From 69588d7ed59a019a5272a9cc391e30c47d006aee Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 9 Oct 2009 11:29:33 +0100
Subject: llvmpipe: Eliminate constant mapping/unmapping.

---
 src/gallium/drivers/llvmpipe/lp_context.h     |  3 --
 src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 50 ---------------------------
 src/gallium/drivers/llvmpipe/lp_state_fs.c    | 20 +++++++++--
 3 files changed, 17 insertions(+), 56 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 8d5a0d4f1f..7df340554e 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -88,9 +88,6 @@ struct llvmpipe_context {
    /** Mapped vertex buffers */
    ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS];
    
-   /** Mapped constant buffers */
-   void *mapped_constants[PIPE_SHADER_TYPES];
-
    /** Vertex format */
    struct vertex_info vertex_info;
    struct vertex_info vertex_info_vbuf;
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 89772e62d3..0aa13a1fc6 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -45,54 +45,6 @@
 
 
-static void
-llvmpipe_map_constant_buffers(struct llvmpipe_context *lp)
-{
-   struct pipe_screen *screen = lp->pipe.screen;
-   uint i, size;
-
-   for (i = 0; i < PIPE_SHADER_TYPES; i++) {
-      if (lp->constants[i].buffer && lp->constants[i].buffer->size)
-         lp->mapped_constants[i] = screen->buffer_map(screen, lp->constants[i].buffer,
-                                                      PIPE_BUFFER_USAGE_CPU_READ);
-   }
-
-   if (lp->constants[PIPE_SHADER_VERTEX].buffer)
-      size = lp->constants[PIPE_SHADER_VERTEX].buffer->size;
-   else
-      size = 0;
-
-   lp->jit_context.constants = lp->mapped_constants[PIPE_SHADER_FRAGMENT];
-
-   draw_set_mapped_constant_buffer(lp->draw,
-                                   lp->mapped_constants[PIPE_SHADER_VERTEX],
-                                   size);
-}
-
-
-static void
-llvmpipe_unmap_constant_buffers(struct llvmpipe_context *lp)
-{
-   struct pipe_screen *screen = lp->pipe.screen;
-   uint i;
-
-   /* really need to flush all prims since the vert/frag shaders const buffers
-    * are going away now.
-    */
-   draw_flush(lp->draw);
-
-   draw_set_mapped_constant_buffer(lp->draw, NULL, 0);
-
-   lp->jit_context.constants = NULL;
-
-   for (i = 0; i < 2; i++) {
-      if (lp->constants[i].buffer && lp->constants[i].buffer->size)
-         screen->buffer_unmap(screen, lp->constants[i].buffer);
-      lp->mapped_constants[i] = NULL;
-   }
-}
-
-
 boolean
 llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
                      unsigned start, unsigned count)
@@ -124,7 +76,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
       llvmpipe_update_derived( lp );
 
    llvmpipe_map_transfers(lp);
-   llvmpipe_map_constant_buffers(lp);
 
    /*
     * Map vertex buffers
@@ -163,7 +114,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
 
 
    /* Note: leave drawing surfaces mapped */
-   llvmpipe_unmap_constant_buffers(lp);
 
    lp->dirty_render_cache = TRUE;
    
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index b00be0cc32..7728ba6076 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -83,6 +83,7 @@
 #include "lp_bld_debug.h"
 #include "lp_screen.h"
 #include "lp_context.h"
+#include "lp_buffer.h"
 #include "lp_state.h"
 #include "lp_quad.h"
 #include "lp_tex_sample.h"
@@ -671,16 +672,29 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
 void
 llvmpipe_set_constant_buffer(struct pipe_context *pipe,
                              uint shader, uint index,
-                             const struct pipe_constant_buffer *buf)
+                             const struct pipe_constant_buffer *constants)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   struct pipe_buffer *buffer = constants ? constants->buffer : NULL;
+   unsigned size = buffer ? buffer->size : 0;
+   const void *data = buffer ? llvmpipe_buffer(buffer)->data : NULL;
 
    assert(shader < PIPE_SHADER_TYPES);
    assert(index == 0);
 
+   if(shader == PIPE_SHADER_VERTEX)
+      draw_flush(llvmpipe->draw);
+
    /* note: reference counting */
-   pipe_buffer_reference(&llvmpipe->constants[shader].buffer,
-			 buf ? buf->buffer : NULL);
+   pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer);
+
+   if(shader == PIPE_SHADER_FRAGMENT) {
+      llvmpipe->jit_context.constants = data;
+   }
+
+   if(shader == PIPE_SHADER_VERTEX) {
+      draw_set_mapped_constant_buffer(llvmpipe->draw, data, size);
+   }
 
    llvmpipe->dirty |= LP_NEW_CONSTANTS;
 }
-- 
cgit v1.2.3


From f36123323c9d696fec6e54882242cab15247ab0d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 8 Oct 2009 13:00:37 -0600
Subject: softpipe: restore/fix print_vertex() debug helper

---
 src/gallium/drivers/softpipe/sp_setup.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index e55e209fd1..00fb52a64f 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -106,6 +106,7 @@ struct setup_context {
 #endif
 
    unsigned winding;		/* which winding to cull */
+   unsigned nr_vertex_attrs;
 };
 
 
@@ -268,8 +269,8 @@ static void print_vertex(const struct setup_context *setup,
                          const float (*v)[4])
 {
    int i;
-   debug_printf("   Vertex: (%p)\n", v);
-   for (i = 0; i < setup->quad[0].nr_attrs; i++) {
+   debug_printf("   Vertex: (%p)\n", (void *) v);
+   for (i = 0; i < setup->nr_vertex_attrs; i++) {
       debug_printf("     %d: %f %f %f %f\n",  i,
               v[i][0], v[i][1], v[i][2], v[i][3]);
       if (util_is_inf_or_nan(v[i][0])) {
@@ -1254,6 +1255,9 @@ void sp_setup_prepare( struct setup_context *setup )
       softpipe_update_derived(sp);
    }
 
+   /* Note: nr_attrs is only used for debugging (vertex printing) */
+   setup->nr_vertex_attrs = draw_num_vs_outputs(sp->draw);
+
    sp->quad.first->begin( sp->quad.first );
 
    if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES &&
-- 
cgit v1.2.3


From a74e53ddba246b1f6604c6120b63a923fd9c60d5 Mon Sep 17 00:00:00 2001
From: Cooper Yuan <cooperyuan@gmail.com>
Date: Sat, 10 Oct 2009 14:41:44 +0800
Subject: r300g: add video surface create and destroy functions

---
 src/gallium/drivers/r300/r300_texture.c | 52 +++++++++++++++++++++++++++++++++
 src/gallium/drivers/r300/r300_texture.h | 14 ++++++++-
 2 files changed, 65 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index ce60ded7ca..7ea4c33fa9 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -215,6 +215,55 @@ static struct pipe_texture*
     return (struct pipe_texture*)tex;
 }
 
+static struct pipe_video_surface *
+r300_video_surface_create(struct pipe_screen *screen,
+                          enum pipe_video_chroma_format chroma_format,
+                          unsigned width, unsigned height)
+{
+    struct r300_video_surface *r300_vsfc;
+    struct pipe_texture template;
+
+    assert(screen);
+    assert(width && height);
+
+    r300_vsfc = CALLOC_STRUCT(r300_video_surface);
+    if (!r300_vsfc)
+       return NULL;
+
+    pipe_reference_init(&r300_vsfc->base.reference, 1);
+    r300_vsfc->base.screen = screen;
+    r300_vsfc->base.chroma_format = chroma_format;
+    r300_vsfc->base.width = width;
+    r300_vsfc->base.height = height;
+
+    memset(&template, 0, sizeof(struct pipe_texture));
+    template.target = PIPE_TEXTURE_2D;
+    template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+    template.last_level = 0;
+    template.width[0] = util_next_power_of_two(width);
+    template.height[0] = util_next_power_of_two(height);
+    template.depth[0] = 1;
+    pf_get_block(template.format, &template.block);
+    template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER |
+                         PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+    r300_vsfc->tex = screen->texture_create(screen, &template);
+    if (!r300_vsfc->tex)
+    {
+        FREE(r300_vsfc);
+        return NULL;
+    }
+
+    return &r300_vsfc->base;
+}
+
+static void r300_video_surface_destroy(struct pipe_video_surface *vsfc)
+{
+    struct r300_video_surface *r300_vsfc = r300_video_surface(vsfc);
+    pipe_texture_reference(&r300_vsfc->tex, NULL);
+    FREE(r300_vsfc);
+}
+
 void r300_init_screen_texture_functions(struct pipe_screen* screen)
 {
     screen->texture_create = r300_texture_create;
@@ -222,6 +271,9 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen)
     screen->get_tex_surface = r300_get_tex_surface;
     screen->tex_surface_destroy = r300_tex_surface_destroy;
     screen->texture_blanket = r300_texture_blanket;
+
+    screen->video_surface_create = r300_video_surface_create;
+    screen->video_surface_destroy= r300_video_surface_destroy;
 }
 
 boolean r300_get_texture_buffer(struct pipe_texture* texture,
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index bd87790bc3..e5182d31b4 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -24,7 +24,7 @@
 #define R300_TEXTURE_H
 
 #include "pipe/p_screen.h"
-
+#include "pipe/p_video_state.h"
 #include "util/u_math.h"
 
 #include "r300_context.h"
@@ -91,6 +91,18 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
     return 0;
 }
 
+struct r300_video_surface
+{
+    struct pipe_video_surface   base;
+    struct pipe_texture         *tex;
+};
+
+static INLINE struct r300_video_surface *
+r300_video_surface(struct pipe_video_surface *pvs)
+{
+    return (struct r300_video_surface *)pvs;
+}
+
 #ifndef R300_WINSYS_H
 
 boolean r300_get_texture_buffer(struct pipe_texture* texture,
-- 
cgit v1.2.3


From 768481ed40cb7530fdbadbf4d6dc00b74209adf1 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Sat, 10 Oct 2009 09:18:14 -0600
Subject: softpipe: revert 564df9dc5f6335eb8dc68f3c69cf054d2142663c

This change silenced valgrind warnings but broke progs/tests/drawbuffers.
The problem is we don't know the surface's state when we start caching it
(it may or may not be initialized/cleared/etc).  So "clearing" it here was
presumptuous.  Leaving the code in place (but disabled) for reference and
when using valgrind.

Fixes bug 24401
---
 src/gallium/drivers/softpipe/sp_tile_cache.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 5f7864e671..b2195ec6b5 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -131,7 +131,12 @@ sp_create_tile_cache( struct pipe_screen *screen )
          tc->entries[pos].y = -1;
       }
 
-#if TILE_CLEAR_OPTIMIZATION
+      /* XXX this code prevents valgrind warnings about use of uninitialized
+       * memory in programs that don't clear the surface before rendering.
+       * However, it breaks clearing in other situations (such as in
+       * progs/tests/drawbuffers, see bug 24402).
+       */
+#if 0 && TILE_CLEAR_OPTIMIZATION
       /* set flags to indicate all the tiles are cleared */
       memset(tc->clear_flags, 255, sizeof(tc->clear_flags));
 #endif
-- 
cgit v1.2.3


From 39daa763b59cc80d862709e99ee3619bd0f7a14d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Sat, 10 Oct 2009 09:12:00 -0600
Subject: softpipe: fix multi-drawbuffers regression

This is part of the fix for bug 24401.
---
 src/gallium/drivers/softpipe/sp_quad_blend.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index e243c63fa2..0ad0b98654 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -946,15 +946,15 @@ choose_blend_quad(struct quad_stage *qs,
       qs->run = blend_noop;
    }
    else if (!softpipe->blend->logicop_enable &&
-            softpipe->blend->colormask == 0xf) 
+            softpipe->blend->colormask == 0xf &&
+            softpipe->framebuffer.nr_cbufs == 1)
    {
       if (!blend->blend_enable) {
          qs->run = single_output_color;
       }
       else if (blend->rgb_src_factor == blend->alpha_src_factor &&
                blend->rgb_dst_factor == blend->alpha_dst_factor &&
-               blend->rgb_func == blend->alpha_func &&
-               softpipe->framebuffer.nr_cbufs == 1)
+               blend->rgb_func == blend->alpha_func)
       {
          if (blend->alpha_func == PIPE_BLEND_ADD) {
             if (blend->rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
-- 
cgit v1.2.3


From 3611d01a44d5d3cd2c132e685836b1ea9c8b9922 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sun, 11 Oct 2009 19:12:24 +1000
Subject: r300g: fix blending default state + alpha separate.

this makes the default state same as r300
---
 src/gallium/drivers/r300/r300_state.c | 41 +++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 88cb9af6fb..3cef285dee 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -46,23 +46,46 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
 {
     struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
 
+    {
+	unsigned eqRGB = state->rgb_func;
+	unsigned srcRGB = state->rgb_src_factor;
+	unsigned dstRGB = state->rgb_dst_factor;
+
+	unsigned eqA = state->alpha_func;
+	unsigned srcA = state->alpha_src_factor;
+	unsigned dstA = state->alpha_dst_factor;
+
+	if (srcA != srcRGB ||
+	    dstA != dstRGB ||
+	    eqA != eqRGB) {
+	    blend->alpha_blend_control =
+		r300_translate_blend_function(eqA) |
+		(r300_translate_blend_factor(srcA) <<
+                    R300_SRC_BLEND_SHIFT) |
+                (r300_translate_blend_factor(dstA) <<
+		 R300_DST_BLEND_SHIFT);
+	    blend->blend_control |= R300_ALPHA_BLEND_ENABLE |
+		R300_SEPARATE_ALPHA_ENABLE;
+	} else {
+	    blend->alpha_blend_control = R300_COMB_FCN_ADD_CLAMP |
+		(R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+		(R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
+	}
+    }
     if (state->blend_enable) {
         /* XXX for now, always do separate alpha...
          * is it faster to do it with one reg? */
-        blend->blend_control = R300_ALPHA_BLEND_ENABLE |
-                R300_SEPARATE_ALPHA_ENABLE |
-                R300_READ_ENABLE |
+        blend->blend_control |= R300_READ_ENABLE |
                 r300_translate_blend_function(state->rgb_func) |
                 (r300_translate_blend_factor(state->rgb_src_factor) <<
                     R300_SRC_BLEND_SHIFT) |
                 (r300_translate_blend_factor(state->rgb_dst_factor) <<
                     R300_DST_BLEND_SHIFT);
-        blend->alpha_blend_control =
-                r300_translate_blend_function(state->alpha_func) |
-                (r300_translate_blend_factor(state->alpha_src_factor) <<
-                    R300_SRC_BLEND_SHIFT) |
-                (r300_translate_blend_factor(state->alpha_dst_factor) <<
-                    R300_DST_BLEND_SHIFT);
+    } else {
+	blend->blend_control = 
+	    R300_COMB_FCN_ADD_CLAMP |
+	    (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+	    (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
     }
 
     /* PIPE_LOGICOP_* don't need to be translated, fortunately. */
-- 
cgit v1.2.3


From f096cc7dc1cdae1698eb7a340cd8c7f5ea0b1166 Mon Sep 17 00:00:00 2001
From: Nicolai Hähnle <nhaehnle@gmail.com>
Date: Sun, 11 Oct 2009 12:40:07 +0200
Subject: r300g: Fix fragment program constants upload on R300
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Nicolai Hähnle <nhaehnle@gmail.com>
---
 src/gallium/drivers/r300/r300_emit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 77ce431cdc..570b4c5ef7 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -212,7 +212,7 @@ void r300_emit_fragment_program_code(struct r300_context* r300,
     }
 
     if (constants->Count) {
-        OUT_CS_ONE_REG(R300_PFS_PARAM_0_X, constants->Count * 4);
+        OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, constants->Count * 4);
         for(i = 0; i < constants->Count; ++i) {
             const float * data = get_shader_constant(r300, &constants->Constants[i], externals);
             OUT_CS(pack_float24(data[0]));
-- 
cgit v1.2.3


From a5348d435da7d06478adc003a07e388915a8b346 Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Mon, 12 Oct 2009 21:03:26 +0200
Subject: Add support for more 8 and 16 bits formats

---
 src/gallium/drivers/nv04/nv04_surface_2d.c | 11 ++++++++++-
 src/gallium/drivers/nv30/nv30_miptree.c    |  5 +++++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index b2ab50ee21..8c7eb367e2 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -13,10 +13,13 @@ nv04_surface_format(enum pipe_format format)
 {
 	switch (format) {
 	case PIPE_FORMAT_A8_UNORM:
+	case PIPE_FORMAT_L8_UNORM:
+	case PIPE_FORMAT_I8_UNORM:
 		return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
 	case PIPE_FORMAT_R16_SNORM:
 	case PIPE_FORMAT_R5G6B5_UNORM:
 	case PIPE_FORMAT_Z16_UNORM:
+	case PIPE_FORMAT_A8L8_UNORM:
 		return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
 	case PIPE_FORMAT_X8R8G8B8_UNORM:
 	case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -36,6 +39,7 @@ nv04_rect_format(enum pipe_format format)
 	case PIPE_FORMAT_A8_UNORM:
 		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
 	case PIPE_FORMAT_R5G6B5_UNORM:
+	case PIPE_FORMAT_A8L8_UNORM:
 	case PIPE_FORMAT_Z16_UNORM:
 		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
 	case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -51,6 +55,10 @@ static INLINE int
 nv04_scaled_image_format(enum pipe_format format)
 {
 	switch (format) {
+	case PIPE_FORMAT_A8_UNORM:
+	case PIPE_FORMAT_L8_UNORM:
+	case PIPE_FORMAT_I8_UNORM:
+		return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8;
 	case PIPE_FORMAT_A1R5G5B5_UNORM:
 		return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5;
 	case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -59,6 +67,7 @@ nv04_scaled_image_format(enum pipe_format format)
 		return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8;
 	case PIPE_FORMAT_R5G6B5_UNORM:
 	case PIPE_FORMAT_R16_SNORM:
+	case PIPE_FORMAT_A8L8_UNORM:
 		return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
 	default:
 		return -1;
@@ -131,7 +140,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
 	OUT_RING  (chan, nv04_surface_format(dst->format) |
 	                 log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT |
 	                 log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT);
- 
+
 	BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
 	OUT_RELOCo(chan, src_bo,
 	                 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index 7f8054de73..17acca61ab 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -96,6 +96,11 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 		case PIPE_FORMAT_A8R8G8B8_UNORM:
 		case PIPE_FORMAT_X8R8G8B8_UNORM:
 		case PIPE_FORMAT_R16_SNORM:
+		case PIPE_FORMAT_R5G6B5_UNORM:
+		case PIPE_FORMAT_A8L8_UNORM:
+		case PIPE_FORMAT_A8_UNORM:
+		case PIPE_FORMAT_L8_UNORM:
+		case PIPE_FORMAT_I8_UNORM:
 		{
 			if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE))
 				mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
-- 
cgit v1.2.3


From 05fc9cdfdfceaf7ca1db64bf1feccf649fe4c907 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Tue, 6 Oct 2009 15:30:39 -0700
Subject: r300g: Clean up texture formats.

---
 src/gallium/drivers/r300/r300_screen.c        |  4 +---
 src/gallium/drivers/r300/r300_state_inlines.h |  1 -
 src/gallium/drivers/r300/r300_texture.h       | 13 ++++++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 81d01b1320..e8d991586f 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -215,10 +215,8 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
                  PIPE_TEXTURE_USAGE_PRIMARY |
                  PIPE_TEXTURE_USAGE_SAMPLER);
 
-        /* Z buffer */
+        /* Z buffer or texture */
         case PIPE_FORMAT_Z16_UNORM:
-            return usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL;
-
         /* Z buffer with stencil or texture */
         case PIPE_FORMAT_Z24S8_UNORM:
             return usage &
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 88eb66b79e..d7b57e1b22 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -295,7 +295,6 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format)
         case PIPE_FORMAT_X8R8G8B8_UNORM:
         case PIPE_FORMAT_R8G8B8A8_UNORM:
         case PIPE_FORMAT_R8G8B8X8_UNORM:
-        case PIPE_FORMAT_Z24S8_UNORM:
             return R300_COLOR_FORMAT_ARGB8888;
         /* XXX Not in pipe_format
         case PIPE_FORMAT_A32R32G32B32:
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index e5182d31b4..992dad77ab 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -43,6 +43,14 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
         /* X8 */
         case PIPE_FORMAT_I8_UNORM:
             return R300_EASY_TX_FORMAT(X, X, X, X, X8);
+        /* X16 */
+        case PIPE_FORMAT_R16_UNORM:
+            return R300_EASY_TX_FORMAT(X, X, X, X, X16);
+        case PIPE_FORMAT_R16_SNORM:
+            return R300_EASY_TX_FORMAT(X, X, X, X, X16) |
+                R300_TX_FORMAT_SIGNED;
+        case PIPE_FORMAT_Z16_UNORM:
+            return R300_EASY_TX_FORMAT(X, X, X, X, X16);
         /* W8Z8Y8X8 */
         case PIPE_FORMAT_A8R8G8B8_UNORM:
             return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
@@ -76,11 +84,6 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
         /* W24_FP */
         case PIPE_FORMAT_Z24S8_UNORM:
             return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP);
-	/* Z5_Y6_X5 */
-        case PIPE_FORMAT_R16_SNORM:
-            return R300_EASY_TX_FORMAT(X, X, X, X, Z5Y6X5);
-        case PIPE_FORMAT_Z16_UNORM:
-	    return R300_EASY_TX_FORMAT(X, X, X, X, X16);
         default:
             debug_printf("r300: Implementation error: "
                 "Got unsupported texture format %s in %s\n",
-- 
cgit v1.2.3


From 36ccdf09b8483305c7fa1366de9df2dea2fd6985 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Tue, 6 Oct 2009 16:00:27 -0700
Subject: r300g: Prevent multiple-use textures from getting incorrectly
 approved.

---
 src/gallium/drivers/r300/r300_screen.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index e8d991586f..7d154576e0 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -182,16 +182,19 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param)
 static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
                                    boolean is_r500)
 {
+    uint32_t retval = 0;
+
     switch (format) {
         /* Supported formats. */
         /* Colorbuffer */
         case PIPE_FORMAT_A4R4G4B4_UNORM:
         case PIPE_FORMAT_R5G6B5_UNORM:
         case PIPE_FORMAT_A1R5G5B5_UNORM:
-            return usage &
+            retval = usage &
                 (PIPE_TEXTURE_USAGE_RENDER_TARGET |
                  PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
                  PIPE_TEXTURE_USAGE_PRIMARY);
+            break;
 
         /* Texture */
         case PIPE_FORMAT_A8R8G8B8_SRGB:
@@ -201,7 +204,8 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
         case PIPE_FORMAT_DXT3_RGBA:
         case PIPE_FORMAT_DXT5_RGBA:
         case PIPE_FORMAT_YCBCR:
-            return usage & PIPE_TEXTURE_USAGE_SAMPLER;
+            retval = usage & PIPE_TEXTURE_USAGE_SAMPLER;
+            break;
 
         /* Colorbuffer or texture */
         case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -209,19 +213,21 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
         case PIPE_FORMAT_R8G8B8A8_UNORM:
         case PIPE_FORMAT_R8G8B8X8_UNORM:
         case PIPE_FORMAT_I8_UNORM:
-            return usage &
+            retval = usage &
                 (PIPE_TEXTURE_USAGE_RENDER_TARGET |
                  PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
                  PIPE_TEXTURE_USAGE_PRIMARY |
                  PIPE_TEXTURE_USAGE_SAMPLER);
+            break;
 
         /* Z buffer or texture */
         case PIPE_FORMAT_Z16_UNORM:
         /* Z buffer with stencil or texture */
         case PIPE_FORMAT_Z24S8_UNORM:
-            return usage &
+            retval = usage &
                 (PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
                  PIPE_TEXTURE_USAGE_SAMPLER);
+            break;
 
         /* Definitely unsupported formats. */
         /* Non-usable Z buffer/stencil formats. */
@@ -259,7 +265,13 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
             break;
     }
 
-    return FALSE;
+    /* If usage was a mask that contained multiple bits, and not all of them
+     * are supported, this will catch that and return FALSE.
+     * e.g. usage = 2 | 4; retval = 4; (retval >= usage) == FALSE
+     *
+     * This also returns FALSE for any unknown formats.
+     */
+    return (retval >= usage);
 }
 
 /* XXX moar targets */
-- 
cgit v1.2.3


From 95a05621eb750c07e5c7a5eb64b8458d202192b3 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Mon, 12 Oct 2009 20:47:00 -0700
Subject: r300g: Fallback on surfaces we can't render to or from.

Still not sure why st keeps handing down things we can't render to.
---
 src/gallium/drivers/r300/r300_surface.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
index cc6288cb51..4d0ccd6b0f 100644
--- a/src/gallium/drivers/r300/r300_surface.c
+++ b/src/gallium/drivers/r300/r300_surface.c
@@ -113,9 +113,10 @@ static void r300_surface_fill(struct pipe_context* pipe,
         dest, x, y, w, h, pixpitch, color);
 
     /* Fallback? */
-    if (FALSE) {
+    if (!pipe->screen->is_format_supported(pipe->screen, dest->format,
+        PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
 fallback:
-        debug_printf("r300: Falling back on surface clear...");
+        debug_printf("r300: Falling back on surface clear...\n");
         util_surface_fill(pipe, dest, x, y, w, h, color);
         return;
     }
@@ -245,10 +246,18 @@ static void r300_surface_copy(struct pipe_context* pipe,
     if ((srctex->buffer == desttex->buffer) &&
             ((destx < srcx + w) || (srcx < destx + w)) &&
             ((desty < srcy + h) || (srcy < desty + h))) {
+        goto fallback;
+    }
+
+    if (!pipe->screen->is_format_supported(pipe->screen, src->format,
+            PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER, 0) ||
+            !pipe->screen->is_format_supported(pipe->screen, dest->format,
+            PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
 fallback:
         debug_printf("r300: Falling back on surface_copy\n");
         util_surface_copy(pipe, FALSE, dest, destx, desty, src,
                 srcx, srcy, w, h);
+        return;
     }
 
     /* Add our target BOs to the list. */
-- 
cgit v1.2.3


From a4a4f7abc2137754646a811007696321c7714f1b Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Mon, 12 Oct 2009 20:55:57 -0700
Subject: r300g: Surface debug.

It gets really annoying watching r300g tell me how it's filling surfaces.
Or falling back during filling surfaces.
---
 src/gallium/drivers/r300/r300_context.h |  1 +
 src/gallium/drivers/r300/r300_debug.c   |  1 +
 src/gallium/drivers/r300/r300_surface.c | 12 ++++++------
 3 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 52b1c9a6b2..a817459ee3 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -312,6 +312,7 @@ void r300_init_surface_functions(struct r300_context* r300);
 #define DBG_VP      0x0000004
 #define DBG_CS      0x0000008
 #define DBG_DRAW    0x0000010
+#define DBG_SURF    0x0000020
 /*@}*/
 
 static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags)
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index 85d69c0747..4a55a0c5b1 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -37,6 +37,7 @@ static struct debug_option debug_options[] = {
     { "vp", DBG_VP, "Vertex program handling" },
     { "cs", DBG_CS, "Command submissions" },
     { "draw", DBG_DRAW, "Draw and emit" },
+    { "surf", DBG_SURF, "Surface drawing" },
 
     { "all", ~0, "Convenience option that enables all debug flags" },
 
diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
index 4d0ccd6b0f..a263b26512 100644
--- a/src/gallium/drivers/r300/r300_surface.c
+++ b/src/gallium/drivers/r300/r300_surface.c
@@ -108,7 +108,7 @@ static void r300_surface_fill(struct pipe_context* pipe,
     r = (float)((color >> 16) & 0xff) / 255.0f;
     g = (float)((color >>  8) & 0xff) / 255.0f;
     b = (float)((color >>  0) & 0xff) / 255.0f;
-    debug_printf("r300: Filling surface %p at (%d,%d),"
+    DBG(r300, DBG_SURF, "r300: Filling surface %p at (%d,%d),"
         " dimensions %dx%d (pixel pitch %d), color 0x%x\n",
         dest, x, y, w, h, pixpitch, color);
 
@@ -116,7 +116,7 @@ static void r300_surface_fill(struct pipe_context* pipe,
     if (!pipe->screen->is_format_supported(pipe->screen, dest->format,
         PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
 fallback:
-        debug_printf("r300: Falling back on surface clear...\n");
+        DBG(r300, DBG_SURF, "r300: Falling back on surface clear...\n");
         util_surface_fill(pipe, dest, x, y, w, h, color);
         return;
     }
@@ -131,7 +131,7 @@ validate:
     if (!r300->winsys->validate(r300->winsys)) {
         r300->context.flush(&r300->context, 0, NULL);
         if (invalid) {
-            debug_printf("r300: Stuck in validation loop, gonna fallback.");
+            DBG(r300, DBG_SURF, "r300: Stuck in validation loop, gonna fallback.");
             goto fallback;
         }
         invalid = TRUE;
@@ -239,7 +239,7 @@ static void r300_surface_copy(struct pipe_context* pipe,
     float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty;
     CS_LOCALS(r300);
 
-    debug_printf("r300: Copying surface %p at (%d,%d) to %p at (%d, %d),"
+    DBG(r300, DBG_SURF, "r300: Copying surface %p at (%d,%d) to %p at (%d, %d),"
         " dimensions %dx%d (pixel pitch %d)\n",
         src, srcx, srcy, dest, destx, desty, w, h, pixpitch);
 
@@ -254,7 +254,7 @@ static void r300_surface_copy(struct pipe_context* pipe,
             !pipe->screen->is_format_supported(pipe->screen, dest->format,
             PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
 fallback:
-        debug_printf("r300: Falling back on surface_copy\n");
+        DBG(r300, DBG_SURF, "r300: Falling back on surface_copy\n");
         util_surface_copy(pipe, FALSE, dest, destx, desty, src,
                 srcx, srcy, w, h);
         return;
@@ -275,7 +275,7 @@ validate:
     if (!r300->winsys->validate(r300->winsys)) {
         r300->context.flush(&r300->context, 0, NULL);
         if (invalid) {
-            debug_printf("r300: Stuck in validation loop, gonna fallback.");
+            DBG(r300, DBG_SURF, "r300: Stuck in validation loop, gonna fallback.");
             goto fallback;
         }
         invalid = TRUE;
-- 
cgit v1.2.3


From ca8cafda0b996167647d724ea3da3ec568a9e42f Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Mon, 12 Oct 2009 21:26:46 -0700
Subject: r300g: More debug flags.

---
 src/gallium/drivers/r300/r300_context.h | 2 ++
 src/gallium/drivers/r300/r300_debug.c   | 2 ++
 src/gallium/drivers/r300/r300_surface.c | 9 +++++----
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index a817459ee3..086633f732 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -313,6 +313,8 @@ void r300_init_surface_functions(struct r300_context* r300);
 #define DBG_CS      0x0000008
 #define DBG_DRAW    0x0000010
 #define DBG_SURF    0x0000020
+#define DBG_TEX     0x0000040
+#define DBG_FALL    0x0000080
 /*@}*/
 
 static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags)
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index 4a55a0c5b1..bfd4ab018a 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -38,6 +38,8 @@ static struct debug_option debug_options[] = {
     { "cs", DBG_CS, "Command submissions" },
     { "draw", DBG_DRAW, "Draw and emit" },
     { "surf", DBG_SURF, "Surface drawing" },
+    { "tex", DBG_TEX, "Textures" },
+    { "fall", DBG_FALL, "Fallbacks" },
 
     { "all", ~0, "Convenience option that enables all debug flags" },
 
diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
index a263b26512..d72e734ff0 100644
--- a/src/gallium/drivers/r300/r300_surface.c
+++ b/src/gallium/drivers/r300/r300_surface.c
@@ -116,7 +116,8 @@ static void r300_surface_fill(struct pipe_context* pipe,
     if (!pipe->screen->is_format_supported(pipe->screen, dest->format,
         PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
 fallback:
-        DBG(r300, DBG_SURF, "r300: Falling back on surface clear...\n");
+        DBG(r300, DBG_SURF | DBG_FALL,
+            "r300: Falling back on surface clear...\n");
         util_surface_fill(pipe, dest, x, y, w, h, color);
         return;
     }
@@ -131,7 +132,7 @@ validate:
     if (!r300->winsys->validate(r300->winsys)) {
         r300->context.flush(&r300->context, 0, NULL);
         if (invalid) {
-            DBG(r300, DBG_SURF, "r300: Stuck in validation loop, gonna fallback.");
+            DBG(r300, DBG_SURF | DBG_FALL, "r300: Stuck in validation loop.");
             goto fallback;
         }
         invalid = TRUE;
@@ -254,7 +255,7 @@ static void r300_surface_copy(struct pipe_context* pipe,
             !pipe->screen->is_format_supported(pipe->screen, dest->format,
             PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
 fallback:
-        DBG(r300, DBG_SURF, "r300: Falling back on surface_copy\n");
+        DBG(r300, DBG_SURF | DBG_FALL, "r300: Falling back on surface_copy\n");
         util_surface_copy(pipe, FALSE, dest, destx, desty, src,
                 srcx, srcy, w, h);
         return;
@@ -275,7 +276,7 @@ validate:
     if (!r300->winsys->validate(r300->winsys)) {
         r300->context.flush(&r300->context, 0, NULL);
         if (invalid) {
-            DBG(r300, DBG_SURF, "r300: Stuck in validation loop, gonna fallback.");
+            DBG(r300, DBG_SURF | DBG_FALL, "r300: Stuck in validation loop.");
             goto fallback;
         }
         invalid = TRUE;
-- 
cgit v1.2.3


From cf33aaf8fe2b1d22e394f431735b76f3ab04b854 Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Tue, 13 Oct 2009 22:53:32 +0200
Subject: nouveau: nv30: use texture width,height for render target dimensions

---
 src/gallium/drivers/nv30/nv30_state_fb.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
index 44b6a74715..2729dcec7c 100644
--- a/src/gallium/drivers/nv30/nv30_state_fb.c
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -40,10 +40,9 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 		for (i = 1; i < fb->nr_cbufs; i++)
 			assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));
 
-		/* FIXME: NV34TCL_RT_FORMAT_LOG2_[WIDTH/HEIGHT] */
 		rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
-		log2i(fb->width) << 16 /*NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT*/ |
-		log2i(fb->height) << 24 /*NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT*/;
+			    (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+			    (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
 	}
 	else
 		rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
-- 
cgit v1.2.3


From 23c0c820e2767324546d450d2a7aa7bf1f70c36f Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 14 Oct 2009 11:42:05 +1000
Subject: r300g: fix case where texture unit 0 is disabled but unit 1 is
 enabled.

to reproduce, start texrect, disable 0 texture in menu.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r300/r300_emit.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 570b4c5ef7..99deb50400 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -683,7 +683,8 @@ validate:
     /* ...textures... */
     for (i = 0; i < r300->texture_count; i++) {
         tex = r300->textures[i];
-        assert(tex && tex->buffer && "texture is marked, but NULL!");
+        if (!tex)
+	    continue;
         if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
                     RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) {
             r300->context.flush(&r300->context, 0, NULL);
@@ -770,12 +771,13 @@ validate:
     if (r300->dirty_state &
             (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) {
         for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) {
-            if (r300->dirty_state &
-                    ((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) {
-                r300_emit_texture(r300,
-                        r300->sampler_states[i],
-                        r300->textures[i],
-                        i);
+  	    if (r300->dirty_state &
+		((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) {
+		if (r300->textures[i]) 
+		    r300_emit_texture(r300,
+				      r300->sampler_states[i],
+				      r300->textures[i],
+				      i);
                 r300->dirty_state &=
                     ~((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i));
                 dirty_tex++;
-- 
cgit v1.2.3


From 210481ae16e966865dcf9f1fd5f5dfabf4dc28bc Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 14 Oct 2009 15:13:25 +1000
Subject: r300g: attempt to make bo space check sane.

This attempts to make r300g do proper bo space checking as opposed
to whatever it was doing now.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 src/gallium/drivers/r300/r300_context.c          |  9 +++++++++
 src/gallium/drivers/r300/r300_emit.c             |  3 +++
 src/gallium/drivers/r300/r300_winsys.h           |  6 ++++++
 src/gallium/winsys/drm/radeon/core/radeon_r300.c | 23 ++++++++++++++++++++---
 4 files changed, 38 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 9cc455135d..e6bc80e48f 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -136,6 +136,13 @@ r300_is_buffer_referenced( struct pipe_context *pipe,
    return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
 }
 
+static void r300_flush_cb(void *data)
+{
+    struct r300_context* const cs_context_copy = data;
+
+    cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL);
+}
+
 struct pipe_context* r300_create_context(struct pipe_screen* screen,
                                          struct r300_winsys* r300_winsys)
 {
@@ -190,6 +197,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300_init_state_functions(r300);
 
     r300_emit_invariant_state(r300);
+
+    r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300);
     r300->dirty_state = R300_NEW_KITCHEN_SINK;
     r300->dirty_hw++;
 
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 99deb50400..64748ad8f8 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -658,6 +658,9 @@ void r300_emit_dirty_state(struct r300_context* r300)
 
     r300_update_derived_state(r300);
 
+    /* Clean out BOs. */
+    r300->winsys->reset_bos(r300->winsys);
+
     /* XXX check size */
 validate:
     /* Color buffers... */
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index f18ad75a47..540f8eca92 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -92,6 +92,12 @@ struct r300_winsys {
 
     /* Flush the CS. */
     void (*flush_cs)(struct r300_winsys* winsys);
+
+    /* winsys flush - callback from winsys when flush required */
+    void (*set_flush_cb)(struct r300_winsys *winsys,
+			 void (*flush_cb)(void *), void *data);
+
+    void (*reset_bos)(struct r300_winsys *winsys);
 };
 
 struct pipe_context* r300_create_context(struct pipe_screen* screen,
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
index d2d84f1a8f..3587892e00 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
@@ -22,6 +22,17 @@
 
 #include "radeon_r300.h"
 
+static void radeon_r300_set_flush_cb(struct r300_winsys *winsys,
+				     void (*flush_cb)(void *),
+				     void *data)
+{
+    struct radeon_winsys_priv* priv =
+        (struct radeon_winsys_priv*)winsys->radeon_winsys;
+
+    radeon_cs_space_set_flush(priv->cs, flush_cb,
+			      data);
+}
+
 static boolean radeon_r300_add_buffer(struct r300_winsys* winsys,
                                       struct pipe_buffer* pbuffer,
                                       uint32_t rd,
@@ -95,6 +106,13 @@ static void radeon_r300_write_cs_reloc(struct r300_winsys* winsys,
     }
 }
 
+static void radeon_r300_reset_bos(struct r300_winsys *winsys)
+{
+    struct radeon_winsys_priv* priv =
+        (struct radeon_winsys_priv*)winsys->radeon_winsys;
+    radeon_cs_space_reset_bos(priv->cs);
+}
+
 static void radeon_r300_end_cs(struct r300_winsys* winsys,
                                const char* file,
                                const char* function,
@@ -119,9 +137,6 @@ static void radeon_r300_flush_cs(struct r300_winsys* winsys)
         radeon_cs_print(priv->cs, stderr);
     }
 
-    /* Clean out BOs. */
-    radeon_cs_space_reset_bos(priv->cs);
-
     /* Reset CS.
      * Someday, when we care about performance, we should really find a way
      * to rotate between two or three CS objects so that the GPU can be
@@ -203,6 +218,8 @@ radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys)
     winsys->write_cs_reloc = radeon_r300_write_cs_reloc;
     winsys->end_cs = radeon_r300_end_cs;
     winsys->flush_cs = radeon_r300_flush_cs;
+    winsys->reset_bos = radeon_r300_reset_bos;
+    winsys->set_flush_cb = radeon_r300_set_flush_cb;
 
     memcpy(winsys, old_winsys, sizeof(struct radeon_winsys));
 
-- 
cgit v1.2.3


From c1bee7bdea470b6b5dcebef9aacc8fe4feca687c Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 14 Oct 2009 16:53:12 +1000
Subject: r300g: fixup arb occulsion query support.

1: add rv530 support
   - num z pipes cap
   - add proper start/finish query options for rv530

2: convert to use linked list properly.

3: add flushing required check.

4: initial Z top disabling support.

TODO:
make it actually work on my rv530.
---
 src/gallium/drivers/r300/r300_chipset.h          |  2 +
 src/gallium/drivers/r300/r300_context.c          | 10 ++--
 src/gallium/drivers/r300/r300_context.h          |  4 +-
 src/gallium/drivers/r300/r300_emit.c             | 69 ++++++++++++++++++++----
 src/gallium/drivers/r300/r300_flush.c            | 10 +++-
 src/gallium/drivers/r300/r300_query.c            | 42 ++++++++-------
 src/gallium/drivers/r300/r300_reg.h              | 15 ++++--
 src/gallium/drivers/r300/r300_screen.c           |  1 +
 src/gallium/drivers/r300/r300_state.c            | 11 ++--
 src/gallium/drivers/r300/r300_winsys.h           |  3 ++
 src/gallium/winsys/drm/radeon/core/radeon_r300.c | 10 ++++
 11 files changed, 134 insertions(+), 43 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index 322d4a57e4..f015a4243d 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -36,6 +36,8 @@ struct r300_capabilities {
     int num_vert_fpus;
     /* The number of fragment pipes */
     int num_frag_pipes;
+    /* The number of z pipes */
+    int num_z_pipes;
     /* Whether or not TCL is physically present */
     boolean has_tcl;
     /* Whether or not this is an RV515 or newer; R500s have many differences
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index e6bc80e48f..7a9c098e30 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -99,11 +99,9 @@ static void r300_destroy_context(struct pipe_context* context) {
     context->screen->buffer_destroy(r300->oqbo);
 
     /* If there are any queries pending or not destroyed, remove them now. */
-    if (r300->query_list) {
-        foreach_s(query, temp, r300->query_list) {
-            remove_from_list(query);
-            FREE(query);
-        }
+    foreach_s(query, temp, &r300->query_list) {
+        remove_from_list(query);
+        FREE(query);
     }
 
     FREE(r300->blend_color_state);
@@ -201,6 +199,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300);
     r300->dirty_state = R300_NEW_KITCHEN_SINK;
     r300->dirty_hw++;
-
+    make_empty_list(&r300->query_list);
     return &r300->context;
 }
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 086633f732..9b0094b63c 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -172,6 +172,8 @@ struct r300_query {
     unsigned int count;
     /* The offset of this query into the query buffer, in bytes. */
     unsigned offset;
+    /* if we've flushed the query */
+    boolean flushed;
     /* Linked list members. */
     struct r300_query* prev;
     struct r300_query* next;
@@ -237,7 +239,7 @@ struct r300_context {
     /* Occlusion query buffer. */
     struct pipe_buffer* oqbo;
     /* Query list. */
-    struct r300_query* query_list;
+    struct r300_query query_list;
 
     /* Various CSO state objects. */
     /* Blend state. */
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 64748ad8f8..3d28249c16 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -323,28 +323,30 @@ void r300_emit_fb_state(struct r300_context* r300,
 void r300_emit_query_begin(struct r300_context* r300,
                            struct r300_query* query)
 {
+    struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
     CS_LOCALS(r300);
 
     /* XXX This will almost certainly not return good results
      * for overlapping queries. */
-    BEGIN_CS(2);
+    BEGIN_CS(4);
+    if (caps->family == CHIP_FAMILY_RV530) {
+	OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+    } else {
+	OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);
+    }
     OUT_CS_REG(R300_ZB_ZPASS_DATA, 0);
     END_CS;
 }
 
-void r300_emit_query_end(struct r300_context* r300,
-                         struct r300_query* query)
+
+static void r300_emit_query_finish(struct r300_context *r300,
+				   struct r300_query *query)
 {
     struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
     CS_LOCALS(r300);
 
-    if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo,
-                0, RADEON_GEM_DOMAIN_GTT)) {
-        debug_printf("r300: There wasn't room for the OQ buffer!?"
-                " Oh noes!\n");
-    }
-
     assert(caps->num_frag_pipes);
+
     BEGIN_CS(6 * caps->num_frag_pipes + 2);
     /* I'm not so sure I like this switch, but it's hard to be elegant
      * when there's so many special cases...
@@ -394,6 +396,55 @@ void r300_emit_query_end(struct r300_context* r300,
 
 }
 
+static void rv530_emit_query_single(struct r300_context *r300,
+				    struct r300_query *query)
+{
+    CS_LOCALS(r300);
+
+    BEGIN_CS(8);
+    OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+    OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+    OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+    OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+    END_CS;
+}
+
+static void rv530_emit_query_double(struct r300_context *r300,
+				    struct r300_query *query)
+{
+    CS_LOCALS(r300);
+
+    BEGIN_CS(14);
+    OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+    OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+    OUT_CS_RELOC(r300->oqbo, query->offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+    OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
+    OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+    OUT_CS_RELOC(r300->oqbo, query->offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+    OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+    END_CS;
+}
+
+void r300_emit_query_end(struct r300_context* r300,
+                         struct r300_query* query)
+{
+    struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps;
+
+    if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo,
+                0, RADEON_GEM_DOMAIN_GTT)) {
+        debug_printf("r300: There wasn't room for the OQ buffer!?"
+                " Oh noes!\n");
+    }
+
+    if (caps->family == CHIP_FAMILY_RV530) {
+	if (caps->num_z_pipes == 2)
+	    rv530_emit_query_double(r300, query);
+	else
+	    rv530_emit_query_single(r300, query);
+    } else 
+        r300_emit_query_finish(r300, query);
+}
+
 void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs)
 {
     CS_LOCALS(r300);
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 0dff1c6f4f..a8ab0d7212 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -26,9 +26,10 @@ static void r300_flush(struct pipe_context* pipe,
                        unsigned flags,
                        struct pipe_fence_handle** fence)
 {
-    struct r300_context* r300 = r300_context(pipe);
-    CS_LOCALS(r300);
+    struct r300_context *r300 = r300_context(pipe);
+    struct r300_query *query;
 
+    CS_LOCALS(r300);
     /* We probably need to flush Draw, but we may have been called from
      * within Draw. This feels kludgy, but it might be the best thing. */
     if (!r300->draw->flushing) {
@@ -41,8 +42,13 @@ static void r300_flush(struct pipe_context* pipe,
         r300->dirty_state = R300_NEW_KITCHEN_SINK;
         r300->dirty_hw = 0;
     }
+    /* reset flushed query */
+    foreach(query, &r300->query_list) {
+        query->flushed = TRUE;
+    }
 }
 
+
 void r300_init_flush_functions(struct r300_context* r300)
 {
     r300->context.flush = r300_flush;
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 2880d34877..b01313648b 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -24,13 +24,13 @@
 
 #include "r300_emit.h"
 
-static struct pipe_query* r300_create_query(struct pipe_context* pipe,
+static struct pipe_query *r300_create_query(struct pipe_context *pipe,
                                             unsigned query_type)
 {
-    struct r300_context* r300 = r300_context(pipe);
-    struct r300_screen* r300screen = r300_screen(r300->context.screen);
-    unsigned query_size = r300screen->caps->num_frag_pipes * 4;
-    struct r300_query* q, * qptr;
+    struct r300_context *r300 = r300_context(pipe);
+    struct r300_screen *r300screen = r300_screen(r300->context.screen);
+    unsigned query_size;
+    struct r300_query *q, *qptr;
 
     q = CALLOC_STRUCT(r300_query);
 
@@ -39,13 +39,16 @@ static struct pipe_query* r300_create_query(struct pipe_context* pipe,
 
     q->active = FALSE;
 
-    if (!r300->query_list) {
-        r300->query_list = q;
-    } else if (!is_empty_list(r300->query_list)) {
-        qptr = last_elem(r300->query_list);
+    if (r300screen->caps->family == CHIP_FAMILY_RV530)
+	query_size = r300screen->caps->num_z_pipes * sizeof(uint32_t);
+    else
+	query_size = r300screen->caps->num_frag_pipes * sizeof(uint32_t);
+
+    if (!is_empty_list(&r300->query_list)) {
+        qptr = last_elem(&r300->query_list);
         q->offset = qptr->offset + query_size;
-        insert_at_tail(r300->query_list, q);
     }
+    insert_at_tail(&r300->query_list, q);
 
     /* XXX */
     if (q->offset >= 4096) {
@@ -74,9 +77,10 @@ static void r300_begin_query(struct pipe_context* pipe,
     map = pipe->screen->buffer_map(pipe->screen, r300->oqbo,
             PIPE_BUFFER_USAGE_CPU_WRITE);
     map += q->offset / 4;
-    *map = ~0;
+    *map = ~0U;
     pipe->screen->buffer_unmap(pipe->screen, r300->oqbo);
 
+    q->flushed = FALSE;
     r300_emit_dirty_state(r300);
     r300_emit_query_begin(r300, q);
 }
@@ -98,28 +102,30 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
 {
     struct r300_context* r300 = r300_context(pipe);
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
-    struct r300_query* q = (struct r300_query*)query;
+    struct r300_query *q = (struct r300_query*)query;
     unsigned flags = PIPE_BUFFER_USAGE_CPU_READ;
     uint32_t* map;
-    uint32_t temp;
+    uint32_t temp = 0;
     unsigned i;
 
-    if (wait) {
+    if (q->flushed == FALSE)
         pipe->flush(pipe, 0, NULL);
-    } else {
+    if (!wait) {
         flags |= PIPE_BUFFER_USAGE_DONTBLOCK;
     }
 
     map = pipe->screen->buffer_map(pipe->screen, r300->oqbo, flags);
+    if (!map)
+        return FALSE;
     map += q->offset / 4;
     for (i = 0; i < r300screen->caps->num_frag_pipes; i++) {
-        if (*map == ~0) {
+        if (*map == ~0U) {
             /* Looks like our results aren't ready yet. */
             if (wait) {
                 debug_printf("r300: Despite waiting, OQ results haven't"
                         " come in yet.\n");
             }
-            temp = ~0;
+            temp = ~0U;
             break;
         }
         temp += *map;
@@ -127,7 +133,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
     }
     pipe->screen->buffer_unmap(pipe->screen, r300->oqbo);
 
-    if (temp == ~0) {
+    if (temp == ~0U) {
         /* Our results haven't been written yet... */
         return FALSE;
     }
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 3abff5db62..ae94bb9b9f 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -1172,6 +1172,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 /* SU Depth Offset value */
 #define R300_SU_DEPTH_OFFSET                0x42c4
 
+#define R300_SU_REG_DEST		    0x42c8
+#	define R300_RASTER_PIPE_SELECT_0	(1 << 0)
+#	define R300_RASTER_PIPE_SELECT_1	(1 << 1)
+#	define R300_RASTER_PIPE_SELECT_2	(1 << 2)
+#	define R300_RASTER_PIPE_SELECT_3	(1 << 3)
+#	define R300_RASTER_PIPE_SELECT_ALL	0xf
+
 
 /* BEGIN: Rasterization / Interpolators - many guesses */
 
@@ -2095,6 +2102,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define R500_FG_ALPHA_VALUE                0x4be0
 #	define R500_FG_ALPHA_VALUE_MASK 0x0000ffff
 
+#define RV530_FG_ZBREG_DEST                 0x4be8
+#	define RV530_FG_ZBREG_DEST_PIPE_SELECT_0             (1 << 0)
+#	define RV530_FG_ZBREG_DEST_PIPE_SELECT_1             (1 << 1)
+#	define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL           (3 << 0)
 /* gap */
 
 /* Fragment program parameters in 7.16 floating point */
@@ -3313,10 +3324,6 @@ enum {
 
 #define R200_3D_DRAW_IMMD_2      0xC0003500
 
-/* XXX Oh look, stuff not brought over from docs yet */
-
-#define R300_SU_REG_DEST                    0x42C8
-
 #endif /* _R300_REG_H */
 
 /* *INDENT-ON* */
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 7d154576e0..5381651c77 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -395,6 +395,7 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys)
 
     caps->pci_id = r300_winsys->pci_id;
     caps->num_frag_pipes = r300_winsys->gb_pipes;
+    caps->num_z_pipes = r300_winsys->z_pipes;
 
     r300_parse_chipset(caps);
 
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 3cef285dee..d8533ac168 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -190,6 +190,7 @@ static void*
         r300_create_dsa_state(struct pipe_context* pipe,
                               const struct pipe_depth_stencil_alpha_state* state)
 {
+    struct r300_context* r300 = r300_context(pipe);
     struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state);
 
     /* Depth test setup. */
@@ -247,11 +248,15 @@ static void*
             R300_FG_ALPHA_FUNC_ENABLE;
         dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f,
                                      0, 1023);
-    } else {
-        /* XXX need to fix this to be dynamically set
-        dsa->z_buffer_top = R300_ZTOP_ENABLE; */
     }
 
+    dsa->z_buffer_top = R300_ZTOP_ENABLE;
+    /* XXX TODO: add frag prog rules for ztop disable */
+    if (state->alpha.enabled && state->alpha.func != PIPE_FUNC_ALWAYS)
+	dsa->z_buffer_top = R300_ZTOP_DISABLE;
+    if (!is_empty_list(&r300->query_list))
+	dsa->z_buffer_top = R300_ZTOP_DISABLE;
+
     return (void*)dsa;
 }
 
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index 540f8eca92..864a6146b2 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -48,6 +48,9 @@ struct r300_winsys {
     /* GB pipe count */
     uint32_t gb_pipes;
 
+    /* Z pipe count (rv530 only) */
+    uint32_t z_pipes;
+
     /* GART size. */
     uint32_t gart_size;
 
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
index 3587892e00..7ea5d1fb4e 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
@@ -164,6 +164,16 @@ static void do_ioctls(struct r300_winsys* winsys, int fd)
     }
     winsys->gb_pipes = target;
 
+    /* get Z pipes */
+    info.request = RADEON_INFO_NUM_Z_PIPES;
+    retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
+    if (retval) {
+        fprintf(stderr, "%s: Failed to get GB pipe count, "
+                "error number %d\n", __FUNCTION__, retval);
+        exit(1);
+    }
+    winsys->z_pipes = target;
+
     /* Then, get PCI ID */
     info.request = RADEON_INFO_DEVICE_ID;
     retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info));
-- 
cgit v1.2.3


From 47791697ab6eb6965f0ba8ba3f20373b3753ca2a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 14 Oct 2009 17:14:43 +1000
Subject: r300g: convert query to a state for emitting.

This means we don't emit in the begin query but when we have
to flush. Similiar to classic.

TODO:
make query object actually work.
---
 src/gallium/drivers/r300/r300_context.h |  2 ++
 src/gallium/drivers/r300/r300_emit.c    | 15 ++++++++++++---
 src/gallium/drivers/r300/r300_query.c   |  8 ++++++--
 3 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 9b0094b63c..3a01869ba1 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -141,6 +141,7 @@ struct r300_viewport_state {
 #define R300_NEW_VERTEX_FORMAT   0x04000000
 #define R300_NEW_VERTEX_SHADER   0x08000000
 #define R300_NEW_VIEWPORT        0x10000000
+#define R300_NEW_QUERY           0x20000000
 #define R300_NEW_KITCHEN_SINK    0x1fffffff
 
 /* The next several objects are not pure Radeon state; they inherit from
@@ -239,6 +240,7 @@ struct r300_context {
     /* Occlusion query buffer. */
     struct pipe_buffer* oqbo;
     /* Query list. */
+    struct r300_query *query_current;
     struct r300_query query_list;
 
     /* Various CSO state objects. */
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 3d28249c16..babbe0dd74 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -320,12 +320,16 @@ void r300_emit_fb_state(struct r300_context* r300,
     END_CS;
 }
 
-void r300_emit_query_begin(struct r300_context* r300,
-                           struct r300_query* query)
+void r300_emit_query_start(struct r300_context *r300)
+
 {
-    struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
+    struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps;
+    struct r300_query *query = r300->query_current;
     CS_LOCALS(r300);
 
+    if (!query)
+	return;
+
     /* XXX This will almost certainly not return good results
      * for overlapping queries. */
     BEGIN_CS(4);
@@ -772,6 +776,11 @@ validate:
         goto validate;
     }
 
+    if (r300->dirty_state & R300_NEW_QUERY) {
+        r300_emit_query_start(r300);
+        r300->dirty_state &= ~R300_NEW_QUERY;
+    }
+
     if (r300->dirty_state & R300_NEW_BLEND) {
         r300_emit_blend_state(r300, r300->blend_state);
         r300->dirty_state &= ~R300_NEW_BLEND;
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index b01313648b..fb4340ff3d 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -74,6 +74,8 @@ static void r300_begin_query(struct pipe_context* pipe,
     struct r300_context* r300 = r300_context(pipe);
     struct r300_query* q = (struct r300_query*)query;
 
+    assert(r300->query_current == NULL);
+
     map = pipe->screen->buffer_map(pipe->screen, r300->oqbo,
             PIPE_BUFFER_USAGE_CPU_WRITE);
     map += q->offset / 4;
@@ -81,8 +83,8 @@ static void r300_begin_query(struct pipe_context* pipe,
     pipe->screen->buffer_unmap(pipe->screen, r300->oqbo);
 
     q->flushed = FALSE;
-    r300_emit_dirty_state(r300);
-    r300_emit_query_begin(r300, q);
+    r300->query_current = q;
+    r300->dirty_state |= R300_NEW_QUERY;
 }
 
 static void r300_end_query(struct pipe_context* pipe,
@@ -93,6 +95,8 @@ static void r300_end_query(struct pipe_context* pipe,
 
     r300_emit_dirty_state(r300);
     r300_emit_query_end(r300, q);
+
+    r300->query_current = NULL;
 }
 
 static boolean r300_get_query_result(struct pipe_context* pipe,
-- 
cgit v1.2.3


From 51d1cf55da6f8b8a215814589a189b6e5e537fe5 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 14 Oct 2009 17:44:19 +1000
Subject: r300g: port over last parts of oq support.

Add support for begin/end in each CS so we don't get any other
processes rendering in between.

TODO:
blame other parts of driver for this not working like Z.
---
 src/gallium/drivers/r300/r300_context.h |  2 ++
 src/gallium/drivers/r300/r300_emit.c    | 11 +++++++++--
 src/gallium/drivers/r300/r300_emit.h    |  3 +--
 src/gallium/drivers/r300/r300_flush.c   |  2 ++
 src/gallium/drivers/r300/r300_query.c   |  7 ++-----
 5 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 3a01869ba1..7826ed1452 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -175,6 +175,8 @@ struct r300_query {
     unsigned offset;
     /* if we've flushed the query */
     boolean flushed;
+    /* if begin has been emitted */
+    boolean begin_emitted;
     /* Linked list members. */
     struct r300_query* prev;
     struct r300_query* next;
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index babbe0dd74..6e616cd5b2 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -340,6 +340,7 @@ void r300_emit_query_start(struct r300_context *r300)
     }
     OUT_CS_REG(R300_ZB_ZPASS_DATA, 0);
     END_CS;
+    query->begin_emitted = TRUE;
 }
 
 
@@ -429,10 +430,16 @@ static void rv530_emit_query_double(struct r300_context *r300,
     END_CS;
 }
 
-void r300_emit_query_end(struct r300_context* r300,
-                         struct r300_query* query)
+void r300_emit_query_end(struct r300_context* r300)
 {
     struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps;
+    struct r300_query *query = r300->query_current;
+
+    if (!query)
+	return;
+
+    if (query->begin_emitted == FALSE)
+        return;
 
     if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo,
                 0, RADEON_GEM_DOMAIN_GTT)) {
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index c4002b8e5d..b62aa9fec5 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -58,8 +58,7 @@ void r300_emit_fb_state(struct r300_context* r300,
 
 void r300_emit_query_begin(struct r300_context* r300,
                            struct r300_query* query);
-void r300_emit_query_end(struct r300_context* r300,
-                         struct r300_query* query);
+void r300_emit_query_end(struct r300_context* r300);
 
 void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs);
 
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index a8ab0d7212..241ea71d6b 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -36,6 +36,8 @@ static void r300_flush(struct pipe_context* pipe,
         draw_flush(r300->draw);
     }
 
+    r300_emit_query_end(r300);
+
     if (r300->dirty_hw) {
         FLUSH_CS;
         r300_emit_invariant_state(r300);
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index fb4340ff3d..2b0fbfb7d2 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -88,14 +88,11 @@ static void r300_begin_query(struct pipe_context* pipe,
 }
 
 static void r300_end_query(struct pipe_context* pipe,
-                           struct pipe_query* query)
+	                   struct pipe_query* query)
 {
     struct r300_context* r300 = r300_context(pipe);
-    struct r300_query* q = (struct r300_query*)query;
-
-    r300_emit_dirty_state(r300);
-    r300_emit_query_end(r300, q);
 
+    r300_emit_query_end(r300);
     r300->query_current = NULL;
 }
 
-- 
cgit v1.2.3


From ce5cba040c34a1a70186c29a5055e9be3c85a54a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 14 Oct 2009 18:05:14 +1000
Subject: r300g: add one more ZTOP disable bit.

Still missing the frag uses kill support, hopefully nha can point that out.
---
 src/gallium/drivers/r300/r300_fs.h    | 6 ++++++
 src/gallium/drivers/r300/r300_state.c | 4 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index 967e9f697e..04453274aa 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -48,4 +48,10 @@ struct r300_fragment_shader {
 void r300_translate_fragment_shader(struct r300_context* r300,
                                     struct r300_fragment_shader* fs);
 
+static inline boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
+{
+    if (!fs)
+	return FALSE;
+    return (fs->code.writes_depth) ? TRUE : FALSE;
+}
 #endif /* R300_FS_H */
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index d8533ac168..95e2943baa 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -252,9 +252,11 @@ static void*
 
     dsa->z_buffer_top = R300_ZTOP_ENABLE;
     /* XXX TODO: add frag prog rules for ztop disable */
+    if (r300_fragment_shader_writes_depth(r300->fs))
+	dsa->z_buffer_top = R300_ZTOP_DISABLE;
     if (state->alpha.enabled && state->alpha.func != PIPE_FUNC_ALWAYS)
 	dsa->z_buffer_top = R300_ZTOP_DISABLE;
-    if (!is_empty_list(&r300->query_list))
+    if (r300->query_current)
 	dsa->z_buffer_top = R300_ZTOP_DISABLE;
 
     return (void*)dsa;
-- 
cgit v1.2.3


From fa581580b18d530b849299c38604ab0804290e49 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 14 Oct 2009 18:24:05 +1000
Subject: r300g: add QUERY to KITCHEN_SINK

I missed this, thanks to Corbin for pointing it out.
---
 src/gallium/drivers/r300/r300_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 7826ed1452..d2e8875503 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -142,7 +142,7 @@ struct r300_viewport_state {
 #define R300_NEW_VERTEX_SHADER   0x08000000
 #define R300_NEW_VIEWPORT        0x10000000
 #define R300_NEW_QUERY           0x20000000
-#define R300_NEW_KITCHEN_SINK    0x1fffffff
+#define R300_NEW_KITCHEN_SINK    0x3fffffff
 
 /* The next several objects are not pure Radeon state; they inherit from
  * various Gallium classes. */
-- 
cgit v1.2.3


From 88b697fb0aaaab8479716763510f56b1053ddb37 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 14 Oct 2009 18:24:34 +1000
Subject: r300g: remove buffer add that should be unnecessary.

This should be handled in the emit fine
---
 src/gallium/drivers/r300/r300_emit.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 6e616cd5b2..feffadd0ee 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -441,12 +441,6 @@ void r300_emit_query_end(struct r300_context* r300)
     if (query->begin_emitted == FALSE)
         return;
 
-    if (!r300->winsys->add_buffer(r300->winsys, r300->oqbo,
-                0, RADEON_GEM_DOMAIN_GTT)) {
-        debug_printf("r300: There wasn't room for the OQ buffer!?"
-                " Oh noes!\n");
-    }
-
     if (caps->family == CHIP_FAMILY_RV530) {
 	if (caps->num_z_pipes == 2)
 	    rv530_emit_query_double(r300, query);
-- 
cgit v1.2.3


From f13e507798cdbbe2fad5df33dcd581d49d6fa7ab Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 14 Oct 2009 01:58:18 -0700
Subject: r300g: Compiler warning cleanup.

---
 src/gallium/drivers/r300/r300_context.c       | 2 +-
 src/gallium/drivers/r300/r300_render.c        | 7 -------
 src/gallium/drivers/r300/r300_state_derived.c | 2 +-
 src/gallium/drivers/r300/r300_surface.c       | 3 +--
 4 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 7a9c098e30..b243f88bb5 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -152,7 +152,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->winsys = r300_winsys;
 
     r300->context.winsys = (struct pipe_winsys*)r300_winsys;
-    r300->context.screen = r300_screen(screen);
+    r300->context.screen = screen;
 
     r300_init_debug(r300);
 
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index ca44e0f661..b56f7a3d1e 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -127,7 +127,6 @@ static void r300_render_unmap_vertices(struct vbuf_render* render,
 static void r300_render_release_vertices(struct vbuf_render* render)
 {
     struct r300_render* r300render = r300_render(render);
-    struct r300_context* r300 = r300render->r300;
 
     r300render->vbo_offset += r300render->vbo_max_used;
     r300render->vbo_max_used = 0;
@@ -182,8 +181,6 @@ static void r300_prepare_render(struct r300_render* render, unsigned count)
 {
     struct r300_context* r300 = render->r300;
 
-    CS_LOCALS(r300);
-
     r300_emit_dirty_state(r300);
 }
 
@@ -213,11 +210,7 @@ static void r300_render_draw(struct vbuf_render* render,
 {
     struct r300_render* r300render = r300_render(render);
     struct r300_context* r300 = r300render->r300;
-    struct pipe_screen* screen = r300->context.screen;
-    struct pipe_buffer* index_buffer;
-    void* index_map;
     int i;
-    uint32_t index;
 
     CS_LOCALS(r300);
 
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 02b7ab9107..335b54820a 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -193,7 +193,7 @@ static void r300_vertex_psc(struct r300_context* r300,
     struct vertex_info* vinfo = &vformat->vinfo;
     int* tab = vformat->vs_tab;
     uint32_t temp;
-    int i, attrib_count;
+    unsigned i, attrib_count;
 
     /* Vertex shaders have no semantics on their inputs,
      * so PSC should just route stuff based on their info,
diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
index d72e734ff0..5cf49d20aa 100644
--- a/src/gallium/drivers/r300/r300_surface.c
+++ b/src/gallium/drivers/r300/r300_surface.c
@@ -95,8 +95,7 @@ static void r300_surface_fill(struct pipe_context* pipe,
                               unsigned w, unsigned h,
                               unsigned color)
 {
-    int i;
-    float r, g, b, a, depth;
+    float r, g, b, a;
     struct r300_context* r300 = r300_context(pipe);
     struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
     struct r300_texture* tex = (struct r300_texture*)dest->texture;
-- 
cgit v1.2.3


From fd63f89e95342d7d5921d6369346e356b505b584 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 14 Oct 2009 03:09:41 -0700
Subject: r300g: Move ztop to derived state.

Need to get it into its own atom instead of piggybacking on DSA.
---
 src/gallium/drivers/r300/r300_state.c         | 10 --------
 src/gallium/drivers/r300/r300_state_derived.c | 36 +++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 10 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 95e2943baa..8359850966 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -190,7 +190,6 @@ static void*
         r300_create_dsa_state(struct pipe_context* pipe,
                               const struct pipe_depth_stencil_alpha_state* state)
 {
-    struct r300_context* r300 = r300_context(pipe);
     struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state);
 
     /* Depth test setup. */
@@ -250,15 +249,6 @@ static void*
                                      0, 1023);
     }
 
-    dsa->z_buffer_top = R300_ZTOP_ENABLE;
-    /* XXX TODO: add frag prog rules for ztop disable */
-    if (r300_fragment_shader_writes_depth(r300->fs))
-	dsa->z_buffer_top = R300_ZTOP_DISABLE;
-    if (state->alpha.enabled && state->alpha.func != PIPE_FUNC_ALWAYS)
-	dsa->z_buffer_top = R300_ZTOP_DISABLE;
-    if (r300->query_current)
-	dsa->z_buffer_top = R300_ZTOP_DISABLE;
-
     return (void*)dsa;
 }
 
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 335b54820a..5d323a26b1 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -444,6 +444,37 @@ static void r300_update_rs_block(struct r300_context* r300)
     rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0);
 }
 
+static void r300_update_ztop(struct r300_context* r300)
+{
+    r300->dsa_state->z_buffer_top = R300_ZTOP_ENABLE;
+
+    /* This is important enough that I felt it warranted a comment.
+     *
+     * According to the docs, these are the conditions where ZTOP must be
+     * disabled:
+     * 1) Alpha testing enabled
+     * 2) Texture kill instructions in fragment shader
+     * 3) Chroma key culling enabled
+     * 4) W-buffering enabled
+     *
+     * The docs claim that for the first three cases, if no ZS writes happen,
+     * then ZTOP can be used.
+     *
+     * Additionally, the following conditions require disabled ZTOP:
+     * ~) Depth writes in fragment shader
+     * ~) Outstanding occlusion queries
+     *
+     * ~C.
+     */
+    if (r300->dsa_state->alpha_function) {
+        r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE;
+    } else if (r300_fragment_shader_writes_depth(r300->fs)) {
+        r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE;
+    } else if (r300->query_current) {
+        r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE;
+    }
+}
+
 void r300_update_derived_state(struct r300_context* r300)
 {
     if (r300->dirty_state &
@@ -455,4 +486,9 @@ void r300_update_derived_state(struct r300_context* r300)
         r300_update_fs_tab(r300);
         r300_update_rs_block(r300);
     }
+
+    if (r300->dirty_state &
+            (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) {
+        r300_update_ztop(r300);
+    }
 }
-- 
cgit v1.2.3


From 4046c3bab4dde95d4096f26637adaa6ce6d310a9 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 14 Oct 2009 17:11:30 +0100
Subject: llvmpipe: Use ALIGN_STACK.

---
 src/gallium/drivers/llvmpipe/lp_setup.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 60107214df..b14c265b7f 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -115,6 +115,7 @@ struct setup_context {
 /**
  * Execute fragment shader for the four fragments in the quad.
  */
+ALIGN_STACK
 static void
 shade_quads(struct llvmpipe_context *llvmpipe,
             struct quad_header *quads[],
-- 
cgit v1.2.3


From 96c9b39a6a9553573fcbdb5fd6db0e9d59768442 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Tue, 13 Oct 2009 15:32:04 +0100
Subject: i915g: Fix warnings

---
 src/gallium/drivers/i915/i915_debug.c         | 2 +-
 src/gallium/drivers/i915/i915_fpc_translate.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c
index ce92d1af9a..e6640e587b 100644
--- a/src/gallium/drivers/i915/i915_debug.c
+++ b/src/gallium/drivers/i915/i915_debug.c
@@ -880,7 +880,7 @@ i915_dump_batchbuffer( struct intel_batchbuffer *batch )
       return;
    }
    
-   debug_printf( "\n\nBATCH: (%d)\n", bytes / 4);
+   debug_printf( "\n\nBATCH: (%d)\n", (int)bytes / 4);
 
    while (!done &&
 	  stream.offset < bytes)
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 89504ced27..1fe5cda956 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -127,7 +127,7 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
    va_start( args, msg );  
    util_vsnprintf( buffer, sizeof(buffer), msg, args );
    va_end( args );
-   debug_printf(buffer);
+   debug_printf("%s", buffer);
    debug_printf("\n");
 
    p->error = 1;
-- 
cgit v1.2.3


From a82fc97c643c4309a10cfefb108c4c0f11a2e55a Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 14 Oct 2009 20:06:38 -0700
Subject: r300g: Move ZTOP to its own state atom.

It may seem pointless, but this avoids a fair amount of predicted CSO pain.
---
 src/gallium/drivers/r300/r300_context.h       | 8 +++++++-
 src/gallium/drivers/r300/r300_emit.c          | 2 +-
 src/gallium/drivers/r300/r300_state_derived.c | 8 ++++----
 src/gallium/drivers/r300/r300_surface.h       | 1 -
 4 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index d2e8875503..2acce0fd4a 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -62,7 +62,6 @@ struct r300_dsa_state {
     uint32_t z_buffer_control;  /* R300_ZB_CNTL: 0x4f00 */
     uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */
     uint32_t stencil_ref_mask;  /* R300_ZB_STENCILREFMASK: 0x4f08 */
-    uint32_t z_buffer_top;      /* R300_ZB_ZTOP: 0x4f14 */
     uint32_t stencil_ref_bf;    /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */
 };
 
@@ -124,6 +123,10 @@ struct r300_viewport_state {
     uint32_t vte_control; /* R300_VAP_VTE_CNTL:      0x20b0 */
 };
 
+struct r300_ztop_state {
+    uint32_t z_buffer_top;      /* R300_ZB_ZTOP: 0x4f14 */
+};
+
 #define R300_NEW_BLEND           0x00000001
 #define R300_NEW_BLEND_COLOR     0x00000002
 #define R300_NEW_CLIP            0x00000004
@@ -281,6 +284,9 @@ struct r300_context {
     struct r300_vertex_shader* vs;
     /* Viewport state. */
     struct r300_viewport_state* viewport_state;
+    /* ZTOP state. */
+    struct r300_ztop_state ztop_state;
+
     /* Bitmask of dirty state objects. */
     uint32_t dirty_state;
     /* Flag indicating whether or not the HW is dirty. */
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index feffadd0ee..2c3bba952d 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -106,7 +106,7 @@ void r300_emit_dsa_state(struct r300_context* r300,
     OUT_CS(dsa->z_buffer_control);
     OUT_CS(dsa->z_stencil_control);
     OUT_CS(dsa->stencil_ref_mask);
-    OUT_CS_REG(R300_ZB_ZTOP, dsa->z_buffer_top);
+    OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top);
     if (r300screen->caps->is_r500) {
         /* OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); */
     }
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 5d323a26b1..f0861a9cf1 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -446,7 +446,7 @@ static void r300_update_rs_block(struct r300_context* r300)
 
 static void r300_update_ztop(struct r300_context* r300)
 {
-    r300->dsa_state->z_buffer_top = R300_ZTOP_ENABLE;
+    r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE;
 
     /* This is important enough that I felt it warranted a comment.
      *
@@ -467,11 +467,11 @@ static void r300_update_ztop(struct r300_context* r300)
      * ~C.
      */
     if (r300->dsa_state->alpha_function) {
-        r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE;
+        r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
     } else if (r300_fragment_shader_writes_depth(r300->fs)) {
-        r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE;
+        r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
     } else if (r300->query_current) {
-        r300->dsa_state->z_buffer_top = R300_ZTOP_DISABLE;
+        r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
     }
 }
 
diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h
index f9e98b2ec9..d5998e6e6d 100644
--- a/src/gallium/drivers/r300/r300_surface.h
+++ b/src/gallium/drivers/r300/r300_surface.h
@@ -54,7 +54,6 @@ static struct r300_dsa_state dsa_clear_state = {
     .z_buffer_control = 0x0,
     .z_stencil_control = 0x0,
     .stencil_ref_mask = R300_STENCILWRITEMASK_MASK,
-    .z_buffer_top = R300_ZTOP_ENABLE,
     .stencil_ref_bf = 0x0,
 };
 
-- 
cgit v1.2.3


From 72fd1cf292937565a182b400595816c6ad88836a Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Wed, 14 Oct 2009 11:44:09 +0200
Subject: trace: Handle transfer returning null

---
 src/gallium/drivers/trace/tr_screen.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index ab605c7fc8..7da9bd3866 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -366,7 +366,8 @@ trace_screen_get_tex_transfer(struct pipe_screen *_screen,
 
    trace_dump_call_end();
 
-   result = trace_transfer_create(tr_tex, result);
+   if (result)
+      result = trace_transfer_create(tr_tex, result);
 
    return result;
 }
-- 
cgit v1.2.3


From 13580aa3d142b17f936e517daf949ae228f9f14e Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Thu, 15 Oct 2009 21:58:44 +0200
Subject: nouveau: nv30: refuse binding a colour buffer with a zeta buffer with
 different bits, till the backend can tell Mesa not to do that.

---
 src/gallium/drivers/nv30/nv30_state_fb.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
index 2729dcec7c..9b0266fba5 100644
--- a/src/gallium/drivers/nv30/nv30_state_fb.c
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -15,6 +15,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 	unsigned w = fb->width;
 	unsigned h = fb->height;
 	struct nv30_miptree *nv30mt;
+	int colour_bits = 32, zeta_bits = 32;
 
 	rt_enable = 0;
 	for (i = 0; i < fb->nr_cbufs; i++) {
@@ -54,6 +55,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 		break;
 	case PIPE_FORMAT_R5G6B5_UNORM:
 		rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5;
+		colour_bits = 16;
 		break;
 	default:
 		assert(0);
@@ -62,6 +64,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 	switch (zeta_format) {
 	case PIPE_FORMAT_Z16_UNORM:
 		rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16;
+		zeta_bits = 16;
 		break;
 	case PIPE_FORMAT_Z24S8_UNORM:
 	case PIPE_FORMAT_Z24X8_UNORM:
@@ -72,6 +75,10 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 		assert(0);
 	}
 
+	if (colour_bits != zeta_bits) {
+		return FALSE;
+	}
+
 	if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) {
 		uint32_t pitch = rt[0]->pitch;
 		if (zeta) {
-- 
cgit v1.2.3


From a5a05fd782bf7bc3843e475df7b12fe6784c1b9e Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Thu, 15 Oct 2009 22:41:09 +0200
Subject: nouveau: nv30: Hack to enforce same number of bits as front buffer,
 for render targets

---
 src/gallium/drivers/nv30/nv30_screen.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index 41af38450b..5b1e5cab2d 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -10,6 +10,22 @@
 #define NV34TCL_CHIPSET_3X_MASK 0x00000010
 #define NV35TCL_CHIPSET_3X_MASK 0x000001e0
 
+/* FIXME: It seems I should not include directly ../../winsys/drm/nouveau/drm/nouveau_drm_api.h
+ * to get the pointer to the context front buffer, so I copied nouveau_winsys here.
+ * nv30_screen_surface_format_supported() can then use it to enforce creating fbo
+ * with same number of bits everywhere.
+ */
+struct nouveau_winsys {
+	struct pipe_winsys base;
+
+	struct pipe_screen *pscreen;
+
+	unsigned nr_pctx;
+	struct pipe_context **pctx;
+
+	struct pipe_surface *front;
+};
+
 static int
 nv30_screen_get_param(struct pipe_screen *pscreen, int param)
 {
@@ -83,21 +99,19 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
 				     enum pipe_texture_target target,
 				     unsigned tex_usage, unsigned geom_flags)
 {
+	struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front;
+
 	if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
-		switch (format) {
-		case PIPE_FORMAT_A8R8G8B8_UNORM:
-		case PIPE_FORMAT_R5G6B5_UNORM:
-			return TRUE;
-		default:
-			break;
-		}
+		return (format == front->format);
 	} else
 	if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
 		switch (format) {
 		case PIPE_FORMAT_Z24S8_UNORM:
 		case PIPE_FORMAT_Z24X8_UNORM:
+			return (front->format == PIPE_FORMAT_A8R8G8B8_UNORM)
+				|| (front->format == PIPE_FORMAT_A8R8G8B8_UNORM);
 		case PIPE_FORMAT_Z16_UNORM:
-			return TRUE;
+			return (front->format == PIPE_FORMAT_R5G6B5_UNORM);
 		default:
 			break;
 		}
-- 
cgit v1.2.3


From e4f21be13ac14edd89e865138a9e36b266425b39 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Thu, 15 Oct 2009 16:58:26 -0700
Subject: r300g: Set logical ID for each emitted texture/sampler.

multitexarray works on my r300, but texrect doesn't.
---
 src/gallium/drivers/r300/r300_emit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 2c3bba952d..f3adc0968e 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -531,7 +531,8 @@ void r300_emit_texture(struct r300_context* r300,
     CS_LOCALS(r300);
 
     BEGIN_CS(16);
-    OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0);
+    OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0 |
+        (offset << 28));
     OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1);
     OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color);
 
-- 
cgit v1.2.3


From fc8a156cfc539b9c04dc3527e4fc61cb4b0b688e Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Fri, 16 Oct 2009 08:39:59 -0700
Subject: r300g: Use a hash table to look up vertex info.

Need to move rs_block to this, too.

Also, I'm getting massive amounts of flicker for some reason; I bet we've gotta
re-re-examine PSC and friends. :C
---
 src/gallium/drivers/r300/r300_context.c       | 18 ++++++-
 src/gallium/drivers/r300/r300_context.h       | 10 +++-
 src/gallium/drivers/r300/r300_emit.c          | 26 +++++-----
 src/gallium/drivers/r300/r300_render.c        |  2 +-
 src/gallium/drivers/r300/r300_state_derived.c | 72 ++++++++++++++++++++-------
 src/gallium/drivers/r300/r300_state_derived.h |  4 ++
 6 files changed, 97 insertions(+), 35 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index b243f88bb5..a1156d2de6 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -89,10 +89,23 @@ static boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
     return r300_draw_elements(pipe, NULL, 0, mode, start, count);
 }
 
-static void r300_destroy_context(struct pipe_context* context) {
+static enum pipe_error r300_clear_hash_table(void* key, void* value,
+                                             void* data)
+{
+    FREE(key);
+    FREE(value);
+    return PIPE_OK;
+}
+
+static void r300_destroy_context(struct pipe_context* context)
+{
     struct r300_context* r300 = r300_context(context);
     struct r300_query* query, * temp;
 
+    u_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table,
+        NULL);
+    u_hash_table_destroy(r300->shader_hash_table);
+
     draw_destroy(r300->draw);
 
     /* Free the OQ BO. */
@@ -167,6 +180,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->context.is_texture_referenced = r300_is_texture_referenced;
     r300->context.is_buffer_referenced = r300_is_buffer_referenced;
 
+    r300->shader_hash_table = u_hash_table_create(r300_shader_key_hash,
+        r300_shader_key_compare);
+
     r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state);
     r300->rs_block = CALLOC_STRUCT(r300_rs_block);
     r300->scissor_state = CALLOC_STRUCT(r300_scissor_state);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 2acce0fd4a..2a62c67fc7 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -30,12 +30,14 @@
 
 #include "tgsi/tgsi_scan.h"
 
+#include "util/u_hash_table.h"
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
 
 #include "r300_clear.h"
 #include "r300_query.h"
 #include "r300_screen.h"
+#include "r300_state_derived.h"
 #include "r300_winsys.h"
 
 struct r300_fragment_shader;
@@ -248,6 +250,12 @@ struct r300_context {
     struct r300_query *query_current;
     struct r300_query query_list;
 
+    /* Shader hash table. Used to store vertex formatting information, which
+     * depends on the combination of both currently loaded shaders. */
+    struct u_hash_table* shader_hash_table;
+    /* Vertex formatting information. */
+    struct r300_vertex_format* vertex_info;
+
     /* Various CSO state objects. */
     /* Blend state. */
     struct r300_blend_state* blend_state;
@@ -278,8 +286,6 @@ struct r300_context {
     /* Vertex buffers for Gallium. */
     struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
     int vertex_buffer_count;
-    /* Vertex information. */
-    struct r300_vertex_format vertex_info;
     /* Vertex shader. */
     struct r300_vertex_shader* vs;
     /* Viewport state. */
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index f3adc0968e..e6092cda9b 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -551,7 +551,7 @@ void r300_emit_vertex_buffer(struct r300_context* r300)
 
     DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, "
             "vertex size %d\n", r300->vbo,
-            r300->vertex_info.vinfo.size);
+            r300->vertex_info->vinfo.size);
     /* Set the pointer to our vertex buffer. The emitted values are this:
      * PACKET3 [3D_LOAD_VBPNTR]
      * COUNT   [1]
@@ -562,8 +562,8 @@ void r300_emit_vertex_buffer(struct r300_context* r300)
     BEGIN_CS(7);
     OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3);
     OUT_CS(1);
-    OUT_CS(r300->vertex_info.vinfo.size |
-            (r300->vertex_info.vinfo.size << 8));
+    OUT_CS(r300->vertex_info->vinfo.size |
+            (r300->vertex_info->vinfo.size << 8));
     OUT_CS(r300->vbo_offset);
     OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
     END_CS;
@@ -575,30 +575,30 @@ void r300_emit_vertex_format_state(struct r300_context* r300)
     CS_LOCALS(r300);
 
     BEGIN_CS(26);
-    OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info.vinfo.size);
+    OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info->vinfo.size);
 
     OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
-    OUT_CS(r300->vertex_info.vinfo.hwfmt[0]);
-    OUT_CS(r300->vertex_info.vinfo.hwfmt[1]);
+    OUT_CS(r300->vertex_info->vinfo.hwfmt[0]);
+    OUT_CS(r300->vertex_info->vinfo.hwfmt[1]);
     OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
-    OUT_CS(r300->vertex_info.vinfo.hwfmt[2]);
-    OUT_CS(r300->vertex_info.vinfo.hwfmt[3]);
+    OUT_CS(r300->vertex_info->vinfo.hwfmt[2]);
+    OUT_CS(r300->vertex_info->vinfo.hwfmt[3]);
     /* for (i = 0; i < 4; i++) {
      *    debug_printf("hwfmt%d: 0x%08x\n", i,
-     *            r300->vertex_info.vinfo.hwfmt[i]);
+     *            r300->vertex_info->vinfo.hwfmt[i]);
      * } */
 
     OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8);
     for (i = 0; i < 8; i++) {
-        OUT_CS(r300->vertex_info.vap_prog_stream_cntl[i]);
+        OUT_CS(r300->vertex_info->vap_prog_stream_cntl[i]);
         /* debug_printf("prog_stream_cntl%d: 0x%08x\n", i,
-         *        r300->vertex_info.vap_prog_stream_cntl[i]); */
+         *        r300->vertex_info->vap_prog_stream_cntl[i]); */
     }
     OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8);
     for (i = 0; i < 8; i++) {
-        OUT_CS(r300->vertex_info.vap_prog_stream_cntl_ext[i]);
+        OUT_CS(r300->vertex_info->vap_prog_stream_cntl_ext[i]);
         /* debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i,
-         *        r300->vertex_info.vap_prog_stream_cntl_ext[i]); */
+         *        r300->vertex_info->vap_prog_stream_cntl_ext[i]); */
     }
     END_CS;
 }
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index b56f7a3d1e..4e778e1e57 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -67,7 +67,7 @@ r300_render_get_vertex_info(struct vbuf_render* render)
 
     r300_update_derived_state(r300);
 
-    return &r300->vertex_info.vinfo;
+    return &r300->vertex_info->vinfo;
 }
 
 static boolean r300_render_allocate_vertices(struct vbuf_render* render,
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index f0861a9cf1..53027777d6 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -29,6 +29,27 @@
 /* r300_state_derived: Various bits of state which are dependent upon
  * currently bound CSO data. */
 
+struct r300_shader_key {
+    struct r300_vertex_shader* vs;
+    struct r300_fragment_shader* fs;
+};
+
+unsigned r300_shader_key_hash(void* key) {
+    struct r300_shader_key* shader_key = (struct r300_shader_key*)key;
+    unsigned vs = (unsigned)shader_key->vs;
+    unsigned fs = (unsigned)shader_key->fs;
+
+    return (vs << 16) | (fs & 0xffff);
+}
+
+int r300_shader_key_compare(void* key1, void* key2) {
+    struct r300_shader_key* shader_key1 = (struct r300_shader_key*)key1;
+    struct r300_shader_key* shader_key2 = (struct r300_shader_key*)key2;
+
+    return (shader_key1->vs == shader_key2->vs) &&
+        (shader_key1->fs == shader_key2->fs);
+}
+
 /* Set up the vs_tab and routes. */
 static void r300_vs_tab_routes(struct r300_context* r300,
                                struct r300_vertex_format* vformat)
@@ -247,23 +268,41 @@ static void r300_vertex_psc(struct r300_context* r300,
 /* Update the vertex format. */
 static void r300_update_vertex_format(struct r300_context* r300)
 {
-    struct r300_vertex_format vformat;
+    struct r300_shader_key* key;
+    struct r300_vertex_format* vformat;
+    void* value;
     int i;
 
-    memset(&vformat, 0, sizeof(struct r300_vertex_format));
-    for (i = 0; i < 16; i++) {
-        vformat.vs_tab[i] = -1;
-        vformat.fs_tab[i] = -1;
-    }
+    key = CALLOC_STRUCT(r300_shader_key);
+    key->vs = r300->vs;
+    key->fs = r300->fs;
 
-    r300_vs_tab_routes(r300, &vformat);
+    value = u_hash_table_get(r300->shader_hash_table, (void*)key);
+    if (value) {
+        debug_printf("r300: Hash table hit! vs: %p fs: %p\n", key->vs,
+            key->fs);
+        vformat = (struct r300_vertex_format*)value;
+    } else {
+        debug_printf("r300: Hash table miss... vs: %p fs: %p\n", key->vs,
+            key->fs);
+        vformat = CALLOC_STRUCT(r300_vertex_format);
+
+        for (i = 0; i < 16; i++) {
+            vformat->vs_tab[i] = -1;
+            vformat->fs_tab[i] = -1;
+        }
+
+        r300_vs_tab_routes(r300, vformat);
+        r300_vertex_psc(r300, vformat);
 
-    r300_vertex_psc(r300, &vformat);
+        if (u_hash_table_set(r300->shader_hash_table, (void*)key,
+                (void*)vformat) != PIPE_OK) {
+            debug_printf("r300: Hash table insertion error!\n");
+        }
+    }
 
-    if (memcmp(&r300->vertex_info, &vformat,
-                sizeof(struct r300_vertex_format))) {
-        memcpy(&r300->vertex_info, &vformat,
-                sizeof(struct r300_vertex_format));
+    if (r300->vertex_info != vformat) {
+        r300->vertex_info = vformat;
         r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
     }
 }
@@ -271,7 +310,7 @@ static void r300_update_vertex_format(struct r300_context* r300)
 /* Set up the mappings from GB to US, for RS block. */
 static void r300_update_fs_tab(struct r300_context* r300)
 {
-    struct r300_vertex_format* vformat = &r300->vertex_info;
+    struct r300_vertex_format* vformat = r300->vertex_info;
     struct tgsi_shader_info* info = &r300->fs->info;
     int i, cols = 0, texs = 0, cols_emitted = 0;
     int* tab = vformat->fs_tab;
@@ -337,7 +376,7 @@ static void r300_update_rs_block(struct r300_context* r300)
 {
     struct r300_rs_block* rs = r300->rs_block;
     struct tgsi_shader_info* info = &r300->fs->info;
-    int* tab = r300->vertex_info.fs_tab;
+    int* tab = r300->vertex_info->fs_tab;
     int col_count = 0, fp_offset = 0, i, tex_count = 0;
     int rs_tex_comp = 0;
     memset(rs, 0, sizeof(struct r300_rs_block));
@@ -477,10 +516,7 @@ static void r300_update_ztop(struct r300_context* r300)
 
 void r300_update_derived_state(struct r300_context* r300)
 {
-    if (r300->dirty_state &
-            (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) {
-        r300_update_vertex_format(r300);
-    }
+    r300_update_vertex_format(r300);
 
     if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) {
         r300_update_fs_tab(r300);
diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h
index 71a4a47b00..05ad535e2d 100644
--- a/src/gallium/drivers/r300/r300_state_derived.h
+++ b/src/gallium/drivers/r300/r300_state_derived.h
@@ -25,6 +25,10 @@
 
 struct r300_context;
 
+unsigned r300_shader_key_hash(void* key);
+
+int r300_shader_key_compare(void* key1, void* key2);
+
 void r300_update_derived_state(struct r300_context* r300);
 
 #endif /* R300_STATE_DERIVED_H */
-- 
cgit v1.2.3


From 67356ae04743da3137e950503ffd4a1f8fa36400 Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Sat, 17 Oct 2009 20:27:24 +0200
Subject: nouveau: nv30: Use same workaround as i915 for segfault related to
 vbo

---
 src/gallium/drivers/nv30/nv30_context.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
index f827bdc78b..a3e65b96f7 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -10,7 +10,7 @@ nv30_flush(struct pipe_context *pipe, unsigned flags,
 	   struct pipe_fence_handle **fence)
 {
 	struct nv30_context *nv30 = nv30_context(pipe);
-	
+
 	if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
 		BEGIN_RING(rankine, 0x1fd8, 1);
 		OUT_RING  (2);
@@ -37,10 +37,14 @@ nv30_is_texture_referenced( struct pipe_context *pipe,
 			    unsigned face, unsigned level)
 {
    /**
-    * FIXME: Optimize.
+    * FIXME: Return the corrent result. We can't alays return referenced
+    *        since it causes a double flush within the vbo module.
     */
-
+#if 0
    return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+#else
+   return 0;
+#endif
 }
 
 static unsigned int
@@ -48,10 +52,14 @@ nv30_is_buffer_referenced( struct pipe_context *pipe,
 			   struct pipe_buffer *buf)
 {
    /**
-    * FIXME: Optimize.
+    * FIXME: Return the corrent result. We can't alays return referenced
+    *        since it causes a double flush within the vbo module.
     */
-
+#if 0
    return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+#else
+   return 0;
+#endif
 }
 
 struct pipe_context *
@@ -95,4 +103,3 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
 
 	return &nv30->pipe;
 }
-	
-- 
cgit v1.2.3


From 66aab9a1f6de241687a14f7aed45226061c1b84b Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Sat, 17 Oct 2009 20:46:19 +0200
Subject: nouveau: nv30: Remove duplicate case. Was a typo for X8R8G8B8, but
 that will never be use for front buffer.

---
 src/gallium/drivers/nv30/nv30_screen.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index 5b1e5cab2d..bb40e1803d 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -108,8 +108,7 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
 		switch (format) {
 		case PIPE_FORMAT_Z24S8_UNORM:
 		case PIPE_FORMAT_Z24X8_UNORM:
-			return (front->format == PIPE_FORMAT_A8R8G8B8_UNORM)
-				|| (front->format == PIPE_FORMAT_A8R8G8B8_UNORM);
+			return (front->format == PIPE_FORMAT_A8R8G8B8_UNORM);
 		case PIPE_FORMAT_Z16_UNORM:
 			return (front->format == PIPE_FORMAT_R5G6B5_UNORM);
 		default:
-- 
cgit v1.2.3


From 114417a2f52ab463f37fcabb5e9b0636574623dc Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Sat, 17 Oct 2009 20:49:18 +0200
Subject: nouveau: nv40: Use same workaround as i915 for segfault related to
 vbo

---
 src/gallium/drivers/nv40/nv40_context.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
index 8eba6a43ef..4e23671202 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -10,7 +10,7 @@ nv40_flush(struct pipe_context *pipe, unsigned flags,
 	   struct pipe_fence_handle **fence)
 {
 	struct nv40_context *nv40 = nv40_context(pipe);
-	
+
 	if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
 		BEGIN_RING(curie, 0x1fd8, 1);
 		OUT_RING  (2);
@@ -37,10 +37,14 @@ nv40_is_texture_referenced( struct pipe_context *pipe,
 			    unsigned face, unsigned level)
 {
    /**
-    * FIXME: Optimize.
+    * FIXME: Return the correct result. We can't always return referenced
+    *        since it causes a double flush within the vbo module.
     */
-
+#if 0
    return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+#else
+   return 0;
+#endif
 }
 
 static unsigned int
@@ -48,10 +52,14 @@ nv40_is_buffer_referenced( struct pipe_context *pipe,
 			   struct pipe_buffer *buf)
 {
    /**
-    * FIXME: Optimize.
+    * FIXME: Return the correct result. We can't always return referenced
+    *        since it causes a double flush within the vbo module.
     */
-
+#if 0
    return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+#else
+   return 0;
+#endif
 }
 
 struct pipe_context *
@@ -95,4 +103,3 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
 
 	return &nv40->pipe;
 }
-	
-- 
cgit v1.2.3


From ce9ae4a483e7c85a9046a87005232aa09de782aa Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 17 Oct 2009 20:05:23 -0700
Subject: r300g: Fix u_hash_table rename.

---
 src/gallium/drivers/r300/r300_context.c       | 6 +++---
 src/gallium/drivers/r300/r300_context.h       | 2 +-
 src/gallium/drivers/r300/r300_state_derived.c | 8 +++-----
 3 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index a1156d2de6..0518685200 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -102,9 +102,9 @@ static void r300_destroy_context(struct pipe_context* context)
     struct r300_context* r300 = r300_context(context);
     struct r300_query* query, * temp;
 
-    u_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table,
+    util_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table,
         NULL);
-    u_hash_table_destroy(r300->shader_hash_table);
+    util_hash_table_destroy(r300->shader_hash_table);
 
     draw_destroy(r300->draw);
 
@@ -180,7 +180,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->context.is_texture_referenced = r300_is_texture_referenced;
     r300->context.is_buffer_referenced = r300_is_buffer_referenced;
 
-    r300->shader_hash_table = u_hash_table_create(r300_shader_key_hash,
+    r300->shader_hash_table = util_hash_table_create(r300_shader_key_hash,
         r300_shader_key_compare);
 
     r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 2a62c67fc7..2d608d6afc 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -252,7 +252,7 @@ struct r300_context {
 
     /* Shader hash table. Used to store vertex formatting information, which
      * depends on the combination of both currently loaded shaders. */
-    struct u_hash_table* shader_hash_table;
+    struct util_hash_table* shader_hash_table;
     /* Vertex formatting information. */
     struct r300_vertex_format* vertex_info;
 
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 53027777d6..0210d97914 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -277,7 +277,7 @@ static void r300_update_vertex_format(struct r300_context* r300)
     key->vs = r300->vs;
     key->fs = r300->fs;
 
-    value = u_hash_table_get(r300->shader_hash_table, (void*)key);
+    value = util_hash_table_get(r300->shader_hash_table, (void*)key);
     if (value) {
         debug_printf("r300: Hash table hit! vs: %p fs: %p\n", key->vs,
             key->fs);
@@ -295,10 +295,8 @@ static void r300_update_vertex_format(struct r300_context* r300)
         r300_vs_tab_routes(r300, vformat);
         r300_vertex_psc(r300, vformat);
 
-        if (u_hash_table_set(r300->shader_hash_table, (void*)key,
-                (void*)vformat) != PIPE_OK) {
-            debug_printf("r300: Hash table insertion error!\n");
-        }
+        util_hash_table_set(r300->shader_hash_table,
+            (void*)key, (void*)vformat);
     }
 
     if (r300->vertex_info != vformat) {
-- 
cgit v1.2.3


From 51173e4e53a64465d1498ffd6454687b7629eb59 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 17 Oct 2009 20:29:27 -0700
Subject: r300g: Also have rs_block keyed to the current shader combo.

Eliminates part of the glxgears corruption here. Need to clean up PSC more,
to get rid of the rest of it.
---
 src/gallium/drivers/r300/r300_state_derived.c | 109 ++++++++++++++------------
 1 file changed, 58 insertions(+), 51 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 0210d97914..da8c366f30 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -34,6 +34,11 @@ struct r300_shader_key {
     struct r300_fragment_shader* fs;
 };
 
+struct r300_shader_derived_value {
+    struct r300_vertex_format* vformat;
+    struct r300_rs_block* rs_block;
+};
+
 unsigned r300_shader_key_hash(void* key) {
     struct r300_shader_key* shader_key = (struct r300_shader_key*)key;
     unsigned vs = (unsigned)shader_key->vs;
@@ -265,50 +270,10 @@ static void r300_vertex_psc(struct r300_context* r300,
         (R300_LAST_VEC << (i & 1 ? 16 : 0));
 }
 
-/* Update the vertex format. */
-static void r300_update_vertex_format(struct r300_context* r300)
-{
-    struct r300_shader_key* key;
-    struct r300_vertex_format* vformat;
-    void* value;
-    int i;
-
-    key = CALLOC_STRUCT(r300_shader_key);
-    key->vs = r300->vs;
-    key->fs = r300->fs;
-
-    value = util_hash_table_get(r300->shader_hash_table, (void*)key);
-    if (value) {
-        debug_printf("r300: Hash table hit! vs: %p fs: %p\n", key->vs,
-            key->fs);
-        vformat = (struct r300_vertex_format*)value;
-    } else {
-        debug_printf("r300: Hash table miss... vs: %p fs: %p\n", key->vs,
-            key->fs);
-        vformat = CALLOC_STRUCT(r300_vertex_format);
-
-        for (i = 0; i < 16; i++) {
-            vformat->vs_tab[i] = -1;
-            vformat->fs_tab[i] = -1;
-        }
-
-        r300_vs_tab_routes(r300, vformat);
-        r300_vertex_psc(r300, vformat);
-
-        util_hash_table_set(r300->shader_hash_table,
-            (void*)key, (void*)vformat);
-    }
-
-    if (r300->vertex_info != vformat) {
-        r300->vertex_info = vformat;
-        r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
-    }
-}
-
 /* Set up the mappings from GB to US, for RS block. */
-static void r300_update_fs_tab(struct r300_context* r300)
+static void r300_update_fs_tab(struct r300_context* r300,
+                               struct r300_vertex_format* vformat)
 {
-    struct r300_vertex_format* vformat = r300->vertex_info;
     struct tgsi_shader_info* info = &r300->fs->info;
     int i, cols = 0, texs = 0, cols_emitted = 0;
     int* tab = vformat->fs_tab;
@@ -370,14 +335,14 @@ static void r300_update_fs_tab(struct r300_context* r300)
 /* Set up the RS block. This is the part of the chipset that actually does
  * the rasterization of vertices into fragments. This is also the part of the
  * chipset that locks up if any part of it is even slightly wrong. */
-static void r300_update_rs_block(struct r300_context* r300)
+static void r300_update_rs_block(struct r300_context* r300,
+                                 struct r300_vertex_format* vformat,
+                                 struct r300_rs_block* rs)
 {
-    struct r300_rs_block* rs = r300->rs_block;
     struct tgsi_shader_info* info = &r300->fs->info;
-    int* tab = r300->vertex_info->fs_tab;
+    int* tab = vformat->fs_tab;
     int col_count = 0, fp_offset = 0, i, tex_count = 0;
     int rs_tex_comp = 0;
-    memset(rs, 0, sizeof(struct r300_rs_block));
 
     if (r300_screen(r300->context.screen)->caps->is_r500) {
         for (i = 0; i < info->num_inputs; i++) {
@@ -481,6 +446,53 @@ static void r300_update_rs_block(struct r300_context* r300)
     rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0);
 }
 
+/* Update the vertex format. */
+static void r300_update_vertex_format(struct r300_context* r300)
+{
+    struct r300_shader_key* key;
+    struct r300_vertex_format* vformat;
+    struct r300_rs_block* rs_block;
+    struct r300_shader_derived_value* value;
+    int i;
+
+    key = CALLOC_STRUCT(r300_shader_key);
+    key->vs = r300->vs;
+    key->fs = r300->fs;
+
+    value = (struct r300_shader_derived_value*)
+        util_hash_table_get(r300->shader_hash_table, (void*)key);
+    if (value) {
+        vformat = value->vformat;
+        rs_block = value->rs_block;
+
+        FREE(key);
+    } else {
+        vformat = CALLOC_STRUCT(r300_vertex_format);
+        rs_block = CALLOC_STRUCT(r300_rs_block);
+        value = CALLOC_STRUCT(r300_shader_derived_value);
+
+        for (i = 0; i < 16; i++) {
+            vformat->vs_tab[i] = -1;
+            vformat->fs_tab[i] = -1;
+        }
+
+        r300_vs_tab_routes(r300, vformat);
+        r300_vertex_psc(r300, vformat);
+        r300_update_fs_tab(r300, vformat);
+
+        r300_update_rs_block(r300, vformat, rs_block);
+
+        value->vformat = vformat;
+        value->rs_block = rs_block;
+        util_hash_table_set(r300->shader_hash_table,
+            (void*)key, (void*)value);
+    }
+
+    r300->vertex_info = vformat;
+    r300->rs_block = rs_block;
+    r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK);
+}
+
 static void r300_update_ztop(struct r300_context* r300)
 {
     r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE;
@@ -516,11 +528,6 @@ void r300_update_derived_state(struct r300_context* r300)
 {
     r300_update_vertex_format(r300);
 
-    if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) {
-        r300_update_fs_tab(r300);
-        r300_update_rs_block(r300);
-    }
-
     if (r300->dirty_state &
             (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) {
         r300_update_ztop(r300);
-- 
cgit v1.2.3


From 11056ca86fce64209b7d21c87070c419a1968d28 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 17 Oct 2009 20:47:45 -0700
Subject: r300g: Use a dirty test to bring framerate back up.

This is just split out from the next commit, that's all.
---
 src/gallium/drivers/r300/r300_state_derived.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index da8c366f30..c59d446e93 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -447,7 +447,7 @@ static void r300_update_rs_block(struct r300_context* r300,
 }
 
 /* Update the vertex format. */
-static void r300_update_vertex_format(struct r300_context* r300)
+static void r300_update_derived_shader_state(struct r300_context* r300)
 {
     struct r300_shader_key* key;
     struct r300_vertex_format* vformat;
@@ -526,7 +526,10 @@ static void r300_update_ztop(struct r300_context* r300)
 
 void r300_update_derived_state(struct r300_context* r300)
 {
-    r300_update_vertex_format(r300);
+    if (r300->dirty_state &
+        (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) {
+        r300_update_derived_shader_state(r300);
+    }
 
     if (r300->dirty_state &
             (R300_NEW_DSA | R300_NEW_FRAGMENT_SHADER | R300_NEW_QUERY)) {
-- 
cgit v1.2.3


From bfd877e4705002d97ee8dba6fe0c1f8676582ab3 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 17 Oct 2009 20:53:19 -0700
Subject: r300g: Squash format warning.

Won't ever be supported.
---
 src/gallium/drivers/r300/r300_screen.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 5381651c77..44770d1aca 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -231,6 +231,7 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
 
         /* Definitely unsupported formats. */
         /* Non-usable Z buffer/stencil formats. */
+        case PIPE_FORMAT_Z32_UNORM:
         case PIPE_FORMAT_Z24X8_UNORM:
         case PIPE_FORMAT_S8Z24_UNORM:
         case PIPE_FORMAT_X8Z24_UNORM:
-- 
cgit v1.2.3


From bb567357bc1366df7115e0daa68c2470e3bf6ba6 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 17 Oct 2009 21:32:56 -0700
Subject: gallium: Permit surface_copy and surface_fill to be NULL.

Uf. Lots of files touched. Would people with working vega, xorg, dri1, etc.
please make sure you are not broken, and fix yourself up if you are.

There were only two or three places where the code did not have painful
fallbacks, so I would advise st maintainers to find less painful workarounds,
or consider overhauling util_surface_copy and util_surface_fill.

Per ymanton, darktama, and Dr_Jakob's suggestions, clear has been left as-is.

I will not add PIPE_CAP_BLITTER unless it is deemed necessary.
---
 src/gallium/auxiliary/util/u_blit.c             | 19 ++++++++++++++-----
 src/gallium/auxiliary/util/u_clear.h            | 16 +++++++++++++---
 src/gallium/drivers/r300/r300_context.c         |  2 +-
 src/gallium/include/pipe/p_context.h            |  3 +++
 src/gallium/state_trackers/dri/dri_drawable.c   | 22 ++++++++++++++++------
 src/gallium/state_trackers/egl/egl_surface.c    | 23 +++++++++++++++++------
 src/gallium/state_trackers/vega/renderer.c      | 16 ++++++++++++----
 src/gallium/state_trackers/vega/vg_tracker.c    | 24 +++++++++++++++++-------
 src/gallium/state_trackers/xorg/xorg_exa.c      | 12 +++++++++---
 src/gallium/state_trackers/xorg/xorg_renderer.c | 19 ++++++++++++++-----
 src/mesa/state_tracker/st_atom_framebuffer.c    | 16 ++++++++++++----
 src/mesa/state_tracker/st_cb_drawpixels.c       | 19 ++++++++++++++-----
 src/mesa/state_tracker/st_cb_fbo.c              | 16 ++++++++++++----
 src/mesa/state_tracker/st_cb_texture.c          |  3 ++-
 14 files changed, 156 insertions(+), 54 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index fb00c3abe8..5038642599 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -46,6 +46,7 @@
 #include "util/u_memory.h"
 #include "util/u_simple_shaders.h"
 #include "util/u_surface.h"
+#include "util/u_rect.h"
 
 #include "cso_cache/cso_context.h"
 
@@ -301,7 +302,8 @@ util_blit_pixels_writemask(struct blit_state *ctx,
     * no overlapping.
     * Filter mode should not matter since there's no stretching.
     */
-   if (dst->format == src->format &&
+   if (pipe->surface_copy &&
+       dst->format == src->format &&
        srcX0 < srcX1 &&
        dstX0 < dstX1 &&
        srcY0 < srcY1 &&
@@ -365,10 +367,17 @@ util_blit_pixels_writemask(struct blit_state *ctx,
                                         PIPE_BUFFER_USAGE_GPU_WRITE);
 
       /* load temp texture */
-      pipe->surface_copy(pipe,
-                         texSurf, 0, 0,   /* dest */
-                         src, srcLeft, srcTop, /* src */
-                         srcW, srcH);     /* size */
+      if (pipe->surface_copy) {
+         pipe->surface_copy(pipe,
+                            texSurf, 0, 0,   /* dest */
+                            src, srcLeft, srcTop, /* src */
+                            srcW, srcH);     /* size */
+      } else {
+         util_surface_copy(pipe, FALSE,
+                           texSurf, 0, 0,   /* dest */
+                           src, srcLeft, srcTop, /* src */
+                           srcW, srcH);     /* size */
+      }
 
       /* free the surface, update the texture if necessary.
        */
diff --git a/src/gallium/auxiliary/util/u_clear.h b/src/gallium/auxiliary/util/u_clear.h
index 7c16b32cf9..1e65a035ae 100644
--- a/src/gallium/auxiliary/util/u_clear.h
+++ b/src/gallium/auxiliary/util/u_clear.h
@@ -32,6 +32,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 #include "util/u_pack_color.h"
+#include "util/u_rect.h"
 
 
 /**
@@ -48,13 +49,22 @@ util_clear(struct pipe_context *pipe,
       unsigned color;
 
       util_pack_color(rgba, ps->format, &color);
-      pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color);
+      if (pipe->surface_fill) {
+         pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color);
+      } else {
+         util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color);
+      }
    }
 
    if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
       struct pipe_surface *ps = framebuffer->zsbuf;
 
-      pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height,
-                         util_pack_z_stencil(ps->format, depth, stencil));
+      if (pipe->surface_fill) {
+         pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height,
+                            util_pack_z_stencil(ps->format, depth, stencil));
+      } else {
+         util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height,
+                           util_pack_z_stencil(ps->format, depth, stencil));
+      }
    }
 }
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 0518685200..7b370b3e95 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -206,7 +206,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
 
     r300_init_query_functions(r300);
 
-    r300_init_surface_functions(r300);
+    /* r300_init_surface_functions(r300); */
 
     r300_init_state_functions(r300);
 
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 39620a7198..5569001e60 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -189,6 +189,9 @@ struct pipe_context {
 
    /**
     * Surface functions
+    *
+    * The pipe driver is allowed to set these functions to NULL, and in that
+    * case, they will not be available.
     */
    /*@{*/
 
diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c
index c67cc8dacb..5625ff53cf 100644
--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -45,6 +45,7 @@
 #include "state_tracker/st_cb_fbo.h"
 
 #include "util/u_memory.h"
+#include "util/u_rect.h"
 
 static struct pipe_surface *
 dri_surface_from_handle(struct drm_api *api,
@@ -541,12 +542,21 @@ dri1_swap_copy(struct dri_context *ctx,
    cur = dPriv->pClipRects;
 
    for (i = 0; i < dPriv->numClipRects; ++i) {
-      if (dri1_intersect_src_bbox(&clip, dPriv->x, dPriv->y, cur++, bbox))
-	 pipe->surface_copy(pipe, dst, clip.x1, clip.y1,
-			    src,
-			    (int)clip.x1 - dPriv->x,
-			    (int)clip.y1 - dPriv->y,
-			    clip.x2 - clip.x1, clip.y2 - clip.y1);
+      if (dri1_intersect_src_bbox(&clip, dPriv->x, dPriv->y, cur++, bbox)) {
+         if (pipe->surface_copy) {
+            pipe->surface_copy(pipe, dst, clip.x1, clip.y1,
+                               src,
+                               (int)clip.x1 - dPriv->x,
+                               (int)clip.y1 - dPriv->y,
+                               clip.x2 - clip.x1, clip.y2 - clip.y1);
+         } else {
+            util_surface_copy(pipe, FALSE, dst, clip.x1, clip.y1,
+                              src,
+                              (int)clip.x1 - dPriv->x,
+                              (int)clip.y1 - dPriv->y,
+                              clip.x2 - clip.x1, clip.y2 - clip.y1);
+         }
+      }
    }
 }
 
diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c
index 7911a8834e..71c013756d 100644
--- a/src/gallium/state_trackers/egl/egl_surface.c
+++ b/src/gallium/state_trackers/egl/egl_surface.c
@@ -12,6 +12,8 @@
 
 #include "state_tracker/drm_api.h"
 
+#include "util/u_rect.h"
+
 /*
  * Util functions
  */
@@ -360,12 +362,21 @@ drm_swap_buffers(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *draw)
 		st_notify_swapbuffers(surf->stfb);
 
 		if (ctx && surf->screen) {
-			ctx->pipe->surface_copy(ctx->pipe,
-				surf->screen->surface,
-				0, 0,
-				back_surf,
-				0, 0,
-				surf->w, surf->h);
+            if (ctx->pipe->surface_copy) {
+                ctx->pipe->surface_copy(ctx->pipe,
+                    surf->screen->surface,
+                    0, 0,
+                    back_surf,
+                    0, 0,
+                    surf->w, surf->h);
+            } else {
+                util_surface_copy(ctx->pipe, FALSE,
+                    surf->screen->surface,
+                    0, 0,
+                    back_surf,
+                    0, 0,
+                    surf->w, surf->h);
+            }
 			ctx->pipe->flush(ctx->pipe, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE, NULL);
 
 #ifdef DRM_MODE_FEATURE_DIRTYFB
diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c
index f7c5f2f0cd..396c88aa3d 100644
--- a/src/gallium/state_trackers/vega/renderer.c
+++ b/src/gallium/state_trackers/vega/renderer.c
@@ -37,6 +37,7 @@
 #include "util/u_draw_quad.h"
 #include "util/u_simple_shaders.h"
 #include "util/u_memory.h"
+#include "util/u_rect.h"
 
 #include "cso_cache/cso_context.h"
 
@@ -457,10 +458,17 @@ void renderer_copy_surface(struct renderer *ctx,
                                      PIPE_BUFFER_USAGE_GPU_WRITE);
 
    /* load temp texture */
-   pipe->surface_copy(pipe,
-                      texSurf, 0, 0,   /* dest */
-                      src, srcLeft, srcTop, /* src */
-                      srcW, srcH);     /* size */
+   if (pipe->surface_copy) {
+      pipe->surface_copy(pipe,
+                         texSurf, 0, 0,   /* dest */
+                         src, srcLeft, srcTop, /* src */
+                         srcW, srcH);     /* size */
+   } else {
+      util_surface_copy(pipe, FALSE,
+                        texSurf, 0, 0,   /* dest */
+                        src, srcLeft, srcTop, /* src */
+                        srcW, srcH);     /* size */
+   }
 
    /* free the surface, update the texture if necessary.*/
    screen->tex_surface_destroy(texSurf);
diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c
index 56cc60aebe..c4da01e52c 100644
--- a/src/gallium/state_trackers/vega/vg_tracker.c
+++ b/src/gallium/state_trackers/vega/vg_tracker.c
@@ -235,13 +235,23 @@ static void setup_new_alpha_mask(struct vg_context *ctx,
          old_texture,
          0, 0, 0,
          PIPE_BUFFER_USAGE_GPU_READ);
-      pipe->surface_copy(pipe,
-                         surface,
-                         0, 0,
-                         old_surface,
-                         0, 0,
-                         MIN2(old_surface->width, width),
-                         MIN2(old_surface->height, height));
+      if (pipe->surface_copy) {
+         pipe->surface_copy(pipe,
+                            surface,
+                            0, 0,
+                            old_surface,
+                            0, 0,
+                            MIN2(old_surface->width, width),
+                            MIN2(old_surface->height, height));
+      } else {
+         util_surface_copy(pipe, FALSE,
+                           surface,
+                           0, 0,
+                           old_surface,
+                           0, 0,
+                           MIN2(old_surface->width, width),
+                           MIN2(old_surface->height, height));
+      }
       if (surface)
          pipe_surface_reference(&surface, NULL);
       if (old_surface)
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c
index af76d6690f..4988af4864 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa.c
@@ -693,9 +693,15 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
 	    dst_surf = exa->scrn->get_tex_surface(
 		exa->scrn, texture, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE);
 	    src_surf = xorg_gpu_surface(exa->pipe->screen, priv);
-	    exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf,
-				    0, 0, min(width, texture->width[0]),
-				    min(height, texture->height[0]));
+        if (exa->pipe->surface_copy) {
+            exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf,
+                        0, 0, min(width, texture->width[0]),
+                        min(height, texture->height[0]));
+        } else {
+            util_surface_copy(exa->pipe, FALSE, dst_surf, 0, 0, src_surf,
+                        0, 0, min(width, texture->width[0]),
+                        min(height, texture->height[0]));
+        }
 	    exa->scrn->tex_surface_destroy(dst_surf);
 	    exa->scrn->tex_surface_destroy(src_surf);
 	}
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c
index 81b209cb59..ca69e1e0e9 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.c
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.c
@@ -7,6 +7,7 @@
 #include "util/u_draw_quad.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_rect.h"
 
 #include "pipe/p_inlines.h"
 
@@ -586,11 +587,19 @@ create_sampler_texture(struct xorg_renderer *r,
          screen, src, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ);
       struct pipe_surface *ps_tex = screen->get_tex_surface(
          screen, pt, 0, 0, 0, PIPE_BUFFER_USAGE_GPU_WRITE );
-      pipe->surface_copy(pipe,
-			 ps_tex, /* dest */
-			 0, 0, /* destx/y */
-			 ps_read,
-			 0, 0, src->width[0], src->height[0]);
+      if (pipe->surface_copy) {
+         pipe->surface_copy(pipe,
+                ps_tex, /* dest */
+                0, 0, /* destx/y */
+                ps_read,
+                0, 0, src->width[0], src->height[0]);
+      } else {
+          util_surface_copy(pipe, FALSE,
+                ps_tex, /* dest */
+                0, 0, /* destx/y */
+                ps_read,
+                0, 0, src->width[0], src->height[0]);
+      }
       pipe_surface_reference(&ps_read, NULL);
       pipe_surface_reference(&ps_tex, NULL);
    }
diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c
index 5209a6a0c9..e18c0f6e0a 100644
--- a/src/mesa/state_tracker/st_atom_framebuffer.c
+++ b/src/mesa/state_tracker/st_atom_framebuffer.c
@@ -39,6 +39,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
 #include "cso_cache/cso_context.h"
+#include "util/u_rect.h"
 
 
@@ -162,10 +163,17 @@ update_framebuffer_state( struct st_context *st )
          (void) st_get_framebuffer_surface(stfb, ST_SURFACE_FRONT_LEFT, &surf_front);
          (void) st_get_framebuffer_surface(stfb, ST_SURFACE_BACK_LEFT, &surf_back);
 
-         st->pipe->surface_copy(st->pipe,
-                                surf_front, 0, 0,  /* dest */
-                                surf_back, 0, 0,   /* src */
-                                fb->Width, fb->Height);
+         if (st->pipe->surface_copy) {
+            st->pipe->surface_copy(st->pipe,
+                                   surf_front, 0, 0,  /* dest */
+                                   surf_back, 0, 0,   /* src */
+                                   fb->Width, fb->Height);
+         } else {
+            util_surface_copy(st->pipe, FALSE,
+                              surf_front, 0, 0,
+                              surf_back, 0, 0,
+                              fb->Width, fb->Height);
+         }
       }
       /* we're assuming we'll really draw to the front buffer */
       st->frontbuffer_status = FRONT_STATUS_DIRTY;
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 5c3413f905..be44577117 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -62,6 +62,7 @@
 #include "util/u_tile.h"
 #include "util/u_draw_quad.h"
 #include "util/u_math.h"
+#include "util/u_rect.h"
 #include "shader/prog_instruction.h"
 #include "cso_cache/cso_context.h"
 
@@ -1075,11 +1076,19 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
                                        PIPE_BUFFER_USAGE_GPU_READ);
       struct pipe_surface *psTex = screen->get_tex_surface(screen, pt, 0, 0, 0, 
                                       PIPE_BUFFER_USAGE_GPU_WRITE );
-      pipe->surface_copy(pipe,
-			 psTex, /* dest */
-			 0, 0, /* destx/y */
-			 psRead,
-			 srcx, srcy, width, height);
+      if (pipe->surface_copy) {
+         pipe->surface_copy(pipe,
+                            psTex, /* dest */
+                            0, 0, /* destx/y */
+                            psRead,
+                            srcx, srcy, width, height);
+      } else {
+         util_surface_copy(pipe, FALSE,
+                           psTex,
+                           0, 0,
+                           psRead,
+                           srcx, srcy, width, height);
+      }
       pipe_surface_reference(&psRead, NULL); 
       pipe_surface_reference(&psTex, NULL);
    }
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 864f5d3ca3..73aa65955b 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -49,6 +49,7 @@
 #include "st_public.h"
 #include "st_texture.h"
 
+#include "util/u_rect.h"
 
 
 /**
@@ -538,10 +539,17 @@ copy_back_to_front(struct st_context *st,
    (void) st_get_framebuffer_surface(stfb, backIndex, &surf_back);
 
    if (surf_front && surf_back) {
-      st->pipe->surface_copy(st->pipe,
-                             surf_front, 0, 0,  /* dest */
-                             surf_back, 0, 0,   /* src */
-                             fb->Width, fb->Height);
+      if (st->pipe->surface_copy) {
+         st->pipe->surface_copy(st->pipe,
+                                surf_front, 0, 0,  /* dest */
+                                surf_back, 0, 0,   /* src */
+                                fb->Width, fb->Height);
+      } else {
+         util_surface_copy(st->pipe, FALSE,
+                           surf_front, 0, 0,
+                           surf_back, 0, 0,
+                           fb->Width, fb->Height);
+      }
    }
 }
 
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index b943787106..a1953342b4 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1546,7 +1546,8 @@ st_copy_texsubimage(GLcontext *ctx,
 
    if (ctx->_ImageTransferState == 0x0) {
 
-      if (matching_base_formats && 
+      if (pipe->surface_copy &&
+          matching_base_formats &&
           src_format == dest_format &&
           !do_flip) 
       {
-- 
cgit v1.2.3


From 838da1d4ae11aa8b5eab4f35713709714e337cbe Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 18 Oct 2009 14:31:58 +0100
Subject: llvmpipe: Allocate texture storage for whole quads.

---
 src/gallium/drivers/llvmpipe/lp_texture.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 08f0950d47..a00f2495df 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -66,16 +66,24 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
    pf_get_block(lpt->base.format, &lpt->base.block);
 
    for (level = 0; level <= pt->last_level; level++) {
+      unsigned nblocksx, nblocksy;
+
       pt->width[level] = width;
       pt->height[level] = height;
       pt->depth[level] = depth;
       pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);  
-      pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);  
-      lpt->stride[level] = align(pt->nblocksx[level]*pt->block.size, 16);
+      pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
+
+      /* Allocate storage for whole quads. This is particularly important
+       * for depth surfaces, which are currently stored in a swizzled format. */
+      nblocksx = pf_get_nblocksx(&pt->block, align(width, 2));
+      nblocksy = pf_get_nblocksy(&pt->block, align(height, 2));
+
+      lpt->stride[level] = align(nblocksx*pt->block.size, 16);
 
       lpt->level_offset[level] = buffer_size;
 
-      buffer_size += (pt->nblocksy[level] *
+      buffer_size += (nblocksy *
                       ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
                       lpt->stride[level]);
 
-- 
cgit v1.2.3


From d2e29b502e5f777551ff057f08e54d82542863cf Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 18 Oct 2009 10:30:18 -0700
Subject: r300g: Add another ZTOP condition.

I don't even know if texkill works right now.
---
 src/gallium/drivers/r300/r300_state_derived.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index c59d446e93..2c624766bb 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -517,6 +517,8 @@ static void r300_update_ztop(struct r300_context* r300)
      */
     if (r300->dsa_state->alpha_function) {
         r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
+    } else if (r300->fs->info.uses_kill) {
+        r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
     } else if (r300_fragment_shader_writes_depth(r300->fs)) {
         r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
     } else if (r300->query_current) {
-- 
cgit v1.2.3


From 16a06fea73b1e6e8857f7568762bfc56dcfe2940 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 18 Oct 2009 15:54:39 -0700
Subject: r300g: Fix up a bunch of warnings.

---
 src/gallium/drivers/r300/r300_emit.c          | 24 +++++++++++-------------
 src/gallium/drivers/r300/r300_emit.h          |  1 +
 src/gallium/drivers/r300/r300_flush.c         |  2 ++
 src/gallium/drivers/r300/r300_screen.c        |  2 +-
 src/gallium/drivers/r300/r300_state.c         |  4 ++--
 src/gallium/drivers/r300/r300_state_derived.c | 10 ++++------
 6 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index e6092cda9b..df2046bd0c 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -320,8 +320,7 @@ void r300_emit_fb_state(struct r300_context* r300,
     END_CS;
 }
 
-void r300_emit_query_start(struct r300_context *r300)
-
+static void r300_emit_query_start(struct r300_context *r300)
 {
     struct r300_capabilities *caps = r300_screen(r300->context.screen)->caps;
     struct r300_query *query = r300->query_current;
@@ -334,9 +333,9 @@ void r300_emit_query_start(struct r300_context *r300)
      * for overlapping queries. */
     BEGIN_CS(4);
     if (caps->family == CHIP_FAMILY_RV530) {
-	OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+        OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
     } else {
-	OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);
+        OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);
     }
     OUT_CS_REG(R300_ZB_ZPASS_DATA, 0);
     END_CS;
@@ -345,7 +344,7 @@ void r300_emit_query_start(struct r300_context *r300)
 
 
 static void r300_emit_query_finish(struct r300_context *r300,
-				   struct r300_query *query)
+                                   struct r300_query *query)
 {
     struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
     CS_LOCALS(r300);
@@ -388,7 +387,7 @@ static void r300_emit_query_finish(struct r300_context *r300,
             OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
             OUT_CS_RELOC(r300->oqbo, query->offset + (sizeof(uint32_t) * 0),
                     0, RADEON_GEM_DOMAIN_GTT, 0);
-	    break;
+            break;
         default:
             debug_printf("r300: Implementation error: Chipset reports %d"
                     " pixel pipes!\n", caps->num_frag_pipes);
@@ -398,11 +397,10 @@ static void r300_emit_query_finish(struct r300_context *r300,
     /* And, finally, reset it to normal... */
     OUT_CS_REG(R300_SU_REG_DEST, 0xF);
     END_CS;
-
 }
 
 static void rv530_emit_query_single(struct r300_context *r300,
-				    struct r300_query *query)
+                                    struct r300_query *query)
 {
     CS_LOCALS(r300);
 
@@ -415,7 +413,7 @@ static void rv530_emit_query_single(struct r300_context *r300,
 }
 
 static void rv530_emit_query_double(struct r300_context *r300,
-				    struct r300_query *query)
+                                    struct r300_query *query)
 {
     CS_LOCALS(r300);
 
@@ -442,10 +440,10 @@ void r300_emit_query_end(struct r300_context* r300)
         return;
 
     if (caps->family == CHIP_FAMILY_RV530) {
-	if (caps->num_z_pipes == 2)
-	    rv530_emit_query_double(r300, query);
-	else
-	    rv530_emit_query_single(r300, query);
+        if (caps->num_z_pipes == 2)
+            rv530_emit_query_double(r300, query);
+        else
+            rv530_emit_query_single(r300, query);
     } else 
         r300_emit_query_finish(r300, query);
 }
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index b62aa9fec5..7e469ea0c7 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -58,6 +58,7 @@ void r300_emit_fb_state(struct r300_context* r300,
 
 void r300_emit_query_begin(struct r300_context* r300,
                            struct r300_query* query);
+
 void r300_emit_query_end(struct r300_context* r300);
 
 void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs);
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index 241ea71d6b..d60652a021 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -20,7 +20,9 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+#include "r300_emit.h"
 #include "r300_flush.h"
+#include "r300_state_invariant.h"
 
 static void r300_flush(struct pipe_context* pipe,
                        unsigned flags,
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 44770d1aca..cc499d400a 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -311,7 +311,7 @@ r300_get_tex_transfer(struct pipe_screen *screen,
 {
     struct r300_texture *tex = (struct r300_texture *)texture;
     struct r300_transfer *trans;
-    unsigned offset;  /* in bytes */
+    unsigned offset = 0;  /* in bytes */
 
     /* XXX Add support for these things */
     if (texture->target == PIPE_TEXTURE_CUBE) {
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 8359850966..0a982a9d5d 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -329,7 +329,7 @@ static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
 {
     struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
     rc_constants_destroy(&fs->code.constants);
-    FREE(fs->state.tokens);
+    FREE((void*)fs->state.tokens);
     FREE(shader);
 }
 
@@ -697,7 +697,7 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
 
         rc_constants_destroy(&vs->code.constants);
         draw_delete_vertex_shader(r300->draw, vs->draw);
-        FREE(vs->state.tokens);
+        FREE((void*)vs->state.tokens);
         FREE(shader);
     } else {
         draw_delete_vertex_shader(r300->draw,
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 2c624766bb..1468b9d36e 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -265,7 +265,9 @@ static void r300_vertex_psc(struct r300_context* r300,
     }
 
     /* Set the last vector in the PSC. */
-    i--;
+    if (i) {
+        i -= 1;
+    }
     vformat->vap_prog_stream_cntl[i >> 1] |=
         (R300_LAST_VEC << (i & 1 ? 16 : 0));
 }
@@ -336,17 +338,14 @@ static void r300_update_fs_tab(struct r300_context* r300,
  * the rasterization of vertices into fragments. This is also the part of the
  * chipset that locks up if any part of it is even slightly wrong. */
 static void r300_update_rs_block(struct r300_context* r300,
-                                 struct r300_vertex_format* vformat,
                                  struct r300_rs_block* rs)
 {
     struct tgsi_shader_info* info = &r300->fs->info;
-    int* tab = vformat->fs_tab;
     int col_count = 0, fp_offset = 0, i, tex_count = 0;
     int rs_tex_comp = 0;
 
     if (r300_screen(r300->context.screen)->caps->is_r500) {
         for (i = 0; i < info->num_inputs; i++) {
-            assert(tab[i] != -1);
             switch (info->input_semantic_name[i]) {
                 case TGSI_SEMANTIC_COLOR:
                     rs->ip[col_count] |=
@@ -387,7 +386,6 @@ static void r300_update_rs_block(struct r300_context* r300,
         }
     } else {
         for (i = 0; i < info->num_inputs; i++) {
-            assert(tab[i] != -1);
             switch (info->input_semantic_name[i]) {
                 case TGSI_SEMANTIC_COLOR:
                     rs->ip[col_count] |=
@@ -480,7 +478,7 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
         r300_vertex_psc(r300, vformat);
         r300_update_fs_tab(r300, vformat);
 
-        r300_update_rs_block(r300, vformat, rs_block);
+        r300_update_rs_block(r300, rs_block);
 
         value->vformat = vformat;
         value->rs_block = rs_block;
-- 
cgit v1.2.3


From 869d3eea37ee060d62cd5b7f6031ef5a93e328a1 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Wed, 7 Oct 2009 16:07:34 +1000
Subject: drm/nv50: write tic/tsc setup to correct slots when skipping units

---
 src/gallium/drivers/nv50/nv50_state_validate.c | 7 ++++---
 src/gallium/drivers/nv50/nv50_tex.c            | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index fd27620371..9079de918d 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -356,13 +356,14 @@ viewport_uptodate:
 	if (nv50->dirty & NV50_NEW_SAMPLER) {
 		int i;
 
-		so = so_new(nv50->sampler_nr * 9 + 2, 0);
-		so_method(so, tesla, NV50TCL_CB_ADDR, 1);
-		so_data  (so, NV50_CB_TSC);
+		so = so_new(nv50->sampler_nr * 11, 0);
 		for (i = 0; i < nv50->sampler_nr; i++) {
 			if (!nv50->sampler[i])
 				continue;
 
+			so_method(so, tesla, NV50TCL_CB_ADDR, 1);
+			so_data  (so, ((i * 8) << NV50TCL_CB_ADDR_ID_SHIFT) |
+				      NV50_CB_TSC);
 			so_method(so, tesla, NV50TCL_CB_DATA(0) | (2<<29), 8);
 			so_datap (so, nv50->sampler[i]->tsc, 8);
 		}
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 72d33150af..ca2b883e9b 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -148,18 +148,19 @@ nv50_tex_validate(struct nv50_context *nv50)
 	struct nouveau_stateobj *so;
 	int unit, push;
 
-	push  = nv50->miptree_nr * 9 + 2;
+	push  = nv50->miptree_nr * 11;
 	push += MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2;
 
 	so = so_new(push, nv50->miptree_nr * 2);
-	so_method(so, tesla, NV50TCL_CB_ADDR, 1);
-	so_data  (so, NV50_CB_TIC);
 	for (unit = 0; unit < nv50->miptree_nr; unit++) {
 		struct nv50_miptree *mt = nv50->miptree[unit];
 
 		if (!mt)
 			continue;
 
+		so_method(so, tesla, NV50TCL_CB_ADDR, 1);
+		so_data  (so, ((unit * 8) << NV50TCL_CB_ADDR_ID_SHIFT) |
+			      NV50_CB_TIC);
 		so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, 8);
 		if (nv50_tex_construct(nv50, so, mt, unit)) {
 			NOUVEAU_ERR("failed tex validate\n");
-- 
cgit v1.2.3


From 35b98e2884bd7c76c43fa08d5bb0a8f1396d3298 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Mon, 19 Oct 2009 09:28:59 +1000
Subject: nouveau: implement is_{texture,buffer}_referenced properly

---
 src/gallium/drivers/nouveau/Makefile          |  3 +-
 src/gallium/drivers/nouveau/nouveau_context.c | 41 +++++++++++++++++++++++++++
 src/gallium/drivers/nouveau/nouveau_context.h | 11 +++++++
 src/gallium/drivers/nv04/nv04_context.c       | 28 ++----------------
 src/gallium/drivers/nv04/nv04_context.h       |  1 +
 src/gallium/drivers/nv10/nv10_context.c       | 27 ++----------------
 src/gallium/drivers/nv10/nv10_context.h       |  1 +
 src/gallium/drivers/nv20/nv20_context.c       | 28 ++----------------
 src/gallium/drivers/nv20/nv20_context.h       |  1 +
 src/gallium/drivers/nv30/nv30_context.c       | 35 ++---------------------
 src/gallium/drivers/nv30/nv30_context.h       |  1 +
 src/gallium/drivers/nv40/nv40_context.c       | 35 ++---------------------
 src/gallium/drivers/nv40/nv40_context.h       |  1 +
 src/gallium/drivers/nv50/nv50_context.c       | 27 ++----------------
 src/gallium/drivers/nv50/nv50_context.h       |  1 +
 15 files changed, 72 insertions(+), 169 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/nouveau_context.c
 create mode 100644 src/gallium/drivers/nouveau/nouveau_context.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nouveau/Makefile b/src/gallium/drivers/nouveau/Makefile
index dbe8a6e7bf..0cb66041d5 100644
--- a/src/gallium/drivers/nouveau/Makefile
+++ b/src/gallium/drivers/nouveau/Makefile
@@ -3,6 +3,7 @@ include $(TOP)/configs/current
 
 LIBNAME = nouveau
 
-C_SOURCES = nouveau_screen.c
+C_SOURCES = nouveau_screen.c \
+	    nouveau_context.c
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/nouveau/nouveau_context.c b/src/gallium/drivers/nouveau/nouveau_context.c
new file mode 100644
index 0000000000..23443869e6
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_context.c
@@ -0,0 +1,41 @@
+#include <pipe/p_defines.h>
+#include <pipe/p_context.h>
+
+#include "nouveau/nouveau_screen.h"
+#include "nouveau/nouveau_context.h"
+
+#include "nouveau/nouveau_bo.h"
+
+static unsigned int
+nouveau_reference_flags(struct nouveau_bo *bo)
+{
+	uint32_t bo_flags;
+	int flags = 0;
+
+	bo_flags = nouveau_bo_pending(bo);
+	if (bo_flags & NOUVEAU_BO_RD)
+		flags |= PIPE_REFERENCED_FOR_READ;
+	if (bo_flags & NOUVEAU_BO_WR)
+		flags |= PIPE_REFERENCED_FOR_WRITE;
+
+	return flags;
+}
+
+unsigned int
+nouveau_is_texture_referenced(struct pipe_context *pipe,
+			      struct pipe_texture *pt,
+			      unsigned face, unsigned level)
+{
+	struct nouveau_miptree *mt = nouveau_miptree(pt);
+
+	return nouveau_reference_flags(mt->bo);
+}
+
+unsigned int
+nouveau_is_buffer_referenced(struct pipe_context *pipe, struct pipe_buffer *pb)
+{
+	struct nouveau_bo *bo = nouveau_bo(pb);
+
+	return nouveau_reference_flags(bo);
+}
+
diff --git a/src/gallium/drivers/nouveau/nouveau_context.h b/src/gallium/drivers/nouveau/nouveau_context.h
new file mode 100644
index 0000000000..6a28d40da7
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nouveau_context.h
@@ -0,0 +1,11 @@
+#ifndef __NOUVEAU_CONTEXT_H__
+#define __NOUVEAU_CONTEXT_H__
+
+unsigned int
+nouveau_is_texture_referenced(struct pipe_context *, struct pipe_texture *,
+			      unsigned face, unsigned level);
+
+unsigned int
+nouveau_is_buffer_referenced(struct pipe_context *, struct pipe_buffer *);
+
+#endif
diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c
index 17166c9f51..10d984ace9 100644
--- a/src/gallium/drivers/nv04/nv04_context.c
+++ b/src/gallium/drivers/nv04/nv04_context.c
@@ -64,30 +64,6 @@ nv04_init_hwctx(struct nv04_context *nv04)
 	return TRUE;
 }
 
-static unsigned int
-nv04_is_texture_referenced( struct pipe_context *pipe,
-			    struct pipe_texture *texture,
-			    unsigned face, unsigned level)
-{
-   /**
-    * FIXME: Optimize.
-    */
-
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-static unsigned int
-nv04_is_buffer_referenced( struct pipe_context *pipe,
-			   struct pipe_buffer *buf)
-{
-   /**
-    * FIXME: Optimize.
-    */
-
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-
 struct pipe_context *
 nv04_create(struct pipe_screen *pscreen, unsigned pctx_id)
 {
@@ -113,8 +89,8 @@ nv04_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv04->pipe.clear = nv04_clear;
 	nv04->pipe.flush = nv04_flush;
 
-	nv04->pipe.is_texture_referenced = nv04_is_texture_referenced;
-	nv04->pipe.is_buffer_referenced = nv04_is_buffer_referenced;
+	nv04->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+	nv04->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
 	nv04_init_surface_functions(nv04);
 	nv04_init_state_functions(nv04);
diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h
index 2842b2c90d..55326c787a 100644
--- a/src/gallium/drivers/nv04/nv04_context.h
+++ b/src/gallium/drivers/nv04/nv04_context.h
@@ -13,6 +13,7 @@
 
 #include "nouveau/nouveau_winsys.h"
 #include "nouveau/nouveau_gldefs.h"
+#include "nouveau/nouveau_context.h"
 
 #define NOUVEAU_PUSH_CONTEXT(ctx)                                              \
 	struct nv04_screen *ctx = nv04->screen
diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c
index a127b134ec..933176fc32 100644
--- a/src/gallium/drivers/nv10/nv10_context.c
+++ b/src/gallium/drivers/nv10/nv10_context.c
@@ -257,29 +257,6 @@ nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
 {
 }
 
-static unsigned int
-nv10_is_texture_referenced( struct pipe_context *pipe,
-			    struct pipe_texture *texture,
-			    unsigned face, unsigned level)
-{
-   /**
-    * FIXME: Optimize.
-    */
-
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-static unsigned int
-nv10_is_buffer_referenced( struct pipe_context *pipe,
-			   struct pipe_buffer *buf)
-{
-   /**
-    * FIXME: Optimize.
-    */
-
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
 struct pipe_context *
 nv10_create(struct pipe_screen *pscreen, unsigned pctx_id)
 {
@@ -305,8 +282,8 @@ nv10_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv10->pipe.clear = nv10_clear;
 	nv10->pipe.flush = nv10_flush;
 
-	nv10->pipe.is_texture_referenced = nv10_is_texture_referenced;
-	nv10->pipe.is_buffer_referenced = nv10_is_buffer_referenced;
+	nv10->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+	nv10->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
 	nv10_init_surface_functions(nv10);
 	nv10_init_state_functions(nv10);
diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h
index f1e003c953..36a6aa7a74 100644
--- a/src/gallium/drivers/nv10/nv10_context.h
+++ b/src/gallium/drivers/nv10/nv10_context.h
@@ -13,6 +13,7 @@
 
 #include "nouveau/nouveau_winsys.h"
 #include "nouveau/nouveau_gldefs.h"
+#include "nouveau/nouveau_context.h"
 
 #define NOUVEAU_PUSH_CONTEXT(ctx)                                              \
 	struct nv10_screen *ctx = nv10->screen
diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c
index b32d0d83ba..9a48739661 100644
--- a/src/gallium/drivers/nv20/nv20_context.c
+++ b/src/gallium/drivers/nv20/nv20_context.c
@@ -380,30 +380,6 @@ nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
 {
 }
 
-
-static unsigned int
-nv20_is_texture_referenced( struct pipe_context *pipe,
-			    struct pipe_texture *texture,
-			    unsigned face, unsigned level)
-{
-   /**
-    * FIXME: Optimize.
-    */
-
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-static unsigned int
-nv20_is_buffer_referenced( struct pipe_context *pipe,
-			   struct pipe_buffer *buf)
-{
-   /**
-    * FIXME: Optimize.
-    */
-
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
 struct pipe_context *
 nv20_create(struct pipe_screen *pscreen, unsigned pctx_id)
 {
@@ -429,8 +405,8 @@ nv20_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv20->pipe.clear = nv20_clear;
 	nv20->pipe.flush = nv20_flush;
 
-	nv20->pipe.is_texture_referenced = nv20_is_texture_referenced;
-	nv20->pipe.is_buffer_referenced = nv20_is_buffer_referenced;
+	nv20->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+	nv20->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
 	nv20_init_surface_functions(nv20);
 	nv20_init_state_functions(nv20);
diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h
index fc932f1f90..a4eaa95660 100644
--- a/src/gallium/drivers/nv20/nv20_context.h
+++ b/src/gallium/drivers/nv20/nv20_context.h
@@ -13,6 +13,7 @@
 
 #include "nouveau/nouveau_winsys.h"
 #include "nouveau/nouveau_gldefs.h"
+#include "nouveau/nouveau_context.h"
 
 #define NOUVEAU_PUSH_CONTEXT(ctx)                                              \
 	struct nv20_screen *ctx = nv20->screen
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
index a3e65b96f7..d8300fd69f 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -31,37 +31,6 @@ nv30_destroy(struct pipe_context *pipe)
 	FREE(nv30);
 }
 
-static unsigned int
-nv30_is_texture_referenced( struct pipe_context *pipe,
-			    struct pipe_texture *texture,
-			    unsigned face, unsigned level)
-{
-   /**
-    * FIXME: Return the corrent result. We can't alays return referenced
-    *        since it causes a double flush within the vbo module.
-    */
-#if 0
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-#else
-   return 0;
-#endif
-}
-
-static unsigned int
-nv30_is_buffer_referenced( struct pipe_context *pipe,
-			   struct pipe_buffer *buf)
-{
-   /**
-    * FIXME: Return the corrent result. We can't alays return referenced
-    *        since it causes a double flush within the vbo module.
-    */
-#if 0
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-#else
-   return 0;
-#endif
-}
-
 struct pipe_context *
 nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
 {
@@ -86,8 +55,8 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv30->pipe.clear = nv30_clear;
 	nv30->pipe.flush = nv30_flush;
 
-	nv30->pipe.is_texture_referenced = nv30_is_texture_referenced;
-	nv30->pipe.is_buffer_referenced = nv30_is_buffer_referenced;
+	nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+	nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
 	nv30_init_query_functions(nv30);
 	nv30_init_surface_functions(nv30);
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index 4229c0a0e1..8d49366dfc 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -13,6 +13,7 @@
 
 #include "nouveau/nouveau_winsys.h"
 #include "nouveau/nouveau_gldefs.h"
+#include "nouveau/nouveau_context.h"
 
 #define NOUVEAU_PUSH_CONTEXT(ctx)                                              \
 	struct nv30_screen *ctx = nv30->screen
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
index 4e23671202..7f008274a4 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -31,37 +31,6 @@ nv40_destroy(struct pipe_context *pipe)
 	FREE(nv40);
 }
 
-static unsigned int
-nv40_is_texture_referenced( struct pipe_context *pipe,
-			    struct pipe_texture *texture,
-			    unsigned face, unsigned level)
-{
-   /**
-    * FIXME: Return the correct result. We can't always return referenced
-    *        since it causes a double flush within the vbo module.
-    */
-#if 0
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-#else
-   return 0;
-#endif
-}
-
-static unsigned int
-nv40_is_buffer_referenced( struct pipe_context *pipe,
-			   struct pipe_buffer *buf)
-{
-   /**
-    * FIXME: Return the correct result. We can't always return referenced
-    *        since it causes a double flush within the vbo module.
-    */
-#if 0
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-#else
-   return 0;
-#endif
-}
-
 struct pipe_context *
 nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
 {
@@ -86,8 +55,8 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv40->pipe.clear = nv40_clear;
 	nv40->pipe.flush = nv40_flush;
 
-	nv40->pipe.is_texture_referenced = nv40_is_texture_referenced;
-	nv40->pipe.is_buffer_referenced = nv40_is_buffer_referenced;
+	nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+	nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
 	nv40_init_query_functions(nv40);
 	nv40_init_surface_functions(nv40);
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index 97bc83292d..a3d594167a 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -13,6 +13,7 @@
 
 #include "nouveau/nouveau_winsys.h"
 #include "nouveau/nouveau_gldefs.h"
+#include "nouveau/nouveau_context.h"
 
 #define NOUVEAU_PUSH_CONTEXT(ctx)                                              \
 	struct nv40_screen *ctx = nv40->screen
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index fca078b174..7ef27bb671 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -60,29 +60,6 @@ nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
 {
 }
 
-static unsigned int
-nv50_is_texture_referenced( struct pipe_context *pipe,
-			    struct pipe_texture *texture,
-			    unsigned face, unsigned level)
-{
-   /**
-    * FIXME: Optimize.
-    */
-
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
-static unsigned int
-nv50_is_buffer_referenced( struct pipe_context *pipe,
-			   struct pipe_buffer *buf)
-{
-   /**
-    * FIXME: Optimize.
-    */
-
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-}
-
 struct pipe_context *
 nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
 {
@@ -108,8 +85,8 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
 
 	nv50->pipe.flush = nv50_flush;
 
-	nv50->pipe.is_texture_referenced = nv50_is_texture_referenced;
-	nv50->pipe.is_buffer_referenced = nv50_is_buffer_referenced;
+	nv50->pipe.is_texture_referenced = nouveau_is_texture_referenced;
+	nv50->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
 	screen->base.channel->user_private = nv50;
 	screen->base.channel->flush_notify = nv50_state_flush_notify;
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 4608854d71..fd2dab856d 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -14,6 +14,7 @@
 #include "nouveau/nouveau_winsys.h"
 #include "nouveau/nouveau_gldefs.h"
 #include "nouveau/nouveau_stateobj.h"
+#include "nouveau/nouveau_context.h"
 
 #include "nv50_screen.h"
 #include "nv50_program.h"
-- 
cgit v1.2.3


From 6ab2fcca9d40ed65ab8d88c0253969c5311b7320 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sat, 10 Oct 2009 13:18:07 +0200
Subject: nv50: nicer texture format switch

Similar to nv40.
---
 src/gallium/drivers/nv50/nv50_tex.c | 144 ++++++++++++------------------------
 1 file changed, 49 insertions(+), 95 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index ca2b883e9b..81e04327e8 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -25,106 +25,60 @@
 
 #include "nouveau/nouveau_stateobj.h"
 
+#define _(pf, tt, r, g, b, a, tf)                       	\
+{                                                       	\
+	PIPE_FORMAT_##pf,					\
+	NV50TIC_0_0_MAPR_##r | NV50TIC_0_0_TYPER_##tt |		\
+	NV50TIC_0_0_MAPG_##g | NV50TIC_0_0_TYPEG_##tt |		\
+	NV50TIC_0_0_MAPB_##b | NV50TIC_0_0_TYPEB_##tt |		\
+	NV50TIC_0_0_MAPA_##a | NV50TIC_0_0_TYPEA_##tt |		\
+	NV50TIC_0_0_FMT_##tf					\
+}
+
+struct nv50_texture_format {
+	enum pipe_format pf;
+	uint32_t hw;
+};
+
+#define NV50_TEX_FORMAT_LIST_SIZE \
+	(sizeof(nv50_tex_format_list) / sizeof(struct nv50_texture_format))
+
+static const struct nv50_texture_format nv50_tex_format_list[] =
+{
+	_(A8R8G8B8_UNORM, UNORM, C2, C1, C0, C3,  8_8_8_8),
+	_(X8R8G8B8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8),
+	_(A1R5G5B5_UNORM, UNORM, C2, C1, C0, C3,  1_5_5_5),
+	_(A4R4G4B4_UNORM, UNORM, C2, C1, C0, C3,  4_4_4_4),
+
+	_(R5G6B5_UNORM, UNORM, C2, C1, C0, ONE, 5_6_5),
+
+	_(L8_UNORM, UNORM, C0, C0, C0, ONE, 8),
+	_(A8_UNORM, UNORM, ZERO, ZERO, ZERO, C0, 8),
+	_(I8_UNORM, UNORM, C0, C0, C0, C0, 8),
+
+	_(A8L8_UNORM, UNORM, C0, C0, C0, C1, 8_8),
+
+	_(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1),
+	_(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1),
+	_(DXT3_RGBA, UNORM, C0, C1, C2, C3, DXT3),
+	_(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5)
+};
+
+#undef _
+
 static int
 nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 		   struct nv50_miptree *mt, int unit)
 {
-	switch (mt->base.base.format) {
-	case PIPE_FORMAT_A8R8G8B8_UNORM:
-		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_8_8_8_8);
-		break;
-	case PIPE_FORMAT_X8R8G8B8_UNORM:
-		so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_8_8_8_8);
-		break;
-	case PIPE_FORMAT_A1R5G5B5_UNORM:
-		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_1_5_5_5);
-		break;
-	case PIPE_FORMAT_A4R4G4B4_UNORM:
-		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_4_4_4_4);
-		break;
-	case PIPE_FORMAT_R5G6B5_UNORM:
-		so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_5_6_5);
-		break;
-	case PIPE_FORMAT_L8_UNORM:
-		so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_8);
-		break;
-	case PIPE_FORMAT_A8_UNORM:
-		so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_8);
-		break;
-	case PIPE_FORMAT_I8_UNORM:
-		so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_8);
-		break;
-	case PIPE_FORMAT_A8L8_UNORM:
-		so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_8_8);
-		break;
-	case PIPE_FORMAT_DXT1_RGB:
-		so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_DXT1);
-		break;
-	case PIPE_FORMAT_DXT1_RGBA:
-		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_DXT1);
-		break;
-	case PIPE_FORMAT_DXT3_RGBA:
-		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_DXT3);
-		break;
-	case PIPE_FORMAT_DXT5_RGBA:
-		so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
-			    NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
-			    NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM |
-			    NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM |
-			    NV50TIC_0_0_FMT_DXT5);
-		break;
-	default:
-		return 1;
-	}
+	unsigned i;
+
+	for (i = 0; i < NV50_TEX_FORMAT_LIST_SIZE; i++)
+		if (nv50_tex_format_list[i].pf == mt->base.base.format)
+			break;
+	if (i == NV50_TEX_FORMAT_LIST_SIZE)
+                return 1;
 
+	so_data (so, nv50_tex_format_list[i].hw);
 	so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
 		     NOUVEAU_BO_RD, 0, 0);
 	if (nv50->sampler[unit]->normalized)
-- 
cgit v1.2.3


From fba2eabe13b8a3f8c1396c5949db3daab0192156 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sat, 10 Oct 2009 13:13:16 +0200
Subject: nv50: use SIFC for TIC, TSC upload

Add proper flushes for TIC and TSC and remove
the costly 2D.0110 flush in nv50_flush.

Correct TIC and TSC bo sizes.
---
 src/gallium/drivers/nv50/nv50_context.c        |  7 ----
 src/gallium/drivers/nv50/nv50_context.h        |  5 +++
 src/gallium/drivers/nv50/nv50_screen.c         | 25 ++----------
 src/gallium/drivers/nv50/nv50_state_validate.c | 54 ++++++++++++++++++++++----
 src/gallium/drivers/nv50/nv50_tex.c            | 34 +++++++++-------
 src/gallium/drivers/nv50/nv50_vbo.c            |  4 --
 6 files changed, 77 insertions(+), 52 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 7ef27bb671..219e7a7862 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -33,13 +33,6 @@ nv50_flush(struct pipe_context *pipe, unsigned flags,
 {
 	struct nv50_context *nv50 = nv50_context(pipe);
 	struct nouveau_channel *chan = nv50->screen->base.channel;
-	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
-
-	/* We need this in the ddx for reliable composite, not sure what we're
-	 * actually flushing. We generate all our own flushes with flags = 0. */
-	WAIT_RING(chan, 2);
-	BEGIN_RING(chan, eng2d, 0x0110, 1);
-	OUT_RING  (chan, 0);
 
 	if (flags & PIPE_FLUSH_FRAME)
 		FIRE_RING(chan);
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index fd2dab856d..75cb65d9a2 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -199,6 +199,11 @@ extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program
 extern boolean nv50_state_validate(struct nv50_context *nv50);
 extern void nv50_state_flush_notify(struct nouveau_channel *chan);
 
+extern void nv50_so_init_sifc(struct nv50_context *nv50,
+			      struct nouveau_stateobj *so,
+			      struct nouveau_bo *bo, unsigned reloc,
+			      unsigned size);
+
 /* nv50_tex.c */
 extern void nv50_tex_validate(struct nv50_context *);
 
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index dd7baecba7..66361dc3ba 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -364,48 +364,31 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
 	so_data  (so, 0x00000131 | (NV50_CB_PFP << 12));
 
-	/* Texture sampler/image unit setup - we abuse the constant buffer
-	 * upload mechanism for the moment to upload data to the tex config
-	 * blocks.  At some point we *may* want to go the NVIDIA way of doing
-	 * things?
-	 */
-	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 32*8*4, &screen->tic);
+	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tic);
 	if (ret) {
 		nv50_screen_destroy(pscreen);
 		return NULL;
 	}
 
-	so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
-	so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
-		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
-	so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
-		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, (NV50_CB_TIC << 16) | 0x0800);
 	so_method(so, screen->tesla, NV50TCL_TIC_ADDRESS_HIGH, 3);
 	so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
 		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
 	so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
 		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, 0x00000800);
+	so_data  (so, 0x000007ff);
 
-	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 32*8*4, &screen->tsc);
+	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tsc);
 	if (ret) {
 		nv50_screen_destroy(pscreen);
 		return NULL;
 	}
 
-	so_method(so, screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
-	so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
-		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
-	so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
-		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, (NV50_CB_TSC << 16) | 0x0800);
 	so_method(so, screen->tesla, NV50TCL_TSC_ADDRESS_HIGH, 3);
 	so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
 		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
 	so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
 		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, 0x00000800);
+	so_data  (so, 0x00000000);
 
 
 	/* Vertex array limits - max them out */
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 9079de918d..012911f41b 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -222,6 +222,9 @@ nv50_state_flush_notify(struct nouveau_channel *chan)
 {
 	struct nv50_context *nv50 = chan->user_private;
 
+	if (nv50->state.tic_upload && !(nv50->dirty & NV50_NEW_TEXTURE))
+		so_emit(chan, nv50->state.tic_upload);
+
 	so_emit_reloc_markers(chan, nv50->state.fb);
 	so_emit_reloc_markers(chan, nv50->state.vertprog);
 	so_emit_reloc_markers(chan, nv50->state.fragprog);
@@ -233,6 +236,7 @@ boolean
 nv50_state_validate(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
 	struct nouveau_stateobj *so;
 	unsigned i;
 
@@ -354,19 +358,25 @@ scissor_uptodate:
 viewport_uptodate:
 
 	if (nv50->dirty & NV50_NEW_SAMPLER) {
-		int i;
+		unsigned i;
+
+		so = so_new(nv50->sampler_nr * 9 + 23 + 4, 2);
+
+		nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
+				  nv50->sampler_nr * 8 * 4);
 
-		so = so_new(nv50->sampler_nr * 11, 0);
 		for (i = 0; i < nv50->sampler_nr; i++) {
 			if (!nv50->sampler[i])
 				continue;
-
-			so_method(so, tesla, NV50TCL_CB_ADDR, 1);
-			so_data  (so, ((i * 8) << NV50TCL_CB_ADDR_ID_SHIFT) |
-				      NV50_CB_TSC);
-			so_method(so, tesla, NV50TCL_CB_DATA(0) | (2<<29), 8);
+			so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
 			so_datap (so, nv50->sampler[i]->tsc, 8);
 		}
+
+		so_method(so, tesla, 0x1440, 1); /* sync SIFC */
+		so_data  (so, 0);
+		so_method(so, tesla, 0x1334, 1); /* flush TSC */
+		so_data  (so, 0);
+
 		so_ref(so, &nv50->state.tsc_upload);
 		so_ref(NULL, &so);
 	}
@@ -384,3 +394,33 @@ viewport_uptodate:
 	return TRUE;
 }
 
+void nv50_so_init_sifc(struct nv50_context *nv50,
+		       struct nouveau_stateobj *so,
+		       struct nouveau_bo *bo, unsigned reloc, unsigned size)
+{
+	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+
+	so_method(so, eng2d, NV50_2D_DST_FORMAT, 2);
+	so_data  (so, NV50_2D_DST_FORMAT_R8_UNORM);
+	so_data  (so, 1);
+	so_method(so, eng2d, NV50_2D_DST_PITCH, 5);
+	so_data  (so, 262144);
+	so_data  (so, 65536);
+	so_data  (so, 1);
+	so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_HIGH, 0, 0);
+	so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_LOW, 0, 0);
+	so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2);
+	so_data  (so, 0);
+	so_data  (so, NV50_2D_SIFC_FORMAT_R8_UNORM);
+	so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10);
+	so_data  (so, size);
+	so_data  (so, 1);
+	so_data  (so, 0);
+	so_data  (so, 1);
+	so_data  (so, 0);
+	so_data  (so, 1);
+	so_data  (so, 0);
+	so_data  (so, 0);
+	so_data  (so, 0);
+	so_data  (so, 0);
+}
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 81e04327e8..e12a6ad648 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -98,24 +98,24 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 void
 nv50_tex_validate(struct nv50_context *nv50)
 {
+	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nouveau_stateobj *so;
-	int unit, push;
+	unsigned i, unit, push;
 
-	push  = nv50->miptree_nr * 11;
-	push += MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2;
+	push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6;
+	so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr + 2);
 
-	so = so_new(push, nv50->miptree_nr * 2);
-	for (unit = 0; unit < nv50->miptree_nr; unit++) {
+	nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM,
+			  nv50->miptree_nr * 8 * 4);
+
+	for (i = 0, unit = 0; unit < nv50->miptree_nr; ++unit) {
 		struct nv50_miptree *mt = nv50->miptree[unit];
 
 		if (!mt)
 			continue;
 
-		so_method(so, tesla, NV50TCL_CB_ADDR, 1);
-		so_data  (so, ((unit * 8) << NV50TCL_CB_ADDR_ID_SHIFT) |
-			      NV50_CB_TIC);
-		so_method(so, tesla, NV50TCL_CB_DATA(0) | 0x40000000, 8);
+		so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
 		if (nv50_tex_construct(nv50, so, mt, unit)) {
 			NOUVEAU_ERR("failed tex validate\n");
 			so_ref(NULL, &so);
@@ -123,17 +123,25 @@ nv50_tex_validate(struct nv50_context *nv50)
 		}
 
 		so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
-		so_data  (so, (unit << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) |
-			(unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) |
-			NV50TCL_SET_SAMPLER_TEX_VALID);
+		so_data  (so, (i++ << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) |
+			  (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) |
+			  NV50TCL_SET_SAMPLER_TEX_VALID);
 	}
 
 	for (; unit < nv50->state.miptree_nr; unit++) {
 		so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
 		so_data  (so,
-			(unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0);
+			  (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0);
 	}
 
+	/* not sure if the following really do what I think: */
+	so_method(so, tesla, 0x1440, 1); /* sync SIFC */
+	so_data  (so, 0);
+	so_method(so, tesla, 0x1330, 1); /* flush TIC */
+	so_data  (so, 0);
+	so_method(so, tesla, 0x1338, 1); /* flush texture caches */
+	so_data  (so, 0x20);
+
 	so_ref(so, &nv50->state.tic_upload);
 	so_ref(NULL, &so);
 	nv50->state.miptree_nr = nv50->miptree_nr;
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index eeed148c7b..8b0fbf0e76 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -139,10 +139,6 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 	OUT_RING  (chan, 0);
 	BEGIN_RING(chan, tesla, 0x142c, 1);
 	OUT_RING  (chan, 0);
-	BEGIN_RING(chan, tesla, 0x1440, 1);
-	OUT_RING  (chan, 0);
-	BEGIN_RING(chan, tesla, 0x1334, 1);
-	OUT_RING  (chan, 0);
 
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
 	OUT_RING  (chan, nv50_prim(mode));
-- 
cgit v1.2.3


From c0e80cf0e97cec526bb2ff0f94d9142e33374c20 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 14 Oct 2009 21:23:29 +0200
Subject: nv50: submit user vbo data through the fifo

Requesting a new real buffer from the kernel and
copying all the data is wasteful e.g. if only a
few (but widely spread) vertices are accessed.
---
 src/gallium/drivers/nv50/nv50_context.h |   3 +
 src/gallium/drivers/nv50/nv50_vbo.c     | 409 ++++++++++++++++++++++++++++++--
 2 files changed, 394 insertions(+), 18 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 75cb65d9a2..33667e8765 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -121,6 +121,7 @@ struct nv50_state {
 	struct nouveau_stateobj *vtxfmt;
 	struct nouveau_stateobj *vtxbuf;
 	struct nouveau_stateobj *vtxattr;
+	unsigned vtxelt_nr;
 };
 
 struct nv50_context {
@@ -153,6 +154,8 @@ struct nv50_context {
 	unsigned sampler_nr;
 	struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS];
 	unsigned miptree_nr;
+
+	uint16_t vbo_fifo;
 };
 
 static INLINE struct nv50_context *
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 8b0fbf0e76..db54380241 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -26,6 +26,18 @@
 
 #include "nv50_context.h"
 
+static boolean
+nv50_push_elements_u08(struct nv50_context *, uint8_t *, unsigned);
+
+static boolean
+nv50_push_elements_u16(struct nv50_context *, uint16_t *, unsigned);
+
+static boolean
+nv50_push_elements_u32(struct nv50_context *, uint32_t *, unsigned);
+
+static boolean
+nv50_push_arrays(struct nv50_context *, unsigned, unsigned);
+
 static INLINE unsigned
 nv50_prim(unsigned mode)
 {
@@ -132,6 +144,7 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 	struct nv50_context *nv50 = nv50_context(pipe);
 	struct nouveau_channel *chan = nv50->screen->tesla->channel;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	boolean ret;
 
 	nv50_state_validate(nv50);
 
@@ -142,17 +155,22 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
 	OUT_RING  (chan, nv50_prim(mode));
-	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
-	OUT_RING  (chan, start);
-	OUT_RING  (chan, count);
+
+	if (nv50->vbo_fifo)
+		ret = nv50_push_arrays(nv50, start, count);
+	else {
+		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2);
+		OUT_RING  (chan, start);
+		OUT_RING  (chan, count);
+		ret = TRUE;
+	}
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 	OUT_RING  (chan, 0);
 
-	pipe->flush(pipe, 0, NULL);
-	return TRUE;
+	return ret;
 }
 
-static INLINE void
+static INLINE boolean
 nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
 			      unsigned start, unsigned count)
 {
@@ -161,6 +179,9 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
 
 	map += start;
 
+	if (nv50->vbo_fifo)
+		return nv50_push_elements_u08(nv50, map, count);
+
 	if (count & 1) {
 		BEGIN_RING(chan, tesla, 0x15e8, 1);
 		OUT_RING  (chan, map[0]);
@@ -179,9 +200,10 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
 		count -= nr;
 		map += nr;
 	}
+	return TRUE;
 }
 
-static INLINE void
+static INLINE boolean
 nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
 			      unsigned start, unsigned count)
 {
@@ -190,6 +212,9 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
 
 	map += start;
 
+	if (nv50->vbo_fifo)
+		return nv50_push_elements_u16(nv50, map, count);
+
 	if (count & 1) {
 		BEGIN_RING(chan, tesla, 0x15e8, 1);
 		OUT_RING  (chan, map[0]);
@@ -208,9 +233,10 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
 		count -= nr;
 		map += nr;
 	}
+	return TRUE;
 }
 
-static INLINE void
+static INLINE boolean
 nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
 			      unsigned start, unsigned count)
 {
@@ -219,6 +245,9 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
 
 	map += start;
 
+	if (nv50->vbo_fifo)
+		return nv50_push_elements_u32(nv50, map, count);
+
 	while (count) {
 		unsigned nr = count > 2047 ? 2047 : count;
 
@@ -228,6 +257,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
 		count -= nr;
 		map += nr;
 	}
+	return TRUE;
 }
 
 boolean
@@ -240,6 +270,7 @@ nv50_draw_elements(struct pipe_context *pipe,
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct pipe_screen *pscreen = pipe->screen;
 	void *map;
+	boolean ret;
 	
 	map = pipe_buffer_map(pscreen, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ);
 
@@ -254,23 +285,25 @@ nv50_draw_elements(struct pipe_context *pipe,
 	OUT_RING  (chan, nv50_prim(mode));
 	switch (indexSize) {
 	case 1:
-		nv50_draw_elements_inline_u08(nv50, map, start, count);
+		ret = nv50_draw_elements_inline_u08(nv50, map, start, count);
 		break;
 	case 2:
-		nv50_draw_elements_inline_u16(nv50, map, start, count);
+		ret = nv50_draw_elements_inline_u16(nv50, map, start, count);
 		break;
 	case 4:
-		nv50_draw_elements_inline_u32(nv50, map, start, count);
+		ret = nv50_draw_elements_inline_u32(nv50, map, start, count);
 		break;
 	default:
 		assert(0);
+		ret = FALSE;
+		break;
 	}
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 	OUT_RING  (chan, 0);
 
 	pipe_buffer_unmap(pscreen, indexBuffer);
-	pipe->flush(pipe, 0, NULL);
-	return TRUE;
+
+	return ret;
 }
 
 static INLINE boolean
@@ -337,17 +370,24 @@ nv50_vbo_validate(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nouveau_stateobj *vtxbuf, *vtxfmt, *vtxattr;
-	unsigned i;
+	unsigned i, n_ve;
 
 	/* don't validate if Gallium took away our buffers */
 	if (nv50->vtxbuf_nr == 0)
 		return;
+	nv50->vbo_fifo = 0;
+
+	for (i = 0; i < nv50->vtxbuf_nr; ++i)
+		if (nv50->vtxbuf[i].stride &&
+		    !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
+			nv50->vbo_fifo = 0xffff;
+
+	n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
 
 	vtxattr = NULL;
-	vtxbuf = so_new(nv50->vtxelt_nr * 7, nv50->vtxelt_nr * 4);
-	vtxfmt = so_new(nv50->vtxelt_nr + 1, 0);
-	so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0),
-		nv50->vtxelt_nr);
+	vtxbuf = so_new(n_ve * 7, nv50->vtxelt_nr * 4);
+	vtxfmt = so_new(n_ve + 1, 0);
+	so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
 
 	for (i = 0; i < nv50->vtxelt_nr; i++) {
 		struct pipe_vertex_element *ve = &nv50->vtxelt[i];
@@ -363,10 +403,19 @@ nv50_vbo_validate(struct nv50_context *nv50)
 			so_method(vtxbuf, tesla,
 				  NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
 			so_data  (vtxbuf, 0);
+
+			nv50->vbo_fifo &= ~(1 << i);
 			continue;
 		}
 		so_data(vtxfmt, hw | i);
 
+		if (nv50->vbo_fifo) {
+			so_method(vtxbuf, tesla,
+				  NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
+			so_data  (vtxbuf, 0);
+			continue;
+		}
+
 		so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 3);
 		so_data  (vtxbuf, 0x20000000 | vb->stride);
 		so_reloc (vtxbuf, bo, vb->buffer_offset +
@@ -385,6 +434,13 @@ nv50_vbo_validate(struct nv50_context *nv50)
 			  NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
 			  NOUVEAU_BO_LOW, 0, 0);
 	}
+	for (; i < n_ve; ++i) {
+		so_data  (vtxfmt, 0x7e080010);
+
+		so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_FORMAT(i), 1);
+		so_data  (vtxbuf, 0);
+	}
+	nv50->state.vtxelt_nr = nv50->vtxelt_nr;
 
 	so_ref (vtxfmt, &nv50->state.vtxfmt);
 	so_ref (vtxbuf, &nv50->state.vtxbuf);
@@ -394,3 +450,320 @@ nv50_vbo_validate(struct nv50_context *nv50)
 	so_ref (NULL, &vtxattr);
 }
 
+typedef void (*pfn_push)(struct nouveau_channel *, void *);
+
+struct nv50_vbo_emitctx
+{
+	pfn_push push[16];
+	void *map[16];
+	unsigned stride[16];
+	unsigned nr_ve;
+	unsigned vtx_dwords;
+	unsigned vtx_max;
+};
+
+static INLINE void
+emit_vtx_next(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit)
+{
+	unsigned i;
+
+	for (i = 0; i < emit->nr_ve; ++i) {
+		emit->push[i](chan, emit->map[i]);
+		emit->map[i] += emit->stride[i];
+	}
+}
+
+static INLINE void
+emit_vtx(struct nouveau_channel *chan, struct nv50_vbo_emitctx *emit,
+	 uint32_t vi)
+{
+	unsigned i;
+
+	for (i = 0; i < emit->nr_ve; ++i)
+		emit->push[i](chan, emit->map[i] + emit->stride[i] * vi);
+}
+
+static INLINE boolean
+nv50_map_vbufs(struct nv50_context *nv50)
+{
+	int i;
+
+	for (i = 0; i < nv50->vtxbuf_nr; ++i) {
+		struct pipe_vertex_buffer *vb = &nv50->vtxbuf[i];
+		unsigned size, delta;
+
+		if (nouveau_bo(vb->buffer)->map)
+			continue;
+
+		size = vb->stride * (vb->max_index + 1);
+		delta = vb->buffer_offset;
+
+		if (!size)
+			size = vb->buffer->size - vb->buffer_offset;
+
+		if (nouveau_bo_map_range(nouveau_bo(vb->buffer),
+					 delta, size, NOUVEAU_BO_RD))
+			break;
+	}
+
+	if (i == nv50->vtxbuf_nr)
+		return TRUE;
+	for (; i >= 0; --i)
+		nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
+	return FALSE;
+}
+
+static INLINE void
+nv50_unmap_vbufs(struct nv50_context *nv50)
+{
+        unsigned i;
+
+        for (i = 0; i < nv50->vtxbuf_nr; ++i)
+                if (nouveau_bo(nv50->vtxbuf[i].buffer)->map)
+                        nouveau_bo_unmap(nouveau_bo(nv50->vtxbuf[i].buffer));
+}
+
+static void
+emit_b32_1(struct nouveau_channel *chan, void *data)
+{
+	uint32_t *v = data;
+
+	OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b32_2(struct nouveau_channel *chan, void *data)
+{
+	uint32_t *v = data;
+
+	OUT_RING(chan, v[0]);
+	OUT_RING(chan, v[1]);
+}
+
+static void
+emit_b32_3(struct nouveau_channel *chan, void *data)
+{
+	uint32_t *v = data;
+
+	OUT_RING(chan, v[0]);
+	OUT_RING(chan, v[1]);
+	OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b32_4(struct nouveau_channel *chan, void *data)
+{
+	uint32_t *v = data;
+
+	OUT_RING(chan, v[0]);
+	OUT_RING(chan, v[1]);
+	OUT_RING(chan, v[2]);
+	OUT_RING(chan, v[3]);
+}
+
+static void
+emit_b16_1(struct nouveau_channel *chan, void *data)
+{
+	uint16_t *v = data;
+
+	OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b16_3(struct nouveau_channel *chan, void *data)
+{
+	uint16_t *v = data;
+
+	OUT_RING(chan, (v[1] << 16) | v[0]);
+	OUT_RING(chan, v[2]);
+}
+
+static void
+emit_b08_1(struct nouveau_channel *chan, void *data)
+{
+	uint8_t *v = data;
+
+	OUT_RING(chan, v[0]);
+}
+
+static void
+emit_b08_3(struct nouveau_channel *chan, void *data)
+{
+	uint8_t *v = data;
+
+	OUT_RING(chan, (v[2] << 16) | (v[1] << 8) | v[0]);
+}
+
+static boolean
+emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
+	     unsigned start)
+{
+	unsigned i;
+
+	if (nv50_map_vbufs(nv50) == FALSE)
+		return FALSE;
+
+	emit->nr_ve = 0;
+	emit->vtx_dwords = 0;
+
+	for (i = 0; i < nv50->vtxelt_nr; ++i) {
+		struct pipe_vertex_element *ve;
+		struct pipe_vertex_buffer *vb;
+		unsigned n, type, size;
+
+		ve = &nv50->vtxelt[i];
+		vb = &nv50->vtxbuf[ve->vertex_buffer_index];
+		if (!(nv50->vbo_fifo & (1 << i)))
+			continue;
+		n = emit->nr_ve++;
+
+		emit->stride[n] = vb->stride;
+		emit->map[n] = nouveau_bo(vb->buffer)->map +
+			(start * vb->stride + ve->src_offset);
+
+		type = pf_type(ve->src_format);
+		size = pf_size_x(ve->src_format) << pf_exp2(ve->src_format);
+
+		assert(ve->nr_components > 0 && ve->nr_components <= 4);
+
+		/* It shouldn't be necessary to push the implicit 1s
+		 * for case 3 and size 8 cases 1, 2, 3.
+		 */
+		switch (size) {
+		default:
+			NOUVEAU_ERR("unsupported vtxelt size: %u\n", size);
+			return FALSE;
+		case 32:
+			switch (ve->nr_components) {
+			case 1: emit->push[n] = emit_b32_1; break;
+			case 2: emit->push[n] = emit_b32_2; break;
+			case 3: emit->push[n] = emit_b32_3; break;
+			case 4: emit->push[n] = emit_b32_4; break;
+			}
+			emit->vtx_dwords += ve->nr_components;
+			break;
+		case 16:
+			switch (ve->nr_components) {
+			case 1: emit->push[n] = emit_b16_1; break;
+			case 2: emit->push[n] = emit_b32_1; break;
+			case 3: emit->push[n] = emit_b16_3; break;
+			case 4: emit->push[n] = emit_b32_2; break;
+			}
+			emit->vtx_dwords += (ve->nr_components + 1) >> 1;
+			break;
+		case 8:
+			switch (ve->nr_components) {
+			case 1: emit->push[n] = emit_b08_1; break;
+			case 2: emit->push[n] = emit_b16_1; break;
+			case 3: emit->push[n] = emit_b08_3; break;
+			case 4: emit->push[n] = emit_b32_1; break;
+			}
+			emit->vtx_dwords += 1;
+			break;
+		}
+	}
+
+	emit->vtx_max = 512 / emit->vtx_dwords;
+
+	return TRUE;
+}
+
+static boolean
+nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
+{
+	struct nouveau_channel *chan = nv50->screen->base.channel;
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nv50_vbo_emitctx emit;
+
+	if (emit_prepare(nv50, &emit, start) == FALSE)
+		return FALSE;
+
+	while (count) {
+		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
+	        dw = nr * emit.vtx_dwords;
+
+		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+		for (i = 0; i < nr; ++i)
+			emit_vtx_next(chan, &emit);
+
+		count -= nr;
+	}
+	nv50_unmap_vbufs(nv50);
+
+	return TRUE;
+}
+
+static boolean
+nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
+{
+	struct nouveau_channel *chan = nv50->screen->base.channel;
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nv50_vbo_emitctx emit;
+
+	if (emit_prepare(nv50, &emit, 0) == FALSE)
+		return FALSE;
+
+	while (count) {
+		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
+	        dw = nr * emit.vtx_dwords;
+
+		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+		for (i = 0; i < nr; ++i)
+			emit_vtx(chan, &emit, *map++);
+
+		count -= nr;
+	}
+	nv50_unmap_vbufs(nv50);
+
+	return TRUE;
+}
+
+static boolean
+nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
+{
+	struct nouveau_channel *chan = nv50->screen->base.channel;
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nv50_vbo_emitctx emit;
+
+	if (emit_prepare(nv50, &emit, 0) == FALSE)
+		return FALSE;
+
+	while (count) {
+		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
+	        dw = nr * emit.vtx_dwords;
+
+		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+		for (i = 0; i < nr; ++i)
+			emit_vtx(chan, &emit, *map++);
+
+		count -= nr;
+	}
+	nv50_unmap_vbufs(nv50);
+
+	return TRUE;
+}
+
+static boolean
+nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
+{
+	struct nouveau_channel *chan = nv50->screen->base.channel;
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	struct nv50_vbo_emitctx emit;
+
+	if (emit_prepare(nv50, &emit, 0) == FALSE)
+		return FALSE;
+
+	while (count) {
+		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
+	        dw = nr * emit.vtx_dwords;
+
+		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
+		for (i = 0; i < nr; ++i)
+			emit_vtx(chan, &emit, *map++);
+
+		count -= nr;
+	}
+	nv50_unmap_vbufs(nv50);
+
+	return TRUE;
+}
-- 
cgit v1.2.3


From 1635e8d6f4b96e691746e8c8c5a273089bae6843 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 14 Oct 2009 21:27:35 +0200
Subject: nv50: add support for DDX and DDY opcodes

---
 src/gallium/drivers/nv50/nv50_program.c | 70 ++++++++++++++++++++++++++-------
 1 file changed, 56 insertions(+), 14 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 576d075318..89e0ac8db9 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -837,7 +837,7 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 #define CVTOP_SAT	0x08
 #define CVTOP_ABS	0x10
 
-/* 0x04 == 32 bit */
+/* 0x04 == 32 bit dst */
 /* 0x40 == dst is float */
 /* 0x80 == src is float */
 #define CVT_F32_F32 0xc4
@@ -858,7 +858,7 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
 	set_long(pc, e);
 
 	e->inst[0] |= 0xa0000000;
-	e->inst[1] |= 0x00004000;
+	e->inst[1] |= 0x00004000; /* 32 bit src */
 	e->inst[1] |= (cvn << 16);
 	e->inst[1] |= (fmt << 24);
 	set_src_0(pc, src, e);
@@ -1037,20 +1037,10 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 	FREE(one);
 }
 
-static void
+static INLINE void
 emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
-	struct nv50_program_exec *e = exec(pc);
-
-	set_long(pc, e);
-	e->inst[0] |= 0xa0000000; /* delta */
-	e->inst[1] |= (7 << 29); /* delta */
-	e->inst[1] |= 0x04000000; /* negate arg0? probably not */
-	e->inst[1] |= (1 << 14); /* src .f32 */
-	set_dst(pc, dst, e);
-	set_src_0(pc, src, e);
-
-	emit(pc, e);
+	emit_cvt(pc, dst, src, -1, CVTOP_RN, CVT_F32_F32 | CVT_NEG);
 }
 
 static void
@@ -1218,6 +1208,43 @@ emit_nop(struct nv50_pc *pc)
 	emit(pc, e);
 }
 
+static void
+emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	assert(src->type == P_TEMP);
+
+	e->inst[0] = 0xc0140000;
+	e->inst[1] = 0x89800000;
+	set_long(pc, e);
+	set_dst(pc, dst, e);
+	set_src_0(pc, src, e);
+	set_src_2(pc, src, e);
+
+	emit(pc, e);
+}
+
+static void
+emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	assert(src->type == P_TEMP);
+
+	if (!src->neg) /* ! double negation */
+		emit_neg(pc, src, src);
+
+	e->inst[0] = 0xc0150000;
+	e->inst[1] = 0x8a400000;
+	set_long(pc, e);
+	set_dst(pc, dst, e);
+	set_src_0(pc, src, e);
+	set_src_2(pc, src, e);
+
+	emit(pc, e);
+}
+
 static void
 convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
 {
@@ -1270,6 +1297,7 @@ static boolean
 negate_supported(const struct tgsi_full_instruction *insn, int i)
 {
 	switch (insn->Instruction.Opcode) {
+	case TGSI_OPCODE_DDY:
 	case TGSI_OPCODE_DP3:
 	case TGSI_OPCODE_DP4:
 	case TGSI_OPCODE_MUL:
@@ -1660,6 +1688,20 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_precossin(pc, temp, src[0][0]);
 		emit_flop(pc, 5, brdc, temp);
 		break;
+	case TGSI_OPCODE_DDX:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_ddx(pc, dst[c], src[0][c]);
+		}
+		break;
+	case TGSI_OPCODE_DDY:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_ddy(pc, dst[c], src[0][c]);
+		}
+		break;
 	case TGSI_OPCODE_DP3:
 		emit_mul(pc, temp, src[0][0], src[1][0]);
 		emit_mad(pc, temp, src[0][1], src[1][1], temp);
-- 
cgit v1.2.3


From f204eb184237b387432413212a3a20d83c87594b Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Tue, 13 Oct 2009 15:09:13 +0200
Subject: nv50: quick fix for insn src negation

We only have a per nv50_reg negation flag, if an
nv50_reg is used more than once in a TGSI op with
different sign modes, we'd generate wrong code.

We probably can't do much better without more
invasive changes.
---
 src/gallium/drivers/nv50/nv50_program.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 89e0ac8db9..1bd6f717d1 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1293,9 +1293,12 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
 	e->inst[1] |= q;
 }
 
+/* Some operations support an optional negation flag. */
 static boolean
 negate_supported(const struct tgsi_full_instruction *insn, int i)
 {
+	int s;
+
 	switch (insn->Instruction.Opcode) {
 	case TGSI_OPCODE_DDY:
 	case TGSI_OPCODE_DP3:
@@ -1305,12 +1308,29 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
 	case TGSI_OPCODE_ADD:
 	case TGSI_OPCODE_SUB:
 	case TGSI_OPCODE_MAD:
-		return TRUE;
+		break;
 	case TGSI_OPCODE_POW:
-		return (i == 1) ? TRUE : FALSE;
+		if (i == 1)
+			break;
+		return FALSE;
 	default:
 		return FALSE;
 	}
+
+	/* Watch out for possible multiple uses of an nv50_reg, we
+	 * can't use nv50_reg::neg in these cases.
+	 */
+	for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) {
+		if (s == i)
+			continue;
+		if ((insn->FullSrcRegisters[s].SrcRegister.Index ==
+		     insn->FullSrcRegisters[i].SrcRegister.Index) &&
+		    (insn->FullSrcRegisters[s].SrcRegister.File ==
+		     insn->FullSrcRegisters[i].SrcRegister.File))
+			return FALSE;
+	}
+
+	return TRUE;
 }
 
 /* Return a read mask for source registers deduced from opcode & write mask. */
@@ -1956,6 +1976,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		for (c = 0; c < 4; c++) {
 			if (!src[i][c])
 				continue;
+			src[i][c]->neg = 0;
 			if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
 				FREE(src[i][c]);
 		}
-- 
cgit v1.2.3


From 2eef2017acbbb617c559555648c7745141f3aedb Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 19 Oct 2009 17:47:29 +0200
Subject: nv50: implement TGSI_OPCODE_CMP

---
 src/gallium/drivers/nv50/nv50_program.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 1bd6f717d1..3b7033b518 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -506,11 +506,13 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
 	struct nv50_program_exec *e = exec(pc);
 
-	e->inst[0] |= 0x10000000;
+	e->inst[0] = 0x10000000;
+	if (!pc->allow32)
+		set_long(pc, e);
 
 	set_dst(pc, dst, e);
 
-	if (pc->allow32 && dst->type != P_RESULT && src->type == P_IMMD) {
+	if (!is_long(e) && src->type == P_IMMD) {
 		set_immd(pc, src, e);
 		/*XXX: 32-bit, but steals part of "half" reg space - need to
 		 *     catch and handle this case if/when we do half-regs
@@ -1696,6 +1698,18 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 				 CVTOP_CEIL, CVT_F32_F32 | CVT_RI);
 		}
 		break;
+	case TGSI_OPCODE_CMP:
+		pc->allow32 = FALSE;
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_cvt(pc, NULL, src[0][c], 1, CVTOP_RN, CVT_F32_F32);
+			emit_mov(pc, dst[c], src[1][c]);
+			set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */
+			emit_mov(pc, dst[c], src[2][c]);
+			set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */
+		}
+		break;
 	case TGSI_OPCODE_COS:
 		if (mask & 8) {
 			emit_precossin(pc, temp, src[0][3]);
-- 
cgit v1.2.3


From eb7ea97e7fff1ee39921ad81294c4963b5b3ded8 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 19 Oct 2009 17:53:31 +0200
Subject: nv50: cleanup emit_kil

---
 src/gallium/drivers/nv50/nv50_program.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 3b7033b518..bfd979ce0f 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1050,22 +1050,18 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 {
 	struct nv50_program_exec *e;
 	const int r_pred = 1;
+	unsigned cvn = CVT_F32_F32;
 
-	/* Sets predicate reg ? */
-	e = exec(pc);
-	e->inst[0] = 0xa00001fd;
-	e->inst[1] = 0xc4014788;
-	set_src_0(pc, src, e);
-	set_pred_wr(pc, 1, r_pred, e);
 	if (src->neg)
-		e->inst[1] |= 0x20000000;
-	emit(pc, e);
+		cvn |= CVT_NEG;
+	/* write predicate reg */
+	emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn);
 
-	/* This is probably KILP */
+	/* conditional discard */
 	e = exec(pc);
-	e->inst[0] = 0x000001fe;
+	e->inst[0] = 0x00000002;
 	set_long(pc, e);
-	set_pred(pc, 1 /* LT? */, r_pred, e);
+	set_pred(pc, 0x1 /* LT */, r_pred, e);
 	emit(pc, e);
 }
 
-- 
cgit v1.2.3


From ec5c23551cdb4c369d8f8f392208f4d4bf29911b Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 19 Oct 2009 18:17:45 +0200
Subject: nv50: add support for address regs

Allow indirect uniform access and increase the
limit on parameters from 128 to 512.
---
 src/gallium/drivers/nv50/nv50_program.c | 178 ++++++++++++++++++++++++++++++--
 src/gallium/drivers/nv50/nv50_screen.c  |  10 +-
 2 files changed, 175 insertions(+), 13 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index bfd979ce0f..c7145bb9be 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -32,6 +32,7 @@
 #include "nv50_context.h"
 
 #define NV50_SU_MAX_TEMP 64
+#define NV50_SU_MAX_ADDR 7
 //#define NV50_PROGRAM_DUMP
 
 /* ARL - gallium craps itself on progs/vp/arl.txt
@@ -79,7 +80,8 @@ struct nv50_reg {
 		P_ATTR,
 		P_RESULT,
 		P_CONST,
-		P_IMMD
+		P_IMMD,
+		P_ADDR
 	} type;
 	int index;
 
@@ -99,6 +101,7 @@ struct nv50_pc {
 
 	/* hw resources */
 	struct nv50_reg *r_temp[NV50_SU_MAX_TEMP];
+	struct nv50_reg r_addr[NV50_SU_MAX_ADDR];
 
 	/* tgsi resources */
 	struct nv50_reg *temp;
@@ -112,6 +115,8 @@ struct nv50_pc {
 	struct nv50_reg *immd;
 	float *immd_buf;
 	int immd_nr;
+	struct nv50_reg **addr;
+	int addr_nr;
 
 	struct nv50_reg *temp_temp[16];
 	unsigned temp_temp_nr;
@@ -158,6 +163,17 @@ popcnt4(uint32_t val)
 	return cnt[val & 0xf];
 }
 
+static void
+terminate_mbb(struct nv50_pc *pc)
+{
+	int i;
+
+	/* remove records of temporary address register values */
+	for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
+		if (pc->r_addr[i].index < 0)
+			pc->r_addr[i].rhw = -1;
+}
+
 static void
 alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
 {
@@ -454,9 +470,68 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
 	e->inst[1] |= (val >> 6) << 2;
 }
 
+static void
+emit_set_addr(struct nv50_pc *pc, struct nv50_reg *dst, unsigned val)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	assert(val <= 0xffff);
+	e->inst[0] = 0xd0000000 | ((val & 0xffff) << 9);
+	e->inst[1] = 0x20000000;
+	e->inst[0] |= dst->hw << 2;
+	set_long(pc, e);
+
+	emit(pc, e);
+}
+
+static struct nv50_reg *
+alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
+{
+	int i;
+	struct nv50_reg *a = NULL;
+
+	if (!ref) {
+		for (i = 0; i < NV50_SU_MAX_ADDR; ++i) {
+			if (pc->r_addr[i].index >= 0)
+				continue;
+			if (pc->r_addr[i].rhw >= 0 &&
+			    pc->r_addr[i].acc == pc->insn_cur)
+				continue;
+
+			pc->r_addr[i].rhw = -1;
+			pc->r_addr[i].index = i;
+			return &pc->r_addr[i];
+		}
+		assert(0);
+		return NULL;
+	}
+
+	for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) {
+		if (pc->r_addr[i].index >= 0) /* occupied for TGSI */
+			continue;
+		if (pc->r_addr[i].rhw < 0) { /* unused */
+			a = &pc->r_addr[i];
+			continue;
+		}
+		if (!a && pc->r_addr[i].acc != pc->insn_cur)
+			a = &pc->r_addr[i];
+
+		if (ref->hw - pc->r_addr[i].rhw < 128) {
+		/* alloc'd & suitable */
+			pc->r_addr[i].acc = pc->insn_cur;
+			return &pc->r_addr[i];
+		}
+	}
+	assert(a);
+	emit_set_addr(pc, a, ref->hw * 4);
+
+	a->rhw = ref->hw % 128;
+	a->acc = pc->insn_cur;
+	return a;
+}
 
 #define INTERP_LINEAR		0
-#define INTERP_FLAT			1
+#define INTERP_FLAT		1
 #define INTERP_PERSPECTIVE	2
 #define INTERP_CENTROID		4
 
@@ -488,6 +563,16 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv,
 	emit(pc, e);
 }
 
+static INLINE void
+set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
+{
+	assert(!(e->inst[0] & 0x0c000000));
+	assert(!(e->inst[1] & 0x00000004));
+
+	e->inst[0] |= (a->hw & 3) << 26;
+	e->inst[1] |= (a->hw >> 2) << 2;
+}
+
 static void
 set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
 	 struct nv50_program_exec *e)
@@ -498,6 +583,14 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
 	e->param.shift = s;
 	e->param.mask = m << (s % 32);
 
+	if (src->hw > 127)
+		set_addr(e, alloc_addr(pc, src));
+	else
+	if (src->acc < 0) {
+		assert(src->type == P_CONST);
+		set_addr(e, pc->addr[src->index]);
+	}
+
 	e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
 }
 
@@ -632,7 +725,7 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
 	}
 
 	alloc_reg(pc, src);
-	e->inst[0] |= (src->hw << 16);
+	e->inst[0] |= ((src->hw & 127) << 16);
 }
 
 static void
@@ -660,7 +753,7 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
 	}
 
 	alloc_reg(pc, src);
-	e->inst[1] |= (src->hw << 14);
+	e->inst[1] |= ((src->hw & 127) << 14);
 }
 
 static void
@@ -722,6 +815,22 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
 	emit(pc, e);
 }
 
+static void
+emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
+	 uint8_t s)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	set_long(pc, e);
+	e->inst[1] |= 0xc0000000;
+
+	e->inst[0] |= dst->hw << 2;
+	e->inst[0] |= s << 16; /* shift left */
+	set_src_0_restricted(pc, src, e);
+
+	emit(pc, e);
+}
+
 static void
 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
 	    struct nv50_reg *src0, struct nv50_reg *src1)
@@ -1403,6 +1512,16 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
 		return &pc->temp[dst->DstRegister.Index * 4 + c];
 	case TGSI_FILE_OUTPUT:
 		return &pc->result[dst->DstRegister.Index * 4 + c];
+	case TGSI_FILE_ADDRESS:
+	{
+		struct nv50_reg *r = pc->addr[dst->DstRegister.Index * 4 + c];
+		if (!r) {
+			r = alloc_addr(pc, NULL);
+			pc->addr[dst->DstRegister.Index * 4 + c] = r;
+		}
+		assert(r);
+		return r;
+	}
 	case TGSI_FILE_NULL:
 		return NULL;
 	default:
@@ -1418,7 +1537,10 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 {
 	struct nv50_reg *r = NULL;
 	struct nv50_reg *temp;
-	unsigned sgn, c;
+	unsigned sgn, c, swz;
+
+	if (src->SrcRegister.File != TGSI_FILE_CONSTANT)
+		assert(!src->SrcRegister.Indirect);
 
 	sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
 
@@ -1436,13 +1558,29 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 			r = &pc->temp[src->SrcRegister.Index * 4 + c];
 			break;
 		case TGSI_FILE_CONSTANT:
-			r = &pc->param[src->SrcRegister.Index * 4 + c];
+			if (!src->SrcRegister.Indirect) {
+				r = &pc->param[src->SrcRegister.Index * 4 + c];
+				break;
+			}
+			/* Indicate indirection by setting r->acc < 0 and
+			 * use the index field to select the address reg.
+			 */
+			r = MALLOC_STRUCT(nv50_reg);
+			swz = tgsi_util_get_src_register_swizzle(
+						 &src->SrcRegisterInd, 0);
+			ctor_reg(r, P_CONST,
+				 src->SrcRegisterInd.Index * 4 + swz, c);
+			r->acc = -1;
 			break;
 		case TGSI_FILE_IMMEDIATE:
 			r = &pc->immd[src->SrcRegister.Index * 4 + c];
 			break;
 		case TGSI_FILE_SAMPLER:
 			break;
+		case TGSI_FILE_ADDRESS:
+			r = pc->addr[src->SrcRegister.Index * 4 + c];
+			assert(r);
+			break;
 		default:
 			assert(0);
 			break;
@@ -1678,8 +1816,15 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			emit_add(pc, dst[c], src[0][c], src[1][c]);
 		}
 		break;
+	case TGSI_OPCODE_ARL:
+		assert(src[0][0]);
+		temp = temp_temp(pc);
+		emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32);
+		emit_arl(pc, dst[0], temp, 4);
+		break;
 	case TGSI_OPCODE_BGNLOOP:
 		pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size;
+		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_BRK:
 		emit_branch(pc, -1, 0, NULL);
@@ -1763,6 +1908,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_branch(pc, -1, 0, NULL);
 		pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
 		pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_ENDIF:
 		pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
@@ -1775,6 +1921,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size;
 			pc->br_join[pc->if_lvl] = NULL;
 		}
+		terminate_mbb(pc);
 		/* emit a NOP as join point, we could set it on the next
 		 * one, but would have to make sure it is long and !immd
 		 */
@@ -1785,6 +1932,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_branch(pc, -1, 0, NULL);
 		pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl];
 		pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size;
+		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_EX2:
 		emit_preex2(pc, temp, src[0][0]);
@@ -1812,6 +1960,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		set_pred_wr(pc, 1, 0, pc->if_cond);
 		emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]);
 		pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_KIL:
 		emit_kil(pc, src[0][0]);
@@ -1989,6 +2138,9 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			src[i][c]->neg = 0;
 			if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
 				FREE(src[i][c]);
+			else
+			if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST)
+				FREE(src[i][c]); /* indirect constant */
 		}
 	}
 
@@ -2332,8 +2484,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 					pc->interp_mode[i] = mode;
 			}
 				break;
+			case TGSI_FILE_ADDRESS:
 			case TGSI_FILE_CONSTANT:
-				break;
 			case TGSI_FILE_SAMPLER:
 				break;
 			default:
@@ -2527,6 +2679,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
 	pc->attr_nr = p->info.file_max[TGSI_FILE_INPUT] + 1;
 	pc->result_nr = p->info.file_max[TGSI_FILE_OUTPUT] + 1;
 	pc->param_nr = p->info.file_max[TGSI_FILE_CONSTANT] + 1;
+	pc->addr_nr = p->info.file_max[TGSI_FILE_ADDRESS] + 1;
+	assert(pc->addr_nr <= 2);
 
 	p->cfg.high_temp = 4;
 
@@ -2595,6 +2749,14 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
 				ctor_reg(&pc->param[rid], P_CONST, i, rid);
 	}
 
+	if (pc->addr_nr) {
+		pc->addr = CALLOC(pc->addr_nr * 4, sizeof(struct nv50_reg *));
+		if (!pc->addr)
+			return FALSE;
+	}
+	for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
+		ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1);
+
 	return TRUE;
 }
 
@@ -2774,7 +2936,7 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
 					 p->immd_nr, NV50_CB_PMISC);
 	}
 
-	assert(p->param_nr <= 128);
+	assert(p->param_nr <= 512);
 
 	if (p->param_nr) {
 		unsigned cb;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 66361dc3ba..0bd5487695 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -301,7 +301,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_data  (so, 8);
 
 	/* constant buffers for immediates and VP/FP parameters */
-	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4,
 			     &screen->constbuf_misc[0]);
 	if (ret) {
 		nv50_screen_destroy(pscreen);
@@ -309,7 +309,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	}
 
 	for (i = 0; i < 2; i++) {
-		ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 128*4*4,
+		ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (128 * 4) * 4,
 				     &screen->constbuf_parm[i]);
 		if (ret) {
 			nv50_screen_destroy(pscreen);
@@ -318,8 +318,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	}
 
 	if (nouveau_resource_init(&screen->immd_heap[0], 0, 128) ||
-		nouveau_resource_init(&screen->parm_heap[0], 0, 128) ||
-		nouveau_resource_init(&screen->parm_heap[1], 0, 128))
+	    nouveau_resource_init(&screen->parm_heap[0], 0, 512) ||
+	    nouveau_resource_init(&screen->parm_heap[1], 0, 512))
 	{
 		NOUVEAU_ERR("Error initialising constant buffers.\n");
 		nv50_screen_destroy(pscreen);
@@ -340,7 +340,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
 	so_reloc (so, screen->constbuf_misc[0], 0, NOUVEAU_BO_VRAM |
 		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, (NV50_CB_PMISC << 16) | 0x00000800);
+	so_data  (so, (NV50_CB_PMISC << 16) | 0x00000200);
 	so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
 	so_data  (so, 0x00000001 | (NV50_CB_PMISC << 12));
 	so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
-- 
cgit v1.2.3


From e5f1f6a0bece3d035bf5ac1685b5335af4862cea Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Mon, 19 Oct 2009 13:51:28 -0700
Subject: r300g: Demonstratory kludge to unbreak glxgears.

We *must* recalculate something in vformat every rebind; let's see
if we can't narrow it down a bit.
---
 src/gallium/drivers/r300/r300_state_derived.c | 28 +++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 1468b9d36e..5df1a0cd63 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -460,32 +460,36 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
     value = (struct r300_shader_derived_value*)
         util_hash_table_get(r300->shader_hash_table, (void*)key);
     if (value) {
-        vformat = value->vformat;
+        //vformat = value->vformat;
         rs_block = value->rs_block;
 
         FREE(key);
     } else {
-        vformat = CALLOC_STRUCT(r300_vertex_format);
         rs_block = CALLOC_STRUCT(r300_rs_block);
         value = CALLOC_STRUCT(r300_shader_derived_value);
 
-        for (i = 0; i < 16; i++) {
-            vformat->vs_tab[i] = -1;
-            vformat->fs_tab[i] = -1;
-        }
-
-        r300_vs_tab_routes(r300, vformat);
-        r300_vertex_psc(r300, vformat);
-        r300_update_fs_tab(r300, vformat);
-
         r300_update_rs_block(r300, rs_block);
 
-        value->vformat = vformat;
+        //value->vformat = vformat;
         value->rs_block = rs_block;
         util_hash_table_set(r300->shader_hash_table,
             (void*)key, (void*)value);
     }
 
+    /* XXX This will be refactored ASAP. */
+    vformat = CALLOC_STRUCT(r300_vertex_format);
+
+    for (i = 0; i < 16; i++) {
+        vformat->vs_tab[i] = -1;
+        vformat->fs_tab[i] = -1;
+    }
+
+    r300_vs_tab_routes(r300, vformat);
+    r300_vertex_psc(r300, vformat);
+    r300_update_fs_tab(r300, vformat);
+
+    FREE(r300->vertex_info);
+
     r300->vertex_info = vformat;
     r300->rs_block = rs_block;
     r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK);
-- 
cgit v1.2.3


From fc07ca004aaa338217c49e95f51b072b32c4f8c6 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Tue, 20 Oct 2009 17:17:41 +0200
Subject: trace: Check for destroy before calling it

---
 src/gallium/drivers/trace/tr_drm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/trace/tr_drm.c b/src/gallium/drivers/trace/tr_drm.c
index 781ca5d3bc..48d1c4051c 100644
--- a/src/gallium/drivers/trace/tr_drm.c
+++ b/src/gallium/drivers/trace/tr_drm.c
@@ -150,7 +150,9 @@ trace_drm_destroy(struct drm_api *_api)
 {
    struct trace_drm_api *tr_api = trace_drm_api(_api);
    struct drm_api *api = tr_api->api;
-   api->destroy(api);
+
+   if (api->destroy)
+      api->destroy(api);
 
    free(tr_api);
 }
-- 
cgit v1.2.3


From 4b2cf92ad9caa384869371534c1f2154625a755a Mon Sep 17 00:00:00 2001
From: Marc Dietrich <marvin24@gmx.de>
Date: Sun, 18 Oct 2009 08:28:34 -0700
Subject: cell: fix compilation on cell

s/LERP/LRP/
---
 src/gallium/drivers/cell/ppu/cell_gen_fp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 312621fd53..b6b2f885af 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -674,7 +674,7 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
  * Emit linear interpolate.  See emit_ADD for comments.
  */
 static boolean
-emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
+emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst)
 {
    int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
 
@@ -1766,7 +1766,7 @@ emit_instruction(struct codegen *gen,
       return emit_binop(gen, inst);
    case TGSI_OPCODE_MAD:
       return emit_MAD(gen, inst);
-   case TGSI_OPCODE_LERP:
+   case TGSI_OPCODE_LRP:
       return emit_LRP(gen, inst);
    case TGSI_OPCODE_DP3:
       return emit_DP3(gen, inst);
-- 
cgit v1.2.3


From 58abfebaad80b72c4a4bedad2d96a3959651eea3 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 21 Oct 2009 06:44:16 -0700
Subject: r300g: Kill r300_surface with fire.

If you really want to see it again, check the history.
---
 src/gallium/drivers/r300/Makefile       |   1 -
 src/gallium/drivers/r300/r300_surface.c | 381 --------------------------------
 src/gallium/drivers/r300/r300_surface.h | 123 -----------
 3 files changed, 505 deletions(-)
 delete mode 100644 src/gallium/drivers/r300/r300_surface.c
 delete mode 100644 src/gallium/drivers/r300/r300_surface.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index 69e4724790..c4f2c021c4 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -20,7 +20,6 @@ C_SOURCES = \
 	r300_state_derived.c \
 	r300_state_invariant.c \
 	r300_vs.c \
-	r300_surface.c \
 	r300_texture.c \
 	r300_tgsi_to_rc.c
 
diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c
deleted file mode 100644
index 5cf49d20aa..0000000000
--- a/src/gallium/drivers/r300/r300_surface.c
+++ /dev/null
@@ -1,381 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *                Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r300_surface.h"
-
-static void r300_surface_setup(struct r300_context* r300,
-                               struct r300_texture* dest,
-                               unsigned x, unsigned y,
-                               unsigned w, unsigned h)
-{
-    struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
-    unsigned pixpitch = r300_texture_get_stride(dest, 0) / dest->tex.block.size;
-    CS_LOCALS(r300);
-
-    r300_emit_blend_state(r300, &blend_clear_state);
-    r300_emit_blend_color_state(r300, &blend_color_clear_state);
-    r300_emit_dsa_state(r300, &dsa_clear_state);
-    r300_emit_rs_state(r300, &rs_clear_state);
-
-    BEGIN_CS(26);
-
-    /* Viewport setup */
-    OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
-    OUT_CS_32F((float)w);
-    OUT_CS_32F((float)x);
-    OUT_CS_32F((float)h);
-    OUT_CS_32F((float)y);
-    OUT_CS_32F(1.0);
-    OUT_CS_32F(0.0);
-
-    OUT_CS_REG(R300_VAP_VTE_CNTL, R300_VPORT_X_SCALE_ENA |
-            R300_VPORT_X_OFFSET_ENA |
-            R300_VPORT_Y_SCALE_ENA |
-            R300_VPORT_Y_OFFSET_ENA |
-            R300_VTX_XY_FMT | R300_VTX_Z_FMT);
-
-    /* Pixel scissors. */
-    OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
-    if (caps->is_r500) {
-        OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT));
-        OUT_CS(((w - 1) << R300_SCISSORS_X_SHIFT) | ((h - 1) << R300_SCISSORS_Y_SHIFT));
-    } else {
-        /* Non-R500 chipsets have an offset of 1440 in their scissors. */
-        OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) |
-                ((y + 1440) << R300_SCISSORS_Y_SHIFT));
-        OUT_CS((((w - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
-                (((h - 1) + 1440) << R300_SCISSORS_Y_SHIFT));
-    }
-
-    /* Flush colorbuffer and blend caches. */
-    OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
-        R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D |
-        R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL);
-    OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
-        R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
-        R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
-
-    /* Setup colorbuffer. */
-    OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1);
-    OUT_CS_RELOC(dest->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
-    OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0, 1);
-    OUT_CS_RELOC(dest->buffer, pixpitch |
-                 r300_translate_colorformat(dest->tex.format), 0,
-                 RADEON_GEM_DOMAIN_VRAM, 0);
-    OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf);
-
-    END_CS;
-}
-
-/* Provides pipe_context's "surface_fill". Commonly used for clearing
- * buffers. */
-static void r300_surface_fill(struct pipe_context* pipe,
-                              struct pipe_surface* dest,
-                              unsigned x, unsigned y,
-                              unsigned w, unsigned h,
-                              unsigned color)
-{
-    float r, g, b, a;
-    struct r300_context* r300 = r300_context(pipe);
-    struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
-    struct r300_texture* tex = (struct r300_texture*)dest->texture;
-    unsigned pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
-    boolean invalid = FALSE;
-    CS_LOCALS(r300);
-
-    a = (float)((color >> 24) & 0xff) / 255.0f;
-    r = (float)((color >> 16) & 0xff) / 255.0f;
-    g = (float)((color >>  8) & 0xff) / 255.0f;
-    b = (float)((color >>  0) & 0xff) / 255.0f;
-    DBG(r300, DBG_SURF, "r300: Filling surface %p at (%d,%d),"
-        " dimensions %dx%d (pixel pitch %d), color 0x%x\n",
-        dest, x, y, w, h, pixpitch, color);
-
-    /* Fallback? */
-    if (!pipe->screen->is_format_supported(pipe->screen, dest->format,
-        PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
-fallback:
-        DBG(r300, DBG_SURF | DBG_FALL,
-            "r300: Falling back on surface clear...\n");
-        util_surface_fill(pipe, dest, x, y, w, h, color);
-        return;
-    }
-
-    /* Make sure our target BO is okay. */
-validate:
-    if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
-                0, RADEON_GEM_DOMAIN_VRAM)) {
-        r300->context.flush(&r300->context, 0, NULL);
-        goto validate;
-    }
-    if (!r300->winsys->validate(r300->winsys)) {
-        r300->context.flush(&r300->context, 0, NULL);
-        if (invalid) {
-            DBG(r300, DBG_SURF | DBG_FALL, "r300: Stuck in validation loop.");
-            goto fallback;
-        }
-        invalid = TRUE;
-        goto validate;
-    }
-
-    r300_surface_setup(r300, tex, x, y, w, h);
-
-    /* Vertex shader setup */
-    if (caps->has_tcl) {
-        r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0);
-    } else {
-        BEGIN_CS(4);
-        OUT_CS_REG(R300_VAP_CNTL_STATUS,
-#ifdef PIPE_ARCH_BIG_ENDIAN
-                   R300_VC_32BIT_SWAP |
-#endif
-                   R300_VAP_TCL_BYPASS);
-        OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) |
-                R300_PVS_NUM_CNTLRS(5) |
-                R300_PVS_NUM_FPUS(caps->num_vert_fpus) |
-                R300_PVS_VF_MAX_VTX_NUM(12));
-        END_CS;
-    }
-
-    /* Fragment shader setup */
-    if (caps->is_r500) {
-        r500_emit_fragment_program_code(r300, &r5xx_passthrough_fragment_shader, 0);
-        r300_emit_rs_block_state(r300, &r5xx_rs_block_clear_state);
-    } else {
-        r300_emit_fragment_program_code(r300, &r3xx_passthrough_fragment_shader, 0);
-        r300_emit_rs_block_state(r300, &r3xx_rs_block_clear_state);
-    }
-
-    BEGIN_CS(26);
-
-    /* VAP stream control, mapping from input memory to PVS/RS memory */
-    if (caps->has_tcl) {
-        OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
-            (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) |
-            ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) |
-                R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT));
-    } else {
-        OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
-            (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) |
-            ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) |
-                R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT));
-    }
-    OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
-            (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE0_SHIFT) |
-            (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE1_SHIFT));
-
-    /* VAP format controls */
-    OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0,
-            R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
-            R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
-    OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x0);
-
-    /* Disable textures */
-    OUT_CS_REG(R300_TX_ENABLE, 0x0);
-
-    /* The size of the point we're about to draw, in sixths of pixels */
-    OUT_CS_REG(R300_GA_POINT_SIZE,
-        ((h * 6)  & R300_POINTSIZE_Y_MASK) |
-        ((w * 6) << R300_POINTSIZE_X_SHIFT));
-
-    /* Vertex size. */
-    OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8);
-
-    /* Packet3 with our point vertex */
-    OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8);
-    OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
-            (1 << R300_PRIM_NUM_VERTICES_SHIFT));
-    /* Position */
-    OUT_CS_32F(0.5);
-    OUT_CS_32F(0.5);
-    OUT_CS_32F(1.0);
-    OUT_CS_32F(1.0);
-    /* Color */
-    OUT_CS_32F(r);
-    OUT_CS_32F(g);
-    OUT_CS_32F(b);
-    OUT_CS_32F(a);
-
-    OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
-
-    END_CS;
-
-    r300->dirty_hw++;
-}
-
-static void r300_surface_copy(struct pipe_context* pipe,
-                              struct pipe_surface* dest,
-                              unsigned destx, unsigned desty,
-                              struct pipe_surface* src,
-                              unsigned srcx, unsigned srcy,
-                              unsigned w, unsigned h)
-{
-    struct r300_context* r300 = r300_context(pipe);
-    struct r300_capabilities* caps = r300_screen(pipe->screen)->caps;
-    struct r300_texture* srctex = (struct r300_texture*)src->texture;
-    struct r300_texture* desttex = (struct r300_texture*)dest->texture;
-    unsigned pixpitch = r300_texture_get_stride(srctex, 0) / srctex->tex.block.size;
-    boolean invalid = FALSE;
-    float fsrcx = srcx, fsrcy = srcy, fdestx = destx, fdesty = desty;
-    CS_LOCALS(r300);
-
-    DBG(r300, DBG_SURF, "r300: Copying surface %p at (%d,%d) to %p at (%d, %d),"
-        " dimensions %dx%d (pixel pitch %d)\n",
-        src, srcx, srcy, dest, destx, desty, w, h, pixpitch);
-
-    if ((srctex->buffer == desttex->buffer) &&
-            ((destx < srcx + w) || (srcx < destx + w)) &&
-            ((desty < srcy + h) || (srcy < desty + h))) {
-        goto fallback;
-    }
-
-    if (!pipe->screen->is_format_supported(pipe->screen, src->format,
-            PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER, 0) ||
-            !pipe->screen->is_format_supported(pipe->screen, dest->format,
-            PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) {
-fallback:
-        DBG(r300, DBG_SURF | DBG_FALL, "r300: Falling back on surface_copy\n");
-        util_surface_copy(pipe, FALSE, dest, destx, desty, src,
-                srcx, srcy, w, h);
-        return;
-    }
-
-    /* Add our target BOs to the list. */
-validate:
-    if (!r300->winsys->add_buffer(r300->winsys, srctex->buffer,
-                RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) {
-        r300->context.flush(&r300->context, 0, NULL);
-        goto validate;
-    }
-    if (!r300->winsys->add_buffer(r300->winsys, desttex->buffer,
-                0, RADEON_GEM_DOMAIN_VRAM)) {
-        r300->context.flush(&r300->context, 0, NULL);
-        goto validate;
-    }
-    if (!r300->winsys->validate(r300->winsys)) {
-        r300->context.flush(&r300->context, 0, NULL);
-        if (invalid) {
-            DBG(r300, DBG_SURF | DBG_FALL, "r300: Stuck in validation loop.");
-            goto fallback;
-        }
-        invalid = TRUE;
-        goto validate;
-    }
-
-    r300_surface_setup(r300, desttex, destx, desty, w, h);
-
-    /* Setup the texture. */
-    r300_emit_texture(r300, &r300_sampler_copy_state, srctex, 0);
-
-    /* Flush and enable. */
-    r300_flush_textures(r300);
-
-    /* Vertex shader setup */
-    if (caps->has_tcl) {
-        r300_emit_vertex_program_code(r300, &r300_passthrough_vertex_shader, 0);
-    } else {
-        BEGIN_CS(4);
-        OUT_CS_REG(R300_VAP_CNTL_STATUS,
-#ifdef PIPE_ARCH_BIG_ENDIAN
-                   R300_VC_32BIT_SWAP |
-#endif
-                   R300_VAP_TCL_BYPASS);
-        OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(5) |
-                R300_PVS_NUM_CNTLRS(5) |
-                R300_PVS_NUM_FPUS(caps->num_vert_fpus) |
-                R300_PVS_VF_MAX_VTX_NUM(12));
-        END_CS;
-    }
-
-    /* Fragment shader setup */
-    if (caps->is_r500) {
-        r500_emit_fragment_program_code(r300, &r5xx_texture_fragment_shader, 0);
-        r300_emit_rs_block_state(r300, &r5xx_rs_block_copy_state);
-    } else {
-        r300_emit_fragment_program_code(r300, &r3xx_texture_fragment_shader, 0);
-        r300_emit_rs_block_state(r300, &r3xx_rs_block_copy_state);
-    }
-
-    BEGIN_CS(30);
-    /* VAP stream control, mapping from input memory to PVS/RS memory */
-    if (caps->has_tcl) {
-        OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
-            (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
-            ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) |
-                R300_DATA_TYPE_FLOAT_2) << R300_DATA_TYPE_1_SHIFT));
-    } else {
-        OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0,
-            (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
-            ((R300_LAST_VEC | (6 << R300_DST_VEC_LOC_SHIFT) |
-                R300_DATA_TYPE_FLOAT_2) << R300_DATA_TYPE_1_SHIFT));
-    }
-    OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
-            (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE0_SHIFT) |
-            (R300_VAP_SWIZZLE_XYZW << R300_SWIZZLE1_SHIFT));
-
-    /* VAP format controls */
-    OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0,
-            R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT);
-    /* Two components of texture 0 */
-    OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x2);
-
-    /* Vertex size. */
-    OUT_CS_REG(R300_VAP_VTX_SIZE, 0x4);
-
-    /* Packet3 with our texcoords */
-    OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 16);
-    OUT_CS(R300_PRIM_TYPE_QUADS | R300_PRIM_WALK_RING |
-            (4 << R300_PRIM_NUM_VERTICES_SHIFT));
-    /* (x    , y    ) */
-    OUT_CS_32F(fdestx / dest->width);
-    OUT_CS_32F(fdesty / dest->height);
-    OUT_CS_32F(fsrcx  / src->width);
-    OUT_CS_32F(fsrcy  / src->height);
-    /* (x    , y + h) */
-    OUT_CS_32F(fdestx / dest->width);
-    OUT_CS_32F((fdesty + h) / dest->height);
-    OUT_CS_32F(fsrcx  / src->width);
-    OUT_CS_32F((fsrcy  + h) / src->height);
-    /* (x + w, y + h) */
-    OUT_CS_32F((fdestx + w) / dest->width);
-    OUT_CS_32F((fdesty + h) / dest->height);
-    OUT_CS_32F((fsrcx  + w) / src->width);
-    OUT_CS_32F((fsrcy  + h) / src->height);
-    /* (x + w, y    ) */
-    OUT_CS_32F((fdestx + w) / dest->width);
-    OUT_CS_32F(fdesty / dest->height);
-    OUT_CS_32F((fsrcx  + w) / src->width);
-    OUT_CS_32F(fsrcy  / src->height);
-
-    OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA);
-
-    END_CS;
-
-    r300->dirty_hw++;
-}
-
-void r300_init_surface_functions(struct r300_context* r300)
-{
-    r300->context.surface_fill = r300_surface_fill;
-    r300->context.surface_copy = r300_surface_copy;
-}
diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h
deleted file mode 100644
index d5998e6e6d..0000000000
--- a/src/gallium/drivers/r300/r300_surface.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_SURFACE_H
-#define R300_SURFACE_H
-
-#include "pipe/p_context.h"
-#include "pipe/p_screen.h"
-
-#include "util/u_rect.h"
-
-#include "r300_context.h"
-#include "r300_cs.h"
-#include "r300_emit.h"
-#include "r300_fs.h"
-#include "r300_vs.h"
-#include "r300_state_inlines.h"
-
-static struct r300_blend_state blend_clear_state = {
-    .blend_control = 0x0,
-    .alpha_blend_control = 0x0,
-    .rop = 0x0,
-    .dither = 0x0,
-};
-
-static struct r300_blend_color_state blend_color_clear_state = {
-    .blend_color = 0x0,
-    .blend_color_red_alpha = 0x0,
-    .blend_color_green_blue = 0x0,
-};
-
-static struct r300_dsa_state dsa_clear_state = {
-    .alpha_function = 0x0,
-    .alpha_reference = 0x0,
-    .z_buffer_control = 0x0,
-    .z_stencil_control = 0x0,
-    .stencil_ref_mask = R300_STENCILWRITEMASK_MASK,
-    .stencil_ref_bf = 0x0,
-};
-
-static struct r300_rs_state rs_clear_state = {
-    .point_minmax = 0x36000006,
-    .line_control = 0x00030006,
-    .depth_scale_front = 0x0,
-    .depth_offset_front = 0x0,
-    .depth_scale_back = 0x0,
-    .depth_offset_back = 0x0,
-    .polygon_offset_enable = 0x0,
-    .cull_mode = 0x0,
-    .line_stipple_config = 0x3BAAAAAB,
-    .line_stipple_value = 0x0,
-    .color_control = R300_SHADE_MODEL_FLAT,
-};
-
-static struct r300_rs_block r3xx_rs_block_clear_state = {
-    .ip[0] = R500_RS_SEL_S(R300_RS_SEL_C0) |
-        R500_RS_SEL_T(R300_RS_SEL_C0) |
-        R500_RS_SEL_R(R300_RS_SEL_C0) |
-        R500_RS_SEL_Q(R300_RS_SEL_K1),
-    .inst[0] = R300_RS_INST_COL_CN_WRITE,
-    .count = R300_IT_COUNT(0) | R300_IC_COUNT(1) | R300_HIRES_EN,
-    .inst_count = 0,
-};
-
-static struct r300_rs_block r5xx_rs_block_clear_state = {
-    .ip[0] = R500_RS_SEL_S(R500_RS_IP_PTR_K0) |
-        R500_RS_SEL_T(R500_RS_IP_PTR_K0) |
-        R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
-        R500_RS_SEL_Q(R500_RS_IP_PTR_K1),
-    .inst[0] = R500_RS_INST_COL_CN_WRITE,
-    .count = R300_IT_COUNT(0) | R300_IC_COUNT(1) | R300_HIRES_EN,
-    .inst_count = 0,
-};
-
-/* The following state is used for surface_copy only. */
-
-static struct r300_rs_block r3xx_rs_block_copy_state = {
-    .ip[0] = R500_RS_SEL_S(R300_RS_SEL_K0) |
-        R500_RS_SEL_T(R300_RS_SEL_K0) |
-        R500_RS_SEL_R(R300_RS_SEL_K0) |
-        R500_RS_SEL_Q(R300_RS_SEL_K1),
-    .inst[0] = R300_RS_INST_COL_CN_WRITE,
-    .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN,
-    .inst_count = R300_RS_TX_OFFSET(0),
-};
-
-static struct r300_rs_block r5xx_rs_block_copy_state = {
-    .ip[0] = R500_RS_SEL_S(0) |
-        R500_RS_SEL_T(1) |
-        R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
-        R500_RS_SEL_Q(R500_RS_IP_PTR_K1),
-    .inst[0] = R500_RS_INST_TEX_CN_WRITE,
-    .count = R300_IT_COUNT(2) | R300_IC_COUNT(0) | R300_HIRES_EN,
-    .inst_count = R300_RS_TX_OFFSET(0),
-};
-
-static struct r300_sampler_state r300_sampler_copy_state = {
-    .filter0 = R300_TX_WRAP_S(R300_TX_CLAMP) |
-        R300_TX_WRAP_T(R300_TX_CLAMP) |
-        R300_TX_MAG_FILTER_NEAREST |
-        R300_TX_MIN_FILTER_NEAREST,
-};
-
-#endif /* R300_SURFACE_H */
-- 
cgit v1.2.3


From b21df2620ef970daa306e06c6ca69a9b66280cd6 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 21 Oct 2009 06:47:05 -0700
Subject: r300g: Also kill r300_shader_inlines with fire.

---
 src/gallium/drivers/r300/r300_shader_inlines.h | 47 --------------------------
 1 file changed, 47 deletions(-)
 delete mode 100644 src/gallium/drivers/r300/r300_shader_inlines.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_shader_inlines.h b/src/gallium/drivers/r300/r300_shader_inlines.h
deleted file mode 100644
index a04f45b03e..0000000000
--- a/src/gallium/drivers/r300/r300_shader_inlines.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
- *                Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_SHADER_INLINES_H
-#define R300_SHADER_INLINES_H
-
-/* TGSI constants. TGSI is like XML: If it can't solve your problems, you're
- * not using enough of it. */
-static const struct tgsi_full_src_register r300_constant_zero = {
-    .SrcRegister.Extended = TRUE,
-    .SrcRegister.File = TGSI_FILE_NULL,
-    .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ZERO,
-    .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ZERO,
-    .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ZERO,
-    .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ZERO,
-};
-
-static const struct tgsi_full_src_register r300_constant_one = {
-    .SrcRegister.Extended = TRUE,
-    .SrcRegister.File = TGSI_FILE_NULL,
-    .SrcRegisterExtSwz.ExtSwizzleX = TGSI_EXTSWIZZLE_ONE,
-    .SrcRegisterExtSwz.ExtSwizzleY = TGSI_EXTSWIZZLE_ONE,
-    .SrcRegisterExtSwz.ExtSwizzleZ = TGSI_EXTSWIZZLE_ONE,
-    .SrcRegisterExtSwz.ExtSwizzleW = TGSI_EXTSWIZZLE_ONE,
-};
-
-#endif /* R300_SHADER_INLINES_H */
-- 
cgit v1.2.3


From b589e39809fa9d0b24a708d792b70ae5b120ffb8 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 21 Oct 2009 05:45:05 -0700
Subject: r300g: Examine vertex attribute type on HW TCL too.

---
 src/gallium/drivers/r300/r300_state_derived.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 5df1a0cd63..7297f9c653 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -244,10 +244,8 @@ static void r300_vertex_psc(struct r300_context* r300,
         assert(tab[i] != -1);
 
         /* Add the attribute to the PSC table. */
-        temp = r300screen->caps->has_tcl ?
-            R300_DATA_TYPE_FLOAT_4 :
-            translate_vertex_data_type(vinfo->attrib[i].emit);
-        temp |= tab[i] << R300_DST_VEC_LOC_SHIFT;
+        temp = translate_vertex_data_type(vinfo->attrib[i].emit) |
+            tab[i] << R300_DST_VEC_LOC_SHIFT;
 
         if (i & 1) {
             vformat->vap_prog_stream_cntl[i >> 1] &= 0x0000ffff;
-- 
cgit v1.2.3


From 5a0598f23569314a4ad72eda59e250ab9c43b46d Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 21 Oct 2009 05:48:45 -0700
Subject: r300g: Don't use the hashtable internally.

As osiris pointed out, glxgears slowly gets slower for some reason
when it's enabled, and it's not helping at the moment, so just turn
it off.
---
 src/gallium/drivers/r300/r300_state_derived.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 7297f9c653..6e7679c39f 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -451,6 +451,7 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
     struct r300_shader_derived_value* value;
     int i;
 
+    /*
     key = CALLOC_STRUCT(r300_shader_key);
     key->vs = r300->vs;
     key->fs = r300->fs;
@@ -472,10 +473,11 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
         value->rs_block = rs_block;
         util_hash_table_set(r300->shader_hash_table,
             (void*)key, (void*)value);
-    }
+    } */
 
     /* XXX This will be refactored ASAP. */
     vformat = CALLOC_STRUCT(r300_vertex_format);
+    rs_block = CALLOC_STRUCT(r300_rs_block);
 
     for (i = 0; i < 16; i++) {
         vformat->vs_tab[i] = -1;
@@ -486,7 +488,10 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
     r300_vertex_psc(r300, vformat);
     r300_update_fs_tab(r300, vformat);
 
+    r300_update_rs_block(r300, rs_block);
+
     FREE(r300->vertex_info);
+    FREE(r300->rs_block);
 
     r300->vertex_info = vformat;
     r300->rs_block = rs_block;
-- 
cgit v1.2.3


From babadb8bb9d68f3687a9c9cb80f98c732b1120c7 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 21 Oct 2009 05:54:49 -0700
Subject: r300g: Don't use getenv; use debug_get_*_option instead.

---
 src/gallium/drivers/r300/r300_chipset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index d138866d33..9b763e2cb5 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -31,7 +31,7 @@ void r300_parse_chipset(struct r300_capabilities* caps)
 {
     /* Reasonable defaults */
     caps->num_vert_fpus = 4;
-    caps->has_tcl = getenv("RADEON_NO_TCL") ? FALSE : TRUE;
+    caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE;
     caps->is_r500 = FALSE;
     caps->high_second_pipe = FALSE;
 
-- 
cgit v1.2.3


From 6a448a525baf81173f92ee8c3074b98baa54397b Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 21 Oct 2009 06:31:36 -0700
Subject: r300g: Cleanup header includes.

---
 src/gallium/drivers/r300/r300_chipset.c         |  1 +
 src/gallium/drivers/r300/r300_chipset.h         |  6 +++---
 src/gallium/drivers/r300/r300_clear.c           |  3 +++
 src/gallium/drivers/r300/r300_clear.h           |  4 +---
 src/gallium/drivers/r300/r300_context.c         | 16 +++++++++++++++-
 src/gallium/drivers/r300/r300_context.h         | 13 -------------
 src/gallium/drivers/r300/r300_emit.c            |  7 ++++++-
 src/gallium/drivers/r300/r300_emit.h            |  7 -------
 src/gallium/drivers/r300/r300_flush.c           |  7 +++++++
 src/gallium/drivers/r300/r300_flush.h           |  7 -------
 src/gallium/drivers/r300/r300_fs.c              |  6 +++++-
 src/gallium/drivers/r300/r300_fs.h              |  7 +++----
 src/gallium/drivers/r300/r300_query.c           |  8 +++++++-
 src/gallium/drivers/r300/r300_query.h           |  4 ----
 src/gallium/drivers/r300/r300_render.c          |  5 ++++-
 src/gallium/drivers/r300/r300_screen.c          |  7 +++++++
 src/gallium/drivers/r300/r300_screen.h          |  5 -----
 src/gallium/drivers/r300/r300_state.c           |  4 ++++
 src/gallium/drivers/r300/r300_state_derived.c   |  8 +++++++-
 src/gallium/drivers/r300/r300_state_inlines.h   |  2 ++
 src/gallium/drivers/r300/r300_state_invariant.c |  5 ++++-
 src/gallium/drivers/r300/r300_state_invariant.h |  6 +-----
 src/gallium/drivers/r300/r300_texture.c         |  6 ++++++
 src/gallium/drivers/r300/r300_texture.h         |  3 ---
 24 files changed, 86 insertions(+), 61 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
index 9b763e2cb5..51fdb82ff3 100644
--- a/src/gallium/drivers/r300/r300_chipset.c
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -21,6 +21,7 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
 #include "r300_chipset.h"
+
 #include "util/u_debug.h"
 
 /* r300_chipset: A file all to itself for deducing the various properties of
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
index f015a4243d..0633a8b8a7 100644
--- a/src/gallium/drivers/r300/r300_chipset.h
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -33,11 +33,11 @@ struct r300_capabilities {
     /* Chipset family */
     int family;
     /* The number of vertex floating-point units */
-    int num_vert_fpus;
+    unsigned num_vert_fpus;
     /* The number of fragment pipes */
-    int num_frag_pipes;
+    unsigned num_frag_pipes;
     /* The number of z pipes */
-    int num_z_pipes;
+    unsigned num_z_pipes;
     /* Whether or not TCL is physically present */
     boolean has_tcl;
     /* Whether or not this is an RV515 or newer; R500s have many differences
diff --git a/src/gallium/drivers/r300/r300_clear.c b/src/gallium/drivers/r300/r300_clear.c
index 8b9cb819ae..02d6d504fc 100644
--- a/src/gallium/drivers/r300/r300_clear.c
+++ b/src/gallium/drivers/r300/r300_clear.c
@@ -21,6 +21,9 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
 #include "r300_clear.h"
+#include "r300_context.h"
+
+#include "util/u_clear.h"
 
 /* Clears currently bound buffers. */
 void r300_clear(struct pipe_context* pipe,
diff --git a/src/gallium/drivers/r300/r300_clear.h b/src/gallium/drivers/r300/r300_clear.h
index cd5900565e..b8fcdf273c 100644
--- a/src/gallium/drivers/r300/r300_clear.h
+++ b/src/gallium/drivers/r300/r300_clear.h
@@ -23,9 +23,7 @@
 #ifndef R300_CLEAR_H
 #define R300_CLEAR_H
 
-#include "util/u_clear.h"
-
-#include "r300_context.h"
+struct pipe_context;
 
 void r300_clear(struct pipe_context* pipe,
                 unsigned buffers,
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 7b370b3e95..4ba11a026e 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -20,10 +20,24 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
-#include "r300_context.h"
+#include "draw/draw_context.h"
+
+#include "pipe/p_inlines.h"
+
+#include "tgsi/tgsi_scan.h"
 
+#include "util/u_hash_table.h"
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+
+#include "r300_clear.h"
+#include "r300_context.h"
 #include "r300_flush.h"
+#include "r300_query.h"
+#include "r300_screen.h"
+#include "r300_state_derived.h"
 #include "r300_state_invariant.h"
+#include "r300_winsys.h"
 
 static boolean r300_draw_range_elements(struct pipe_context* pipe,
                                         struct pipe_buffer* indexBuffer,
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 2d608d6afc..61a57df35e 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -23,23 +23,10 @@
 #ifndef R300_CONTEXT_H
 #define R300_CONTEXT_H
 
-#include "draw/draw_context.h"
 #include "draw/draw_vertex.h"
 
 #include "pipe/p_context.h"
 
-#include "tgsi/tgsi_scan.h"
-
-#include "util/u_hash_table.h"
-#include "util/u_memory.h"
-#include "util/u_simple_list.h"
-
-#include "r300_clear.h"
-#include "r300_query.h"
-#include "r300_screen.h"
-#include "r300_state_derived.h"
-#include "r300_winsys.h"
-
 struct r300_fragment_shader;
 struct r300_vertex_shader;
 
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index df2046bd0c..258c38fefd 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -22,10 +22,15 @@
 
 /* r300_emit: Functions for emitting state. */
 
-#include "r300_emit.h"
+#include "util/u_math.h"
 
+#include "r300_context.h"
+#include "r300_cs.h"
+#include "r300_emit.h"
 #include "r300_fs.h"
+#include "r300_screen.h"
 #include "r300_state_derived.h"
+#include "r300_state_inlines.h"
 #include "r300_vs.h"
 
 void r300_emit_blend_state(struct r300_context* r300,
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 7e469ea0c7..02ac5bebbd 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -23,13 +23,6 @@
 #ifndef R300_EMIT_H
 #define R300_EMIT_H
 
-#include "util/u_math.h"
-
-#include "r300_context.h"
-#include "r300_cs.h"
-#include "r300_screen.h"
-#include "r300_state_inlines.h"
-
 struct rX00_fragment_program_code;
 struct r300_vertex_program_code;
 
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index d60652a021..c222ea09b1 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -20,6 +20,13 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+
+#include "util/u_simple_list.h"
+
+#include "r300_context.h"
+#include "r300_cs.h"
 #include "r300_emit.h"
 #include "r300_flush.h"
 #include "r300_state_invariant.h"
diff --git a/src/gallium/drivers/r300/r300_flush.h b/src/gallium/drivers/r300/r300_flush.h
index 9a83d89daa..0e9e6106bb 100644
--- a/src/gallium/drivers/r300/r300_flush.h
+++ b/src/gallium/drivers/r300/r300_flush.h
@@ -23,13 +23,6 @@
 #ifndef R300_FLUSH_H
 #define R300_FLUSH_H
 
-#include "draw/draw_private.h"
-
-#include "pipe/p_context.h"
-
-#include "r300_context.h"
-#include "r300_cs.h"
-
 void r300_init_flush_functions(struct r300_context* r300);
 
 #endif /* R300_FLUSH_H */
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 546ad545a5..2db185fd80 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -21,10 +21,14 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
-#include "r300_fs.h"
+#include "tgsi/tgsi_dump.h"
 
+#include "r300_context.h"
+#include "r300_screen.h"
+#include "r300_fs.h"
 #include "r300_tgsi_to_rc.h"
 
+#include "radeon_code.h"
 #include "radeon_compiler.h"
 
 static void find_output_registers(struct r300_fragment_program_compiler * compiler,
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index 04453274aa..8dfb139738 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -24,14 +24,13 @@
 #ifndef R300_FS_H
 #define R300_FS_H
 
-#include "tgsi/tgsi_dump.h"
+#include "pipe/p_state.h"
+
+#include "tgsi/tgsi_scan.h"
 
-#include "r300_context.h"
 #include "r3xx_fs.h"
 #include "r5xx_fs.h"
 
-#include "radeon_code.h"
-
 struct r300_fragment_shader {
     /* Parent class */
     struct pipe_shader_state state;
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 2b0fbfb7d2..007f11efae 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -20,9 +20,15 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
-#include "r300_query.h"
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
 
+#include "r300_context.h"
+#include "r300_screen.h"
+#include "r300_cs.h"
 #include "r300_emit.h"
+#include "r300_query.h"
+#include "r300_reg.h"
 
 static struct pipe_query *r300_create_query(struct pipe_context *pipe,
                                             unsigned query_type)
diff --git a/src/gallium/drivers/r300/r300_query.h b/src/gallium/drivers/r300/r300_query.h
index 4f50e8f844..48876da312 100644
--- a/src/gallium/drivers/r300/r300_query.h
+++ b/src/gallium/drivers/r300/r300_query.h
@@ -23,10 +23,6 @@
 #ifndef R300_QUERY_H
 #define R300_QUERY_H
 
-#include "r300_context.h"
-#include "r300_cs.h"
-#include "r300_reg.h"
-
 struct r300_context;
 
 static INLINE struct r300_query* r300_query(struct pipe_query* q)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 4e778e1e57..79a33b53cb 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -20,8 +20,11 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
-#include "draw/draw_pipe.h"
+#include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
+
+#include "pipe/p_inlines.h"
+
 #include "util/u_memory.h"
 
 #include "r300_cs.h"
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index cc499d400a..a9058a25e5 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -20,7 +20,14 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_simple_screen.h"
+
+#include "r300_context.h"
 #include "r300_screen.h"
+#include "r300_texture.h"
+#include "r300_winsys.h"
 
 /* Return the identifier behind whom the brave coders responsible for this
  * amalgamation of code, sweat, and duct tape, routinely obscure their names.
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 2a0e41fbc3..41df31f670 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -23,14 +23,9 @@
 #ifndef R300_SCREEN_H
 #define R300_SCREEN_H
 
-#include "pipe/p_inlines.h"
 #include "pipe/p_screen.h"
-#include "util/u_memory.h"
-#include "util/u_simple_screen.h"
 
 #include "r300_chipset.h"
-#include "r300_texture.h"
-#include "r300_winsys.h"
 
 struct r300_screen {
     /* Parent class */
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 0a982a9d5d..c4c9e5d7c2 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -20,8 +20,11 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+#include "draw/draw_context.h"
+
 #include "util/u_debug.h"
 #include "util/u_math.h"
+#include "util/u_memory.h"
 #include "util/u_pack_color.h"
 
 #include "tgsi/tgsi_parse.h"
@@ -31,6 +34,7 @@
 
 #include "r300_context.h"
 #include "r300_reg.h"
+#include "r300_screen.h"
 #include "r300_state_inlines.h"
 #include "r300_fs.h"
 #include "r300_vs.h"
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 6e7679c39f..e749bd9cb9 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -20,9 +20,15 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
-#include "r300_state_derived.h"
+#include "draw/draw_context.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
+#include "r300_context.h"
 #include "r300_fs.h"
+#include "r300_screen.h"
+#include "r300_state_derived.h"
 #include "r300_state_inlines.h"
 #include "r300_vs.h"
 
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index d7b57e1b22..9135c55ec4 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -24,6 +24,8 @@
 #ifndef R300_STATE_INLINES_H
 #define R300_STATE_INLINES_H
 
+#include "draw/draw_vertex.h"
+
 #include "pipe/p_format.h"
 
 #include "r300_reg.h"
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 3865730d63..4865f16058 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -21,9 +21,12 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+#include "r300_context.h"
+#include "r300_cs.h"
+#include "r300_reg.h"
+#include "r300_screen.h"
 #include "r300_state_invariant.h"
 
-
 struct pipe_viewport_state r300_viewport_identity = {
     .scale = {1.0, 1.0, 1.0, 1.0},
     .translate = {0.0, 0.0, 0.0, 0.0},
diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h
index 5bea6779fe..05cff0d6df 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.h
+++ b/src/gallium/drivers/r300/r300_state_invariant.h
@@ -23,11 +23,7 @@
 #ifndef R300_STATE_INVARIANT_H
 #define R300_STATE_INVARIANT_H
 
-#include "r300_chipset.h"
-#include "r300_context.h"
-#include "r300_cs.h"
-#include "r300_reg.h"
-#include "r300_state_inlines.h"
+struct r300_context;
 
 void r300_emit_invariant_state(struct r300_context* r300);
 
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 7ea4c33fa9..339fbb6242 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -20,6 +20,12 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+#include "pipe/p_screen.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "r300_context.h"
 #include "r300_texture.h"
 
 static void r300_setup_texture_state(struct r300_texture* tex)
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 992dad77ab..2e58bda716 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -23,11 +23,8 @@
 #ifndef R300_TEXTURE_H
 #define R300_TEXTURE_H
 
-#include "pipe/p_screen.h"
 #include "pipe/p_video_state.h"
-#include "util/u_math.h"
 
-#include "r300_context.h"
 #include "r300_reg.h"
 
 struct r300_texture;
-- 
cgit v1.2.3


From 3b8dad47f816667aa4166d6e27361d274fc2cf4d Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 21 Oct 2009 06:49:16 -0700
Subject: r300g: No debug in r300_state.

---
 src/gallium/drivers/r300/r300_state.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index c4c9e5d7c2..a3e1bc621a 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -22,7 +22,6 @@
 
 #include "draw/draw_context.h"
 
-#include "util/u_debug.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_pack_color.h"
-- 
cgit v1.2.3


From ce98860012b10cc6cc124fd1ed6fa3a5e28712bb Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 20 Oct 2009 10:54:21 +0100
Subject: llvmpipe: Remove extraneous name in lp_type pre-declaration.

---
 src/gallium/drivers/llvmpipe/lp_bld_arit.h    | 2 +-
 src/gallium/drivers/llvmpipe/lp_bld_const.h   | 2 +-
 src/gallium/drivers/llvmpipe/lp_bld_conv.h    | 2 +-
 src/gallium/drivers/llvmpipe/lp_bld_logic.h   | 2 +-
 src/gallium/drivers/llvmpipe/lp_bld_swizzle.h | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index 095a8e1cab..4e568c055e 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -40,7 +40,7 @@
 #include <llvm-c/Core.h>  
 
 
-struct lp_type type;
+struct lp_type;
 struct lp_build_context;
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_const.h b/src/gallium/drivers/llvmpipe/lp_bld_const.h
index ffb302f736..cb8e1c7b00 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_const.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_const.h
@@ -42,7 +42,7 @@
 #include <pipe/p_compiler.h>
 
 
-struct lp_type type;
+struct lp_type;
 
 
 unsigned
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.h b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
index ca378804d2..948e68fae4 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.h
@@ -40,7 +40,7 @@
 #include <llvm-c/Core.h>  
 
 
-struct lp_type type;
+struct lp_type;
 
 
 LLVMValueRef
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_logic.h b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
index a4ee7723b5..d67500ef70 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_logic.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_logic.h
@@ -42,7 +42,7 @@
 #include "pipe/p_defines.h" /* For PIPE_FUNC_xxx */
 
 
-struct lp_type type;
+struct lp_type;
 struct lp_build_context;
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
index 1f6da80448..b9472127a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
@@ -40,7 +40,7 @@
 #include <llvm-c/Core.h>  
 
 
-struct lp_type type;
+struct lp_type;
 struct lp_build_context;
 
 
-- 
cgit v1.2.3


From cb351bdd6e09b40fe719c548c48ea40c6c4c3d11 Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Wed, 21 Oct 2009 21:56:09 +0200
Subject: nouveau: nv30: check number of colour buffers to bind

---
 src/gallium/drivers/nv30/nv30_state_fb.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
index 9b0266fba5..197de82886 100644
--- a/src/gallium/drivers/nv30/nv30_state_fb.c
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -17,6 +17,10 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 	struct nv30_miptree *nv30mt;
 	int colour_bits = 32, zeta_bits = 32;
 
+	if (fb->nr_cbufs == 0) {
+		return FALSE;
+	}
+
 	rt_enable = 0;
 	for (i = 0; i < fb->nr_cbufs; i++) {
 		if (colour_format) {
-- 
cgit v1.2.3


From d364f662c685ba0f28aa865fbd7e1f0acc3c469e Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Wed, 21 Oct 2009 22:01:03 +0200
Subject: nouveau: nv30: Do not use assert to return NULL

---
 src/gallium/drivers/nv30/nv30_fragtex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
index 822e1d8def..a2ce947a72 100644
--- a/src/gallium/drivers/nv30/nv30_fragtex.c
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -69,7 +69,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
 
 	tf = nv30_fragtex_format(pt->format);
 	if (!tf)
-		assert(0);
+		return NULL;
 
 	txf  = tf->format;
 	txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
-- 
cgit v1.2.3


From 40247d87d215d0f1b6370b2888548544eedf0d89 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 21 Oct 2009 22:53:37 -0700
Subject: r300g: Cleanup old static shader state.

---
 src/gallium/drivers/r300/Makefile  |   2 -
 src/gallium/drivers/r300/r300_fs.c |  11 +---
 src/gallium/drivers/r300/r300_fs.h |   3 +-
 src/gallium/drivers/r300/r300_vs.c |  86 -------------------------
 src/gallium/drivers/r300/r3xx_fs.c |  74 ----------------------
 src/gallium/drivers/r300/r3xx_fs.h |  32 ----------
 src/gallium/drivers/r300/r5xx_fs.c | 125 -------------------------------------
 src/gallium/drivers/r300/r5xx_fs.h |  32 ----------
 8 files changed, 4 insertions(+), 361 deletions(-)
 delete mode 100644 src/gallium/drivers/r300/r3xx_fs.c
 delete mode 100644 src/gallium/drivers/r300/r3xx_fs.h
 delete mode 100644 src/gallium/drivers/r300/r5xx_fs.c
 delete mode 100644 src/gallium/drivers/r300/r5xx_fs.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index c4f2c021c4..f73d80de88 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -4,8 +4,6 @@ include $(TOP)/configs/current
 LIBNAME = r300
 
 C_SOURCES = \
-	r3xx_fs.c \
-	r5xx_fs.c \
 	r300_chipset.c \
 	r300_clear.c \
 	r300_context.c \
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 2db185fd80..29ddc84c41 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -130,14 +130,9 @@ void r300_translate_fragment_shader(struct r300_context* r300,
     /* Invoke the compiler */
     r3xx_compile_fragment_program(&compiler);
     if (compiler.Base.Error) {
-        /* Todo: Fallback to software rendering gracefully? */
-        fprintf(stderr, "r300 FP: Compiler error: %s\n", compiler.Base.ErrorMsg);
-
-        if (compiler.is_r500) {
-            memcpy(compiler.code, &r5xx_passthrough_fragment_shader, sizeof(r5xx_passthrough_fragment_shader));
-        } else {
-            memcpy(compiler.code, &r3xx_passthrough_fragment_shader, sizeof(r3xx_passthrough_fragment_shader));
-        }
+        /* XXX failover maybe? */
+        DBG(r300, DBG_FP, "r300: Error compiling fragment program: %s\n",
+            compiler.Base.ErrorMsg);
     }
 
     /* And, finally... */
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index 8dfb139738..e831c30301 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -28,8 +28,7 @@
 
 #include "tgsi/tgsi_scan.h"
 
-#include "r3xx_fs.h"
-#include "r5xx_fs.h"
+#include "radeon_code.h"
 
 struct r300_fragment_shader {
     /* Parent class */
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 8460cfaf51..eca85879a7 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -146,89 +146,3 @@ void r300_translate_vertex_shader(struct r300_context* r300,
     rc_destroy(&compiler.Base);
     vs->translated = TRUE;
 }
-
-
-/* XXX get these to r300_reg */
-#define R300_PVS_DST_OPCODE(x)   ((x) << 0)
-#   define R300_VE_DOT_PRODUCT            1
-#   define R300_VE_MULTIPLY               2
-#   define R300_VE_ADD                    3
-#   define R300_VE_MAXIMUM                7
-#   define R300_VE_SET_LESS_THAN          10
-#define R300_PVS_DST_MATH_INST     (1 << 6)
-#   define R300_ME_RECIP_DX               6
-#define R300_PVS_DST_MACRO_INST    (1 << 7)
-#   define R300_PVS_MACRO_OP_2CLK_MADD    0
-#define R300_PVS_DST_REG_TYPE(x) ((x) << 8)
-#   define R300_PVS_DST_REG_TEMPORARY     0
-#   define R300_PVS_DST_REG_A0            1
-#   define R300_PVS_DST_REG_OUT           2
-#   define R300_PVS_DST_REG_OUT_REPL_X    3
-#   define R300_PVS_DST_REG_ALT_TEMPORARY 4
-#   define R300_PVS_DST_REG_INPUT         5
-#define R300_PVS_DST_OFFSET(x)   ((x) << 13)
-#define R300_PVS_DST_WE(x)       ((x) << 20)
-#define R300_PVS_DST_WE_XYZW     (0xf << 20)
-
-#define R300_PVS_SRC_REG_TYPE(x) ((x) << 0)
-#   define R300_PVS_SRC_REG_TEMPORARY     0
-#   define R300_PVS_SRC_REG_INPUT         1
-#   define R300_PVS_SRC_REG_CONSTANT      2
-#   define R300_PVS_SRC_REG_ALT_TEMPORARY 3
-#define R300_PVS_SRC_OFFSET(x)   ((x) << 5)
-#define R300_PVS_SRC_SWIZZLE(x)  ((x) << 13)
-#   define R300_PVS_SRC_SELECT_X          0
-#   define R300_PVS_SRC_SELECT_Y          1
-#   define R300_PVS_SRC_SELECT_Z          2
-#   define R300_PVS_SRC_SELECT_W          3
-#   define R300_PVS_SRC_SELECT_FORCE_0    4
-#   define R300_PVS_SRC_SELECT_FORCE_1    5
-#   define R300_PVS_SRC_SWIZZLE_XYZW \
-    ((R300_PVS_SRC_SELECT_X | (R300_PVS_SRC_SELECT_Y << 3) | \
-     (R300_PVS_SRC_SELECT_Z << 6) | (R300_PVS_SRC_SELECT_W << 9)) << 13)
-#   define R300_PVS_SRC_SWIZZLE_ZERO \
-    ((R300_PVS_SRC_SELECT_FORCE_0 | (R300_PVS_SRC_SELECT_FORCE_0 << 3) | \
-     (R300_PVS_SRC_SELECT_FORCE_0 << 6) | \
-      (R300_PVS_SRC_SELECT_FORCE_0 << 9)) << 13)
-#   define R300_PVS_SRC_SWIZZLE_ONE \
-    ((R300_PVS_SRC_SELECT_FORCE_1 | (R300_PVS_SRC_SELECT_FORCE_1 << 3) | \
-     (R300_PVS_SRC_SELECT_FORCE_1 << 6) | \
-      (R300_PVS_SRC_SELECT_FORCE_1 << 9)) << 13)
-#define R300_PVS_MODIFIER_X        (1 << 25)
-#define R300_PVS_MODIFIER_Y        (1 << 26)
-#define R300_PVS_MODIFIER_Z        (1 << 27)
-#define R300_PVS_MODIFIER_W        (1 << 28)
-#define R300_PVS_NEGATE_XYZW \
-    (R300_PVS_MODIFIER_X | R300_PVS_MODIFIER_Y | \
-     R300_PVS_MODIFIER_Z | R300_PVS_MODIFIER_W)
-
-struct r300_vertex_program_code r300_passthrough_vertex_shader = {
-    .length = 8, /* two instructions */
-
-    /* MOV out[0], in[0] */
-    .body.d[0] = R300_PVS_DST_OPCODE(R300_VE_ADD) |
-        R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
-        R300_PVS_DST_OFFSET(0) | R300_PVS_DST_WE_XYZW,
-    .body.d[1] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-        R300_PVS_SRC_OFFSET(0) | R300_PVS_SRC_SWIZZLE_XYZW,
-    .body.d[2] = R300_PVS_SRC_SWIZZLE_ZERO,
-    .body.d[3] = 0x0,
-
-    /* MOV out[1], in[1] */
-    .body.d[4] = R300_PVS_DST_OPCODE(R300_VE_ADD) |
-        R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
-        R300_PVS_DST_OFFSET(1) | R300_PVS_DST_WE_XYZW,
-    .body.d[5] = R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-        R300_PVS_SRC_OFFSET(1) | R300_PVS_SRC_SWIZZLE_XYZW,
-    .body.d[6] = R300_PVS_SRC_SWIZZLE_ZERO,
-    .body.d[7] = 0x0,
-
-    .inputs[0] = 0,
-    .inputs[1] = 1,
-    .outputs[0] = 0,
-    .outputs[1] = 1,
-
-    .InputsRead = 3,
-    .OutputsWritten = 3
-};
-
diff --git a/src/gallium/drivers/r300/r3xx_fs.c b/src/gallium/drivers/r300/r3xx_fs.c
deleted file mode 100644
index c1c1194d58..0000000000
--- a/src/gallium/drivers/r300/r3xx_fs.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *                Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r3xx_fs.h"
-
-#include "r300_reg.h"
-
-struct rX00_fragment_program_code r3xx_passthrough_fragment_shader = {
-    .code.r300.alu.length = 1,
-    .code.r300.tex.length = 0,
-
-    .code.r300.config = 0,
-    .code.r300.pixsize = 0,
-    .code.r300.code_offset = 0,
-    .code.r300.code_addr[3] = R300_RGBA_OUT,
-
-    .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) |
-        R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) |
-        R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) |
-        R300_ALU_OUTC_CMP,
-    .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) |
-        R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ,
-    .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) |
-        R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) |
-        R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) |
-        R300_ALU_OUTA_CMP,
-    .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) |
-        R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT,
-};
-
-struct rX00_fragment_program_code r3xx_texture_fragment_shader = {
-    .code.r300.alu.length = 1,
-    .code.r300.tex.length = 1,
-
-    .code.r300.config = R300_PFS_CNTL_FIRST_NODE_HAS_TEX,
-    .code.r300.pixsize = 0,
-    .code.r300.code_offset = 0,
-    .code.r300.code_addr[3] = R300_RGBA_OUT,
-
-    .code.r300.tex.inst[0] = R300_TEX_OP_LD << R300_TEX_INST_SHIFT,
-
-    .code.r300.alu.inst[0].rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) |
-        R300_RGB_SWIZB(R300_ALU_ARGC_SRC0C_XYZ) |
-        R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) |
-        R300_ALU_OUTC_CMP,
-    .code.r300.alu.inst[0].rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) |
-        R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ,
-    .code.r300.alu.inst[0].alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) |
-        R300_ALPHA_SWIZB(R300_ALU_ARGA_SRC0A) |
-        R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) |
-        R300_ALU_OUTA_CMP,
-    .code.r300.alu.inst[0].alpha_addr = R300_ALPHA_ADDR0(0) |
-        R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT,
-};
diff --git a/src/gallium/drivers/r300/r3xx_fs.h b/src/gallium/drivers/r300/r3xx_fs.h
deleted file mode 100644
index 51cd245724..0000000000
--- a/src/gallium/drivers/r300/r3xx_fs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *                Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R3XX_FS_H
-#define R3XX_FS_H
-
-#include "radeon_code.h"
-
-struct rX00_fragment_program_code r3xx_passthrough_fragment_shader;
-struct rX00_fragment_program_code r3xx_texture_fragment_shader;
-
-#endif /* R3XX_FS_H */
diff --git a/src/gallium/drivers/r300/r5xx_fs.c b/src/gallium/drivers/r300/r5xx_fs.c
deleted file mode 100644
index f072deab0d..0000000000
--- a/src/gallium/drivers/r300/r5xx_fs.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *                Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r5xx_fs.h"
-
-#include "r300_reg.h"
-
-/* XXX this all should find its way back to r300_reg */
-/* Swizzle tools */
-#define R500_SWIZZLE_ZERO 4
-#define R500_SWIZZLE_HALF 5
-#define R500_SWIZZLE_ONE 6
-#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6))
-#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6))
-#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6))
-#define R500_SWIZ_MOD_NEG 1
-#define R500_SWIZ_MOD_ABS 2
-#define R500_SWIZ_MOD_NEG_ABS 3
-/* Swizzles for inst2 */
-#define R500_SWIZ_TEX_STRQ(x) ((x) << 8)
-#define R500_SWIZ_TEX_RGBA(x) ((x) << 24)
-/* Swizzles for inst3 */
-#define R500_SWIZ_RGB_A(x) ((x) << 2)
-#define R500_SWIZ_RGB_B(x) ((x) << 15)
-/* Swizzles for inst4 */
-#define R500_SWIZ_ALPHA_A(x) ((x) << 14)
-#define R500_SWIZ_ALPHA_B(x) ((x) << 21)
-/* Swizzle for inst5 */
-#define R500_SWIZ_RGBA_C(x) ((x) << 14)
-#define R500_SWIZ_ALPHA_C(x) ((x) << 27)
-/* Writemasks */
-#define R500_TEX_WMASK(x) ((x) << 11)
-#define R500_ALU_WMASK(x) ((x) << 11)
-#define R500_ALU_OMASK(x) ((x) << 15)
-#define R500_W_OMASK (1 << 31)
-
-struct rX00_fragment_program_code r5xx_passthrough_fragment_shader = {
-    .code.r500.max_temp_idx = 0,
-    .code.r500.inst_end = 0,
-
-    .code.r500.inst[0].inst0 = R500_INST_TYPE_OUT |
-        R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
-        R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK |
-        R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
-    .code.r500.inst[0].inst1 =
-        R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST |
-        R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST,
-    .code.r500.inst[0].inst2 =
-        R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST |
-        R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST,
-    .code.r500.inst[0].inst3 =
-        R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R |
-        R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B |
-        R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R |
-        R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B,
-    .code.r500.inst[0].inst4 =
-        R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A,
-    .code.r500.inst[0].inst5 =
-        R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
-        R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
-        R500_ALU_RGBA_A_SWIZ_0,
-};
-
-struct rX00_fragment_program_code r5xx_texture_fragment_shader = {
-    .code.r500.max_temp_idx = 0,
-    .code.r500.inst_end = 1,
-
-    .code.r500.inst[0].inst0 = R500_INST_TYPE_TEX |
-        R500_INST_TEX_SEM_WAIT |
-        R500_INST_RGB_WMASK_RGB | R500_INST_ALPHA_WMASK |
-        R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
-    .code.r500.inst[0].inst1 = R500_TEX_ID(0) | R500_TEX_INST_LD |
-        R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED,
-    .code.r500.inst[0].inst2 = R500_TEX_SRC_ADDR(0) |
-        R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G |
-        R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A |
-        R500_TEX_DST_ADDR(0) |
-        R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G |
-        R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A,
-    .code.r500.inst[0].inst3 = 0x0,
-    .code.r500.inst[0].inst4 = 0x0,
-    .code.r500.inst[0].inst5 = 0x0,
-
-    .code.r500.inst[1].inst0 = R500_INST_TYPE_OUT |
-        R500_INST_TEX_SEM_WAIT | R500_INST_LAST |
-        R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK |
-        R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP,
-    .code.r500.inst[1].inst1 =
-        R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST |
-        R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST,
-    .code.r500.inst[1].inst2 =
-        R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST |
-        R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST,
-    .code.r500.inst[1].inst3 =
-        R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R |
-        R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B |
-        R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R |
-        R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B,
-    .code.r500.inst[1].inst4 =
-        R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A,
-    .code.r500.inst[1].inst5 =
-        R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 |
-        R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 |
-        R500_ALU_RGBA_A_SWIZ_0,
-};
diff --git a/src/gallium/drivers/r300/r5xx_fs.h b/src/gallium/drivers/r300/r5xx_fs.h
deleted file mode 100644
index a4addde32b..0000000000
--- a/src/gallium/drivers/r300/r5xx_fs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *                Joakim Sindholt <opensource@zhasha.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R5XX_FS_H
-#define R5XX_FS_H
-
-#include "radeon_code.h"
-
-struct rX00_fragment_program_code r5xx_passthrough_fragment_shader;
-struct rX00_fragment_program_code r5xx_texture_fragment_shader;
-
-#endif /* R5XX_FS_H */
-- 
cgit v1.2.3


From 5a653ada4143c24b00b0ca12b4898064afd59c29 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 21 Oct 2009 22:54:52 -0700
Subject: r300g: Remove unused debug flag.

---
 src/gallium/drivers/r300/r300_context.h | 5 ++---
 src/gallium/drivers/r300/r300_debug.c   | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 61a57df35e..4e2c0ec34e 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -317,9 +317,8 @@ void r300_init_surface_functions(struct r300_context* r300);
 #define DBG_VP      0x0000004
 #define DBG_CS      0x0000008
 #define DBG_DRAW    0x0000010
-#define DBG_SURF    0x0000020
-#define DBG_TEX     0x0000040
-#define DBG_FALL    0x0000080
+#define DBG_TEX     0x0000020
+#define DBG_FALL    0x0000040
 /*@}*/
 
 static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags)
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index bfd4ab018a..421253ca72 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -37,7 +37,6 @@ static struct debug_option debug_options[] = {
     { "vp", DBG_VP, "Vertex program handling" },
     { "cs", DBG_CS, "Command submissions" },
     { "draw", DBG_DRAW, "Draw and emit" },
-    { "surf", DBG_SURF, "Surface drawing" },
     { "tex", DBG_TEX, "Textures" },
     { "fall", DBG_FALL, "Fallbacks" },
 
-- 
cgit v1.2.3


From 034db65f08b943ee9940947db69e4e190f751061 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 21 Oct 2009 23:23:37 -0700
Subject: r300g: Update comments, asserts, indents in r300_texture.

I wish I knew enough about textures to really really REALLY fix that file.
---
 src/gallium/drivers/r300/r300_texture.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 339fbb6242..3c8ff24e17 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -42,16 +42,16 @@ static void r300_setup_texture_state(struct r300_texture* tex)
     /* XXX */
     state->format1 = r300_translate_texformat(pt->format);
     if (pt->target == PIPE_TEXTURE_CUBE) {
-	state->format1 |= R300_TX_FORMAT_CUBIC_MAP;
+        state->format1 |= R300_TX_FORMAT_CUBIC_MAP;
     }
     if (pt->target == PIPE_TEXTURE_3D) {
-	state->format1 |= R300_TX_FORMAT_3D;
+        state->format1 |= R300_TX_FORMAT_3D;
     }
 
     state->format2 = (r300_texture_get_stride(tex, 0) / pt->block.size) - 1;
 
-    /* Assume (somewhat foolishly) that oversized textures will
-     * not be permitted by the state tracker. */
+    /* Don't worry about accidentally setting this bit on non-r500;
+     * the kernel should catch it. */
     if (pt->width[0] > 2048) {
         state->format2 |= R500_TXWIDTH_BIT11;
     }
@@ -73,7 +73,8 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level)
         return tex->stride_override;
 
     if (level > tex->tex.last_level) {
-        debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__, level, tex->tex.last_level);
+        debug_printf("%s: level (%u) > last_level (%u)\n", __FUNCTION__,
+            level, tex->tex.last_level);
         return 0;
     }
 
@@ -96,11 +97,6 @@ static void r300_setup_miptree(struct r300_texture* tex)
         base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]);
         base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]);
 
-        /* Radeons enjoy things in multiples of 64.
-         *
-         * XXX
-         * POT, uncompressed, unmippmapped textures can be aligned to 32,
-         * instead of 64. */
         stride = r300_texture_get_stride(tex, i);
         size = stride * base->nblocksy[i] * base->depth[i];
 
@@ -195,9 +191,7 @@ static struct pipe_texture*
 {
     struct r300_texture* tex;
 
-    /* XXX we should start doing mips now... */
     if (base->target != PIPE_TEXTURE_2D ||
-        base->last_level != 0 ||
         base->depth[0] != 1) {
         return NULL;
     }
@@ -213,7 +207,6 @@ static struct pipe_texture*
 
     tex->stride_override = *stride;
 
-    /* XXX */
     r300_setup_texture_state(tex);
 
     pipe_buffer_reference(&tex->buffer, buffer);
-- 
cgit v1.2.3


From 0a8cd4862c4f04308ab818077bab94417ffbf50b Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 21 Oct 2009 23:26:02 -0700
Subject: r300g: Update comments, function names in r300_state_inlines.

---
 src/gallium/drivers/r300/r300_state_derived.c |  2 +-
 src/gallium/drivers/r300/r300_state_inlines.h | 24 +++++++++++++++++++++---
 2 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index e749bd9cb9..42aee7231e 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -250,7 +250,7 @@ static void r300_vertex_psc(struct r300_context* r300,
         assert(tab[i] != -1);
 
         /* Add the attribute to the PSC table. */
-        temp = translate_vertex_data_type(vinfo->attrib[i].emit) |
+        temp = translate_draw_vertex_data_type(vinfo->attrib[i].emit) |
             tab[i] << R300_DST_VEC_LOC_SHIFT;
 
         if (i & 1) {
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 9135c55ec4..c82d8e5f08 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -53,6 +53,7 @@ static INLINE uint32_t r300_translate_blend_function(int blend_func)
             return R300_COMB_FCN_MAX;
         default:
             debug_printf("r300: Unknown blend function %d\n", blend_func);
+            assert(0);
             break;
     }
     return 0;
@@ -100,6 +101,7 @@ static INLINE uint32_t r300_translate_blend_factor(int blend_fact)
         case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: */
         default:
             debug_printf("r300: Unknown blend factor %d\n", blend_fact);
+            assert(0);
             break;
     }
     return 0;
@@ -129,6 +131,7 @@ static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func)
         default:
             debug_printf("r300: Unknown depth/stencil function %d\n",
                 zs_func);
+            assert(0);
             break;
     }
     return 0;
@@ -155,6 +158,7 @@ static INLINE uint32_t r300_translate_stencil_op(int s_op)
             return R300_ZS_INVERT;
         default:
             debug_printf("r300: Unknown stencil op %d", s_op);
+            assert(0);
             break;
     }
     return 0;
@@ -181,6 +185,7 @@ static INLINE uint32_t r300_translate_alpha_function(int alpha_func)
             return R300_FG_ALPHA_FUNC_ALWAYS;
         default:
             debug_printf("r300: Unknown alpha function %d", alpha_func);
+            assert(0);
             break;
     }
     return 0;
@@ -209,6 +214,7 @@ static INLINE uint32_t r300_translate_wrap(int wrap)
             return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
         default:
             debug_printf("r300: Unknown texture wrap %d", wrap);
+            assert(0);
             return 0;
     }
 }
@@ -228,6 +234,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip)
             break;
         default:
             debug_printf("r300: Unknown texture filter %d\n", min);
+            assert(0);
             break;
     }
     switch (mag) {
@@ -242,6 +249,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip)
             break;
         default:
             debug_printf("r300: Unknown texture filter %d\n", mag);
+            assert(0);
             break;
     }
     switch (mip) {
@@ -256,6 +264,7 @@ static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip)
             break;
         default:
             debug_printf("r300: Unknown texture filter %d\n", mip);
+            assert(0);
             break;
     }
 
@@ -279,6 +288,8 @@ static INLINE uint32_t r300_anisotropy(float max_aniso)
 
 /* Buffer formats. */
 
+/* Colorbuffer formats. This is the unswizzled format of the RB3D block's
+ * output. For the swizzling of the targets, check the shader's format. */
 static INLINE uint32_t r300_translate_colorformat(enum pipe_format format)
 {
     switch (format) {
@@ -313,11 +324,13 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format)
             debug_printf("r300: Implementation error: "
                 "Got unsupported color format %s in %s\n",
                 pf_name(format), __FUNCTION__);
+            assert(0);
             break;
     }
     return 0;
 }
 
+/* Depthbuffer and stencilbuffer. Thankfully, we only support two kinds. */
 static INLINE uint32_t r300_translate_zsformat(enum pipe_format format)
 {
     switch (format) {
@@ -331,18 +344,22 @@ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format)
             debug_printf("r300: Implementation error: "
                 "Got unsupported ZS format %s in %s\n",
                 pf_name(format), __FUNCTION__);
+            assert(0);
             break;
     }
     return 0;
 }
 
-/* Translate pipe_format into US_OUT_FMT.
+/* Shader output formats. This is essentially the swizzle from the shader
+ * to the RB3D block.
+ *
  * Note that formats are stored from C3 to C0. */
 static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format)
 {
     switch (format) {
         case PIPE_FORMAT_A8R8G8B8_UNORM:
         case PIPE_FORMAT_X8R8G8B8_UNORM:
+        /* XXX */
         case PIPE_FORMAT_Z24S8_UNORM:
             return R300_US_OUT_FMT_C4_8 |
                 R300_C0_SEL_B | R300_C1_SEL_G |
@@ -356,6 +373,7 @@ static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format)
             debug_printf("r300: Implementation error: "
                 "Got unsupported output format %s in %s\n",
                 pf_name(format), __FUNCTION__);
+            assert(0);
             return R300_US_OUT_FMT_UNUSED;
     }
     return 0;
@@ -382,7 +400,8 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
     return 0;
 }
 
-static INLINE uint32_t translate_vertex_data_type(int type) {
+/* Translate Draw vertex types into PSC vertex types. */
+static INLINE uint32_t translate_draw_vertex_data_type(int type) {
     switch (type) {
         case EMIT_1F:
         case EMIT_1F_PSIZE:
@@ -406,7 +425,6 @@ static INLINE uint32_t translate_vertex_data_type(int type) {
             assert(0);
             break;
     }
-
     return 0;
 }
 
-- 
cgit v1.2.3


From 511bd5f32b67f903b590f00f7ccf8132127ef2e4 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Thu, 22 Oct 2009 00:21:08 -0700
Subject: r300g: Check for NULL Draw during flush.

Split from the fastpath WIP.
---
 src/gallium/drivers/r300/r300_flush.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
index c222ea09b1..14a08241fc 100644
--- a/src/gallium/drivers/r300/r300_flush.c
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -40,8 +40,10 @@ static void r300_flush(struct pipe_context* pipe,
 
     CS_LOCALS(r300);
     /* We probably need to flush Draw, but we may have been called from
-     * within Draw. This feels kludgy, but it might be the best thing. */
-    if (!r300->draw->flushing) {
+     * within Draw. This feels kludgy, but it might be the best thing.
+     *
+     * Of course, the best thing is to kill Draw with fire. :3 */
+    if (r300->draw && !r300->draw->flushing) {
         draw_flush(r300->draw);
     }
 
-- 
cgit v1.2.3


From eebf4b5299a880f4cdf8a916b4e1ca0bd79a6f07 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 22 Oct 2009 21:55:22 +1000
Subject: nv50: support 3D class 0x8597, remove redundant unknown chipset
 detection

---
 src/gallium/drivers/nv50/nv50_screen.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 0bd5487695..63dce0f4c2 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -218,7 +218,16 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		tesla_class = NV54TCL;
 		break;
 	case 0xa0:
-		tesla_class = NVA0TCL;
+		switch (chipset) {
+		case 0xa0:
+		case 0xaa:
+		case 0xac:
+			tesla_class = NVA0TCL;
+			break;
+		default:
+			tesla_class = 0x8597;
+			break;
+		}
 		break;
 	default:
 		NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", chipset);
@@ -226,12 +235,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		return NULL;
 	}
 
-	if (tesla_class == 0) {
-		NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset);
-		nv50_screen_destroy(pscreen);
-		return NULL;
-	}
-
 	ret = nouveau_grobj_alloc(chan, 0xbeef5097, tesla_class,
 		&screen->tesla);
 	if (ret) {
-- 
cgit v1.2.3


From a07437f8a6a863654487c5586cbd02bfc20f0a3b Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 18:10:19 +0100
Subject: llvmpipe: Call util_cpu_detect() from the unit tests.

---
 src/gallium/drivers/llvmpipe/lp_test_format.c | 3 +++
 src/gallium/drivers/llvmpipe/lp_test_main.c   | 4 ++++
 2 files changed, 7 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index ab80c0143f..5dc8297fe9 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -35,6 +35,7 @@
 #include <llvm-c/Target.h>
 #include <llvm-c/Transforms/Scalar.h>
 
+#include "util/u_cpu_detect.h"
 #include "util/u_format.h"
 
 #include "lp_bld_format.h"
@@ -263,6 +264,8 @@ int main(int argc, char **argv)
    LLVMInitializeNativeTarget();
 #endif
 
+   util_cpu_detect();
+
    for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i)
       if(!test_format(&test_cases[i]))
         ret = 1;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index f07fa256f1..d4767ff52b 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -34,6 +34,8 @@
  */
 
 
+#include "util/u_cpu_detect.h"
+
 #include "lp_bld_const.h"
 #include "lp_test.h"
 
@@ -370,6 +372,8 @@ int main(int argc, char **argv)
    LLVMInitializeNativeTarget();
 #endif
 
+   util_cpu_detect();
+
    if(fp) {
       /* Warm up the caches */
       test_some(0, NULL, 100);
-- 
cgit v1.2.3


From 421507de06bd42a322c5864d887e67e385eb458c Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 18:28:17 +0100
Subject: llvmpipe: Factor vector packing/unpacking to a separate source file.

These functions will be needed to implement many of the 8bit operations,
and they are quite complex on its own.
---
 src/gallium/drivers/llvmpipe/Makefile      |   1 +
 src/gallium/drivers/llvmpipe/SConscript    |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_conv.c | 240 +----------------
 src/gallium/drivers/llvmpipe/lp_bld_pack.c | 419 +++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_pack.h |  95 +++++++
 5 files changed, 519 insertions(+), 237 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_pack.c
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_pack.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 21aff1967a..b96ee23a99 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -21,6 +21,7 @@ C_SOURCES = \
 	lp_bld_interp.c \
 	lp_bld_intr.c \
 	lp_bld_logic.c \
+	lp_bld_pack.c \
 	lp_bld_sample_soa.c \
 	lp_bld_swizzle.c \
 	lp_bld_struct.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 13cd465838..403e4daa43 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -33,6 +33,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_format_soa.c',
 		'lp_bld_interp.c',
 		'lp_bld_intr.c',
+        'lp_bld_pack.c',
 		'lp_bld_sample_soa.c',
 		'lp_bld_struct.c',
 		'lp_bld_logic.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_conv.c b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
index 20c8710214..9935209437 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_conv.c
@@ -69,6 +69,7 @@
 #include "lp_bld_const.h"
 #include "lp_bld_intr.h"
 #include "lp_bld_arit.h"
+#include "lp_bld_pack.h"
 #include "lp_bld_conv.h"
 
 
@@ -198,241 +199,6 @@ lp_build_unsigned_norm_to_float(LLVMBuilderRef builder,
 }
 
 
-/**
- * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
- */
-static LLVMValueRef
-lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi)
-{
-   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
-   unsigned i, j;
-
-   assert(n <= LP_MAX_VECTOR_LENGTH);
-   assert(lo_hi < 2);
-
-   /* TODO: cache results in a static table */
-
-   for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
-      elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
-      elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
-   }
-
-   return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Build shuffle vectors that match PACKxx instructions.
- */
-static LLVMValueRef
-lp_build_const_pack_shuffle(unsigned n)
-{
-   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
-   unsigned i;
-
-   assert(n <= LP_MAX_VECTOR_LENGTH);
-
-   /* TODO: cache results in a static table */
-
-   for(i = 0; i < n; ++i)
-      elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0);
-
-   return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Expand the bit width.
- *
- * This will only change the number of bits the values are represented, not the
- * values themselves.
- */
-static void
-lp_build_expand(LLVMBuilderRef builder,
-               struct lp_type src_type,
-               struct lp_type dst_type,
-               LLVMValueRef src,
-               LLVMValueRef *dst, unsigned num_dsts)
-{
-   unsigned num_tmps;
-   unsigned i;
-
-   /* Register width must remain constant */
-   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
-   /* We must not loose or gain channels. Only precision */
-   assert(src_type.length == dst_type.length * num_dsts);
-
-   num_tmps = 1;
-   dst[0] = src;
-
-   while(src_type.width < dst_type.width) {
-      struct lp_type new_type = src_type;
-      LLVMTypeRef new_vec_type;
-
-      new_type.width *= 2;
-      new_type.length /= 2;
-      new_vec_type = lp_build_vec_type(new_type);
-
-      for(i = num_tmps; i--; ) {
-         LLVMValueRef zero;
-         LLVMValueRef shuffle_lo;
-         LLVMValueRef shuffle_hi;
-         LLVMValueRef lo;
-         LLVMValueRef hi;
-
-         zero = lp_build_zero(src_type);
-         shuffle_lo = lp_build_const_unpack_shuffle(src_type.length, 0);
-         shuffle_hi = lp_build_const_unpack_shuffle(src_type.length, 1);
-
-         /*  PUNPCKLBW, PUNPCKHBW */
-         lo = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_lo, "");
-         hi = LLVMBuildShuffleVector(builder, dst[i], zero, shuffle_hi, "");
-
-         dst[2*i + 0] = LLVMBuildBitCast(builder, lo, new_vec_type, "");
-         dst[2*i + 1] = LLVMBuildBitCast(builder, hi, new_vec_type, "");
-      }
-
-      src_type = new_type;
-
-      num_tmps *= 2;
-   }
-
-   assert(num_tmps == num_dsts);
-}
-
-
-/**
- * Non-interleaved pack.
- *
- * This will move values as
- *
- *   lo =   __ l0 __ l1 __ l2 __..  __ ln
- *   hi =   __ h0 __ h1 __ h2 __..  __ hn
- *   res =  l0 l1 l2 .. ln h0 h1 h2 .. hn
- *
- * TODO: handle saturation consistently.
- */
-static LLVMValueRef
-lp_build_pack2(LLVMBuilderRef builder,
-               struct lp_type src_type,
-               struct lp_type dst_type,
-               boolean clamped,
-               LLVMValueRef lo,
-               LLVMValueRef hi)
-{
-   LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
-   LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
-   LLVMValueRef shuffle;
-   LLVMValueRef res;
-
-   /* Register width must remain constant */
-   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
-   /* We must not loose or gain channels. Only precision */
-   assert(src_type.length * 2 == dst_type.length);
-
-   assert(!src_type.floating);
-   assert(!dst_type.floating);
-
-   if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
-      /* All X86 non-interleaved pack instructions all take signed inputs and
-       * saturate them, so saturate beforehand. */
-      if(!src_type.sign && !clamped) {
-         struct lp_build_context bld;
-         unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
-         LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1);
-         lp_build_context_init(&bld, builder, src_type);
-         lo = lp_build_min(&bld, lo, dst_max);
-         hi = lp_build_min(&bld, hi, dst_max);
-      }
-
-      switch(src_type.width) {
-      case 32:
-         if(dst_type.sign || !util_cpu_caps.has_sse4_1)
-            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
-         else
-            /* PACKUSDW is the only instrinsic with a consistent signature */
-            return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
-         break;
-
-      case 16:
-         if(dst_type.sign)
-            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
-         else
-            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
-         break;
-
-      default:
-         assert(0);
-         return LLVMGetUndef(dst_vec_type);
-         break;
-      }
-
-      res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
-      return res;
-   }
-
-   lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
-   hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
-
-   shuffle = lp_build_const_pack_shuffle(dst_type.length);
-
-   res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");
-
-   return res;
-}
-
-
-/**
- * Truncate the bit width.
- *
- * TODO: Handle saturation consistently.
- */
-static LLVMValueRef
-lp_build_pack(LLVMBuilderRef builder,
-              struct lp_type src_type,
-              struct lp_type dst_type,
-              boolean clamped,
-              const LLVMValueRef *src, unsigned num_srcs)
-{
-   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
-   unsigned i;
-
-   /* Register width must remain constant */
-   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
-
-   /* We must not loose or gain channels. Only precision */
-   assert(src_type.length * num_srcs == dst_type.length);
-
-   for(i = 0; i < num_srcs; ++i)
-      tmp[i] = src[i];
-
-   while(src_type.width > dst_type.width) {
-      struct lp_type new_type = src_type;
-
-      new_type.width /= 2;
-      new_type.length *= 2;
-
-      /* Take in consideration the sign changes only in the last step */
-      if(new_type.width == dst_type.width)
-         new_type.sign = dst_type.sign;
-
-      num_srcs /= 2;
-
-      for(i = 0; i < num_srcs; ++i)
-         tmp[i] = lp_build_pack2(builder, src_type, new_type, clamped,
-                                 tmp[2*i + 0], tmp[2*i + 1]);
-
-      src_type = new_type;
-   }
-
-   assert(num_srcs == 1);
-
-   return tmp[0];
-}
-
-
 /**
  * Generic type conversion.
  *
@@ -572,7 +338,7 @@ lp_build_conv(LLVMBuilderRef builder,
 
    if(tmp_type.width < dst_type.width) {
       assert(num_tmps == 1);
-      lp_build_expand(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
+      lp_build_unpack(builder, tmp_type, dst_type, tmp[0], tmp, num_dsts);
       tmp_type.width = dst_type.width;
       tmp_type.length = dst_type.length;
       num_tmps = num_dsts;
@@ -692,7 +458,7 @@ lp_build_conv_mask(LLVMBuilderRef builder,
    }
    else if(src_type.width < dst_type.width) {
       assert(num_srcs == 1);
-      lp_build_expand(builder, src_type, dst_type, src[0], dst, num_dsts);
+      lp_build_unpack(builder, src_type, dst_type, src[0], dst, num_dsts);
    }
    else {
       assert(num_srcs == num_dsts);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
new file mode 100644
index 0000000000..fe82fda039
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
@@ -0,0 +1,419 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * @file
+ * Helper functions for packing/unpacking.
+ *
+ * Pack/unpacking is necessary for conversion between types of different
+ * bit width.
+ *
+ * They are also commonly used when an computation needs higher
+ * precision for the intermediate values. For example, if one needs the
+ * function:
+ *
+ *   c = compute(a, b);
+ *
+ * to use more precision for intermediate results then one should implement it
+ * as:
+ *
+ *   LLVMValueRef
+ *   compute(LLVMBuilderRef builder struct lp_type type, LLVMValueRef a, LLVMValueRef b)
+ *   {
+ *      struct lp_type wide_type = lp_wider_type(type);
+ *      LLVMValueRef al, ah, bl, bh, cl, ch, c;
+ *
+ *      lp_build_unpack2(builder, type, wide_type, a, &al, &ah);
+ *      lp_build_unpack2(builder, type, wide_type, b, &bl, &bh);
+ *
+ *      cl = compute_half(al, bl);
+ *      ch = compute_half(ah, bh);
+ *
+ *      c = lp_build_pack2(bld->builder, wide_type, type, cl, ch);
+ *
+ *      return c;
+ *   }
+ *
+ * where compute_half() would do the computation for half the elements with
+ * twice the precision.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "util/u_cpu_detect.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_intr.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_pack.h"
+
+
+/**
+ * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
+ */
+static LLVMValueRef
+lp_build_const_unpack_shuffle(unsigned n, unsigned lo_hi)
+{
+   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+   unsigned i, j;
+
+   assert(n <= LP_MAX_VECTOR_LENGTH);
+   assert(lo_hi < 2);
+
+   /* TODO: cache results in a static table */
+
+   for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
+      elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
+      elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
+   }
+
+   return LLVMConstVector(elems, n);
+}
+
+
+/**
+ * Build shuffle vectors that match PACKxx instructions.
+ */
+static LLVMValueRef
+lp_build_const_pack_shuffle(unsigned n)
+{
+   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   assert(n <= LP_MAX_VECTOR_LENGTH);
+
+   /* TODO: cache results in a static table */
+
+   for(i = 0; i < n; ++i)
+      elems[i] = LLVMConstInt(LLVMInt32Type(), 2*i, 0);
+
+   return LLVMConstVector(elems, n);
+}
+
+
+/**
+ * Interleave vector elements.
+ *
+ * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions.
+ */
+LLVMValueRef
+lp_build_interleave2(LLVMBuilderRef builder,
+                     struct lp_type type,
+                     LLVMValueRef a,
+                     LLVMValueRef b,
+                     unsigned lo_hi)
+{
+   LLVMValueRef shuffle;
+
+   shuffle = lp_build_const_unpack_shuffle(type.length, lo_hi);
+
+   return LLVMBuildShuffleVector(builder, a, b, shuffle, "");
+}
+
+
+/**
+ * Double the bit width.
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ */
+void
+lp_build_unpack2(LLVMBuilderRef builder,
+                 struct lp_type src_type,
+                 struct lp_type dst_type,
+                 LLVMValueRef src,
+                 LLVMValueRef *dst_lo,
+                 LLVMValueRef *dst_hi)
+{
+   LLVMValueRef msb;
+   LLVMTypeRef dst_vec_type;
+
+   assert(!src_type.floating);
+   assert(!dst_type.floating);
+   assert(dst_type.sign == src_type.sign);
+   assert(dst_type.width == src_type.width * 2);
+   assert(dst_type.length * 2 == src_type.length);
+
+   if(src_type.sign) {
+      /* Replicate the sign bit in the most significant bits */
+      msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), "");
+   }
+   else
+      /* Most significant bits always zero */
+      msb = lp_build_zero(src_type);
+
+   /* Interleave bits */
+   if(util_cpu_caps.little_endian) {
+      *dst_lo = lp_build_interleave2(builder, src_type, src, msb, 0);
+      *dst_hi = lp_build_interleave2(builder, src_type, src, msb, 1);
+   }
+   else {
+      *dst_lo = lp_build_interleave2(builder, src_type, msb, src, 0);
+      *dst_hi = lp_build_interleave2(builder, src_type, msb, src, 1);
+   }
+
+   /* Cast the result into the new type (twice as wide) */
+
+   dst_vec_type = lp_build_vec_type(dst_type);
+
+   *dst_lo = LLVMBuildBitCast(builder, *dst_lo, dst_vec_type, "");
+   *dst_hi = LLVMBuildBitCast(builder, *dst_hi, dst_vec_type, "");
+}
+
+
+/**
+ * Expand the bit width.
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ */
+void
+lp_build_unpack(LLVMBuilderRef builder,
+                struct lp_type src_type,
+                struct lp_type dst_type,
+                LLVMValueRef src,
+                LLVMValueRef *dst, unsigned num_dsts)
+{
+   unsigned num_tmps;
+   unsigned i;
+
+   /* Register width must remain constant */
+   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+   /* We must not loose or gain channels. Only precision */
+   assert(src_type.length == dst_type.length * num_dsts);
+
+   num_tmps = 1;
+   dst[0] = src;
+
+   while(src_type.width < dst_type.width) {
+      struct lp_type tmp_type = src_type;
+
+      tmp_type.width *= 2;
+      tmp_type.length /= 2;
+
+      for(i = num_tmps; i--; ) {
+         lp_build_unpack2(builder, src_type, tmp_type, dst[i], &dst[2*i + 0], &dst[2*i + 1]);
+      }
+
+      src_type = tmp_type;
+
+      num_tmps *= 2;
+   }
+
+   assert(num_tmps == num_dsts);
+}
+
+
+/**
+ * Non-interleaved pack.
+ *
+ * This will move values as
+ *
+ *   lo =   __ l0 __ l1 __ l2 __..  __ ln
+ *   hi =   __ h0 __ h1 __ h2 __..  __ hn
+ *   res =  l0 l1 l2 .. ln h0 h1 h2 .. hn
+ *
+ * This will only change the number of bits the values are represented, not the
+ * values themselves.
+ *
+ * It is assumed the values are already clamped into the destination type range.
+ * Values outside that range will produce undefined results. Use
+ * lp_build_packs2 instead.
+ */
+LLVMValueRef
+lp_build_pack2(LLVMBuilderRef builder,
+               struct lp_type src_type,
+               struct lp_type dst_type,
+               LLVMValueRef lo,
+               LLVMValueRef hi)
+{
+   LLVMTypeRef src_vec_type = lp_build_vec_type(src_type);
+   LLVMTypeRef dst_vec_type = lp_build_vec_type(dst_type);
+   LLVMValueRef shuffle;
+   LLVMValueRef res;
+
+   dst_vec_type = lp_build_vec_type(dst_type);
+
+   assert(!src_type.floating);
+   assert(!dst_type.floating);
+   assert(src_type.width == dst_type.width * 2);
+   assert(src_type.length * 2 == dst_type.length);
+
+   if(util_cpu_caps.has_sse2 && src_type.width * src_type.length == 128) {
+      switch(src_type.width) {
+      case 32:
+         if(dst_type.sign) {
+            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128", src_vec_type, lo, hi);
+         }
+         else {
+            if (util_cpu_caps.has_sse4_1) {
+               /* PACKUSDW is the only instrinsic with a consistent signature */
+               return lp_build_intrinsic_binary(builder, "llvm.x86.sse41.packusdw", dst_vec_type, lo, hi);
+            }
+            else {
+               assert(0);
+               return LLVMGetUndef(dst_vec_type);
+            }
+         }
+         break;
+
+      case 16:
+         if(dst_type.sign)
+            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packsswb.128", src_vec_type, lo, hi);
+         else
+            res = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128", src_vec_type, lo, hi);
+         break;
+
+      default:
+         assert(0);
+         return LLVMGetUndef(dst_vec_type);
+         break;
+      }
+
+      res = LLVMBuildBitCast(builder, res, dst_vec_type, "");
+      return res;
+   }
+
+   lo = LLVMBuildBitCast(builder, lo, dst_vec_type, "");
+   hi = LLVMBuildBitCast(builder, hi, dst_vec_type, "");
+
+   shuffle = lp_build_const_pack_shuffle(dst_type.length);
+
+   res = LLVMBuildShuffleVector(builder, lo, hi, shuffle, "");
+
+   return res;
+}
+
+
+
+/**
+ * Non-interleaved pack and saturate.
+ *
+ * Same as lp_build_pack2 but will saturate values so that they fit into the
+ * destination type.
+ */
+LLVMValueRef
+lp_build_packs2(LLVMBuilderRef builder,
+                struct lp_type src_type,
+                struct lp_type dst_type,
+                LLVMValueRef lo,
+                LLVMValueRef hi)
+{
+   boolean clamp;
+
+   assert(!src_type.floating);
+   assert(!dst_type.floating);
+   assert(src_type.sign == dst_type.sign);
+   assert(src_type.width == dst_type.width * 2);
+   assert(src_type.length * 2 == dst_type.length);
+
+   clamp = TRUE;
+
+   /* All X86 SSE non-interleaved pack instructions take signed inputs and
+    * saturate them, so no need to clamp for those cases. */
+   if(util_cpu_caps.has_sse2 &&
+      src_type.width * src_type.length == 128 &&
+      src_type.sign)
+      clamp = FALSE;
+
+   if(clamp) {
+      struct lp_build_context bld;
+      unsigned dst_bits = dst_type.sign ? dst_type.width - 1 : dst_type.width;
+      LLVMValueRef dst_max = lp_build_int_const_scalar(src_type, ((unsigned long long)1 << dst_bits) - 1);
+      lp_build_context_init(&bld, builder, src_type);
+      lo = lp_build_min(&bld, lo, dst_max);
+      hi = lp_build_min(&bld, hi, dst_max);
+      /* FIXME: What about lower bound? */
+   }
+
+   return lp_build_pack2(builder, src_type, dst_type, lo, hi);
+}
+
+
+/**
+ * Truncate the bit width.
+ *
+ * TODO: Handle saturation consistently.
+ */
+LLVMValueRef
+lp_build_pack(LLVMBuilderRef builder,
+              struct lp_type src_type,
+              struct lp_type dst_type,
+              boolean clamped,
+              const LLVMValueRef *src, unsigned num_srcs)
+{
+   LLVMValueRef (*pack2)(LLVMBuilderRef builder,
+                         struct lp_type src_type,
+                         struct lp_type dst_type,
+                         LLVMValueRef lo,
+                         LLVMValueRef hi);
+   LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+
+   /* Register width must remain constant */
+   assert(src_type.width * src_type.length == dst_type.width * dst_type.length);
+
+   /* We must not loose or gain channels. Only precision */
+   assert(src_type.length * num_srcs == dst_type.length);
+
+   if(clamped)
+      pack2 = &lp_build_pack2;
+   else
+      pack2 = &lp_build_packs2;
+
+   for(i = 0; i < num_srcs; ++i)
+      tmp[i] = src[i];
+
+   while(src_type.width > dst_type.width) {
+      struct lp_type tmp_type = src_type;
+
+      tmp_type.width /= 2;
+      tmp_type.length *= 2;
+
+      /* Take in consideration the sign changes only in the last step */
+      if(tmp_type.width == dst_type.width)
+         tmp_type.sign = dst_type.sign;
+
+      num_srcs /= 2;
+
+      for(i = 0; i < num_srcs; ++i)
+         tmp[i] = pack2(builder, src_type, tmp_type, tmp[2*i + 0], tmp[2*i + 1]);
+
+      src_type = tmp_type;
+   }
+
+   assert(num_srcs == 1);
+
+   return tmp[0];
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.h b/src/gallium/drivers/llvmpipe/lp_bld_pack.h
new file mode 100644
index 0000000000..fb2a34984a
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.h
@@ -0,0 +1,95 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper functions for packing/unpacking conversions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_PACK_H
+#define LP_BLD_PACK_H
+
+
+#include <llvm-c/Core.h>  
+
+
+struct lp_type;
+
+
+LLVMValueRef
+lp_build_interleave2(LLVMBuilderRef builder,
+                     struct lp_type type,
+                     LLVMValueRef a,
+                     LLVMValueRef b,
+                     unsigned lo_hi);
+
+
+void
+lp_build_unpack2(LLVMBuilderRef builder,
+                 struct lp_type src_type,
+                 struct lp_type dst_type,
+                 LLVMValueRef src,
+                 LLVMValueRef *dst_lo,
+                 LLVMValueRef *dst_hi);
+
+
+void
+lp_build_unpack(LLVMBuilderRef builder,
+                struct lp_type src_type,
+                struct lp_type dst_type,
+                LLVMValueRef src,
+                LLVMValueRef *dst, unsigned num_dsts);
+
+
+LLVMValueRef
+lp_build_packs2(LLVMBuilderRef builder,
+                struct lp_type src_type,
+                struct lp_type dst_type,
+                LLVMValueRef lo,
+                LLVMValueRef hi);
+
+
+LLVMValueRef
+lp_build_pack2(LLVMBuilderRef builder,
+               struct lp_type src_type,
+               struct lp_type dst_type,
+               LLVMValueRef lo,
+               LLVMValueRef hi);
+
+
+LLVMValueRef
+lp_build_pack(LLVMBuilderRef builder,
+              struct lp_type src_type,
+              struct lp_type dst_type,
+              boolean clamped,
+              const LLVMValueRef *src, unsigned num_srcs);
+
+
+#endif /* !LP_BLD_PACK_H */
-- 
cgit v1.2.3


From 4458695bdafb13eba639d986e2f20953f0f7445c Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 18:28:37 +0100
Subject: llvmpipe: Utility function to double the bit width of a type.

---
 src/gallium/drivers/llvmpipe/lp_bld_type.c | 29 ++++++++++++++++++++++++-----
 src/gallium/drivers/llvmpipe/lp_bld_type.h |  4 ++++
 2 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.c b/src/gallium/drivers/llvmpipe/lp_bld_type.c
index 606243d6c5..1320a26721 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.c
@@ -160,12 +160,31 @@ lp_build_int_vec_type(struct lp_type type)
 struct lp_type
 lp_int_type(struct lp_type type)
 {
-   struct lp_type int_type;
+   struct lp_type res_type;
 
-   memset(&int_type, 0, sizeof int_type);
-   int_type.width = type.width;
-   int_type.length = type.length;
-   return int_type;
+   memset(&res_type, 0, sizeof res_type);
+   res_type.width = type.width;
+   res_type.length = type.length;
+
+   return res_type;
+}
+
+
+/**
+ * Return the type with twice the bit width (hence half the number of elements).
+ */
+struct lp_type
+lp_wider_type(struct lp_type type)
+{
+   struct lp_type res_type;
+
+   memcpy(&res_type, &type, sizeof res_type);
+   res_type.width *= 2;
+   res_type.length /= 2;
+
+   assert(res_type.length);
+
+   return res_type;
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index ee5ca3483c..46c298fa20 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -166,6 +166,10 @@ struct lp_type
 lp_int_type(struct lp_type type);
 
 
+struct lp_type
+lp_wider_type(struct lp_type type);
+
+
 void
 lp_build_context_init(struct lp_build_context *bld,
                       LLVMBuilderRef builder,
-- 
cgit v1.2.3


From 01b85e292352d710586344348fff5a81459e5486 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 18:28:57 +0100
Subject: llvmpipe: Use the pack/unpack functions for 8bit unsigned norm
 multiplication.

---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 96 ++++++------------------------
 1 file changed, 17 insertions(+), 79 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index d27ef0de04..83ca06acf8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -54,6 +54,7 @@
 #include "lp_bld_const.h"
 #include "lp_bld_intr.h"
 #include "lp_bld_logic.h"
+#include "lp_bld_pack.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_arit.h"
 
@@ -279,45 +280,6 @@ lp_build_sub(struct lp_build_context *bld,
 }
 
 
-/**
- * Build shuffle vectors that match PUNPCKLxx and PUNPCKHxx instructions.
- */
-static LLVMValueRef 
-lp_build_unpack_shuffle(unsigned n, unsigned lo_hi)
-{
-   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
-   unsigned i, j;
-
-   assert(n <= LP_MAX_VECTOR_LENGTH);
-   assert(lo_hi < 2);
-
-   for(i = 0, j = lo_hi*n/2; i < n; i += 2, ++j) {
-      elems[i + 0] = LLVMConstInt(LLVMInt32Type(), 0 + j, 0);
-      elems[i + 1] = LLVMConstInt(LLVMInt32Type(), n + j, 0);
-   }
-
-   return LLVMConstVector(elems, n);
-}
-
-
-/**
- * Build constant int vector of width 'n' and value 'c'.
- */
-static LLVMValueRef 
-lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
-{
-   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
-   unsigned i;
-
-   assert(n <= LP_MAX_VECTOR_LENGTH);
-
-   for(i = 0; i < n; ++i)
-      elems[i] = LLVMConstInt(type, c, 0);
-
-   return LLVMConstVector(elems, n);
-}
-
-
 /**
  * Normalized 8bit multiplication.
  *
@@ -361,33 +323,30 @@ lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
  */
 static LLVMValueRef
 lp_build_mul_u8n(LLVMBuilderRef builder,
+                 struct lp_type i16_type,
                  LLVMValueRef a, LLVMValueRef b)
 {
-   static LLVMValueRef c01 = NULL;
-   static LLVMValueRef c08 = NULL;
-   static LLVMValueRef c80 = NULL;
+   LLVMValueRef c8;
    LLVMValueRef ab;
 
-   if(!c01) c01 = lp_build_const_vec(LLVMInt16Type(), 8, 0x01);
-   if(!c08) c08 = lp_build_const_vec(LLVMInt16Type(), 8, 0x08);
-   if(!c80) c80 = lp_build_const_vec(LLVMInt16Type(), 8, 0x80);
+   c8 = lp_build_int_const_scalar(i16_type, 8);
    
 #if 0
    
    /* a*b/255 ~= (a*(b + 1)) >> 256 */
-   b = LLVMBuildAdd(builder, b, c01, "");
+   b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), "");
    ab = LLVMBuildMul(builder, a, b, "");
 
 #else
    
-   /* t/255 ~= (t + (t >> 8) + 0x80) >> 8 */
+   /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */
    ab = LLVMBuildMul(builder, a, b, "");
-   ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c08, ""), "");
-   ab = LLVMBuildAdd(builder, ab, c80, "");
+   ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), "");
+   ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), "");
 
 #endif
    
-   ab = LLVMBuildLShr(builder, ab, c08, "");
+   ab = LLVMBuildLShr(builder, ab, c8, "");
 
    return ab;
 }
@@ -415,39 +374,18 @@ lp_build_mul(struct lp_build_context *bld,
       return bld->undef;
 
    if(!type.floating && !type.fixed && type.norm) {
-      if(util_cpu_caps.has_sse2 && type.width == 8 && type.length == 16) {
-         LLVMTypeRef i16x8 = LLVMVectorType(LLVMInt16Type(), 8);
-         LLVMTypeRef i8x16 = LLVMVectorType(LLVMInt8Type(), 16);
-         static LLVMValueRef ml = NULL;
-         static LLVMValueRef mh = NULL;
-         LLVMValueRef al, ah, bl, bh;
-         LLVMValueRef abl, abh;
-         LLVMValueRef ab;
-         
-         if(!ml) ml = lp_build_unpack_shuffle(16, 0);
-         if(!mh) mh = lp_build_unpack_shuffle(16, 1);
+      if(type.width == 8) {
+         struct lp_type i16_type = lp_wider_type(type);
+         LLVMValueRef al, ah, bl, bh, abl, abh, ab;
 
-         /*  PUNPCKLBW, PUNPCKHBW */
-         al = LLVMBuildShuffleVector(bld->builder, a, bld->zero, ml, "");
-         bl = LLVMBuildShuffleVector(bld->builder, b, bld->zero, ml, "");
-         ah = LLVMBuildShuffleVector(bld->builder, a, bld->zero, mh, "");
-         bh = LLVMBuildShuffleVector(bld->builder, b, bld->zero, mh, "");
-
-         /* NOP */
-         al = LLVMBuildBitCast(bld->builder, al, i16x8, "");
-         bl = LLVMBuildBitCast(bld->builder, bl, i16x8, "");
-         ah = LLVMBuildBitCast(bld->builder, ah, i16x8, "");
-         bh = LLVMBuildBitCast(bld->builder, bh, i16x8, "");
+         lp_build_unpack2(bld->builder, type, i16_type, a, &al, &ah);
+         lp_build_unpack2(bld->builder, type, i16_type, b, &bl, &bh);
 
          /* PMULLW, PSRLW, PADDW */
-         abl = lp_build_mul_u8n(bld->builder, al, bl);
-         abh = lp_build_mul_u8n(bld->builder, ah, bh);
-
-         /* PACKUSWB */
-         ab = lp_build_intrinsic_binary(bld->builder, "llvm.x86.sse2.packuswb.128" , i16x8, abl, abh);
+         abl = lp_build_mul_u8n(bld->builder, i16_type, al, bl);
+         abh = lp_build_mul_u8n(bld->builder, i16_type, ah, bh);
 
-         /* NOP */
-         ab = LLVMBuildBitCast(bld->builder, ab, i8x16, "");
+         ab = lp_build_pack2(bld->builder, i16_type, type, abl, abh);
          
          return ab;
       }
-- 
cgit v1.2.3


From 9aafa1fbd247cd6d1bb0ab47bc5b318bd0d67bc5 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 19:02:04 +0100
Subject: llvmpipe: Avoid variable size arrays.

Not really variable size, but MSVC still doesn't like them.
---
 src/gallium/drivers/llvmpipe/lp_test.h       | 3 +++
 src/gallium/drivers/llvmpipe/lp_test_blend.c | 4 ++--
 src/gallium/drivers/llvmpipe/lp_test_conv.c  | 4 ++--
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index a88e110c66..21016fefe3 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -56,6 +56,9 @@
 #include "lp_bld_type.h"
 
 
+#define LP_TEST_NUM_SAMPLES 32
+
+
 void
 write_tsv_header(FILE *fp);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 94b661dcba..e3af81cffb 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -477,8 +477,8 @@ test_one(unsigned verbose,
    char *error = NULL;
    blend_test_ptr_t blend_test_ptr;
    boolean success;
-   const unsigned n = 32;
-   int64_t cycles[n];
+   const unsigned n = LP_TEST_NUM_SAMPLES;
+   int64_t cycles[LP_TEST_NUM_SAMPLES];
    double cycles_avg = 0.0;
    unsigned i, j;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 9dcf58e5dc..ac2a6d05e3 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -156,8 +156,8 @@ test_one(unsigned verbose,
    char *error = NULL;
    conv_test_ptr_t conv_test_ptr;
    boolean success;
-   const unsigned n = 32;
-   int64_t cycles[n];
+   const unsigned n = LP_TEST_NUM_SAMPLES;
+   int64_t cycles[LP_TEST_NUM_SAMPLES];
    double cycles_avg = 0.0;
    unsigned num_srcs;
    unsigned num_dsts;
-- 
cgit v1.2.3


From ba8c11923a13bdec53128988ffc26ceb5c4f7310 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 19:02:42 +0100
Subject: llvmpipe: Define rdtsc for MSVC.

---
 src/gallium/drivers/llvmpipe/lp_test.h | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_test.h b/src/gallium/drivers/llvmpipe/lp_test.h
index 21016fefe3..39d80726e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_test.h
+++ b/src/gallium/drivers/llvmpipe/lp_test.h
@@ -71,17 +71,28 @@ boolean
 test_all(unsigned verbose, FILE *fp);
 
 
+#if defined(PIPE_CC_MSVC)
+
+unsigned __int64 __rdtsc();
+#pragma intrinsic(__rdtsc)
+#define rdtsc() __rdtsc()
+
+#elif defined(PIPE_CC_GCC) && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64))
+
 static INLINE uint64_t
 rdtsc(void)
 {
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
    uint32_t hi, lo;
    __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
    return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
 #else
-   return 0;
+
+#define rdtsc() 0
+
 #endif
-}
+
 
 
 float
-- 
cgit v1.2.3


From 719984afca3864cfe86ca734f3e2bd6eb5834bd1 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 19:03:04 +0100
Subject: llvmpipe: Avoid yet another variable size array.

---
 src/gallium/drivers/llvmpipe/lp_setup.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index b14c265b7f..c43b3da450 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -278,11 +278,13 @@ clip_emit_quad( struct setup_context *setup, struct quad_header *quad )
        * until we codegenerate single-quad variants of the fragment pipeline
        * we need this hack. */
       const unsigned nr_quads = TILE_VECTOR_HEIGHT*TILE_VECTOR_WIDTH/QUAD_SIZE;
-      struct quad_header quads[nr_quads];
-      struct quad_header *quad_ptrs[nr_quads];
+      struct quad_header quads[4];
+      struct quad_header *quad_ptrs[4];
       int x0 = block_x(quad->input.x0);
       unsigned i;
 
+      assert(nr_quads == 4);
+
       for(i = 0; i < nr_quads; ++i) {
          int x = x0 + 2*i;
          if(x == quad->input.x0)
-- 
cgit v1.2.3


From 1f7f9bab8139681e1dcbc6c10fb42965059d1395 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Thu, 22 Oct 2009 11:39:59 -0700
Subject: r300g: Move render functions to r300_render.

Part of the fastpath cleanup.
---
 src/gallium/drivers/r300/r300_context.c |  67 +----------
 src/gallium/drivers/r300/r300_context.h |   3 +
 src/gallium/drivers/r300/r300_render.c  | 192 ++++++++++++++++++++++++++++++++
 src/gallium/drivers/r300/r300_render.h  |  52 +++++++++
 4 files changed, 249 insertions(+), 65 deletions(-)
 create mode 100644 src/gallium/drivers/r300/r300_render.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 4ba11a026e..c34fbb1123 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -34,75 +34,12 @@
 #include "r300_context.h"
 #include "r300_flush.h"
 #include "r300_query.h"
+#include "r300_render.h"
 #include "r300_screen.h"
 #include "r300_state_derived.h"
 #include "r300_state_invariant.h"
 #include "r300_winsys.h"
 
-static boolean r300_draw_range_elements(struct pipe_context* pipe,
-                                        struct pipe_buffer* indexBuffer,
-                                        unsigned indexSize,
-                                        unsigned minIndex,
-                                        unsigned maxIndex,
-                                        unsigned mode,
-                                        unsigned start,
-                                        unsigned count)
-{
-    struct r300_context* r300 = r300_context(pipe);
-    int i;
-
-    for (i = 0; i < r300->vertex_buffer_count; i++) {
-        void* buf = pipe_buffer_map(pipe->screen,
-                                    r300->vertex_buffers[i].buffer,
-                                    PIPE_BUFFER_USAGE_CPU_READ);
-        draw_set_mapped_vertex_buffer(r300->draw, i, buf);
-    }
-
-    if (indexBuffer) {
-        void* indices = pipe_buffer_map(pipe->screen, indexBuffer,
-                                        PIPE_BUFFER_USAGE_CPU_READ);
-        draw_set_mapped_element_buffer_range(r300->draw, indexSize,
-                                             minIndex, maxIndex, indices);
-    } else {
-        draw_set_mapped_element_buffer(r300->draw, 0, NULL);
-    }
-
-    draw_set_mapped_constant_buffer(r300->draw,
-            r300->shader_constants[PIPE_SHADER_VERTEX].constants,
-            r300->shader_constants[PIPE_SHADER_VERTEX].count *
-                (sizeof(float) * 4));
-
-    draw_arrays(r300->draw, mode, start, count);
-
-    for (i = 0; i < r300->vertex_buffer_count; i++) {
-        pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer);
-        draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
-    }
-
-    if (indexBuffer) {
-        pipe_buffer_unmap(pipe->screen, indexBuffer);
-        draw_set_mapped_element_buffer_range(r300->draw, 0, start,
-                                             start + count - 1, NULL);
-    }
-
-    return TRUE;
-}
-
-static boolean r300_draw_elements(struct pipe_context* pipe,
-                                  struct pipe_buffer* indexBuffer,
-                                  unsigned indexSize, unsigned mode,
-                                  unsigned start, unsigned count)
-{
-    return r300_draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0,
-                                    mode, start, count);
-}
-
-static boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
-                                unsigned start, unsigned count)
-{
-    return r300_draw_elements(pipe, NULL, 0, mode, start, count);
-}
-
 static enum pipe_error r300_clear_hash_table(void* key, void* value,
                                              void* data)
 {
@@ -189,7 +126,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
 
     r300->context.draw_arrays = r300_draw_arrays;
     r300->context.draw_elements = r300_draw_elements;
-    r300->context.draw_range_elements = r300_draw_range_elements;
+    r300->context.draw_range_elements = r300_swtcl_draw_range_elements;
 
     r300->context.is_texture_referenced = r300_is_texture_referenced;
     r300->context.is_buffer_referenced = r300_is_buffer_referenced;
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 4e2c0ec34e..30b80fa9db 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -273,6 +273,9 @@ struct r300_context {
     /* Vertex buffers for Gallium. */
     struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
     int vertex_buffer_count;
+    /* Vertex elements for Gallium. */
+    struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS];
+    int vertex_element_count;
     /* Vertex shader. */
     struct r300_vertex_shader* vs;
     /* Viewport state. */
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 79a33b53cb..4916152bd6 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -31,11 +31,203 @@
 #include "r300_context.h"
 #include "r300_emit.h"
 #include "r300_reg.h"
+#include "r300_render.h"
 #include "r300_state_derived.h"
 
 /* r300_render: Vertex and index buffer primitive emission. */
 #define R300_MAX_VBO_SIZE  (1024 * 1024)
 
+static uint32_t r300_translate_primitive(unsigned prim)
+{
+    switch (prim) {
+        case PIPE_PRIM_POINTS:
+            return R300_VAP_VF_CNTL__PRIM_POINTS;
+        case PIPE_PRIM_LINES:
+            return R300_VAP_VF_CNTL__PRIM_LINES;
+        case PIPE_PRIM_LINE_LOOP:
+            return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
+        case PIPE_PRIM_LINE_STRIP:
+            return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
+        case PIPE_PRIM_TRIANGLES:
+            return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
+        case PIPE_PRIM_TRIANGLE_STRIP:
+            return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
+        case PIPE_PRIM_TRIANGLE_FAN:
+            return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
+        case PIPE_PRIM_QUADS:
+            return R300_VAP_VF_CNTL__PRIM_QUADS;
+        case PIPE_PRIM_QUAD_STRIP:
+            return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
+        case PIPE_PRIM_POLYGON:
+            return R300_VAP_VF_CNTL__PRIM_POLYGON;
+        default:
+            return 0;
+    }
+}
+
+/* This is the fast-path drawing & emission for HW TCL. */
+boolean r300_draw_range_elements(struct pipe_context* pipe,
+                                 struct pipe_buffer* indexBuffer,
+                                 unsigned indexSize,
+                                 unsigned minIndex,
+                                 unsigned maxIndex,
+                                 unsigned mode,
+                                 unsigned start,
+                                 unsigned count)
+{
+    struct r300_context* r300 = r300_context(pipe);
+    CS_LOCALS(r300);
+    uint32_t prim = r300_translate_primitive(mode);
+    struct pipe_vertex_buffer* aos = r300->vertex_buffers;
+    unsigned aos_count = r300->vertex_buffer_count;
+    short* indices;
+    unsigned packet_size;
+    unsigned i;
+    bool invalid = FALSE;
+
+validate:
+    for (i = 0; i < aos_count; i++) {
+        if (!r300->winsys->add_buffer(r300->winsys, aos[i].buffer,
+                    RADEON_GEM_DOMAIN_GTT, 0)) {
+            pipe->flush(pipe, 0, NULL);
+            goto validate;
+        }
+    }
+    if (!r300->winsys->validate(r300->winsys)) {
+        pipe->flush(pipe, 0, NULL);
+        if (invalid) {
+            /* Well, hell. */
+            debug_printf("r300: Stuck in validation loop, gonna quit now.");
+            exit(1);
+        }
+        invalid = TRUE;
+        goto validate;
+    }
+
+    r300_emit_dirty_state(r300);
+
+    packet_size = (aos_count >> 1) * 3 + (aos_count & 1) * 2;
+
+    BEGIN_CS(3 + packet_size + (aos_count * 2));
+    OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
+    OUT_CS(aos_count);
+    for (i = 0; i < aos_count - 1; i += 2) {
+        OUT_CS(aos[i].stride |
+            (aos[i].stride << 8) |
+            (aos[i + 1].stride << 16) |
+            (aos[i + 1].stride << 24));
+        OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride);
+        OUT_CS(aos[i + 1].buffer_offset + start * 4 * aos[i + 1].stride);
+    }
+    if (aos_count & 1) {
+        OUT_CS(aos[i].stride | (aos[i].stride << 8));
+        OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride);
+    }
+    for (i = 0; i < aos_count; i++) {
+        OUT_CS_RELOC(aos[i].buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
+    }
+    END_CS;
+
+    if (indexBuffer) {
+        indices = (short*)pipe_buffer_map(pipe->screen, indexBuffer,
+                                          PIPE_BUFFER_USAGE_CPU_READ);
+
+        /* Set the starting point. */
+        indices += start;
+
+        BEGIN_CS(2 + (count+1)/2);
+        OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count + 1)/2);
+        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | prim);
+        for (i = 0; i < count - 1; i += 2) {
+            OUT_CS(indices[i + 1] << 16 | indices[i]);
+        }
+        if (count % 2) {
+            OUT_CS(indices[count - 1]);
+        }
+        END_CS;
+    } else {
+        BEGIN_CS(2);
+        OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
+               prim);
+        END_CS;
+    }
+
+    return TRUE;
+}
+
+/* Simple helpers for context setup. Should probably be moved to util. */
+boolean r300_draw_elements(struct pipe_context* pipe,
+                           struct pipe_buffer* indexBuffer,
+                           unsigned indexSize, unsigned mode,
+                           unsigned start, unsigned count)
+{
+    return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0,
+                                     mode, start, count);
+}
+
+boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+                         unsigned start, unsigned count)
+{
+    return pipe->draw_elements(pipe, NULL, 0, mode, start, count);
+}
+
+/****************************************************************************
+ * The rest of this file is for SW TCL rendering only. Please be polite and *
+ * keep these functions separated so that they are easier to locate. ~C.    *
+ ***************************************************************************/
+
+/* Draw-based drawing for SW TCL chipsets. */
+boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
+                                       struct pipe_buffer* indexBuffer,
+                                       unsigned indexSize,
+                                       unsigned minIndex,
+                                       unsigned maxIndex,
+                                       unsigned mode,
+                                       unsigned start,
+                                       unsigned count)
+{
+    struct r300_context* r300 = r300_context(pipe);
+    int i;
+
+    for (i = 0; i < r300->vertex_buffer_count; i++) {
+        void* buf = pipe_buffer_map(pipe->screen,
+                                    r300->vertex_buffers[i].buffer,
+                                    PIPE_BUFFER_USAGE_CPU_READ);
+        draw_set_mapped_vertex_buffer(r300->draw, i, buf);
+    }
+
+    if (indexBuffer) {
+        void* indices = pipe_buffer_map(pipe->screen, indexBuffer,
+                                        PIPE_BUFFER_USAGE_CPU_READ);
+        draw_set_mapped_element_buffer_range(r300->draw, indexSize,
+                                             minIndex, maxIndex, indices);
+    } else {
+        draw_set_mapped_element_buffer(r300->draw, 0, NULL);
+    }
+
+    draw_set_mapped_constant_buffer(r300->draw,
+            r300->shader_constants[PIPE_SHADER_VERTEX].constants,
+            r300->shader_constants[PIPE_SHADER_VERTEX].count *
+                (sizeof(float) * 4));
+
+    draw_arrays(r300->draw, mode, start, count);
+
+    for (i = 0; i < r300->vertex_buffer_count; i++) {
+        pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer);
+        draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
+    }
+
+    if (indexBuffer) {
+        pipe_buffer_unmap(pipe->screen, indexBuffer);
+        draw_set_mapped_element_buffer_range(r300->draw, 0, start,
+                                             start + count - 1, NULL);
+    }
+
+    return TRUE;
+}
+
+/* Object for rendering using Draw. */
 struct r300_render {
     /* Parent class */
     struct vbuf_render base;
diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h
new file mode 100644
index 0000000000..3d8f47ba75
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_render.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_RENDER_H
+#define R300_RENDER_H
+
+boolean r300_draw_range_elements(struct pipe_context* pipe,
+                                 struct pipe_buffer* indexBuffer,
+                                 unsigned indexSize,
+                                 unsigned minIndex,
+                                 unsigned maxIndex,
+                                 unsigned mode,
+                                 unsigned start,
+                                 unsigned count);
+
+boolean r300_draw_elements(struct pipe_context* pipe,
+                           struct pipe_buffer* indexBuffer,
+                           unsigned indexSize, unsigned mode,
+                           unsigned start, unsigned count);
+
+boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+                         unsigned start, unsigned count);
+
+boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
+                                       struct pipe_buffer* indexBuffer,
+                                       unsigned indexSize,
+                                       unsigned minIndex,
+                                       unsigned maxIndex,
+                                       unsigned mode,
+                                       unsigned start,
+                                       unsigned count);
+
+#endif /* R300_RENDER_H */
-- 
cgit v1.2.3


From 06e464c2d57552d5ccde2b98885aeef953d8b2a1 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Thu, 22 Oct 2009 11:45:36 -0700
Subject: r300g: Clean up duplicate code in r300_render.

---
 src/gallium/drivers/r300/r300_render.c | 49 +++-------------------------------
 1 file changed, 4 insertions(+), 45 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 4916152bd6..6e2bcc62da 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -331,54 +331,13 @@ static boolean r300_render_set_primitive(struct vbuf_render* render,
                                                unsigned prim)
 {
     struct r300_render* r300render = r300_render(render);
-    r300render->prim = prim;
 
-    switch (prim) {
-        case PIPE_PRIM_POINTS:
-            r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POINTS;
-            break;
-        case PIPE_PRIM_LINES:
-            r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINES;
-            break;
-        case PIPE_PRIM_LINE_LOOP:
-            r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
-            break;
-        case PIPE_PRIM_LINE_STRIP:
-            r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
-            break;
-        case PIPE_PRIM_TRIANGLES:
-            r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLES;
-            break;
-        case PIPE_PRIM_TRIANGLE_STRIP:
-            r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
-            break;
-        case PIPE_PRIM_TRIANGLE_FAN:
-            r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
-            break;
-        case PIPE_PRIM_QUADS:
-            r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUADS;
-            break;
-        case PIPE_PRIM_QUAD_STRIP:
-            r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
-            break;
-        case PIPE_PRIM_POLYGON:
-            r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POLYGON;
-            break;
-        default:
-            return FALSE;
-            break;
-    }
+    r300render->prim = prim;
+    r300render->hwprim = r300_translate_primitive(prim);
 
     return TRUE;
 }
 
-static void r300_prepare_render(struct r300_render* render, unsigned count)
-{
-    struct r300_context* r300 = render->r300;
-
-    r300_emit_dirty_state(r300);
-}
-
 static void r300_render_draw_arrays(struct vbuf_render* render,
                                           unsigned start,
                                           unsigned count)
@@ -388,7 +347,7 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
 
     CS_LOCALS(r300);
 
-    r300_prepare_render(r300render, count);
+    r300_emit_dirty_state(r300);
 
     DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count);
 
@@ -409,7 +368,7 @@ static void r300_render_draw(struct vbuf_render* render,
 
     CS_LOCALS(r300);
 
-    r300_prepare_render(r300render, count);
+    r300_emit_dirty_state(r300);
 
     BEGIN_CS(2 + (count+1)/2);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count+1)/2);
-- 
cgit v1.2.3


From f9a69c0f040171cffa63c9c68264c1cf847aa1cd Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Thu, 22 Oct 2009 21:55:09 +0200
Subject: nouveau: nv30: use a8r8g8b8 as depth texture format for z24s8

---
 src/gallium/drivers/nv30/nv30_fragtex.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
index a2ce947a72..f5f17d4071 100644
--- a/src/gallium/drivers/nv30/nv30_fragtex.c
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -30,7 +30,7 @@ nv30_texture_formats[] = {
 	_(I8_UNORM      , L8      ,   S1,   S1,   S1,   S1, X, X, X, X),
 	_(A8L8_UNORM    , A8L8    ,   S1,   S1,   S1,   S1, X, X, X, Y),
 //	_(Z16_UNORM     , Z16     ,   S1,   S1,   S1,  ONE, X, X, X, X),
-//	_(Z24S8_UNORM   , Z24     ,   S1,   S1,   S1,  ONE, X, X, X, X),
+	_(Z24S8_UNORM   , A8R8G8B8,   S1,   S1,   S1,  ONE, X, X, X, X),
 	_(DXT1_RGB      , DXT1    ,   S1,   S1,   S1,  ONE, X, Y, Z, W),
 	_(DXT1_RGBA     , DXT1    ,   S1,   S1,   S1,   S1, X, Y, Z, W),
 	_(DXT3_RGBA     , DXT3    ,   S1,   S1,   S1,   S1, X, Y, Z, W),
@@ -73,9 +73,9 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
 
 	txf  = tf->format;
 	txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
-	txf |= log2i(pt->width[0]) << 20;
-	txf |= log2i(pt->height[0]) << 24;
-	txf |= log2i(pt->depth[0]) << 28;
+	txf |= log2i(pt->width[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
+	txf |= log2i(pt->height[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
+	txf |= log2i(pt->depth[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
 	txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000;
 
 	switch (pt->target) {
-- 
cgit v1.2.3


From 4b8de9bd7c6f77fcf3f1f2b939bab980e074e8bf Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Thu, 22 Oct 2009 22:01:53 +0200
Subject: nouveau: nv30: rewrite so we can render only in depth buffer

---
 src/gallium/drivers/nv30/nv30_state_fb.c | 55 ++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 20 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
index 197de82886..f90681b0f9 100644
--- a/src/gallium/drivers/nv30/nv30_state_fb.c
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -8,8 +8,8 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 	struct nouveau_channel *chan = nv30->screen->base.channel;
 	struct nouveau_grobj *rankine = nv30->screen->rankine;
 	struct nv04_surface *rt[2], *zeta = NULL;
-	uint32_t rt_enable, rt_format;
-	int i, colour_format = 0, zeta_format = 0;
+	uint32_t rt_enable = 0, rt_format = 0;
+	int i, colour_format = 0, zeta_format = 0, depth_only = 0;
 	struct nouveau_stateobj *so = so_new(64, 10);
 	unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
 	unsigned w = fb->width;
@@ -17,11 +17,6 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 	struct nv30_miptree *nv30mt;
 	int colour_bits = 32, zeta_bits = 32;
 
-	if (fb->nr_cbufs == 0) {
-		return FALSE;
-	}
-
-	rt_enable = 0;
 	for (i = 0; i < fb->nr_cbufs; i++) {
 		if (colour_format) {
 			assert(colour_format == fb->cbufs[i]->format);
@@ -40,17 +35,35 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 		zeta = (struct nv04_surface *)fb->zsbuf;
 	}
 
-	if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
-		assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
-		for (i = 1; i < fb->nr_cbufs; i++)
-			assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));
+	if (rt_enable & (NV34TCL_RT_ENABLE_COLOR0|NV34TCL_RT_ENABLE_COLOR1)) {
+		/* Render to at least a colour buffer */
+		if (!(rt[0]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+			assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+			for (i = 1; i < fb->nr_cbufs; i++)
+				assert(!(rt[i]->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR));
 
-		rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
-			    (log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
-			    (log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+			rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+				(log2i(rt[0]->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+				(log2i(rt[0]->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+		}
+		else
+			rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+	} else if (fb->zsbuf) {
+		depth_only = 1;
+
+		/* Render to depth buffer only */
+		if (!(zeta->base.texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) {
+			assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
+
+			rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
+				(log2i(zeta->base.width) << NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT) |
+				(log2i(zeta->base.height) << NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT);
+		}
+		else
+			rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
+	} else {
+		return FALSE;
 	}
-	else
-		rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR;
 
 	switch (colour_format) {
 	case PIPE_FORMAT_A8R8G8B8_UNORM:
@@ -83,21 +96,23 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 		return FALSE;
 	}
 
-	if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) {
-		uint32_t pitch = rt[0]->pitch;
+	if (depth_only || (rt_enable & NV34TCL_RT_ENABLE_COLOR0)) {
+		struct nv04_surface *rt0 = (depth_only ? zeta : rt[0]);
+		uint32_t pitch = rt0->pitch;
+
 		if (zeta) {
 			pitch |= (zeta->pitch << 16);
 		} else {
 			pitch |= (pitch << 16);
 		}
 
-		nv30mt = (struct nv30_miptree *)rt[0]->base.texture;
+		nv30mt = (struct nv30_miptree *) rt0->base.texture;
 		so_method(so, rankine, NV34TCL_DMA_COLOR0, 1);
 		so_reloc (so, nouveau_bo(nv30mt->buffer), 0, rt_flags | NOUVEAU_BO_OR,
 			      chan->vram->handle, chan->gart->handle);
 		so_method(so, rankine, NV34TCL_COLOR0_PITCH, 2);
 		so_data  (so, pitch);
-		so_reloc (so, nouveau_bo(nv30mt->buffer), rt[0]->base.offset,
+		so_reloc (so, nouveau_bo(nv30mt->buffer), rt0->base.offset,
 			      rt_flags | NOUVEAU_BO_LOW, 0, 0);
 	}
 
-- 
cgit v1.2.3


From 198925caa18526e5aa908ab50482aff814207dc2 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 22 Oct 2009 22:57:30 +0200
Subject: nv50: handle PIPE_TEX_FILTER_ANISO case

Set the same bits as for linear filtering (in addition
to max anisotropy), and 2 unknown bits I've seen set.
---
 src/gallium/drivers/nv50/nv50_state.c   | 26 +++++++++++---------------
 src/gallium/drivers/nv50/nv50_texture.h |  2 ++
 2 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 81fa3e34c5..ffaa5e29d1 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -146,6 +146,7 @@ nv50_sampler_state_create(struct pipe_context *pipe,
 		  (wrap_mode(cso->wrap_r) << 6));
 
 	switch (cso->mag_img_filter) {
+	case PIPE_TEX_FILTER_ANISO:
 	case PIPE_TEX_FILTER_LINEAR:
 		tsc[1] |= NV50TSC_1_1_MAGF_LINEAR;
 		break;
@@ -156,6 +157,7 @@ nv50_sampler_state_create(struct pipe_context *pipe,
 	}
 
 	switch (cso->min_img_filter) {
+	case PIPE_TEX_FILTER_ANISO:
 	case PIPE_TEX_FILTER_LINEAR:
 		tsc[1] |= NV50TSC_1_1_MINF_LINEAR;
 		break;
@@ -183,21 +185,15 @@ nv50_sampler_state_create(struct pipe_context *pipe,
 	else
 	if (cso->max_anisotropy >= 12.0)
 		tsc[0] |= (6 << 20);
-	else
-	if (cso->max_anisotropy >= 10.0)
-		tsc[0] |= (5 << 20);
-	else
-	if (cso->max_anisotropy >= 8.0)
-		tsc[0] |= (4 << 20);
-	else
-	if (cso->max_anisotropy >= 6.0)
-		tsc[0] |= (3 << 20);
-	else
-	if (cso->max_anisotropy >= 4.0)
-		tsc[0] |= (2 << 20);
-	else
-	if (cso->max_anisotropy >= 2.0)
-		tsc[0] |= (1 << 20);
+	else {
+		tsc[0] |= (int)(cso->max_anisotropy * 0.5f) << 20;
+
+		if (cso->max_anisotropy >= 4.0)
+			tsc[1] |= NV50TSC_1_1_UNKN_ANISO_35;
+		else
+		if (cso->max_anisotropy >= 2.0)
+			tsc[1] |= NV50TSC_1_1_UNKN_ANISO_15;
+	}
 
 	if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
 		tsc[0] |= (1 << 8);
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
index 207fb039f7..13f74c11c6 100644
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -133,6 +133,8 @@
 #define NV50TSC_1_1_MIPF_NEAREST                                 0x00000080
 #define NV50TSC_1_1_MIPF_LINEAR                                  0x000000c0
 #define NV50TSC_1_1_LOD_BIAS_MASK                                0x01fff000
+#define NV50TSC_1_1_UNKN_ANISO_15                                0x10000000
+#define NV50TSC_1_1_UNKN_ANISO_35                                0x18000000
 
 #define NV50TSC_1_2_MIN_LOD_MASK                                 0x00000f00
 #define NV50TSC_1_2_MAX_LOD_MASK                                 0x00f00000
-- 
cgit v1.2.3


From ff9e1c01989fc80f07cdc69e3e373bdfe1a384ef Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Thu, 22 Oct 2009 13:42:03 -0700
Subject: r300g: Cleanup PSC setup math a bit and stop using Draw formats.

---
 src/gallium/drivers/r300/r300_reg.h           | 21 ++++++++++++
 src/gallium/drivers/r300/r300_state_derived.c | 28 +++++++++-------
 src/gallium/drivers/r300/r300_state_inlines.h | 48 +++++++++++++++++++++------
 3 files changed, 74 insertions(+), 23 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index ae94bb9b9f..e920b2a5e7 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -348,6 +348,27 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_WRITE_ENA_W                         8
 #       define R300_SWIZZLE1_SHIFT                      16
 
+#       define R300_VAP_SWIZZLE_X001 \
+        ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
+         (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Y_SHIFT) | \
+         (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \
+         (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
+         (0xf << R300_WRITE_ENA_SHIFT))
+
+#       define R300_VAP_SWIZZLE_XY01 \
+        ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
+         (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
+         (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \
+         (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
+         (0xf << R300_WRITE_ENA_SHIFT))
+
+#       define R300_VAP_SWIZZLE_XYZ1 \
+        ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
+         (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
+         (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \
+         (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
+         (0xf << R300_WRITE_ENA_SHIFT))
+
 #       define R300_VAP_SWIZZLE_XYZW \
         ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
          (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 42aee7231e..7d000e9e2d 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -224,7 +224,8 @@ static void r300_vertex_psc(struct r300_context* r300,
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
     struct vertex_info* vinfo = &vformat->vinfo;
     int* tab = vformat->vs_tab;
-    uint32_t temp;
+    uint16_t type, swizzle;
+    enum pipe_format format;
     unsigned i, attrib_count;
 
     /* Vertex shaders have no semantics on their inputs,
@@ -246,25 +247,28 @@ static void r300_vertex_psc(struct r300_context* r300,
     }
 
     for (i = 0; i < attrib_count; i++) {
-        /* Make sure we have a proper destination for our attribute */
+        /* Make sure we have a proper destination for our attribute. */
         assert(tab[i] != -1);
 
-        /* Add the attribute to the PSC table. */
-        temp = translate_draw_vertex_data_type(vinfo->attrib[i].emit) |
+        format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+
+        /* Obtain the type of data in this attribute. */
+        type = r300_translate_vertex_data_type(format) |
             tab[i] << R300_DST_VEC_LOC_SHIFT;
 
+        /* Obtain the swizzle for this attribute. Note that the default
+         * swizzle in the hardware is not XYZW! */
+        swizzle = r300_translate_vertex_data_swizzle(format);
+
+        /* Add the attribute to the PSC table. */
         if (i & 1) {
-            vformat->vap_prog_stream_cntl[i >> 1] &= 0x0000ffff;
-            vformat->vap_prog_stream_cntl[i >> 1] |= temp << 16;
+            vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
 
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |=
-                (R300_VAP_SWIZZLE_XYZW << 16);
+            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
         } else {
-            vformat->vap_prog_stream_cntl[i >> 1] &= 0xffff0000;
-            vformat->vap_prog_stream_cntl[i >> 1] |= temp <<  0;
+            vformat->vap_prog_stream_cntl[i >> 1] |= type <<  0;
 
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |=
-                (R300_VAP_SWIZZLE_XYZW <<  0);
+            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 0;
         }
     }
 
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index c82d8e5f08..2431b75a51 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -400,28 +400,54 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
     return 0;
 }
 
-/* Translate Draw vertex types into PSC vertex types. */
-static INLINE uint32_t translate_draw_vertex_data_type(int type) {
-    switch (type) {
-        case EMIT_1F:
-        case EMIT_1F_PSIZE:
+/* Translate pipe_formats into PSC vertex types. */
+static INLINE uint16_t
+r300_translate_vertex_data_type(enum pipe_format format) {
+    switch (format) {
+        case PIPE_FORMAT_R32_FLOAT:
             return R300_DATA_TYPE_FLOAT_1;
             break;
-        case EMIT_2F:
+        case PIPE_FORMAT_R32G32_FLOAT:
             return R300_DATA_TYPE_FLOAT_2;
             break;
-        case EMIT_3F:
+        case PIPE_FORMAT_R32G32B32_FLOAT:
             return R300_DATA_TYPE_FLOAT_3;
             break;
-        case EMIT_4F:
+        case PIPE_FORMAT_R32G32B32A32_FLOAT:
             return R300_DATA_TYPE_FLOAT_4;
             break;
-        case EMIT_4UB:
-            return R300_DATA_TYPE_BYTE;
+        case PIPE_FORMAT_R8G8B8A8_UNORM:
+            return R300_DATA_TYPE_BYTE |
+                R300_NORMALIZE;
+            break;
+        default:
+            debug_printf("r300: Implementation error: "
+                    "Bad vertex data format %s!\n", pf_name(format));
+            assert(0);
+            break;
+    }
+    return 0;
+}
+
+static INLINE uint16_t
+r300_translate_vertex_data_swizzle(enum pipe_format format) {
+    switch (format) {
+        case PIPE_FORMAT_R32_FLOAT:
+            return R300_VAP_SWIZZLE_X001;
+            break;
+        case PIPE_FORMAT_R32G32_FLOAT:
+            return R300_VAP_SWIZZLE_XY01;
+            break;
+        case PIPE_FORMAT_R32G32B32_FLOAT:
+            return R300_VAP_SWIZZLE_XYZ1;
+            break;
+        case PIPE_FORMAT_R32G32B32A32_FLOAT:
+        case PIPE_FORMAT_R8G8B8A8_UNORM:
+            return R300_VAP_SWIZZLE_XYZW;
             break;
         default:
             debug_printf("r300: Implementation error: "
-                    "Bad vertex data type!\n");
+                    "Bad vertex data format %s!\n", pf_name(format));
             assert(0);
             break;
     }
-- 
cgit v1.2.3


From 04ec113e09f6287f2c6b39bf0247e06839eaf7a8 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Thu, 22 Oct 2009 14:28:47 -0700
Subject: r300g: Enable more stuff in r300_screen, cleanup comments.

Also enable 24-bit depth buffers without stencil.
---
 src/gallium/drivers/r300/r300_screen.c        | 34 +++++++++++++++------------
 src/gallium/drivers/r300/r300_state_inlines.h |  4 +++-
 src/gallium/drivers/r300/r300_texture.h       |  1 +
 3 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index a9058a25e5..1d9f91d0f7 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -80,13 +80,10 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
     struct r300_screen* r300screen = r300_screen(pscreen);
 
     switch (param) {
-        /* XXX cases marked "IN THEORY" are possible on the hardware,
-         * but haven't been implemented yet. */
         case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
             /* XXX I'm told this goes up to 16 */
             return 8;
         case PIPE_CAP_NPOT_TEXTURES:
-            /* IN THEORY */
             return 0;
         case PIPE_CAP_TWO_SIDED_STENCIL:
             if (r300screen->caps->is_r500) {
@@ -95,16 +92,26 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
                 return 0;
             }
         case PIPE_CAP_GLSL:
-            if (r300screen->caps->is_r500) {
-                return 1;
-            } else {
-                return 0;
-            }
+            /* I'll be frank. This is a lie.
+             *
+             * We don't truly support GLSL on any of this driver's chipsets.
+             * To be fair, no chipset supports the full GLSL specification
+             * to the best of our knowledge, but some of the less esoteric
+             * features are still missing here.
+             *
+             * Rather than cripple ourselves intentionally, I'm going to set
+             * this flag, and as Gallium's interface continues to change, I
+             * hope that this single monolithic GLSL enable can slowly get
+             * split down into many different pieces and the state tracker
+             * will handle fallbacks transparently, like it should.
+             *
+             * ~ C.
+             */
+            return 1;
         case PIPE_CAP_ANISOTROPIC_FILTER:
             return 1;
         case PIPE_CAP_POINT_SPRITE:
-            /* IN THEORY */
-            return 0;
+            return 1;
         case PIPE_CAP_MAX_RENDER_TARGETS:
             return 4;
         case PIPE_CAP_OCCLUSION_QUERY:
@@ -145,10 +152,8 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
         case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
             return 1;
         case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
-            /* XXX guessing (what a terrible guess) */
-            return 2;
+            return 0;
         case PIPE_CAP_TGSI_CONT_SUPPORTED:
-            /* XXX */
             return 0;
         case PIPE_CAP_BLEND_EQUATION_SEPARATE:
             return 1;
@@ -229,6 +234,7 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
 
         /* Z buffer or texture */
         case PIPE_FORMAT_Z16_UNORM:
+        case PIPE_FORMAT_Z24X8_UNORM:
         /* Z buffer with stencil or texture */
         case PIPE_FORMAT_Z24S8_UNORM:
             retval = usage &
@@ -239,7 +245,6 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
         /* Definitely unsupported formats. */
         /* Non-usable Z buffer/stencil formats. */
         case PIPE_FORMAT_Z32_UNORM:
-        case PIPE_FORMAT_Z24X8_UNORM:
         case PIPE_FORMAT_S8Z24_UNORM:
         case PIPE_FORMAT_X8Z24_UNORM:
             debug_printf("r300: Note: Got unsupported format: %s in %s\n",
@@ -249,7 +254,6 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
         /* XXX These don't even exist
         case PIPE_FORMAT_A32R32G32B32:
         case PIPE_FORMAT_A16R16G16B16: */
-        /* XXX Insert YUV422 packed VYUY and YVYU here */
         /* XXX What the deuce is UV88? (r3xx accel page 14)
             debug_printf("r300: Warning: Got unimplemented format: %s in %s\n",
                 pf_name(format), __FUNCTION__);
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 2431b75a51..ec11a41253 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -330,13 +330,15 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format)
     return 0;
 }
 
-/* Depthbuffer and stencilbuffer. Thankfully, we only support two kinds. */
+/* Depthbuffer and stencilbuffer. Thankfully, we only support two flavors. */
 static INLINE uint32_t r300_translate_zsformat(enum pipe_format format)
 {
     switch (format) {
         /* 16-bit depth, no stencil */
         case PIPE_FORMAT_Z16_UNORM:
             return R300_DEPTHFORMAT_16BIT_INT_Z;
+        /* 24-bit depth, ignored stencil */
+        case PIPE_FORMAT_Z24X8_UNORM:
         /* 24-bit depth, 8-bit stencil */
         case PIPE_FORMAT_Z24S8_UNORM:
             return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 2e58bda716..55d1a0ac5c 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -80,6 +80,7 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
                 R300_TX_FORMAT_YUV_TO_RGB;
         /* W24_FP */
         case PIPE_FORMAT_Z24S8_UNORM:
+        case PIPE_FORMAT_Z24X8_UNORM:
             return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP);
         default:
             debug_printf("r300: Implementation error: "
-- 
cgit v1.2.3


From 4e1d51786e0657c7430d731ac464f2a73e32eddf Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 23 Oct 2009 13:49:04 +0100
Subject: gallium: remove noise opcodes

Provide a dummy implementation in the GL state tracker (move 0.5 to
the destination regs).

At some point, a motivated person could add a better
implementation of noise.  Currently not even the nvidia
binary drivers do anything more than this.  In any case, the
place to do this is in the GL state tracker, not the poor
driver.
---
 src/gallium/auxiliary/tgsi/tgsi_exec.c         | 16 ----------------
 src/gallium/auxiliary/tgsi/tgsi_info.c         |  8 ++++----
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h   |  4 ----
 src/gallium/drivers/cell/spu/spu_exec.c        | 16 ----------------
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c |  9 ---------
 src/gallium/drivers/nv30/nv30_fragprog.c       |  6 ------
 src/gallium/drivers/nv40/nv40_fragprog.c       |  6 ------
 src/gallium/drivers/r300/r300_tgsi_to_rc.c     |  4 ----
 src/gallium/include/pipe/p_shader_tokens.h     |  5 +----
 src/mesa/state_tracker/st_mesa_to_tgsi.c       | 23 +++++++++++++++--------
 10 files changed, 20 insertions(+), 77 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index c79c56debd..d9661c75a0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3223,22 +3223,6 @@ exec_instruction(
       /* no-op */
       break;
 
-   case TGSI_OPCODE_NOISE1:
-      assert( 0 );
-      break;
-
-   case TGSI_OPCODE_NOISE2:
-      assert( 0 );
-      break;
-
-   case TGSI_OPCODE_NOISE3:
-      assert( 0 );
-      break;
-
-   case TGSI_OPCODE_NOISE4:
-      assert( 0 );
-      break;
-
    case TGSI_OPCODE_NOP:
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 17af4cb7ad..fe8b0bdce3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -134,10 +134,10 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 0, 0, 0, 0, 0, 1, "BGNSUB", TGSI_OPCODE_BGNSUB },
    { 0, 0, 0, 1, 1, 0, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
    { 0, 0, 0, 0, 1, 0, "ENDSUB", TGSI_OPCODE_ENDSUB },
-   { 1, 1, 0, 0, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 },
-   { 1, 1, 0, 0, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 },
-   { 1, 1, 0, 0, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 },
-   { 1, 1, 0, 0, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 },
+   { 0, 0, 0, 0, 0, 0, "", 103 },     /* removed */
+   { 0, 0, 0, 0, 0, 0, "", 104 },     /* removed */
+   { 0, 0, 0, 0, 0, 0, "", 105 },     /* removed */
+   { 0, 0, 0, 0, 0, 0, "", 106 },     /* removed */
    { 0, 0, 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP },
    { 0, 0, 0, 0, 0, 0, "", 108 },     /* removed */
    { 0, 0, 0, 0, 0, 0, "", 109 },     /* removed */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index e7bcf4bf75..d321f013d0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -139,10 +139,6 @@ OP00_LBL(BGNLOOP)
 OP00(BGNSUB)
 OP00_LBL(ENDLOOP)
 OP00(ENDSUB)
-OP11(NOISE1)
-OP11(NOISE2)
-OP11(NOISE3)
-OP11(NOISE4)
 OP00(NOP)
 OP11(NRM4)
 OP01(CALLNZ)
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 0eaae2e451..725a72b326 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -1807,22 +1807,6 @@ exec_instruction(
       /* no-op */
       break;
 
-   case TGSI_OPCODE_NOISE1:
-      ASSERT( 0 );
-      break;
-
-   case TGSI_OPCODE_NOISE2:
-      ASSERT( 0 );
-      break;
-
-   case TGSI_OPCODE_NOISE3:
-      ASSERT( 0 );
-      break;
-
-   case TGSI_OPCODE_NOISE4:
-      ASSERT( 0 );
-      break;
-
    case TGSI_OPCODE_NOP:
       break;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index adc81569ed..926cc1cc9f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -1386,15 +1386,6 @@ emit_instruction(
       return 0;
       break;
 
-   case TGSI_OPCODE_NOISE1:
-   case TGSI_OPCODE_NOISE2:
-   case TGSI_OPCODE_NOISE3:
-   case TGSI_OPCODE_NOISE4:
-      FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
-         dst0[chan_index] = bld->base.zero;
-      }
-      break;
-
    case TGSI_OPCODE_NOP:
       break;
 
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index a48ba9782b..62c6c76cee 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -527,12 +527,6 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 	case TGSI_OPCODE_MUL:
 		arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
 		break;
-	case TGSI_OPCODE_NOISE1:
-	case TGSI_OPCODE_NOISE2:
-	case TGSI_OPCODE_NOISE3:
-	case TGSI_OPCODE_NOISE4:
-		arith(fpc, sat, SFL, dst, mask, none, none, none);
-		break;
 	case TGSI_OPCODE_POW:
 		arith(fpc, sat, POW, dst, mask, src[0], src[1], none);
 		break;
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 32d9ed1a7f..e3550baa63 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -568,12 +568,6 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 	case TGSI_OPCODE_MUL:
 		arith(fpc, sat, MUL, dst, mask, src[0], src[1], none);
 		break;
-	case TGSI_OPCODE_NOISE1:
-	case TGSI_OPCODE_NOISE2:
-	case TGSI_OPCODE_NOISE3:
-	case TGSI_OPCODE_NOISE4:
-		arith(fpc, sat, SFL, dst, mask, none, none, none);
-		break;
 	case TGSI_OPCODE_POW:
 		tmp = temp(fpc);
 		arith(fpc, 0, LG2, tmp, MASK_X,
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 74d4fb5087..3d2f6cafee 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -135,10 +135,6 @@ static unsigned translate_opcode(unsigned opcode)
      /* case TGSI_OPCODE_BGNSUB: return RC_OPCODE_BGNSUB; */
      /* case TGSI_OPCODE_ENDLOOP2: return RC_OPCODE_ENDLOOP2; */
      /* case TGSI_OPCODE_ENDSUB: return RC_OPCODE_ENDSUB; */
-     /* case TGSI_OPCODE_NOISE1: return RC_OPCODE_NOISE1; */
-     /* case TGSI_OPCODE_NOISE2: return RC_OPCODE_NOISE2; */
-     /* case TGSI_OPCODE_NOISE3: return RC_OPCODE_NOISE3; */
-     /* case TGSI_OPCODE_NOISE4: return RC_OPCODE_NOISE4; */
         case TGSI_OPCODE_NOP: return RC_OPCODE_NOP;
                                         /* gap */
      /* case TGSI_OPCODE_NRM4: return RC_OPCODE_NRM4; */
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 5fa6c9af30..48e6583ada 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -266,10 +266,7 @@ union tgsi_immediate_data
 #define TGSI_OPCODE_BGNSUB              100
 #define TGSI_OPCODE_ENDLOOP             101
 #define TGSI_OPCODE_ENDSUB              102
-#define TGSI_OPCODE_NOISE1              103
-#define TGSI_OPCODE_NOISE2              104
-#define TGSI_OPCODE_NOISE3              105
-#define TGSI_OPCODE_NOISE4              106
+                                /* gap */
 #define TGSI_OPCODE_NOP                 107
                                 /* gap */
 #define TGSI_OPCODE_NRM4                112
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 70d7c4fee2..1b9d35d353 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -481,14 +481,6 @@ translate_opcode( unsigned op )
       return TGSI_OPCODE_MOV;
    case OPCODE_MUL:
       return TGSI_OPCODE_MUL;
-   case OPCODE_NOISE1:
-      return TGSI_OPCODE_NOISE1;
-   case OPCODE_NOISE2:
-      return TGSI_OPCODE_NOISE2;
-   case OPCODE_NOISE3:
-      return TGSI_OPCODE_NOISE3;
-   case OPCODE_NOISE4:
-      return TGSI_OPCODE_NOISE4;
    case OPCODE_NOP:
       return TGSI_OPCODE_NOP;
    case OPCODE_NRM3:
@@ -616,6 +608,21 @@ compile_instruction(
                  src, num_src );
       break;
 
+   case OPCODE_NOISE1:
+   case OPCODE_NOISE2:
+   case OPCODE_NOISE3:
+   case OPCODE_NOISE4:
+      /* At some point, a motivated person could add a better
+       * implementation of noise.  Currently not even the nvidia
+       * binary drivers do anything more than this.  In any case, the
+       * place to do this is in the GL state tracker, not the poor
+       * driver.
+       */
+      ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
+      break;
+		 
+
+
    default:
       ureg_insn( ureg, 
                  translate_opcode( inst->Opcode ), 
-- 
cgit v1.2.3


From b9cb74c7f826dfd320f5e5b54aa933898f7ddd3d Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 23 Oct 2009 14:31:24 +0100
Subject: gallium: remove the swizzling parts of ExtSwizzle

These haven't been used by the mesa state tracker since the
conversion to tgsi_ureg, and it seems that none of the
other state trackers are using it either.

This helps simplify one of the biggest suprises when starting off with
TGSI shaders.
---
 src/gallium/auxiliary/draw/draw_vs_aos.c       | 30 ++---------
 src/gallium/auxiliary/tgsi/tgsi_build.c        | 27 +---------
 src/gallium/auxiliary/tgsi/tgsi_build.h        |  4 --
 src/gallium/auxiliary/tgsi/tgsi_dump.c         | 27 ----------
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c       | 26 ---------
 src/gallium/auxiliary/tgsi/tgsi_exec.c         | 34 ++++--------
 src/gallium/auxiliary/tgsi/tgsi_info.c         |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h   |  1 -
 src/gallium/auxiliary/tgsi/tgsi_ppc.c          | 26 +++------
 src/gallium/auxiliary/tgsi/tgsi_scan.c         |  5 --
 src/gallium/auxiliary/tgsi/tgsi_sse2.c         | 34 +++---------
 src/gallium/auxiliary/tgsi/tgsi_text.c         | 28 ++--------
 src/gallium/auxiliary/tgsi/tgsi_util.c         | 74 ++------------------------
 src/gallium/auxiliary/tgsi/tgsi_util.h         | 12 +----
 src/gallium/drivers/cell/ppu/cell_gen_fp.c     | 18 ++-----
 src/gallium/drivers/cell/spu/spu_exec.c        | 27 +++-------
 src/gallium/drivers/cell/spu/spu_util.c        | 48 +----------------
 src/gallium/drivers/i915/i915_fpc_translate.c  | 20 ++-----
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 26 +++------
 src/gallium/drivers/nv30/nv30_fragprog.c       | 24 ++-------
 src/gallium/drivers/nv40/nv40_fragprog.c       | 18 ++-----
 src/gallium/drivers/nv40/nv40_vertprog.c       | 18 ++-----
 src/gallium/drivers/nv50/nv50_program.c        | 28 +++-------
 src/gallium/drivers/r300/r300_tgsi_to_rc.c     |  9 ++--
 src/gallium/include/pipe/p_shader_tokens.h     | 17 +-----
 src/mesa/state_tracker/st_mesa_to_tgsi.c       |  2 -
 26 files changed, 96 insertions(+), 489 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 645d7cccba..88bc790b62 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -537,19 +537,10 @@ static struct x86_reg fetch_src( struct aos_compilation *cp,
    unsigned abs = 0;
 
    for (i = 0; i < 4; i++) {
-      unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, i );
+      unsigned swizzle = tgsi_util_get_full_src_register_swizzle( src, i );
       unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, i );
 
-      switch (swizzle) {
-      case TGSI_EXTSWIZZLE_ZERO:
-      case TGSI_EXTSWIZZLE_ONE:
-         AOS_ERROR(cp, "not supporting full swizzles yet in tgsi_aos_sse2");
-         break;
-
-      default:
-         swz |= (swizzle & 0x3) << (i * 2);
-         break;
-      }
+      swz |= (swizzle & 0x3) << (i * 2);
 
       switch (neg) {
       case TGSI_UTIL_SIGN_TOGGLE:
@@ -632,23 +623,10 @@ static void x87_fld_src( struct aos_compilation *cp,
                                                 src->SrcRegister.File, 
                                                 src->SrcRegister.Index);
 
-   unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, channel );
+   unsigned swizzle = tgsi_util_get_full_src_register_swizzle( src, channel );
    unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel );
 
-   switch (swizzle) {
-   case TGSI_EXTSWIZZLE_ZERO:
-      x87_fldz( cp->func );
-      break;
-
-   case TGSI_EXTSWIZZLE_ONE:
-      x87_fld1( cp->func );
-      break;
-
-   default:
-      x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) );
-      break;
-   }
-   
+   x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) );
 
    switch (neg) {
    case TGSI_UTIL_SIGN_TOGGLE:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index e0cfc54420..98d36f43e4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -692,12 +692,8 @@ tgsi_build_full_instruction(
             tgsi_default_src_register_ext_swz() ) ) {
          struct tgsi_src_register_ext_swz *src_register_ext_swz;
 
-         /* Use of the extended swizzle requires the simple swizzle to be identity.
+         /* Use of the extended negate requires the simple negate to be identity.
           */
-         assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X );
-         assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y );
-         assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z );
-         assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W );
          assert( reg->SrcRegister.Negate == FALSE );
 
          if( maxsize <= size )
@@ -707,10 +703,6 @@ tgsi_build_full_instruction(
          size++;
 
          *src_register_ext_swz = tgsi_build_src_register_ext_swz(
-            reg->SrcRegisterExtSwz.ExtSwizzleX,
-            reg->SrcRegisterExtSwz.ExtSwizzleY,
-            reg->SrcRegisterExtSwz.ExtSwizzleZ,
-            reg->SrcRegisterExtSwz.ExtSwizzleW,
             reg->SrcRegisterExtSwz.NegateX,
             reg->SrcRegisterExtSwz.NegateY,
             reg->SrcRegisterExtSwz.NegateZ,
@@ -1048,10 +1040,7 @@ tgsi_default_src_register_ext_swz( void )
    struct tgsi_src_register_ext_swz src_register_ext_swz;
 
    src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ;
-   src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X;
-   src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y;
-   src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z;
-   src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W;
+   src_register_ext_swz.Padding0 = 0;
    src_register_ext_swz.NegateX = 0;
    src_register_ext_swz.NegateY = 0;
    src_register_ext_swz.NegateZ = 0;
@@ -1074,10 +1063,6 @@ tgsi_compare_src_register_ext_swz(
 
 struct tgsi_src_register_ext_swz
 tgsi_build_src_register_ext_swz(
-   unsigned ext_swizzle_x,
-   unsigned ext_swizzle_y,
-   unsigned ext_swizzle_z,
-   unsigned ext_swizzle_w,
    unsigned negate_x,
    unsigned negate_y,
    unsigned negate_z,
@@ -1088,20 +1073,12 @@ tgsi_build_src_register_ext_swz(
 {
    struct tgsi_src_register_ext_swz src_register_ext_swz;
 
-   assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE );
-   assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE );
-   assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE );
-   assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE );
    assert( negate_x <= 1 );
    assert( negate_y <= 1 );
    assert( negate_z <= 1 );
    assert( negate_w <= 1 );
 
    src_register_ext_swz = tgsi_default_src_register_ext_swz();
-   src_register_ext_swz.ExtSwizzleX = ext_swizzle_x;
-   src_register_ext_swz.ExtSwizzleY = ext_swizzle_y;
-   src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z;
-   src_register_ext_swz.ExtSwizzleW = ext_swizzle_w;
    src_register_ext_swz.NegateX = negate_x;
    src_register_ext_swz.NegateY = negate_y;
    src_register_ext_swz.NegateZ = negate_z;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 17d977b059..5e0062a96f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -241,10 +241,6 @@ tgsi_compare_src_register_ext_swz(
 
 struct tgsi_src_register_ext_swz
 tgsi_build_src_register_ext_swz(
-   unsigned ext_swizzle_x,
-   unsigned ext_swizzle_y,
-   unsigned ext_swizzle_z,
-   unsigned ext_swizzle_w,
    unsigned negate_x,
    unsigned negate_y,
    unsigned negate_z,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 111d95b666..3a584a10a1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -148,15 +148,6 @@ static const char *texture_names[] =
    "SHADOWRECT"
 };
 
-static const char *extswizzle_names[] =
-{
-   "x",
-   "y",
-   "z",
-   "w",
-   "0",
-   "1"
-};
 
 static const char *modulate_names[TGSI_MODULATE_COUNT] =
 {
@@ -446,24 +437,6 @@ iter_instruction(
          ENM( src->SrcRegister.SwizzleZ, swizzle_names );
          ENM( src->SrcRegister.SwizzleW, swizzle_names );
       }
-      if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
-          src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
-          src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
-          src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) {
-         CHR( '.' );
-         if (src->SrcRegisterExtSwz.NegateX)
-            TXT("-");
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names );
-         if (src->SrcRegisterExtSwz.NegateY)
-            TXT("-");
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names );
-         if (src->SrcRegisterExtSwz.NegateZ)
-            TXT("-");
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names );
-         if (src->SrcRegisterExtSwz.NegateW)
-            TXT("-");
-         ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names );
-      }
 
       if (src->SrcRegisterExtMod.Complement)
          CHR( ')' );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index 4a9c02b141..4f59ed22b5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -167,16 +167,6 @@ static const char *TGSI_SRC_REGISTER_EXTS[] =
    "SRC_REGISTER_EXT_TYPE_MOD"
 };
 
-static const char *TGSI_EXTSWIZZLES[] =
-{
-   "EXTSWIZZLE_X",
-   "EXTSWIZZLE_Y",
-   "EXTSWIZZLE_Z",
-   "EXTSWIZZLE_W",
-   "EXTSWIZZLE_ZERO",
-   "EXTSWIZZLE_ONE"
-};
-
 static const char *TGSI_WRITEMASKS[] =
 {
    "0",
@@ -560,22 +550,6 @@ dump_instruction_verbose(
          EOL();
          TXT( "\nType       : " );
          ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) {
-            TXT( "\nExtSwizzleX: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) {
-            TXT( "\nExtSwizzleY: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) {
-            TXT( "\nExtSwizzleZ: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) {
-            TXT( "\nExtSwizzleW: " );
-            ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES );
-         }
          if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) {
             TXT( "\nNegateX   : " );
             UID( src->SrcRegisterExtSwz.NegateX );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index d9661c75a0..14f0fc4e38 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -210,9 +210,8 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
          uint channelsWritten = 0x0;
          FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
             /* check if we're reading a channel that's been written */
-            uint swizzle = tgsi_util_get_full_src_register_extswizzle(&inst->FullSrcRegisters[i], chan);
-            if (swizzle <= TGSI_SWIZZLE_W &&
-                (channelsWritten & (1 << swizzle))) {
+            uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan);
+            if (channelsWritten & (1 << swizzle)) {
                return TRUE;
             }
 
@@ -338,7 +337,7 @@ tgsi_exec_machine_bind_shader(
             /* XXX we only handle SOA dependencies properly for MOV/SWZ
              * at this time!
              */
-            if (opcode != TGSI_OPCODE_MOV && opcode != TGSI_OPCODE_SWZ) {
+            if (opcode != TGSI_OPCODE_MOV) {
                debug_printf("Warning: SOA dependency in instruction"
                             " is not handled:\n");
                tgsi_dump_instruction(&parse.FullToken.FullInstruction,
@@ -1130,10 +1129,10 @@ fetch_src_file_channel(
    union tgsi_exec_channel *chan )
 {
    switch( swizzle ) {
-   case TGSI_EXTSWIZZLE_X:
-   case TGSI_EXTSWIZZLE_Y:
-   case TGSI_EXTSWIZZLE_Z:
-   case TGSI_EXTSWIZZLE_W:
+   case TGSI_SWIZZLE_X:
+   case TGSI_SWIZZLE_Y:
+   case TGSI_SWIZZLE_Z:
+   case TGSI_SWIZZLE_W:
       switch( file ) {
       case TGSI_FILE_CONSTANT:
          assert(mach->Consts);
@@ -1201,14 +1200,6 @@ fetch_src_file_channel(
       }
       break;
 
-   case TGSI_EXTSWIZZLE_ZERO:
-      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
-      break;
-
-   case TGSI_EXTSWIZZLE_ONE:
-      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
-      break;
-
    default:
       assert( 0 );
    }
@@ -1367,7 +1358,7 @@ fetch_source(
        */
    }
 
-   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+   swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
    fetch_src_file_channel(
       mach,
       reg->SrcRegister.File,
@@ -1689,10 +1680,8 @@ exec_kil(struct tgsi_exec_machine *mach,
    uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
    union tgsi_exec_channel r[1];
 
-   /* This mask stores component bits that were already tested. Note that
-    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
-    * tested. */
-   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+   /* This mask stores component bits that were already tested. */
+   uniquemask = 0;
 
    for (chan_index = 0; chan_index < 4; chan_index++)
    {
@@ -1700,7 +1689,7 @@ exec_kil(struct tgsi_exec_machine *mach,
       uint i;
 
       /* unswizzle channel */
-      swizzle = tgsi_util_get_full_src_register_extswizzle (
+      swizzle = tgsi_util_get_full_src_register_swizzle (
                         &inst->FullSrcRegisters[0],
                         chan_index);
 
@@ -2031,7 +2020,6 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_MOV:
-   case TGSI_OPCODE_SWZ:
       if (inst->Flags & SOA_DEPENDENCY_FLAG) {
          /* Do all fetches into temp regs, then do all stores to avoid
           * intermediate/accidental clobbering.  This could be done all the
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index fe8b0bdce3..be375cabb8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -149,7 +149,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
    { 0, 1, 0, 0, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC },
    { 0, 1, 0, 0, 0, 0, "KIL", TGSI_OPCODE_KIL },
    { 0, 0, 0, 0, 0, 0, "END", TGSI_OPCODE_END },
-   { 1, 1, 0, 0, 0, 0, "SWZ", TGSI_OPCODE_SWZ }
+   { 0, 0, 0, 0, 0, 0, "", 118 }      /* removed */
 };
 
 const struct tgsi_opcode_info *
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index d321f013d0..b34263da48 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -146,7 +146,6 @@ OP01(IFC)
 OP01(BREAKC)
 OP01(KIL)
 OP00(END)
-OP11(SWZ)
 
 
 #undef OP00
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 4b1c7d4e01..617fd7f6be 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -283,14 +283,14 @@ emit_fetch(struct gen_context *gen,
            const struct tgsi_full_src_register *reg,
            const unsigned chan_index)
 {
-   uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index);
+   uint swizzle = tgsi_util_get_full_src_register_swizzle(reg, chan_index);
    int dst_vec = -1;
 
    switch (swizzle) {
-   case TGSI_EXTSWIZZLE_X:
-   case TGSI_EXTSWIZZLE_Y:
-   case TGSI_EXTSWIZZLE_Z:
-   case TGSI_EXTSWIZZLE_W:
+   case TGSI_SWIZZLE_X:
+   case TGSI_SWIZZLE_Y:
+   case TGSI_SWIZZLE_Z:
+   case TGSI_SWIZZLE_W:
       switch (reg->SrcRegister.File) {
       case TGSI_FILE_INPUT:
          {
@@ -349,16 +349,6 @@ emit_fetch(struct gen_context *gen,
          assert( 0 );
       }
       break;
-   case TGSI_EXTSWIZZLE_ZERO:
-      ppc_vzero(gen->f, dst_vec);
-      break;
-   case TGSI_EXTSWIZZLE_ONE:
-      {
-         int one_vec = gen_one_vec(gen);
-         dst_vec = ppc_allocate_vec_register(gen->f);
-         ppc_vmove(gen->f, dst_vec, one_vec);
-      }
-      break;
    default:
       assert( 0 );
    }
@@ -418,8 +408,8 @@ equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a,
       return FALSE;
    if (a->SrcRegister.Index != b->SrcRegister.Index)
       return FALSE;
-   swz_a = tgsi_util_get_full_src_register_extswizzle(a, chan_a);
-   swz_b = tgsi_util_get_full_src_register_extswizzle(b, chan_b);
+   swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a);
+   swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b);
    if (swz_a != swz_b)
       return FALSE;
    sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a);
@@ -635,7 +625,6 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
          ppc_vlogefp(gen->f, v1, v0);      /* v1 = log2(v0) */
          break;
       case TGSI_OPCODE_MOV:
-      case TGSI_OPCODE_SWZ:
          if (v0 != v1)
             ppc_vmove(gen->f, v1, v0);
          break;
@@ -1119,7 +1108,6 @@ emit_instruction(struct gen_context *gen,
 
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_MOV:
-   case TGSI_OPCODE_SWZ:
    case TGSI_OPCODE_ABS:
    case TGSI_OPCODE_FLR:
    case TGSI_OPCODE_FRC:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 0db4481a3d..f9c16f1b6c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -228,11 +228,6 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
                 src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
                 src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ||
 
-                src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X ||
-                src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y ||
-                src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z ||
-                src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W ||
-
                 dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW)
             {
                tgsi_parse_free(&parse);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 5f6b83b236..a96fc94c7a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1260,13 +1260,13 @@ emit_fetch(
    const struct tgsi_full_src_register *reg,
    const unsigned chan_index )
 {
-   unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+   unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
 
    switch (swizzle) {
-   case TGSI_EXTSWIZZLE_X:
-   case TGSI_EXTSWIZZLE_Y:
-   case TGSI_EXTSWIZZLE_Z:
-   case TGSI_EXTSWIZZLE_W:
+   case TGSI_SWIZZLE_X:
+   case TGSI_SWIZZLE_Y:
+   case TGSI_SWIZZLE_Z:
+   case TGSI_SWIZZLE_W:
       switch (reg->SrcRegister.File) {
       case TGSI_FILE_CONSTANT:
          emit_const(
@@ -1308,22 +1308,6 @@ emit_fetch(
       }
       break;
 
-   case TGSI_EXTSWIZZLE_ZERO:
-      emit_tempf(
-         func,
-         xmm,
-         TGSI_EXEC_TEMP_00000000_I,
-         TGSI_EXEC_TEMP_00000000_C );
-      break;
-
-   case TGSI_EXTSWIZZLE_ONE:
-      emit_tempf(
-         func,
-         xmm,
-         TEMP_ONE_I,
-         TEMP_ONE_C );
-      break;
-
    default:
       assert( 0 );
    }
@@ -1582,13 +1566,13 @@ emit_kil(
    /* This mask stores component bits that were already tested. Note that
     * we test if the value is less than zero, so 1.0 and 0.0 need not to be
     * tested. */
-   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+   uniquemask = 0;
 
    FOR_EACH_CHANNEL( chan_index ) {
       unsigned swizzle;
 
       /* unswizzle channel */
-      swizzle = tgsi_util_get_full_src_register_extswizzle(
+      swizzle = tgsi_util_get_full_src_register_swizzle(
          reg,
          chan_index );
 
@@ -1772,7 +1756,6 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_MOV:
-   case TGSI_OPCODE_SWZ:
       FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( func, *inst, 4 + chan_index, 0, chan_index );
       }
@@ -2938,8 +2921,7 @@ tgsi_emit_sse2(
              * the result in the cases where the code is too opaque to
              * fix.
              */
-            if (opcode != TGSI_OPCODE_MOV &&
-                opcode != TGSI_OPCODE_SWZ) {
+            if (opcode != TGSI_OPCODE_MOV) {
                debug_printf("Warning: src/dst aliasing in instruction"
                             " is not handled:\n");
                tgsi_dump_instruction(&parse.FullToken.FullInstruction, 1);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index d438450b1e..87d9cd7b3f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -538,13 +538,11 @@ static boolean
 parse_optional_swizzle(
    struct translate_ctx *ctx,
    uint swizzle[4],
-   boolean *parsed_swizzle,
-   boolean *parsed_extswizzle )
+   boolean *parsed_swizzle )
 {
    const char *cur = ctx->cur;
 
    *parsed_swizzle = FALSE;
-   *parsed_extswizzle = FALSE;
 
    eat_opt_white( &cur );
    if (*cur == '.') {
@@ -562,15 +560,8 @@ parse_optional_swizzle(
          else if (uprcase( *cur ) == 'W')
             swizzle[i] = TGSI_SWIZZLE_W;
          else {
-            if (*cur == '0')
-               swizzle[i] = TGSI_EXTSWIZZLE_ZERO;
-            else if (*cur == '1')
-               swizzle[i] = TGSI_EXTSWIZZLE_ONE;
-            else {
-               report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" );
-               return FALSE;
-            }
-            *parsed_extswizzle = TRUE;
+	    report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" );
+	    return FALSE;
          }
          cur++;
       }
@@ -595,7 +586,6 @@ parse_src_operand(
    uint swizzle[4];
    boolean parsed_ext_negate_paren = FALSE;
    boolean parsed_swizzle;
-   boolean parsed_extswizzle;
 
    if (*ctx->cur == '-') {
       cur = ctx->cur;
@@ -690,16 +680,8 @@ parse_src_operand(
 
    /* Parse optional swizzle.
     */
-   if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) {
-      if (parsed_extswizzle) {
-         assert( parsed_swizzle );
-
-         src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0];
-         src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1];
-         src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2];
-         src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3];
-      }
-      else if (parsed_swizzle) {
+   if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle )) {
+      if (parsed_swizzle) {
          src->SrcRegister.SwizzleX = swizzle[0];
          src->SrcRegister.SwizzleY = swizzle[1];
          src->SrcRegister.SwizzleZ = swizzle[2];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 71f8a6ca40..6120a2da94 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -69,59 +69,15 @@ tgsi_util_get_src_register_swizzle(
    return 0;
 }
 
-unsigned
-tgsi_util_get_src_register_extswizzle(
-   const struct tgsi_src_register_ext_swz *reg,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      return reg->ExtSwizzleX;
-   case 1:
-      return reg->ExtSwizzleY;
-   case 2:
-      return reg->ExtSwizzleZ;
-   case 3:
-      return reg->ExtSwizzleW;
-   default:
-      assert( 0 );
-   }
-   return 0;
-}
 
 unsigned
-tgsi_util_get_full_src_register_extswizzle(
+tgsi_util_get_full_src_register_swizzle(
    const struct tgsi_full_src_register  *reg,
    unsigned component )
 {
-   unsigned swizzle;
-
-   /*
-    * First, calculate  the   extended swizzle for a given channel. This will give
-    * us either a channel index into the simple swizzle or  a constant 1 or   0.
-    */
-   swizzle = tgsi_util_get_src_register_extswizzle(
-      &reg->SrcRegisterExtSwz,
+   return tgsi_util_get_src_register_swizzle(
+      &reg->SrcRegister,
       component );
-
-   assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
-   assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
-   assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
-   assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
-   assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
-   assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
-
-   /*
-    * Second, calculate the simple  swizzle  for   the   unswizzled channel index.
-    * Leave the constants intact, they are   not   affected by the   simple swizzle.
-    */
-   if( swizzle <= TGSI_SWIZZLE_W ) {
-      swizzle = tgsi_util_get_src_register_swizzle(
-         &reg->SrcRegister,
-         swizzle );
-   }
-
-   return swizzle;
 }
 
 void
@@ -148,30 +104,6 @@ tgsi_util_set_src_register_swizzle(
    }
 }
 
-void
-tgsi_util_set_src_register_extswizzle(
-   struct tgsi_src_register_ext_swz *reg,
-   unsigned swizzle,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      reg->ExtSwizzleX = swizzle;
-      break;
-   case 1:
-      reg->ExtSwizzleY = swizzle;
-      break;
-   case 2:
-      reg->ExtSwizzleZ = swizzle;
-      break;
-   case 3:
-      reg->ExtSwizzleW = swizzle;
-      break;
-   default:
-      assert( 0 );
-   }
-}
-
 unsigned
 tgsi_util_get_src_register_extnegate(
    const  struct tgsi_src_register_ext_swz *reg,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h
index 21eb656327..bf3f20ca6c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -45,13 +45,9 @@ tgsi_util_get_src_register_swizzle(
    const struct tgsi_src_register *reg,
    unsigned component );
 
-unsigned
-tgsi_util_get_src_register_extswizzle(
-   const struct tgsi_src_register_ext_swz *reg,
-   unsigned component);
 
 unsigned
-tgsi_util_get_full_src_register_extswizzle(
+tgsi_util_get_full_src_register_swizzle(
    const struct tgsi_full_src_register *reg,
    unsigned component );
 
@@ -61,12 +57,6 @@ tgsi_util_set_src_register_swizzle(
    unsigned swizzle,
    unsigned component );
 
-void
-tgsi_util_set_src_register_extswizzle(
-   struct tgsi_src_register_ext_swz *reg,
-   unsigned swizzle,
-   unsigned component );
-
 unsigned
 tgsi_util_get_src_register_extnegate(
    const struct tgsi_src_register_ext_swz *reg,
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index b6b2f885af..19e3ab0844 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -231,7 +231,7 @@ static boolean
 is_register_src(struct codegen *gen, int channel,
                 const struct tgsi_full_src_register *src)
 {
-   int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
+   int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
    int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
 
    if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
@@ -271,23 +271,14 @@ get_src_reg(struct codegen *gen,
             const struct tgsi_full_src_register *src)
 {
    int reg = -1;
-   int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
+   int swizzle = tgsi_util_get_full_src_register_swizzle(src, channel);
    boolean reg_is_itemp = FALSE;
    uint sign_op;
 
    assert(swizzle >= TGSI_SWIZZLE_X);
-   assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
+   assert(swizzle <= TGSI_SWIZZLE_W);
 
-   if (swizzle == TGSI_EXTSWIZZLE_ONE) {
-      /* Load const one float and early out */
-      reg = get_const_one_reg(gen);
-   }
-   else if (swizzle == TGSI_EXTSWIZZLE_ZERO) {
-      /* Load const zero float and early out */
-      reg = get_itemp(gen);
-      spe_xor(gen->f, reg, reg, reg);
-   }
-   else {
+   {
       int index = src->SrcRegister.Index;
 
       assert(swizzle < 4);
@@ -1758,7 +1749,6 @@ emit_instruction(struct codegen *gen,
    case TGSI_OPCODE_ARL:
       return emit_ARL(gen, inst);
    case TGSI_OPCODE_MOV:
-   case TGSI_OPCODE_SWZ:
       return emit_MOV(gen, inst);
    case TGSI_OPCODE_ADD:
    case TGSI_OPCODE_SUB:
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 725a72b326..4c32b2d06d 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -346,10 +346,10 @@ fetch_src_file_channel(
    union spu_exec_channel *chan )
 {
    switch( swizzle ) {
-   case TGSI_EXTSWIZZLE_X:
-   case TGSI_EXTSWIZZLE_Y:
-   case TGSI_EXTSWIZZLE_Z:
-   case TGSI_EXTSWIZZLE_W:
+   case TGSI_SWIZZLE_X:
+   case TGSI_SWIZZLE_Y:
+   case TGSI_SWIZZLE_Z:
+   case TGSI_SWIZZLE_W:
       switch( file ) {
       case TGSI_FILE_CONSTANT: {
          unsigned i;
@@ -413,14 +413,6 @@ fetch_src_file_channel(
       }
       break;
 
-   case TGSI_EXTSWIZZLE_ZERO:
-      *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
-      break;
-
-   case TGSI_EXTSWIZZLE_ONE:
-      *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
-      break;
-
    default:
       ASSERT( 0 );
    }
@@ -500,7 +492,7 @@ fetch_source(
       }
    }
 
-   swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+   swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
    fetch_src_file_channel(
       mach,
       reg->SrcRegister.File,
@@ -610,10 +602,8 @@ exec_kil(struct spu_exec_machine *mach,
    uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
    union spu_exec_channel r[1];
 
-   /* This mask stores component bits that were already tested. Note that
-    * we test if the value is less than zero, so 1.0 and 0.0 need not to be
-    * tested. */
-   uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+   /* This mask stores component bits that were already tested. */
+   uniquemask = 0;
 
    for (chan_index = 0; chan_index < 4; chan_index++)
    {
@@ -621,7 +611,7 @@ exec_kil(struct spu_exec_machine *mach,
       uint i;
 
       /* unswizzle channel */
-      swizzle = tgsi_util_get_full_src_register_extswizzle (
+      swizzle = tgsi_util_get_full_src_register_swizzle (
                         &inst->FullSrcRegisters[0],
                         chan_index);
 
@@ -909,7 +899,6 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_MOV:
-   case TGSI_OPCODE_SWZ:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          STORE( &r[0], 0, chan_index );
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
index af25dd3718..25a7a71133 100644
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ b/src/gallium/drivers/cell/spu/spu_util.c
@@ -26,59 +26,15 @@ tgsi_util_get_src_register_swizzle(
    return 0;
 }
 
-unsigned
-tgsi_util_get_src_register_extswizzle(
-   const struct tgsi_src_register_ext_swz *reg,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      return reg->ExtSwizzleX;
-   case 1:
-      return reg->ExtSwizzleY;
-   case 2:
-      return reg->ExtSwizzleZ;
-   case 3:
-      return reg->ExtSwizzleW;
-   default:
-      ASSERT( 0 );
-   }
-   return 0;
-}
 
 unsigned
 tgsi_util_get_full_src_register_extswizzle(
    const struct tgsi_full_src_register  *reg,
    unsigned component )
 {
-   unsigned swizzle;
-
-   /*
-    * First, calculate  the   extended swizzle for a given channel. This will give
-    * us either a channel index into the simple swizzle or  a constant 1 or   0.
-    */
-   swizzle = tgsi_util_get_src_register_extswizzle(
-      &reg->SrcRegisterExtSwz,
+   return tgsi_util_get_src_register_swizzle(
+      reg->SrcRegister,
       component );
-
-   ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
-   ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
-   ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
-   ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
-   ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
-   ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
-
-   /*
-    * Second, calculate the simple  swizzle  for   the   unswizzled channel index.
-    * Leave the constants intact, they are   not   affected by the   simple swizzle.
-    */
-   if( swizzle <= TGSI_SWIZZLE_W ) {
-      swizzle = tgsi_util_get_src_register_swizzle(
-         &reg->SrcRegister,
-         component );
-   }
-
-   return swizzle;
 }
 
 unsigned
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 1fe5cda956..3074044441 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -214,20 +214,11 @@ src_vector(struct i915_fp_compile *p,
       return 0;
    }
 
-   if (source->SrcRegister.Extended) {
-      src = swizzle(src,
-                    source->SrcRegisterExtSwz.ExtSwizzleX,
-                    source->SrcRegisterExtSwz.ExtSwizzleY,
-                    source->SrcRegisterExtSwz.ExtSwizzleZ,
-                    source->SrcRegisterExtSwz.ExtSwizzleW);
-   }
-   else {
-      src = swizzle(src,
-                    source->SrcRegister.SwizzleX,
-                    source->SrcRegister.SwizzleY,
-                    source->SrcRegister.SwizzleZ,
-                    source->SrcRegister.SwizzleW);
-   }
+   src = swizzle(src,
+		 source->SrcRegister.SwizzleX,
+		 source->SrcRegister.SwizzleY,
+		 source->SrcRegister.SwizzleZ,
+		 source->SrcRegister.SwizzleW);
 
 
    /* There's both negate-all-components and per-component negation.
@@ -681,7 +672,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_MOV:
-   case TGSI_OPCODE_SWZ:
       emit_simple_arith(p, inst, A0_MOV, 1);
       break;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 926cc1cc9f..64027de6aa 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -158,14 +158,14 @@ emit_fetch(
    const unsigned chan_index )
 {
    const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
-   unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+   unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
    LLVMValueRef res;
 
    switch (swizzle) {
-   case TGSI_EXTSWIZZLE_X:
-   case TGSI_EXTSWIZZLE_Y:
-   case TGSI_EXTSWIZZLE_Z:
-   case TGSI_EXTSWIZZLE_W:
+   case TGSI_SWIZZLE_X:
+   case TGSI_SWIZZLE_Y:
+   case TGSI_SWIZZLE_Z:
+   case TGSI_SWIZZLE_W:
 
       switch (reg->SrcRegister.File) {
       case TGSI_FILE_CONSTANT: {
@@ -198,14 +198,6 @@ emit_fetch(
       }
       break;
 
-   case TGSI_EXTSWIZZLE_ZERO:
-      res = bld->base.zero;
-      break;
-
-   case TGSI_EXTSWIZZLE_ONE:
-      res = bld->base.one;
-      break;
-
    default:
       assert( 0 );
       return bld->base.undef;
@@ -394,12 +386,7 @@ emit_kil(
       unsigned swizzle;
 
       /* Unswizzle channel */
-      swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
-
-      /* Note that we test if the value is less than zero, so 1.0 and 0.0 need
-       * not to be tested. */
-      if(swizzle == TGSI_EXTSWIZZLE_ZERO || swizzle == TGSI_EXTSWIZZLE_ONE)
-         continue;
+      swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
 
       /* Check if the component has not been already tested. */
       assert(swizzle < NUM_CHANNELS);
@@ -488,7 +475,6 @@ emit_instruction(
 #endif
 
    case TGSI_OPCODE_MOV:
-   case TGSI_OPCODE_SWZ:
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          dst0[chan_index] = emit_fetch( bld, inst, 0, chan_index );
       }
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index 62c6c76cee..93cf869f58 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -326,21 +326,13 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
 	uint c;
 
 	for (c = 0; c < 4; c++) {
-		switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
-		case TGSI_EXTSWIZZLE_X:
-		case TGSI_EXTSWIZZLE_Y:
-		case TGSI_EXTSWIZZLE_Z:
-		case TGSI_EXTSWIZZLE_W:
+		switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
+		case TGSI_SWIZZLE_X:
+		case TGSI_SWIZZLE_Y:
+		case TGSI_SWIZZLE_Z:
+		case TGSI_SWIZZLE_W:
 			mask |= (1 << c);
 			break;
-		case TGSI_EXTSWIZZLE_ZERO:
-			zero_mask |= (1 << c);
-			tgsi.swz[c] = SWZ_X;
-			break;
-		case TGSI_EXTSWIZZLE_ONE:
-			one_mask |= (1 << c);
-			tgsi.swz[c] = SWZ_X;
-			break;
 		default:
 			assert(0);
 		}
@@ -357,12 +349,6 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
 	if (mask)
 		arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
 
-	if (zero_mask)
-		arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
-
-	if (one_mask)
-		arith(fpc, 0, STR, *src, one_mask, *src, none, none);
-
 	if (neg_mask) {
 		struct nv30_sreg one = temp(fpc);
 		arith(fpc, 0, STR, one, neg_mask, one, none, none);
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index e3550baa63..4a6b355936 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -329,21 +329,13 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
 	uint c;
 
 	for (c = 0; c < 4; c++) {
-		switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
-		case TGSI_EXTSWIZZLE_X:
-		case TGSI_EXTSWIZZLE_Y:
-		case TGSI_EXTSWIZZLE_Z:
-		case TGSI_EXTSWIZZLE_W:
+		switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
+		case TGSI_SWIZZLE_X:
+		case TGSI_SWIZZLE_Y:
+		case TGSI_SWIZZLE_Z:
+		case TGSI_SWIZZLE_W:
 			mask |= (1 << c);
 			break;
-		case TGSI_EXTSWIZZLE_ZERO:
-			zero_mask |= (1 << c);
-			tgsi.swz[c] = SWZ_X;
-			break;
-		case TGSI_EXTSWIZZLE_ONE:
-			one_mask |= (1 << c);
-			tgsi.swz[c] = SWZ_X;
-			break;
 		default:
 			assert(0);
 		}
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index 0382dbba8f..4898aaa809 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -370,21 +370,13 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
 	uint c;
 
 	for (c = 0; c < 4; c++) {
-		switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) {
-		case TGSI_EXTSWIZZLE_X:
-		case TGSI_EXTSWIZZLE_Y:
-		case TGSI_EXTSWIZZLE_Z:
-		case TGSI_EXTSWIZZLE_W:
+		switch (tgsi_util_get_full_src_register_swizzle(fsrc, c)) {
+		case TGSI_SWIZZLE_X:
+		case TGSI_SWIZZLE_Y:
+		case TGSI_SWIZZLE_Z:
+		case TGSI_SWIZZLE_W:
 			mask |= tgsi_mask(1 << c);
 			break;
-		case TGSI_EXTSWIZZLE_ZERO:
-			zero_mask |= tgsi_mask(1 << c);
-			tgsi.swz[c] = SWZ_X;
-			break;
-		case TGSI_EXTSWIZZLE_ONE:
-			one_mask |= tgsi_mask(1 << c);
-			tgsi.swz[c] = SWZ_X;
-			break;
 		default:
 			assert(0);
 		}
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index c7145bb9be..5c691877e0 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1544,12 +1544,12 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 
 	sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
 
-	c = tgsi_util_get_full_src_register_extswizzle(src, chan);
+	c = tgsi_util_get_full_src_register_swizzle(src, chan);
 	switch (c) {
-	case TGSI_EXTSWIZZLE_X:
-	case TGSI_EXTSWIZZLE_Y:
-	case TGSI_EXTSWIZZLE_Z:
-	case TGSI_EXTSWIZZLE_W:
+	case TGSI_SWIZZLE_X:
+	case TGSI_SWIZZLE_Y:
+	case TGSI_SWIZZLE_Z:
+	case TGSI_SWIZZLE_W:
 		switch (src->SrcRegister.File) {
 		case TGSI_FILE_INPUT:
 			r = &pc->attr[src->SrcRegister.Index * 4 + c];
@@ -1586,13 +1586,6 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 			break;
 		}
 		break;
-	case TGSI_EXTSWIZZLE_ZERO:
-		r = alloc_immd(pc, 0.0);
-		return r;
-	case TGSI_EXTSWIZZLE_ONE:
-		if (sgn == TGSI_UTIL_SIGN_TOGGLE || sgn == TGSI_UTIL_SIGN_SET)
-			return alloc_immd(pc, -1.0);
-		return alloc_immd(pc, 1.0);
 	default:
 		assert(0);
 		break;
@@ -2005,7 +1998,6 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		}
 		break;
 	case TGSI_OPCODE_MOV:
-	case TGSI_OPCODE_SWZ:
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
@@ -2189,10 +2181,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
-			k = tgsi_util_get_full_src_register_extswizzle(src, c);
-
-			if (k > TGSI_EXTSWIZZLE_W)
-				continue;
+			k = tgsi_util_get_full_src_register_swizzle(src, c);
 
 			reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr;
 		}
@@ -2295,11 +2284,10 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
 
 			if (!(mask & (1 << chn))) /* src is not read */
 				continue;
-			c = tgsi_util_get_full_src_register_extswizzle(fs, chn);
+			c = tgsi_util_get_full_src_register_swizzle(fs, chn);
 			s = tgsi_util_get_full_src_register_sign_mode(fs, chn);
 
-			if (c > TGSI_EXTSWIZZLE_W ||
-			    !(fd->DstRegister.WriteMask & (1 << c)))
+			if (!(fd->DstRegister.WriteMask & (1 << c)))
 				continue;
 
 			/* no danger if src is copied to TEMP first */
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 3d2f6cafee..de599b068f 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -142,7 +142,6 @@ static unsigned translate_opcode(unsigned opcode)
      /* case TGSI_OPCODE_IFC: return RC_OPCODE_IFC; */
      /* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */
         case TGSI_OPCODE_KIL: return RC_OPCODE_KIL;
-        case TGSI_OPCODE_SWZ: return RC_OPCODE_SWZ;
     }
 
     fprintf(stderr, "Unknown opcode: %i\n", opcode);
@@ -205,10 +204,10 @@ static void transform_srcreg(
     dst->File = translate_register_file(src->SrcRegister.File);
     dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index);
     dst->RelAddr = src->SrcRegister.Indirect;
-    dst->Swizzle = tgsi_util_get_full_src_register_extswizzle(src, 0);
-    dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 1) << 3;
-    dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 2) << 6;
-    dst->Swizzle |= tgsi_util_get_full_src_register_extswizzle(src, 3) << 9;
+    dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0);
+    dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3;
+    dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6;
+    dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9;
     dst->Abs = src->SrcRegisterExtMod.Absolute;
     dst->Negate =
         src->SrcRegisterExtSwz.NegateX |
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 48e6583ada..b01df41b0e 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -275,7 +275,7 @@ union tgsi_immediate_data
 #define TGSI_OPCODE_BREAKC              115
 #define TGSI_OPCODE_KIL                 116  /* conditional kill */
 #define TGSI_OPCODE_END                 117  /* aka HALT */
-#define TGSI_OPCODE_SWZ                 118
+                                /* gap */
 #define TGSI_OPCODE_LAST                119
 
 #define TGSI_SAT_NONE            0  /* do not saturate */
@@ -496,17 +496,7 @@ struct tgsi_src_register_ext
  * follows.
  */
 
-#define TGSI_EXTSWIZZLE_X       TGSI_SWIZZLE_X
-#define TGSI_EXTSWIZZLE_Y       TGSI_SWIZZLE_Y
-#define TGSI_EXTSWIZZLE_Z       TGSI_SWIZZLE_Z
-#define TGSI_EXTSWIZZLE_W       TGSI_SWIZZLE_W
-#define TGSI_EXTSWIZZLE_ZERO    4
-#define TGSI_EXTSWIZZLE_ONE     5
-
 /**
- * ExtSwizzleX, ExtSwizzleY, ExtSwizzleZ and ExtSwizzleW swizzle the source
- * register in an extended manner.
- *
  * NegateX, NegateY, NegateZ and NegateW negate individual components of the
  * source register.
  *
@@ -518,10 +508,7 @@ struct tgsi_src_register_ext
 struct tgsi_src_register_ext_swz
 {
    unsigned Type         : 4;    /* TGSI_SRC_REGISTER_EXT_TYPE_SWZ */
-   unsigned ExtSwizzleX  : 4;    /* TGSI_EXTSWIZZLE_ */
-   unsigned ExtSwizzleY  : 4;    /* TGSI_EXTSWIZZLE_ */
-   unsigned ExtSwizzleZ  : 4;    /* TGSI_EXTSWIZZLE_ */
-   unsigned ExtSwizzleW  : 4;    /* TGSI_EXTSWIZZLE_ */
+   unsigned Padding0     : 16;	 /* unused */
    unsigned NegateX      : 1;    /* BOOL */
    unsigned NegateY      : 1;    /* BOOL */
    unsigned NegateZ      : 1;    /* BOOL */
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 1b9d35d353..3d6c215819 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -515,8 +515,6 @@ translate_opcode( unsigned op )
       return TGSI_OPCODE_SSG;
    case OPCODE_SUB:
       return TGSI_OPCODE_SUB;
-   case OPCODE_SWZ:
-      return TGSI_OPCODE_SWZ;
    case OPCODE_TEX:
       return TGSI_OPCODE_TEX;
    case OPCODE_TXB:
-- 
cgit v1.2.3


From 8a571b809accce1c36907ea616a893b920b752e5 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 23 Oct 2009 14:38:30 +0100
Subject: cell: typo from ExtSwizzle commit

---
 src/gallium/drivers/cell/spu/spu_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
index 25a7a71133..a62c04e6af 100644
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ b/src/gallium/drivers/cell/spu/spu_util.c
@@ -28,7 +28,7 @@ tgsi_util_get_src_register_swizzle(
 
 
 unsigned
-tgsi_util_get_full_src_register_extswizzle(
+tgsi_util_get_full_src_register_swizzle(
    const struct tgsi_full_src_register  *reg,
    unsigned component )
 {
-- 
cgit v1.2.3


From da253319f9e5d37d9c55b975ef9328545a3ac9b4 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 23 Oct 2009 14:50:02 +0100
Subject: gallium: remove extended negate also, and also the ExtSwz token

Likewise, the extended negate functionality hasn't been
used since mesa switched to using tgsi_ureg to build programs,
and has been translating the SWZ opcode internally to a single MAD.
---
 src/gallium/auxiliary/tgsi/tgsi_build.c       | 82 ---------------------------
 src/gallium/auxiliary/tgsi/tgsi_build.h       | 18 ------
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c      | 32 +----------
 src/gallium/auxiliary/tgsi/tgsi_parse.c       |  5 --
 src/gallium/auxiliary/tgsi/tgsi_parse.h       |  1 -
 src/gallium/auxiliary/tgsi/tgsi_ureg.c        |  1 -
 src/gallium/auxiliary/tgsi/tgsi_util.c        | 53 +----------------
 src/gallium/auxiliary/tgsi/tgsi_util.h        | 12 ----
 src/gallium/drivers/cell/spu/spu_util.c       | 46 ---------------
 src/gallium/drivers/i915/i915_fpc_translate.c | 13 +----
 src/gallium/drivers/nv30/nv30_fragprog.c      | 17 +-----
 src/gallium/drivers/nv40/nv40_fragprog.c      | 23 +-------
 src/gallium/drivers/nv40/nv40_vertprog.c      | 23 +-------
 src/gallium/drivers/r300/r300_tgsi_to_rc.c    |  7 +--
 src/gallium/include/pipe/p_shader_tokens.h    | 24 --------
 15 files changed, 11 insertions(+), 346 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 98d36f43e4..d45561362d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -687,32 +687,6 @@ tgsi_build_full_instruction(
          header );
       prev_token = (struct tgsi_token  *) src_register;
 
-      if( tgsi_compare_src_register_ext_swz(
-            reg->SrcRegisterExtSwz,
-            tgsi_default_src_register_ext_swz() ) ) {
-         struct tgsi_src_register_ext_swz *src_register_ext_swz;
-
-         /* Use of the extended negate requires the simple negate to be identity.
-          */
-         assert( reg->SrcRegister.Negate == FALSE );
-
-         if( maxsize <= size )
-            return 0;
-         src_register_ext_swz =
-            (struct  tgsi_src_register_ext_swz *) &tokens[size];
-         size++;
-
-         *src_register_ext_swz = tgsi_build_src_register_ext_swz(
-            reg->SrcRegisterExtSwz.NegateX,
-            reg->SrcRegisterExtSwz.NegateY,
-            reg->SrcRegisterExtSwz.NegateZ,
-            reg->SrcRegisterExtSwz.NegateW,
-            prev_token,
-            instruction,
-            header );
-         prev_token = (struct tgsi_token  *) src_register_ext_swz;
-      }
-
       if( tgsi_compare_src_register_ext_mod(
             reg->SrcRegisterExtMod,
             tgsi_default_src_register_ext_mod() ) ) {
@@ -1025,7 +999,6 @@ tgsi_default_full_src_register( void )
    struct tgsi_full_src_register full_src_register;
 
    full_src_register.SrcRegister = tgsi_default_src_register();
-   full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz();
    full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod();
    full_src_register.SrcRegisterInd = tgsi_default_src_register();
    full_src_register.SrcRegisterDim = tgsi_default_dimension();
@@ -1034,61 +1007,6 @@ tgsi_default_full_src_register( void )
    return full_src_register;
 }
 
-struct tgsi_src_register_ext_swz
-tgsi_default_src_register_ext_swz( void )
-{
-   struct tgsi_src_register_ext_swz src_register_ext_swz;
-
-   src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ;
-   src_register_ext_swz.Padding0 = 0;
-   src_register_ext_swz.NegateX = 0;
-   src_register_ext_swz.NegateY = 0;
-   src_register_ext_swz.NegateZ = 0;
-   src_register_ext_swz.NegateW = 0;
-   src_register_ext_swz.Padding = 0;
-   src_register_ext_swz.Extended = 0;
-
-   return src_register_ext_swz;
-}
-
-unsigned
-tgsi_compare_src_register_ext_swz(
-   struct tgsi_src_register_ext_swz a,
-   struct tgsi_src_register_ext_swz b )
-{
-   a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
-   return compare32(&a, &b);
-}
-
-struct tgsi_src_register_ext_swz
-tgsi_build_src_register_ext_swz(
-   unsigned negate_x,
-   unsigned negate_y,
-   unsigned negate_z,
-   unsigned negate_w,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_src_register_ext_swz src_register_ext_swz;
-
-   assert( negate_x <= 1 );
-   assert( negate_y <= 1 );
-   assert( negate_z <= 1 );
-   assert( negate_w <= 1 );
-
-   src_register_ext_swz = tgsi_default_src_register_ext_swz();
-   src_register_ext_swz.NegateX = negate_x;
-   src_register_ext_swz.NegateY = negate_y;
-   src_register_ext_swz.NegateZ = negate_z;
-   src_register_ext_swz.NegateW = negate_w;
-
-   prev_token->Extended = 1;
-   instruction_grow( instruction, header );
-
-   return src_register_ext_swz;
-}
 
 struct tgsi_src_register_ext_mod
 tgsi_default_src_register_ext_mod( void )
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 5e0062a96f..9ae1705f6c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -231,24 +231,6 @@ tgsi_build_src_register(
 struct tgsi_full_src_register
 tgsi_default_full_src_register( void );
 
-struct tgsi_src_register_ext_swz
-tgsi_default_src_register_ext_swz( void );
-
-unsigned
-tgsi_compare_src_register_ext_swz(
-   struct tgsi_src_register_ext_swz a,
-   struct tgsi_src_register_ext_swz b );
-
-struct tgsi_src_register_ext_swz
-tgsi_build_src_register_ext_swz(
-   unsigned negate_x,
-   unsigned negate_y,
-   unsigned negate_z,
-   unsigned negate_w,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
-
 struct tgsi_src_register_ext_mod
 tgsi_default_src_register_ext_mod( void );
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index 4f59ed22b5..c7dbdb3bd2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -163,7 +163,7 @@ static const char *TGSI_TEXTURES[] =
 
 static const char *TGSI_SRC_REGISTER_EXTS[] =
 {
-   "SRC_REGISTER_EXT_TYPE_SWZ",
+   "",
    "SRC_REGISTER_EXT_TYPE_MOD"
 };
 
@@ -546,36 +546,6 @@ dump_instruction_verbose(
          }
       }
 
-      if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) {
-         EOL();
-         TXT( "\nType       : " );
-         ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS );
-         if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) {
-            TXT( "\nNegateX   : " );
-            UID( src->SrcRegisterExtSwz.NegateX );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) {
-            TXT( "\nNegateY   : " );
-            UID( src->SrcRegisterExtSwz.NegateY );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) {
-            TXT( "\nNegateZ   : " );
-            UID( src->SrcRegisterExtSwz.NegateZ );
-         }
-         if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) {
-            TXT( "\nNegateW   : " );
-            UID( src->SrcRegisterExtSwz.NegateW );
-         }
-         if( ignored ) {
-            TXT( "\nPadding   : " );
-            UIX( src->SrcRegisterExtSwz.Padding );
-            if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) {
-               TXT( "\nExtended   : " );
-               UID( src->SrcRegisterExtSwz.Extended );
-            }
-         }
-      }
-
       if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
          EOL();
          TXT( "\nType     : " );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 4870f82b6b..f742c71936 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -264,11 +264,6 @@ tgsi_parse_token(
             next_token( ctx, &token );
 
             switch( token.Type ) {
-            case TGSI_SRC_REGISTER_EXT_TYPE_SWZ:
-               copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtSwz,
-                          &token);
-               break;
-
             case TGSI_SRC_REGISTER_EXT_TYPE_MOD:
                copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod,
                           &token);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index a26ee5ba86..602131398d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -56,7 +56,6 @@ struct tgsi_full_dst_register
 struct tgsi_full_src_register
 {
    struct tgsi_src_register         SrcRegister;
-   struct tgsi_src_register_ext_swz SrcRegisterExtSwz;
    struct tgsi_src_register_ext_mod SrcRegisterExtMod;
    struct tgsi_src_register         SrcRegisterInd;
    struct tgsi_dimension            SrcRegisterDim;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 654426a903..8cb574ea43 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -51,7 +51,6 @@ union tgsi_any_token {
    struct tgsi_instruction_ext_texture insn_ext_texture;
    struct tgsi_instruction_ext_predicate insn_ext_predicate;
    struct tgsi_src_register src;
-   struct tgsi_src_register_ext_swz src_ext_swz;
    struct tgsi_src_register_ext_mod src_ext_mod;
    struct tgsi_dimension dim;
    struct tgsi_dst_register dst;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 6120a2da94..4dee1be9e8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -104,50 +104,6 @@ tgsi_util_set_src_register_swizzle(
    }
 }
 
-unsigned
-tgsi_util_get_src_register_extnegate(
-   const  struct tgsi_src_register_ext_swz *reg,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      return reg->NegateX;
-   case 1:
-      return reg->NegateY;
-   case 2:
-      return reg->NegateZ;
-   case 3:
-      return reg->NegateW;
-   default:
-      assert( 0 );
-   }
-   return 0;
-}
-
-void
-tgsi_util_set_src_register_extnegate(
-   struct tgsi_src_register_ext_swz *reg,
-   unsigned negate,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      reg->NegateX = negate;
-      break;
-   case 1:
-      reg->NegateY = negate;
-      break;
-   case 2:
-      reg->NegateZ = negate;
-      break;
-   case 3:
-      reg->NegateW = negate;
-      break;
-   default:
-      assert( 0 );
-   }
-}
-
 unsigned
 tgsi_util_get_full_src_register_sign_mode(
    const struct  tgsi_full_src_register *reg,
@@ -171,9 +127,7 @@ tgsi_util_get_full_src_register_sign_mode(
       unsigned negate;
 
       negate = reg->SrcRegister.Negate;
-      if( tgsi_util_get_src_register_extnegate( &reg->SrcRegisterExtSwz, component ) ) {
-         negate = !negate;
-      }
+
       if( reg->SrcRegisterExtMod.Negate ) {
          negate = !negate;
       }
@@ -194,11 +148,6 @@ tgsi_util_set_full_src_register_sign_mode(
    struct tgsi_full_src_register *reg,
    unsigned sign_mode )
 {
-   reg->SrcRegisterExtSwz.NegateX = 0;
-   reg->SrcRegisterExtSwz.NegateY = 0;
-   reg->SrcRegisterExtSwz.NegateZ = 0;
-   reg->SrcRegisterExtSwz.NegateW = 0;
-
    switch (sign_mode)
    {
    case TGSI_UTIL_SIGN_CLEAR:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h
index bf3f20ca6c..19ee2e7cf2 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -33,7 +33,6 @@ extern "C" {
 #endif
 
 struct tgsi_src_register;
-struct tgsi_src_register_ext_swz;
 struct tgsi_full_src_register;
 
 void *
@@ -57,17 +56,6 @@ tgsi_util_set_src_register_swizzle(
    unsigned swizzle,
    unsigned component );
 
-unsigned
-tgsi_util_get_src_register_extnegate(
-   const struct tgsi_src_register_ext_swz *reg,
-   unsigned component );
-
-void
-tgsi_util_set_src_register_extnegate(
-   struct tgsi_src_register_ext_swz *reg,
-   unsigned negate,
-   unsigned component );
-
 #define TGSI_UTIL_SIGN_CLEAR    0   /* Force positive */
 #define TGSI_UTIL_SIGN_SET      1   /* Force negative */
 #define TGSI_UTIL_SIGN_TOGGLE   2   /* Negate */
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
index a62c04e6af..c2c32b22d5 100644
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ b/src/gallium/drivers/cell/spu/spu_util.c
@@ -37,49 +37,6 @@ tgsi_util_get_full_src_register_swizzle(
       component );
 }
 
-unsigned
-tgsi_util_get_src_register_extnegate(
-   const  struct tgsi_src_register_ext_swz *reg,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      return reg->NegateX;
-   case 1:
-      return reg->NegateY;
-   case 2:
-      return reg->NegateZ;
-   case 3:
-      return reg->NegateW;
-   default:
-      ASSERT( 0 );
-   }
-   return 0;
-}
-
-void
-tgsi_util_set_src_register_extnegate(
-   struct tgsi_src_register_ext_swz *reg,
-   unsigned negate,
-   unsigned component )
-{
-   switch( component ) {
-   case 0:
-      reg->NegateX = negate;
-      break;
-   case 1:
-      reg->NegateY = negate;
-      break;
-   case 2:
-      reg->NegateZ = negate;
-      break;
-   case 3:
-      reg->NegateW = negate;
-      break;
-   default:
-      ASSERT( 0 );
-   }
-}
 
 unsigned
 tgsi_util_get_full_src_register_sign_mode(
@@ -104,9 +61,6 @@ tgsi_util_get_full_src_register_sign_mode(
       unsigned negate;
 
       negate = reg->SrcRegister.Negate;
-      if( tgsi_util_get_src_register_extnegate( &reg->SrcRegisterExtSwz, component ) ) {
-         negate = !negate;
-      }
       if( reg->SrcRegisterExtMod.Negate ) {
          negate = !negate;
       }
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 3074044441..379d47e79a 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -225,17 +225,8 @@ src_vector(struct i915_fp_compile *p,
     * Try to handle both here.
     */
    {
-      int nx = source->SrcRegisterExtSwz.NegateX;
-      int ny = source->SrcRegisterExtSwz.NegateY;
-      int nz = source->SrcRegisterExtSwz.NegateZ;
-      int nw = source->SrcRegisterExtSwz.NegateW;
-      if (source->SrcRegister.Negate) {
-         nx = !nx;
-         ny = !ny;
-         nz = !nz;
-         nw = !nw;
-      }
-      src = negate(src, nx, ny, nz, nw);
+      int n = source->SrcRegister.Negate;
+      src = negate(src, n, n, n, n);
    }
 
    /* no abs() or post-abs negation */
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index 93cf869f58..cc0385426c 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -318,11 +318,7 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
 {
 	const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0);
 	struct nv30_sreg tgsi = tgsi_src(fpc, fsrc);
-	uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
-	uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
-			fsrc->SrcRegisterExtSwz.NegateY,
-			fsrc->SrcRegisterExtSwz.NegateZ,
-			fsrc->SrcRegisterExtSwz.NegateW };
+	uint mask = 0;
 	uint c;
 
 	for (c = 0; c < 4; c++) {
@@ -336,12 +332,9 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
 		default:
 			assert(0);
 		}
-
-		if (!tgsi.negate && neg[c])
-			neg_mask |= (1 << c);
 	}
 
-	if (mask == MASK_ALL && !neg_mask)
+	if (mask == MASK_ALL)
 		return TRUE;
 
 	*src = temp(fpc);
@@ -349,12 +342,6 @@ src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc,
 	if (mask)
 		arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
 
-	if (neg_mask) {
-		struct nv30_sreg one = temp(fpc);
-		arith(fpc, 0, STR, one, neg_mask, one, none, none);
-		arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
-	}
-
 	return FALSE;
 }
 
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 4a6b355936..99277506fc 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -321,11 +321,7 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
 {
 	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
 	struct nv40_sreg tgsi = tgsi_src(fpc, fsrc);
-	uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
-	uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
-			fsrc->SrcRegisterExtSwz.NegateY,
-			fsrc->SrcRegisterExtSwz.NegateZ,
-			fsrc->SrcRegisterExtSwz.NegateW };
+	uint mask = 0;
 	uint c;
 
 	for (c = 0; c < 4; c++) {
@@ -339,12 +335,9 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
 		default:
 			assert(0);
 		}
-
-		if (!tgsi.negate && neg[c])
-			neg_mask |= (1 << c);
 	}
 
-	if (mask == MASK_ALL && !neg_mask)
+	if (mask == MASK_ALL)
 		return TRUE;
 
 	*src = temp(fpc);
@@ -352,18 +345,6 @@ src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc,
 	if (mask)
 		arith(fpc, 0, MOV, *src, mask, tgsi, none, none);
 
-	if (zero_mask)
-		arith(fpc, 0, SFL, *src, zero_mask, *src, none, none);
-
-	if (one_mask)
-		arith(fpc, 0, STR, *src, one_mask, *src, none, none);
-
-	if (neg_mask) {
-		struct nv40_sreg one = temp(fpc);
-		arith(fpc, 0, STR, one, neg_mask, one, none, none);
-		arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none);
-	}
-
 	return FALSE;
 }
 
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index 4898aaa809..31dae2457f 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -362,11 +362,7 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
 {
 	const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0);
 	struct nv40_sreg tgsi = tgsi_src(vpc, fsrc);
-	uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0;
-	uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX,
-			fsrc->SrcRegisterExtSwz.NegateY,
-			fsrc->SrcRegisterExtSwz.NegateZ,
-			fsrc->SrcRegisterExtSwz.NegateW };
+	uint mask = 0;
 	uint c;
 
 	for (c = 0; c < 4; c++) {
@@ -380,12 +376,9 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
 		default:
 			assert(0);
 		}
-
-		if (!tgsi.negate && neg[c])
-			neg_mask |= tgsi_mask(1 << c);
 	}
 
-	if (mask == MASK_ALL && !neg_mask)
+	if (mask == MASK_ALL)
 		return TRUE;
 
 	*src = temp(vpc);
@@ -393,18 +386,6 @@ src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc,
 	if (mask)
 		arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none);
 
-	if (zero_mask)
-		arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none);
-
-	if (one_mask)
-		arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none);
-
-	if (neg_mask) {
-		struct nv40_sreg one = temp(vpc);
-		arith(vpc, 0, OP_STR, one, neg_mask, one, none, none);
-		arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none);
-	}
-
 	return FALSE;
 }
 
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index de599b068f..589f1984ee 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -209,12 +209,7 @@ static void transform_srcreg(
     dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6;
     dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9;
     dst->Abs = src->SrcRegisterExtMod.Absolute;
-    dst->Negate =
-        src->SrcRegisterExtSwz.NegateX |
-        (src->SrcRegisterExtSwz.NegateY << 1) |
-        (src->SrcRegisterExtSwz.NegateZ << 2) |
-        (src->SrcRegisterExtSwz.NegateW << 3);
-    dst->Negate ^= src->SrcRegister.Negate ? RC_MASK_XYZW : 0;
+    dst->Negate = src->SrcRegister.Negate ? RC_MASK_XYZW : 0;
 }
 
 static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src)
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index b01df41b0e..de338c4877 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -475,7 +475,6 @@ struct tgsi_src_register
  * Then, if tgsi_src_register::Dimension is TRUE, tgsi_dimension follows.
  */
 
-#define TGSI_SRC_REGISTER_EXT_TYPE_SWZ      0
 #define TGSI_SRC_REGISTER_EXT_TYPE_MOD      1
 
 struct tgsi_src_register_ext
@@ -486,9 +485,6 @@ struct tgsi_src_register_ext
 };
 
 /**
- * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_SWZ,
- * it should be cast to tgsi_src_register_ext_swz.
- * 
  * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_MOD,
  * it should be cast to tgsi_src_register_ext_mod.
  * 
@@ -496,26 +492,6 @@ struct tgsi_src_register_ext
  * follows.
  */
 
-/**
- * NegateX, NegateY, NegateZ and NegateW negate individual components of the
- * source register.
- *
- * NOTE: To simplify matter, if this token is present, the corresponding Swizzle
- *       and Negate fields in tgsi_src_register should be set to X,Y,Z,W
- *       and FALSE, respectively.
- */
-
-struct tgsi_src_register_ext_swz
-{
-   unsigned Type         : 4;    /* TGSI_SRC_REGISTER_EXT_TYPE_SWZ */
-   unsigned Padding0     : 16;	 /* unused */
-   unsigned NegateX      : 1;    /* BOOL */
-   unsigned NegateY      : 1;    /* BOOL */
-   unsigned NegateZ      : 1;    /* BOOL */
-   unsigned NegateW      : 1;    /* BOOL */
-   unsigned Padding      : 7;
-   unsigned Extended     : 1;    /* BOOL */
-};
 
 /**
  * Extra src register modifiers
-- 
cgit v1.2.3


From 2f5f7c07732577f60666e3cee69c75c9b035c145 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 23 Oct 2009 16:55:02 +0100
Subject: i965g: re-starting from the dri driver

---
 src/gallium/drivers/i965/Makefile               |  104 +
 src/gallium/drivers/i965/brw_cc.c               |  297 +++
 src/gallium/drivers/i965/brw_clip.c             |  273 ++
 src/gallium/drivers/i965/brw_clip.h             |  179 ++
 src/gallium/drivers/i965/brw_clip_line.c        |  276 ++
 src/gallium/drivers/i965/brw_clip_point.c       |   56 +
 src/gallium/drivers/i965/brw_clip_state.c       |  184 ++
 src/gallium/drivers/i965/brw_clip_tri.c         |  603 +++++
 src/gallium/drivers/i965/brw_clip_unfilled.c    |  505 ++++
 src/gallium/drivers/i965/brw_clip_util.c        |  396 +++
 src/gallium/drivers/i965/brw_context.c          |  173 ++
 src/gallium/drivers/i965/brw_context.h          |  767 ++++++
 src/gallium/drivers/i965/brw_curbe.c            |  376 +++
 src/gallium/drivers/i965/brw_defines.h          |  851 +++++++
 src/gallium/drivers/i965/brw_disasm.c           |  903 +++++++
 src/gallium/drivers/i965/brw_draw.c             |  493 ++++
 src/gallium/drivers/i965/brw_draw.h             |   54 +
 src/gallium/drivers/i965/brw_draw_upload.c      |  742 ++++++
 src/gallium/drivers/i965/brw_eu.c               |  254 ++
 src/gallium/drivers/i965/brw_eu.h               |  968 +++++++
 src/gallium/drivers/i965/brw_eu_debug.c         |   95 +
 src/gallium/drivers/i965/brw_eu_emit.c          | 1425 +++++++++++
 src/gallium/drivers/i965/brw_eu_util.c          |  126 +
 src/gallium/drivers/i965/brw_gs.c               |  201 ++
 src/gallium/drivers/i965/brw_gs.h               |   76 +
 src/gallium/drivers/i965/brw_gs_emit.c          |  186 ++
 src/gallium/drivers/i965/brw_gs_state.c         |  149 ++
 src/gallium/drivers/i965/brw_misc_state.c       |  545 ++++
 src/gallium/drivers/i965/brw_program.c          |  166 ++
 src/gallium/drivers/i965/brw_queryobj.c         |  254 ++
 src/gallium/drivers/i965/brw_sf.c               |  200 ++
 src/gallium/drivers/i965/brw_sf.h               |  113 +
 src/gallium/drivers/i965/brw_sf_emit.c          |  739 ++++++
 src/gallium/drivers/i965/brw_sf_state.c         |  365 +++
 src/gallium/drivers/i965/brw_state.h            |  173 ++
 src/gallium/drivers/i965/brw_state_batch.c      |   99 +
 src/gallium/drivers/i965/brw_state_cache.c      |  597 +++++
 src/gallium/drivers/i965/brw_state_dump.c       |  224 ++
 src/gallium/drivers/i965/brw_state_upload.c     |  416 ++++
 src/gallium/drivers/i965/brw_structs.h          | 1575 ++++++++++++
 src/gallium/drivers/i965/brw_tex.c              |   59 +
 src/gallium/drivers/i965/brw_tex_layout.c       |  222 ++
 src/gallium/drivers/i965/brw_urb.c              |  250 ++
 src/gallium/drivers/i965/brw_util.c             |  104 +
 src/gallium/drivers/i965/brw_util.h             |   45 +
 src/gallium/drivers/i965/brw_vs.c               |  124 +
 src/gallium/drivers/i965/brw_vs.h               |   88 +
 src/gallium/drivers/i965/brw_vs_emit.c          | 1667 +++++++++++++
 src/gallium/drivers/i965/brw_vs_state.c         |  185 ++
 src/gallium/drivers/i965/brw_vs_surface_state.c |  226 ++
 src/gallium/drivers/i965/brw_wm.c               |  375 +++
 src/gallium/drivers/i965/brw_wm.h               |  309 +++
 src/gallium/drivers/i965/brw_wm_debug.c         |  174 ++
 src/gallium/drivers/i965/brw_wm_emit.c          | 1509 +++++++++++
 src/gallium/drivers/i965/brw_wm_fp.c            | 1177 +++++++++
 src/gallium/drivers/i965/brw_wm_glsl.c          | 3046 +++++++++++++++++++++++
 src/gallium/drivers/i965/brw_wm_iz.c            |  157 ++
 src/gallium/drivers/i965/brw_wm_pass0.c         |  442 ++++
 src/gallium/drivers/i965/brw_wm_pass1.c         |  291 +++
 src/gallium/drivers/i965/brw_wm_pass2.c         |  343 +++
 src/gallium/drivers/i965/brw_wm_sampler_state.c |  369 +++
 src/gallium/drivers/i965/brw_wm_state.c         |  317 +++
 src/gallium/drivers/i965/brw_wm_surface_state.c |  752 ++++++
 src/gallium/drivers/i965/intel_batchbuffer.h    |  184 ++
 src/gallium/drivers/i965/intel_chipset.h        |  118 +
 src/gallium/drivers/i965/intel_structs.h        |  132 +
 src/gallium/drivers/i965/intel_tex_format.c     |  225 ++
 src/gallium/drivers/i965/intel_tex_layout.c     |  140 ++
 68 files changed, 29208 insertions(+)
 create mode 100644 src/gallium/drivers/i965/Makefile
 create mode 100644 src/gallium/drivers/i965/brw_cc.c
 create mode 100644 src/gallium/drivers/i965/brw_clip.c
 create mode 100644 src/gallium/drivers/i965/brw_clip.h
 create mode 100644 src/gallium/drivers/i965/brw_clip_line.c
 create mode 100644 src/gallium/drivers/i965/brw_clip_point.c
 create mode 100644 src/gallium/drivers/i965/brw_clip_state.c
 create mode 100644 src/gallium/drivers/i965/brw_clip_tri.c
 create mode 100644 src/gallium/drivers/i965/brw_clip_unfilled.c
 create mode 100644 src/gallium/drivers/i965/brw_clip_util.c
 create mode 100644 src/gallium/drivers/i965/brw_context.c
 create mode 100644 src/gallium/drivers/i965/brw_context.h
 create mode 100644 src/gallium/drivers/i965/brw_curbe.c
 create mode 100644 src/gallium/drivers/i965/brw_defines.h
 create mode 100644 src/gallium/drivers/i965/brw_disasm.c
 create mode 100644 src/gallium/drivers/i965/brw_draw.c
 create mode 100644 src/gallium/drivers/i965/brw_draw.h
 create mode 100644 src/gallium/drivers/i965/brw_draw_upload.c
 create mode 100644 src/gallium/drivers/i965/brw_eu.c
 create mode 100644 src/gallium/drivers/i965/brw_eu.h
 create mode 100644 src/gallium/drivers/i965/brw_eu_debug.c
 create mode 100644 src/gallium/drivers/i965/brw_eu_emit.c
 create mode 100644 src/gallium/drivers/i965/brw_eu_util.c
 create mode 100644 src/gallium/drivers/i965/brw_gs.c
 create mode 100644 src/gallium/drivers/i965/brw_gs.h
 create mode 100644 src/gallium/drivers/i965/brw_gs_emit.c
 create mode 100644 src/gallium/drivers/i965/brw_gs_state.c
 create mode 100644 src/gallium/drivers/i965/brw_misc_state.c
 create mode 100644 src/gallium/drivers/i965/brw_program.c
 create mode 100644 src/gallium/drivers/i965/brw_queryobj.c
 create mode 100644 src/gallium/drivers/i965/brw_sf.c
 create mode 100644 src/gallium/drivers/i965/brw_sf.h
 create mode 100644 src/gallium/drivers/i965/brw_sf_emit.c
 create mode 100644 src/gallium/drivers/i965/brw_sf_state.c
 create mode 100644 src/gallium/drivers/i965/brw_state.h
 create mode 100644 src/gallium/drivers/i965/brw_state_batch.c
 create mode 100644 src/gallium/drivers/i965/brw_state_cache.c
 create mode 100644 src/gallium/drivers/i965/brw_state_dump.c
 create mode 100644 src/gallium/drivers/i965/brw_state_upload.c
 create mode 100644 src/gallium/drivers/i965/brw_structs.h
 create mode 100644 src/gallium/drivers/i965/brw_tex.c
 create mode 100644 src/gallium/drivers/i965/brw_tex_layout.c
 create mode 100644 src/gallium/drivers/i965/brw_urb.c
 create mode 100644 src/gallium/drivers/i965/brw_util.c
 create mode 100644 src/gallium/drivers/i965/brw_util.h
 create mode 100644 src/gallium/drivers/i965/brw_vs.c
 create mode 100644 src/gallium/drivers/i965/brw_vs.h
 create mode 100644 src/gallium/drivers/i965/brw_vs_emit.c
 create mode 100644 src/gallium/drivers/i965/brw_vs_state.c
 create mode 100644 src/gallium/drivers/i965/brw_vs_surface_state.c
 create mode 100644 src/gallium/drivers/i965/brw_wm.c
 create mode 100644 src/gallium/drivers/i965/brw_wm.h
 create mode 100644 src/gallium/drivers/i965/brw_wm_debug.c
 create mode 100644 src/gallium/drivers/i965/brw_wm_emit.c
 create mode 100644 src/gallium/drivers/i965/brw_wm_fp.c
 create mode 100644 src/gallium/drivers/i965/brw_wm_glsl.c
 create mode 100644 src/gallium/drivers/i965/brw_wm_iz.c
 create mode 100644 src/gallium/drivers/i965/brw_wm_pass0.c
 create mode 100644 src/gallium/drivers/i965/brw_wm_pass1.c
 create mode 100644 src/gallium/drivers/i965/brw_wm_pass2.c
 create mode 100644 src/gallium/drivers/i965/brw_wm_sampler_state.c
 create mode 100644 src/gallium/drivers/i965/brw_wm_state.c
 create mode 100644 src/gallium/drivers/i965/brw_wm_surface_state.c
 create mode 100644 src/gallium/drivers/i965/intel_batchbuffer.h
 create mode 100644 src/gallium/drivers/i965/intel_chipset.h
 create mode 100644 src/gallium/drivers/i965/intel_structs.h
 create mode 100644 src/gallium/drivers/i965/intel_tex_format.c
 create mode 100644 src/gallium/drivers/i965/intel_tex_layout.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
new file mode 100644
index 0000000000..7a55333e89
--- /dev/null
+++ b/src/gallium/drivers/i965/Makefile
@@ -0,0 +1,104 @@
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = i965_dri.so
+
+DRIVER_SOURCES = \
+	intel_batchbuffer.c \
+	intel_blit.c \
+	intel_buffer_objects.c \
+	intel_buffers.c \
+	intel_clear.c \
+	intel_context.c \
+	intel_decode.c \
+	intel_extensions.c \
+	intel_fbo.c \
+	intel_mipmap_tree.c \
+	intel_regions.c \
+	intel_screen.c \
+	intel_span.c \
+	intel_pixel.c \
+	intel_pixel_bitmap.c \
+	intel_pixel_copy.c \
+	intel_pixel_draw.c \
+	intel_pixel_read.c \
+	intel_state.c \
+	intel_swapbuffers.c \
+	intel_syncobj.c \
+	intel_tex.c \
+	intel_tex_copy.c \
+	intel_tex_format.c \
+	intel_tex_image.c \
+	intel_tex_layout.c \
+	intel_tex_subimage.c \
+	intel_tex_validate.c \
+	brw_cc.c \
+	brw_clip.c \
+	brw_clip_line.c \
+	brw_clip_point.c \
+	brw_clip_state.c \
+	brw_clip_tri.c \
+	brw_clip_unfilled.c \
+	brw_clip_util.c \
+	brw_context.c \
+	brw_curbe.c \
+	brw_disasm.c \
+	brw_draw.c \
+	brw_draw_upload.c \
+	brw_eu.c \
+	brw_eu_debug.c \
+	brw_eu_emit.c \
+	brw_eu_util.c \
+	brw_fallback.c \
+	brw_gs.c \
+	brw_gs_emit.c \
+	brw_gs_state.c \
+	brw_misc_state.c \
+	brw_program.c \
+	brw_queryobj.c \
+	brw_sf.c \
+	brw_sf_emit.c \
+	brw_sf_state.c \
+	brw_state_batch.c \
+	brw_state_cache.c \
+	brw_state_dump.c \
+	brw_state_upload.c \
+	brw_tex.c \
+	brw_tex_layout.c \
+	brw_urb.c \
+	brw_util.c \
+	brw_vs.c \
+	brw_vs_constval.c \
+	brw_vs_emit.c \
+	brw_vs_state.c \
+	brw_vs_surface_state.c \
+	brw_vtbl.c \
+	brw_wm.c \
+	brw_wm_debug.c \
+	brw_wm_emit.c \
+	brw_wm_fp.c \
+	brw_wm_iz.c \
+	brw_wm_glsl.c \
+	brw_wm_pass0.c \
+	brw_wm_pass1.c \
+	brw_wm_pass2.c \
+	brw_wm_sampler_state.c \
+	brw_wm_state.c \
+	brw_wm_surface_state.c 
+
+C_SOURCES = \
+	$(COMMON_SOURCES) \
+	$(MINIGLX_SOURCES) \
+	$(DRIVER_SOURCES)
+
+ASM_SOURCES = 
+
+DRIVER_DEFINES = -I../intel -I../intel/server
+
+DRI_LIB_DEPS += -ldrm_intel
+
+include ../Makefile.template
+
+intel_decode.o: ../intel/intel_decode.c
+intel_tex_layout.o: ../intel/intel_tex_layout.c
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
new file mode 100644
index 0000000000..1088a7a607
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -0,0 +1,297 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_util.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+static void prepare_cc_vp( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_cc_viewport ccv;
+
+   memset(&ccv, 0, sizeof(ccv));
+
+   /* _NEW_VIEWPORT */
+   ccv.min_depth = ctx->Viewport.Near;
+   ccv.max_depth = ctx->Viewport.Far;
+
+   dri_bo_unreference(brw->cc.vp_bo);
+   brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
+}
+
+const struct brw_tracked_state brw_cc_vp = {
+   .dirty = {
+      .mesa = _NEW_VIEWPORT,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .prepare = prepare_cc_vp
+};
+
+struct brw_cc_unit_key {
+   GLboolean stencil, stencil_two_side, color_blend, alpha_enabled;
+
+   GLenum stencil_func[2], stencil_fail_op[2];
+   GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
+   GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2];
+   GLenum logic_op;
+
+   GLenum blend_eq_rgb, blend_eq_a;
+   GLenum blend_src_rgb, blend_src_a;
+   GLenum blend_dst_rgb, blend_dst_a;
+
+   GLenum alpha_func;
+   GLclampf alpha_ref;
+
+   GLboolean dither;
+
+   GLboolean depth_test, depth_write;
+   GLenum depth_func;
+};
+
+static void
+cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const unsigned back = ctx->Stencil._BackFace;
+
+   memset(key, 0, sizeof(*key));
+
+   key->stencil = ctx->Stencil._Enabled;
+   key->stencil_two_side = ctx->Stencil._TestTwoSide;
+
+   if (key->stencil) {
+      key->stencil_func[0] = ctx->Stencil.Function[0];
+      key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
+      key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
+      key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
+      key->stencil_ref[0] = ctx->Stencil.Ref[0];
+      key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
+      key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
+   }
+   if (key->stencil_two_side) {
+      key->stencil_func[1] = ctx->Stencil.Function[back];
+      key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
+      key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
+      key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
+      key->stencil_ref[1] = ctx->Stencil.Ref[back];
+      key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
+      key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
+   }
+
+   if (ctx->Color._LogicOpEnabled)
+      key->logic_op = ctx->Color.LogicOp;
+   else
+      key->logic_op = GL_COPY;
+
+   key->color_blend = ctx->Color.BlendEnabled;
+   if (key->color_blend) {
+      key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
+      key->blend_eq_a = ctx->Color.BlendEquationA;
+      key->blend_src_rgb = ctx->Color.BlendSrcRGB;
+      key->blend_dst_rgb = ctx->Color.BlendDstRGB;
+      key->blend_src_a = ctx->Color.BlendSrcA;
+      key->blend_dst_a = ctx->Color.BlendDstA;
+   }
+
+   key->alpha_enabled = ctx->Color.AlphaEnabled;
+   if (key->alpha_enabled) {
+      key->alpha_func = ctx->Color.AlphaFunc;
+      key->alpha_ref = ctx->Color.AlphaRef;
+   }
+
+   key->dither = ctx->Color.DitherFlag;
+
+   key->depth_test = ctx->Depth.Test;
+   if (key->depth_test) {
+      key->depth_func = ctx->Depth.Func;
+      key->depth_write = ctx->Depth.Mask;
+   }
+}
+
+/**
+ * Creates the state cache entry for the given CC unit key.
+ */
+static dri_bo *
+cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+{
+   struct brw_cc_unit_state cc;
+   dri_bo *bo;
+
+   memset(&cc, 0, sizeof(cc));
+
+   /* _NEW_STENCIL */
+   if (key->stencil) {
+      cc.cc0.stencil_enable = 1;
+      cc.cc0.stencil_func =
+	 intel_translate_compare_func(key->stencil_func[0]);
+      cc.cc0.stencil_fail_op =
+	 intel_translate_stencil_op(key->stencil_fail_op[0]);
+      cc.cc0.stencil_pass_depth_fail_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
+      cc.cc0.stencil_pass_depth_pass_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
+      cc.cc1.stencil_ref = key->stencil_ref[0];
+      cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
+      cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
+
+      if (key->stencil_two_side) {
+	 cc.cc0.bf_stencil_enable = 1;
+	 cc.cc0.bf_stencil_func =
+	    intel_translate_compare_func(key->stencil_func[1]);
+	 cc.cc0.bf_stencil_fail_op =
+	    intel_translate_stencil_op(key->stencil_fail_op[1]);
+	 cc.cc0.bf_stencil_pass_depth_fail_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
+	 cc.cc0.bf_stencil_pass_depth_pass_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
+	 cc.cc1.bf_stencil_ref = key->stencil_ref[1];
+	 cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1];
+	 cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1];
+      }
+
+      /* Not really sure about this:
+       */
+      if (key->stencil_write_mask[0] ||
+	  (key->stencil_two_side && key->stencil_write_mask[1]))
+	 cc.cc0.stencil_write_enable = 1;
+   }
+
+   /* _NEW_COLOR */
+   if (key->logic_op != GL_COPY) {
+      cc.cc2.logicop_enable = 1;
+      cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op);
+   } else if (key->color_blend) {
+      GLenum eqRGB = key->blend_eq_rgb;
+      GLenum eqA = key->blend_eq_a;
+      GLenum srcRGB = key->blend_src_rgb;
+      GLenum dstRGB = key->blend_dst_rgb;
+      GLenum srcA = key->blend_src_a;
+      GLenum dstA = key->blend_dst_a;
+
+      if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+	 srcRGB = dstRGB = GL_ONE;
+      }
+
+      if (eqA == GL_MIN || eqA == GL_MAX) {
+	 srcA = dstA = GL_ONE;
+      }
+
+      cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+      cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
+      cc.cc6.blend_function = brw_translate_blend_equation(eqRGB);
+
+      cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+      cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
+      cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA);
+
+      cc.cc3.blend_enable = 1;
+      cc.cc3.ia_blend_enable = (srcA != srcRGB ||
+				dstA != dstRGB ||
+				eqA != eqRGB);
+   }
+
+   if (key->alpha_enabled) {
+      cc.cc3.alpha_test = 1;
+      cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+      cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+
+      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref);
+   }
+
+   if (key->dither) {
+      cc.cc5.dither_enable = 1;
+      cc.cc6.y_dither_offset = 0;
+      cc.cc6.x_dither_offset = 0;
+   }
+
+   /* _NEW_DEPTH */
+   if (key->depth_test) {
+      cc.cc2.depth_test = 1;
+      cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
+      cc.cc2.depth_write_enable = key->depth_write;
+   }
+
+   /* CACHE_NEW_CC_VP */
+   cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      cc.cc5.statistics_enable = 1;
+
+   bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
+			 key, sizeof(*key),
+			 &brw->cc.vp_bo, 1,
+			 &cc, sizeof(cc),
+			 NULL, NULL);
+
+   /* Emit CC viewport relocation */
+   dri_bo_emit_reloc(bo,
+		     I915_GEM_DOMAIN_INSTRUCTION,
+		     0,
+		     0,
+		     offsetof(struct brw_cc_unit_state, cc4),
+		     brw->cc.vp_bo);
+
+   return bo;
+}
+
+static void prepare_cc_unit( struct brw_context *brw )
+{
+   struct brw_cc_unit_key key;
+
+   cc_unit_populate_key(brw, &key);
+
+   dri_bo_unreference(brw->cc.state_bo);
+   brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT,
+				       &key, sizeof(key),
+				       &brw->cc.vp_bo, 1,
+				       NULL);
+
+   if (brw->cc.state_bo == NULL)
+      brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
+}
+
+const struct brw_tracked_state brw_cc_unit = {
+   .dirty = {
+      .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH,
+      .brw = 0,
+      .cache = CACHE_NEW_CC_VP
+   },
+   .prepare = prepare_cc_unit,
+};
+
+
+
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
new file mode 100644
index 0000000000..20a927cf38
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -0,0 +1,273 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_clip.h"
+
+
+#define FRONT_UNFILLED_BIT  0x1
+#define BACK_UNFILLED_BIT   0x2
+
+
+static void compile_clip_prog( struct brw_context *brw,
+			     struct brw_clip_prog_key *key )
+{
+   struct brw_clip_compile c;
+   const GLuint *program;
+   GLuint program_size;
+   GLuint delta;
+   GLuint i;
+
+   memset(&c, 0, sizeof(c));
+   
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func);
+
+   c.func.single_program_flow = 1;
+
+   c.key = *key;
+   c.need_ff_sync = BRW_IS_IGDNG(brw);
+
+   /* Need to locate the two positions present in vertex + header.
+    * These are currently hardcoded:
+    */
+   c.header_position_offset = ATTR_SIZE;
+
+   if (BRW_IS_IGDNG(brw))
+       delta = 3 * REG_SIZE;
+   else
+       delta = REG_SIZE;
+
+   for (i = 0; i < VERT_RESULT_MAX; i++)
+      if (c.key.attrs & (1<<i)) {
+	 c.offset[i] = delta;
+	 delta += ATTR_SIZE;
+      }
+
+   c.nr_attrs = brw_count_bits(c.key.attrs);
+   
+   if (BRW_IS_IGDNG(brw))
+       c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
+   else
+       c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
+
+   c.nr_bytes = c.nr_regs * REG_SIZE;
+
+   c.prog_data.clip_mode = c.key.clip_mode; /* XXX */
+
+   /* For some reason the thread is spawned with only 4 channels
+    * unmasked.  
+    */
+   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+   /* Would ideally have the option of producing a program which could
+    * do all three:
+    */
+   switch (key->primitive) {
+   case GL_TRIANGLES: 
+      if (key->do_unfilled)
+	 brw_emit_unfilled_clip( &c );
+      else
+	 brw_emit_tri_clip( &c );
+      break;
+   case GL_LINES:
+      brw_emit_line_clip( &c );
+      break;
+   case GL_POINTS:
+      brw_emit_point_clip( &c );
+      break;
+   default:
+      assert(0);
+      return;
+   }
+
+	 
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+   /* Upload
+    */
+   dri_bo_unreference(brw->clip.prog_bo);
+   brw->clip.prog_bo = brw_upload_cache( &brw->cache,
+					 BRW_CLIP_PROG,
+					 &c.key, sizeof(c.key),
+					 NULL, 0,
+					 program, program_size,
+					 &c.prog_data,
+					 &brw->clip.prog_data );
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_clip_prog(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_clip_prog_key key;
+
+   memset(&key, 0, sizeof(key));
+
+   /* Populate the key:
+    */
+   /* BRW_NEW_REDUCED_PRIMITIVE */
+   key.primitive = brw->intel.reduced_primitive;
+   /* CACHE_NEW_VS_PROG */
+   key.attrs = brw->vs.prog_data->outputs_written;
+   /* _NEW_LIGHT */
+   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
+   /* _NEW_TRANSFORM */
+   key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+
+   if (BRW_IS_IGDNG(brw))
+       key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
+   else
+       key.clip_mode = BRW_CLIPMODE_NORMAL;
+
+   /* _NEW_POLYGON */
+   if (key.primitive == GL_TRIANGLES) {
+      if (ctx->Polygon.CullFlag &&
+	  ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+	 key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
+      else {
+	 GLuint fill_front = CLIP_CULL;
+	 GLuint fill_back = CLIP_CULL;
+	 GLuint offset_front = 0;
+	 GLuint offset_back = 0;
+
+	 if (!ctx->Polygon.CullFlag ||
+	     ctx->Polygon.CullFaceMode != GL_FRONT) {
+	    switch (ctx->Polygon.FrontMode) {
+	    case GL_FILL: 
+	       fill_front = CLIP_FILL; 
+	       offset_front = 0;
+	       break;
+	    case GL_LINE:
+	       fill_front = CLIP_LINE;
+	       offset_front = ctx->Polygon.OffsetLine;
+	       break;
+	    case GL_POINT:
+	       fill_front = CLIP_POINT;
+	       offset_front = ctx->Polygon.OffsetPoint;
+	       break;
+	    }
+	 }
+
+	 if (!ctx->Polygon.CullFlag ||
+	     ctx->Polygon.CullFaceMode != GL_BACK) {
+	    switch (ctx->Polygon.BackMode) {
+	    case GL_FILL: 
+	       fill_back = CLIP_FILL; 
+	       offset_back = 0;
+	       break;
+	    case GL_LINE:
+	       fill_back = CLIP_LINE;
+	       offset_back = ctx->Polygon.OffsetLine;
+	       break;
+	    case GL_POINT:
+	       fill_back = CLIP_POINT;
+	       offset_back = ctx->Polygon.OffsetPoint;
+	       break;
+	    }
+	 }
+
+	 if (ctx->Polygon.BackMode != GL_FILL ||
+	     ctx->Polygon.FrontMode != GL_FILL) {
+	    key.do_unfilled = 1;
+
+	    /* Most cases the fixed function units will handle.  Cases where
+	     * one or more polygon faces are unfilled will require help:
+	     */
+	    key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+
+	    if (offset_back || offset_front) {
+	       /* _NEW_POLYGON, _NEW_BUFFERS */
+	       key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale;
+	       key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
+	    }
+
+	    switch (ctx->Polygon.FrontFace) {
+	    case GL_CCW:
+	       key.fill_ccw = fill_front;
+	       key.fill_cw = fill_back;
+	       key.offset_ccw = offset_front;
+	       key.offset_cw = offset_back;
+	       if (ctx->Light.Model.TwoSide &&
+		   key.fill_cw != CLIP_CULL) 
+		  key.copy_bfc_cw = 1;
+	       break;
+	    case GL_CW:
+	       key.fill_cw = fill_front;
+	       key.fill_ccw = fill_back;
+	       key.offset_cw = offset_front;
+	       key.offset_ccw = offset_back;
+	       if (ctx->Light.Model.TwoSide &&
+		   key.fill_ccw != CLIP_CULL) 
+		  key.copy_bfc_ccw = 1;
+	       break;
+	    }
+	 }
+      }
+   }
+
+   dri_bo_unreference(brw->clip.prog_bo);
+   brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+					&key, sizeof(key),
+					NULL, 0,
+					&brw->clip.prog_data);
+   if (brw->clip.prog_bo == NULL)
+      compile_clip_prog( brw, &key );
+}
+
+
+const struct brw_tracked_state brw_clip_prog = {
+   .dirty = {
+      .mesa  = (_NEW_LIGHT | 
+		_NEW_TRANSFORM |
+		_NEW_POLYGON | 
+		_NEW_BUFFERS),
+      .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .prepare = upload_clip_prog
+};
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
new file mode 100644
index 0000000000..957df441ab
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -0,0 +1,179 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#ifndef BRW_CLIP_H
+#define BRW_CLIP_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_VERTS (3+6+6)	
+
+/* Note that if unfilled primitives are being emitted, we have to fix
+ * up polygon offset and flatshading at this point:
+ */
+struct brw_clip_prog_key {
+   GLuint attrs:32;		
+   GLuint primitive:4;
+   GLuint nr_userclip:3;
+   GLuint do_flat_shading:1;
+   GLuint do_unfilled:1;
+   GLuint fill_cw:2;		/* includes cull information */
+   GLuint fill_ccw:2;		/* includes cull information */
+   GLuint offset_cw:1;
+   GLuint offset_ccw:1;
+   GLuint pad0:17;
+
+   GLuint copy_bfc_cw:1;
+   GLuint copy_bfc_ccw:1;
+   GLuint clip_mode:3;
+   GLuint pad1:27;
+   
+   GLfloat offset_factor;
+   GLfloat offset_units;
+};
+
+
+#define CLIP_LINE   0
+#define CLIP_POINT  1
+#define CLIP_FILL   2
+#define CLIP_CULL   3
+
+
+#define PRIM_MASK  (0x1f)
+
+struct brw_clip_compile {
+   struct brw_compile func;
+   struct brw_clip_prog_key key;
+   struct brw_clip_prog_data prog_data;
+   
+   struct {
+      struct brw_reg R0;
+      struct brw_reg vertex[MAX_VERTS];
+
+      struct brw_reg t;
+      struct brw_reg t0, t1;
+      struct brw_reg dp0, dp1;
+
+      struct brw_reg dpPrev;
+      struct brw_reg dp;
+      struct brw_reg loopcount;
+      struct brw_reg nr_verts;
+      struct brw_reg planemask;
+
+      struct brw_reg inlist;
+      struct brw_reg outlist;
+      struct brw_reg freelist;
+
+      struct brw_reg dir;
+      struct brw_reg tmp0, tmp1;
+      struct brw_reg offset;
+      
+      struct brw_reg fixed_planes;
+      struct brw_reg plane_equation;
+       
+      struct brw_reg ff_sync;
+   } reg;
+
+   /* 3 different ways of expressing vertex size:
+    */
+   GLuint nr_attrs;
+   GLuint nr_regs;
+   GLuint nr_bytes;
+
+   GLuint first_tmp;
+   GLuint last_tmp;
+
+   GLboolean need_direction;
+
+   GLuint last_mrf;
+
+   GLuint header_position_offset;
+   GLuint offset[VERT_ATTRIB_MAX];
+   GLboolean need_ff_sync;
+};
+
+#define ATTR_SIZE  (4*4)
+
+/* Points are only culled, so no need for a clip routine, however it
+ * works out easier to have a dummy one.
+ */
+void brw_emit_unfilled_clip( struct brw_clip_compile *c );
+void brw_emit_tri_clip( struct brw_clip_compile *c );
+void brw_emit_line_clip( struct brw_clip_compile *c );
+void brw_emit_point_clip( struct brw_clip_compile *c );
+
+/* brw_clip_tri.c, for use by the unfilled clip routine:
+ */
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c );
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c );
+void brw_clip_tri( struct brw_clip_compile *c );
+void brw_clip_tri_emit_polygon( struct brw_clip_compile *c );
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
+			      GLuint nr_verts );
+
+
+/* Utils:
+ */
+
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+			     struct brw_indirect dest_ptr,
+			     struct brw_indirect v0_ptr, /* from */
+			     struct brw_indirect v1_ptr, /* to */
+			     struct brw_reg t0,
+			     GLboolean force_edgeflag );
+
+void brw_clip_init_planes( struct brw_clip_compile *c );
+
+void brw_clip_emit_vue(struct brw_clip_compile *c, 
+		       struct brw_indirect vert,
+		       GLboolean allocate,
+		       GLboolean eot,
+		       GLuint header);
+
+void brw_clip_kill_thread(struct brw_clip_compile *c);
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c );
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c );
+
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+			   GLuint to, GLuint from );
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c );
+
+struct brw_reg get_tmp( struct brw_clip_compile *c );
+
+void brw_clip_project_position(struct brw_clip_compile *c,
+             struct brw_reg pos );
+void brw_clip_ff_sync(struct brw_clip_compile *c);
+void brw_clip_init_ff_sync(struct brw_clip_compile *c);
+#endif
diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c
new file mode 100644
index 0000000000..048ca620fa
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_line.c
@@ -0,0 +1,276 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
+{
+   GLuint i = 0,j;
+
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+   if (c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec4_grf(i, 0);
+      i += (6 + c->key.nr_userclip + 1) / 2;
+
+      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+   }
+   else
+      c->prog_data.curb_read_length = 0;
+
+
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < 4; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+
+   c->reg.t           = brw_vec1_grf(i, 0);
+   c->reg.t0          = brw_vec1_grf(i, 1);
+   c->reg.t1          = brw_vec1_grf(i, 2);
+   c->reg.planemask   = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+   c->reg.plane_equation = brw_vec4_grf(i, 4);
+   i++;
+
+   c->reg.dp0         = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+   c->reg.dp1         = brw_vec1_grf(i, 4);
+   i++;
+
+   if (!c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
+      i++;
+   }
+
+   if (c->need_ff_sync) {
+      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+      i++;
+   }
+
+   c->first_tmp = i;
+   c->last_tmp = i;
+
+   c->prog_data.urb_read_length = c->nr_regs; /* ? */
+   c->prog_data.total_grf = i;
+}
+
+
+
+/* Line clipping, more or less following the following algorithm:
+ *
+ *  for (p=0;p<MAX_PLANES;p++) {
+ *     if (clipmask & (1 << p)) {
+ *        GLfloat dp0 = DOTPROD( vtx0, plane[p] );
+ *        GLfloat dp1 = DOTPROD( vtx1, plane[p] );
+ *
+ *        if (IS_NEGATIVE(dp1)) {
+ *           GLfloat t = dp1 / (dp1 - dp0);
+ *           if (t > t1) t1 = t;
+ *        } else {
+ *           GLfloat t = dp0 / (dp0 - dp1);
+ *           if (t > t0) t0 = t;
+ *        }
+ *  
+ *        if (t0 + t1 >= 1.0)
+ *           return;
+ *     }
+ *  }
+ *
+ *  interp( ctx, newvtx0, vtx0, vtx1, t0 );
+ *  interp( ctx, newvtx1, vtx1, vtx0, t1 );
+ *
+ */
+static void clip_and_emit_line( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_indirect vtx0     = brw_indirect(0, 0);
+   struct brw_indirect vtx1      = brw_indirect(1, 0);
+   struct brw_indirect newvtx0   = brw_indirect(2, 0);
+   struct brw_indirect newvtx1   = brw_indirect(3, 0);
+   struct brw_indirect plane_ptr = brw_indirect(4, 0);
+   struct brw_instruction *plane_loop;
+   struct brw_instruction *plane_active;
+   struct brw_instruction *is_negative;
+   struct brw_instruction *is_neg2 = NULL;
+   struct brw_instruction *not_culled;
+   struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
+
+   brw_MOV(p, get_addr_reg(vtx0),      brw_address(c->reg.vertex[0]));
+   brw_MOV(p, get_addr_reg(vtx1),      brw_address(c->reg.vertex[1]));
+   brw_MOV(p, get_addr_reg(newvtx0),   brw_address(c->reg.vertex[2]));
+   brw_MOV(p, get_addr_reg(newvtx1),   brw_address(c->reg.vertex[3]));
+   brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c));
+
+   /* Note: init t0, t1 together: 
+    */
+   brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0));
+
+   brw_clip_init_planes(c);
+   brw_clip_init_clipmask(c);
+
+   /* -ve rhw workaround */
+   if (BRW_IS_965(p->brw)) {
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
+              brw_imm_ud(1<<20));
+      brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
+   }
+
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+   plane_loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      /* if (planemask & 1)
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1));
+      
+      plane_active = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 if (c->key.nr_userclip)
+	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+	 else
+	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+
+	 /* dp = DP4(vtx->position, plane) 
+	  */
+	 brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+
+	 /* if (IS_NEGATIVE(dp1)) 
+	  */
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	 brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	 is_negative = brw_IF(p, BRW_EXECUTE_1);
+	 {
+             /*
+              * Both can be negative on GM965/G965 due to RHW workaround
+              * if so, this object should be rejected.
+              */
+             if (BRW_IS_965(p->brw)) {
+                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
+                 is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+                 {
+                     brw_clip_kill_thread(c);
+                 }
+                 brw_ENDIF(p, is_neg2);
+             }
+
+             brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0));
+             brw_math_invert(p, c->reg.t, c->reg.t);
+             brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1);
+
+             brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 );
+             brw_MOV(p, c->reg.t1, c->reg.t);
+             brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	 } 
+	 is_negative = brw_ELSE(p, is_negative);
+	 {
+             /* Coming back in.  We know that both cannot be negative
+              * because the line would have been culled in that case.
+              */
+
+             /* If both are positive, do nothing */
+             /* Only on GM965/G965 */
+             if (BRW_IS_965(p->brw)) {
+                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
+                 is_neg2 = brw_IF(p, BRW_EXECUTE_1);
+             }
+
+             {
+                 brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1));
+                 brw_math_invert(p, c->reg.t, c->reg.t);
+                 brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0);
+
+                 brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 );
+                 brw_MOV(p, c->reg.t0, c->reg.t);
+                 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+             }
+
+             if (BRW_IS_965(p->brw)) {
+                 brw_ENDIF(p, is_neg2);
+             }
+         }
+	 brw_ENDIF(p, is_negative);	 
+      }
+      brw_ENDIF(p, plane_active);
+      
+      /* plane_ptr++;
+       */
+      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+      /* while (planemask>>=1) != 0
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+   }
+   brw_WHILE(p, plane_loop);
+
+   brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1);
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
+   not_culled = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE);
+      brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, GL_FALSE);
+
+      brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+      brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); 
+   }
+   brw_ENDIF(p, not_culled);
+   brw_clip_kill_thread(c);
+}
+
+
+
+void brw_emit_line_clip( struct brw_clip_compile *c )
+{
+   brw_clip_line_alloc_regs(c);
+   brw_clip_init_ff_sync(c);
+
+   if (c->key.do_flat_shading)
+      brw_clip_copy_colors(c, 0, 1);
+                
+   clip_and_emit_line(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c
new file mode 100644
index 0000000000..8458f61c5a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_point.c
@@ -0,0 +1,56 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+/* Point clipping, nothing to do?
+ */
+void brw_emit_point_clip( struct brw_clip_compile *c )
+{
+   /* Send an empty message to kill the thread:
+    */
+   brw_clip_tri_alloc_regs(c, 0);
+   brw_clip_init_ff_sync(c);
+
+   brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
new file mode 100644
index 0000000000..234b3744bf
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -0,0 +1,184 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+
+struct brw_clip_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
+   unsigned int curb_entry_read_length;
+   unsigned int clip_mode;
+
+   unsigned int curbe_offset;
+
+   unsigned int nr_urb_entries, urb_size;
+
+   GLboolean depth_clamp;
+};
+
+static void
+clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_CLIP_PROG */
+   key->total_grf = brw->clip.prog_data->total_grf;
+   key->urb_entry_read_length = brw->clip.prog_data->urb_read_length;
+   key->curb_entry_read_length = brw->clip.prog_data->curb_read_length;
+   key->clip_mode = brw->clip.prog_data->clip_mode;
+
+   /* BRW_NEW_CURBE_OFFSETS */
+   key->curbe_offset = brw->curbe.clip_start;
+
+   /* BRW_NEW_URB_FENCE */
+   key->nr_urb_entries = brw->urb.nr_clip_entries;
+   key->urb_size = brw->urb.vsize;
+
+   /* _NEW_TRANSOFORM */
+   key->depth_clamp = ctx->Transform.DepthClamp;
+}
+
+static dri_bo *
+clip_unit_create_from_key(struct brw_context *brw,
+			  struct brw_clip_unit_key *key)
+{
+   struct brw_clip_unit_state clip;
+   dri_bo *bo;
+
+   memset(&clip, 0, sizeof(clip));
+
+   clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   /* reloc */
+   clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+
+   clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   clip.thread1.single_program_flow = 1;
+
+   clip.thread3.urb_entry_read_length = key->urb_entry_read_length;
+   clip.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+   clip.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+   clip.thread3.dispatch_grf_start_reg = 1;
+   clip.thread3.urb_entry_read_offset = 0;
+
+   clip.thread4.nr_urb_entries = key->nr_urb_entries;
+   clip.thread4.urb_entry_allocation_size = key->urb_size - 1;
+   /* If we have enough clip URB entries to run two threads, do so.
+    */
+   if (key->nr_urb_entries >= 10) {
+      /* Half of the URB entries go to each thread, and it has to be an
+       * even number.
+       */
+      assert(key->nr_urb_entries % 2 == 0);
+      
+      /* Although up to 16 concurrent Clip threads are allowed on IGDNG, 
+       * only 2 threads can output VUEs at a time.
+       */
+      if (BRW_IS_IGDNG(brw))
+         clip.thread4.max_threads = 16 - 1;        
+      else
+         clip.thread4.max_threads = 2 - 1;
+   } else {
+      assert(key->nr_urb_entries >= 5);
+      clip.thread4.max_threads = 1 - 1;
+   }
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+      clip.thread4.max_threads = 0;
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      clip.thread4.stats_enable = 1;
+
+   clip.clip5.userclip_enable_flags = 0x7f;
+   clip.clip5.userclip_must_clip = 1;
+   clip.clip5.guard_band_enable = 0;
+   if (!key->depth_clamp)
+      clip.clip5.viewport_z_clip_enable = 1;
+   clip.clip5.viewport_xy_clip_enable = 1;
+   clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE;
+   clip.clip5.api_mode = BRW_CLIP_API_OGL;
+   clip.clip5.clip_mode = key->clip_mode;
+
+   if (BRW_IS_G4X(brw))
+      clip.clip5.negative_w_clip_test = 1;
+
+   clip.clip6.clipper_viewport_state_ptr = 0;
+   clip.viewport_xmin = -1;
+   clip.viewport_xmax = 1;
+   clip.viewport_ymin = -1;
+   clip.viewport_ymax = 1;
+
+   bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
+			 key, sizeof(*key),
+			 &brw->clip.prog_bo, 1,
+			 &clip, sizeof(clip),
+			 NULL, NULL);
+
+   /* Emit clip program relocation */
+   assert(brw->clip.prog_bo);
+   dri_bo_emit_reloc(bo,
+		     I915_GEM_DOMAIN_INSTRUCTION,
+		     0,
+		     clip.thread0.grf_reg_count << 1,
+		     offsetof(struct brw_clip_unit_state, thread0),
+		     brw->clip.prog_bo);
+
+   return bo;
+}
+
+static void upload_clip_unit( struct brw_context *brw )
+{
+   struct brw_clip_unit_key key;
+
+   clip_unit_populate_key(brw, &key);
+
+   dri_bo_unreference(brw->clip.state_bo);
+   brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
+					 &key, sizeof(key),
+					 &brw->clip.prog_bo, 1,
+					 NULL);
+   if (brw->clip.state_bo == NULL) {
+      brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
+   }
+}
+
+const struct brw_tracked_state brw_clip_unit = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_URB_FENCE),
+      .cache = CACHE_NEW_CLIP_PROG
+   },
+   .prepare = upload_clip_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
new file mode 100644
index 0000000000..0efd77225e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -0,0 +1,603 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+static void release_tmps( struct brw_clip_compile *c )
+{
+   c->last_tmp = c->first_tmp;
+}
+
+
+void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, 
+			      GLuint nr_verts )
+{
+   GLuint i = 0,j;
+
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+   if (c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec4_grf(i, 0);
+      i += (6 + c->key.nr_userclip + 1) / 2;
+
+      c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2;
+   }
+   else
+      c->prog_data.curb_read_length = 0;
+
+
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < nr_verts; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+
+   if (c->nr_attrs & 1) {
+      for (j = 0; j < 3; j++) {
+	 GLuint delta = c->nr_attrs*16 + 32;
+
+         if (BRW_IS_IGDNG(c->func.brw))
+             delta = c->nr_attrs * 16 + 32 * 3;
+
+	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
+      }
+   }
+
+   c->reg.t          = brw_vec1_grf(i, 0);
+   c->reg.loopcount  = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_D);
+   c->reg.nr_verts   = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD);
+   c->reg.planemask  = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD);
+   c->reg.plane_equation = brw_vec4_grf(i, 4);
+   i++;
+
+   c->reg.dpPrev     = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */
+   c->reg.dp         = brw_vec1_grf(i, 4);
+   i++;
+
+   c->reg.inlist     = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+
+   c->reg.outlist    = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+
+   c->reg.freelist   = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0);
+   i++;
+
+   if (!c->key.nr_userclip) {
+      c->reg.fixed_planes = brw_vec8_grf(i, 0); 
+      i++;
+   }
+
+   if (c->key.do_unfilled) {
+      c->reg.dir     = brw_vec4_grf(i, 0);
+      c->reg.offset  = brw_vec4_grf(i, 4);
+      i++;
+      c->reg.tmp0    = brw_vec4_grf(i, 0);
+      c->reg.tmp1    = brw_vec4_grf(i, 4);
+      i++;
+   }
+
+   if (c->need_ff_sync) {
+      c->reg.ff_sync = retype(brw_vec1_grf(i, 0), BRW_REGISTER_TYPE_UD);
+      i++;
+   }
+
+   c->first_tmp = i;
+   c->last_tmp = i;
+
+   c->prog_data.urb_read_length = c->nr_regs; /* ? */
+   c->prog_data.total_grf = i;
+}
+
+
+
+void brw_clip_tri_init_vertices( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+   struct brw_instruction *is_rev;
+
+   /* Initial list of indices for incoming vertexes:
+    */
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_EQ, 
+	   tmp0,
+	   brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
+
+   /* XXX: Is there an easier way to do this?  Need to reverse every
+    * second tristrip element:  Can ignore sometimes?
+    */
+   is_rev = brw_IF(p, BRW_EXECUTE_1);
+   {   
+      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[1]) );
+      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[0]) );
+      if (c->need_direction)
+	 brw_MOV(p, c->reg.dir, brw_imm_f(-1));
+   }
+   is_rev = brw_ELSE(p, is_rev);
+   {
+      brw_MOV(p, get_element(c->reg.inlist, 0),  brw_address(c->reg.vertex[0]) );
+      brw_MOV(p, get_element(c->reg.inlist, 1),  brw_address(c->reg.vertex[1]) );
+      if (c->need_direction)
+	 brw_MOV(p, c->reg.dir, brw_imm_f(1));
+   }
+   brw_ENDIF(p, is_rev);
+
+   brw_MOV(p, get_element(c->reg.inlist, 2),  brw_address(c->reg.vertex[2]) );
+   brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0));
+   brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3));
+}
+
+
+
+void brw_clip_tri_flat_shade( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *is_poly;
+   struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_EQ, 
+	   tmp0,
+	   brw_imm_ud(_3DPRIM_POLYGON));
+
+   is_poly = brw_IF(p, BRW_EXECUTE_1);
+   {   
+      brw_clip_copy_colors(c, 1, 0);
+      brw_clip_copy_colors(c, 2, 0);
+   }
+   is_poly = brw_ELSE(p, is_poly);
+   {
+      brw_clip_copy_colors(c, 0, 2);
+      brw_clip_copy_colors(c, 1, 2);
+   }
+   brw_ENDIF(p, is_poly);
+}
+
+
+
+/* Use mesa's clipping algorithms, translated to GEN4 assembly.
+ */
+void brw_clip_tri( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_indirect vtx = brw_indirect(0, 0);
+   struct brw_indirect vtxPrev = brw_indirect(1, 0);
+   struct brw_indirect vtxOut = brw_indirect(2, 0);
+   struct brw_indirect plane_ptr = brw_indirect(3, 0);
+   struct brw_indirect inlist_ptr = brw_indirect(4, 0);
+   struct brw_indirect outlist_ptr = brw_indirect(5, 0);
+   struct brw_indirect freelist_ptr = brw_indirect(6, 0);
+   struct brw_instruction *plane_loop;
+   struct brw_instruction *plane_active;
+   struct brw_instruction *vertex_loop;
+   struct brw_instruction *next_test;
+   struct brw_instruction *prev_test;
+   
+   brw_MOV(p, get_addr_reg(vtxPrev),     brw_address(c->reg.vertex[2]) );
+   brw_MOV(p, get_addr_reg(plane_ptr),   brw_clip_plane0_address(c));
+   brw_MOV(p, get_addr_reg(inlist_ptr),  brw_address(c->reg.inlist));
+   brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+
+   brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) );
+
+   plane_loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      /* if (planemask & 1)
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1));
+      
+      plane_active = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 /* vtxOut = freelist_ptr++ 
+	  */
+	 brw_MOV(p, get_addr_reg(vtxOut),       get_addr_reg(freelist_ptr) );
+	 brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE));
+
+	 if (c->key.nr_userclip)
+	    brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0));
+	 else
+	    brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0));
+	    
+	 brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+	 brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0));
+
+	 vertex_loop = brw_DO(p, BRW_EXECUTE_1);
+	 {
+	    /* vtx = *input_ptr;
+	     */
+	    brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0));
+
+	    /* IS_NEGATIVE(prev) */
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	    brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	    prev_test = brw_IF(p, BRW_EXECUTE_1);
+	    {
+	       /* IS_POSITIVE(next)
+		*/
+	       brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
+	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	       next_test = brw_IF(p, BRW_EXECUTE_1);
+	       {
+
+		  /* Coming back in.
+		   */
+		  brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp));
+		  brw_math_invert(p, c->reg.t, c->reg.t);
+		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev);
+
+		  /* If (vtxOut == 0) vtxOut = vtxPrev
+		   */
+		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) );
+		  brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+		  brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, GL_FALSE);
+
+		  /* *outlist_ptr++ = vtxOut;
+		   * nr_verts++; 
+		   * vtxOut = 0;
+		   */
+		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+	       }
+	       brw_ENDIF(p, next_test);
+	       
+	    }
+	    prev_test = brw_ELSE(p, prev_test);
+	    {
+	       /* *outlist_ptr++ = vtxPrev;
+		* nr_verts++;
+		*/
+	       brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev));
+	       brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+	       brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+
+	       /* IS_NEGATIVE(next)
+		*/
+	       brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	       next_test = brw_IF(p, BRW_EXECUTE_1);
+	       {
+		  /* Going out of bounds.  Avoid division by zero as we
+		   * know dp != dpPrev from DIFFERENT_SIGNS, above.
+		   */
+		  brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev));
+		  brw_math_invert(p, c->reg.t, c->reg.t);
+		  brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp);
+
+		  /* If (vtxOut == 0) vtxOut = vtx
+		   */
+		  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) );
+		  brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) );
+		  brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+		  brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, GL_TRUE);		  
+
+		  /* *outlist_ptr++ = vtxOut;
+		   * nr_verts++; 
+		   * vtxOut = 0;
+		   */
+		  brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut));
+		  brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short)));
+		  brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1));
+		  brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) );
+	       } 	       
+	       brw_ENDIF(p, next_test);
+	    }
+	    brw_ENDIF(p, prev_test);
+	    
+	    /* vtxPrev = vtx;
+	     * inlist_ptr++;
+	     */
+	    brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx));
+	    brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short)));
+
+	    /* while (--loopcount != 0)
+	     */
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+	    brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+	 } 
+	 brw_WHILE(p, vertex_loop);
+
+	 /* vtxPrev = *(outlist_ptr-1)  OR: outlist[nr_verts-1]
+	  * inlist = outlist
+	  * inlist_ptr = &inlist[0]
+	  * outlist_ptr = &outlist[0]
+	  */
+	 brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2));
+	 brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0));
+	 brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0));
+	 brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist));
+	 brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist));
+      }
+      brw_ENDIF(p, plane_active);
+      
+      /* plane_ptr++;
+       */
+      brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c));
+
+      /* nr_verts >= 3 
+       */
+      brw_CMP(p,
+	      vec1(brw_null_reg()),
+	      BRW_CONDITIONAL_GE,
+	      c->reg.nr_verts,
+	      brw_imm_ud(3));
+   
+      /* && (planemask>>=1) != 0
+       */
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1));
+   }
+   brw_WHILE(p, plane_loop);
+}
+
+
+
+void brw_clip_tri_emit_polygon(struct brw_clip_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *loop, *if_insn;
+
+   /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--)
+    */
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+   brw_ADD(p,
+	   c->reg.loopcount,
+	   c->reg.nr_verts,
+	   brw_imm_d(-2));
+
+   if_insn = brw_IF(p, BRW_EXECUTE_1);
+   {
+      struct brw_indirect v0 = brw_indirect(0, 0);
+      struct brw_indirect vptr = brw_indirect(1, 0);
+
+      brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist));
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+      brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START));
+      
+      brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+      loop = brw_DO(p, BRW_EXECUTE_1);
+      {
+	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2));
+  
+	 brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2));
+	 brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0));
+
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+      }
+      brw_WHILE(p, loop);
+
+      brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END));
+   }
+   brw_ENDIF(p, if_insn);
+}
+
+static void do_clip_tri( struct brw_clip_compile *c )
+{
+   brw_clip_init_planes(c);
+
+   brw_clip_tri(c);
+}
+
+
+static void maybe_do_clip_tri( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *do_clip;
+
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+   do_clip = brw_IF(p, BRW_EXECUTE_1);
+   {
+      do_clip_tri(c);
+   }
+   brw_ENDIF(p, do_clip);
+}
+
+static void brw_clip_test( struct brw_clip_compile *c )
+{
+    struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+    struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+
+    struct brw_reg v0 = get_tmp(c);
+    struct brw_reg v1 = get_tmp(c);
+    struct brw_reg v2 = get_tmp(c);
+
+    struct brw_indirect vt0 = brw_indirect(0, 0);
+    struct brw_indirect vt1 = brw_indirect(1, 0);
+    struct brw_indirect vt2 = brw_indirect(2, 0);
+
+    struct brw_compile *p = &c->func;
+    struct brw_instruction *is_outside;
+    struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */
+
+    brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
+    brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
+    brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
+    brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
+    brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
+    brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
+    brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
+
+    /* test nearz, xmin, ymin plane */
+    /* clip.xyz < -clip.w */
+    brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, negate(get_element(v0, 3))); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, negate(get_element(v1, 3))); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, negate(get_element(v2, 3))); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* All vertices are outside of a plane, rejected */
+    brw_AND(p, t, t1, t2);
+    brw_AND(p, t, t, t3);
+    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+    brw_OR(p, tmp0, tmp0, get_element(t, 2));
+    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+    is_outside = brw_IF(p, BRW_EXECUTE_1);
+    {
+        brw_clip_kill_thread(c);
+    }
+    brw_ENDIF(p, is_outside);
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* some vertices are inside a plane, some are outside,need to clip */
+    brw_XOR(p, t, t1, t2);
+    brw_XOR(p, t1, t2, t3);
+    brw_OR(p, t, t, t1);
+    brw_AND(p, t, t, brw_imm_ud(0x1));
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 0), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 1), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 2), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* test farz, xmax, ymax plane */
+    /* clip.xyz > clip.w */
+    brw_CMP(p, t1, BRW_CONDITIONAL_G, v0, get_element(v0, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t2, BRW_CONDITIONAL_G, v1, get_element(v1, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, t3, BRW_CONDITIONAL_G, v2, get_element(v2, 3)); 
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* All vertices are outside of a plane, rejected */
+    brw_AND(p, t, t1, t2);
+    brw_AND(p, t, t, t3);
+    brw_OR(p, tmp0, get_element(t, 0), get_element(t, 1));
+    brw_OR(p, tmp0, tmp0, get_element(t, 2));
+    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+    brw_AND(p, brw_null_reg(), tmp0, brw_imm_ud(0x1));
+    is_outside = brw_IF(p, BRW_EXECUTE_1);
+    {
+        brw_clip_kill_thread(c);
+    }
+    brw_ENDIF(p, is_outside);
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    /* some vertices are inside a plane, some are outside,need to clip */
+    brw_XOR(p, t, t1, t2);
+    brw_XOR(p, t1, t2, t3);
+    brw_OR(p, t, t, t1);
+    brw_AND(p, t, t, brw_imm_ud(0x1));
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 0), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 1), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
+            get_element(t, 2), brw_imm_ud(0));
+    brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
+    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+    release_tmps(c);
+}
+
+
+void brw_emit_tri_clip( struct brw_clip_compile *c )
+{
+   struct brw_instruction *neg_rhw;
+   struct brw_compile *p = &c->func;
+   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+   brw_clip_tri_init_vertices(c);
+   brw_clip_init_clipmask(c);
+   brw_clip_init_ff_sync(c);
+
+   /* if -ve rhw workaround bit is set, 
+      do cliptest */
+   if (BRW_IS_965(p->brw)) {
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), 
+              brw_imm_ud(1<<20));
+      neg_rhw = brw_IF(p, BRW_EXECUTE_1); 
+      {
+         brw_clip_test(c);
+      }
+      brw_ENDIF(p, neg_rhw);
+   }
+   /* Can't push into do_clip_tri because with polygon (or quad)
+    * flatshading, need to apply the flatshade here because we don't
+    * respect the PV when converting to trifan for emit:
+    */
+   if (c->key.do_flat_shading) 
+      brw_clip_tri_flat_shade(c); 
+      
+   if ((c->key.clip_mode == BRW_CLIPMODE_NORMAL) ||
+       (c->key.clip_mode == BRW_CLIPMODE_KERNEL_CLIP))
+      do_clip_tri(c);
+   else 
+      maybe_do_clip_tri(c);
+
+   brw_clip_tri_emit_polygon(c);
+
+   /* Send an empty message to kill the thread:
+    */
+   brw_clip_kill_thread(c);
+}
diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c
new file mode 100644
index 0000000000..ad1bfa435f
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_unfilled.c
@@ -0,0 +1,505 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+/* This is performed against the original triangles, so no indirection
+ * required:
+BZZZT!
+ */
+static void compute_tri_direction( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg e = c->reg.tmp0;
+   struct brw_reg f = c->reg.tmp1;
+   struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); 
+   struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); 
+   struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); 
+
+
+   struct brw_reg v0n = get_tmp(c);
+   struct brw_reg v1n = get_tmp(c);
+   struct brw_reg v2n = get_tmp(c);
+
+   /* Convert to NDC.
+    * NOTE: We can't modify the original vertex coordinates,
+    * as it may impact further operations.
+    * So, we have to keep normalized coordinates in temp registers.
+    *
+    * TBD-KC
+    * Try to optimize unnecessary MOV's.
+    */
+   brw_MOV(p, v0n, v0);
+   brw_MOV(p, v1n, v1);
+   brw_MOV(p, v2n, v2);
+
+   brw_clip_project_position(c, v0n);
+   brw_clip_project_position(c, v1n);
+   brw_clip_project_position(c, v2n);
+
+   /* Calculate the vectors of two edges of the triangle:
+    */
+   brw_ADD(p, e, v0n, negate(v2n)); 
+   brw_ADD(p, f, v1n, negate(v2n)); 
+
+   /* Take their crossproduct:
+    */
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3),  brw_swizzle(f,2,0,1,3));
+   brw_MAC(p, vec4(e),  negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3));
+   brw_set_access_mode(p, BRW_ALIGN_1);
+
+   brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e));
+}
+
+
+static void cull_direction( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *ccw;
+   GLuint conditional;
+
+   assert (!(c->key.fill_ccw == CLIP_CULL &&
+	     c->key.fill_cw == CLIP_CULL));
+
+   if (c->key.fill_ccw == CLIP_CULL)
+      conditional = BRW_CONDITIONAL_GE;
+   else
+      conditional = BRW_CONDITIONAL_L;
+
+   brw_CMP(p,
+	   vec1(brw_null_reg()),
+	   conditional,
+	   get_element(c->reg.dir, 2),
+	   brw_imm_f(0));
+   
+   ccw = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_kill_thread(c);
+   }
+   brw_ENDIF(p, ccw);
+}
+
+
+
+static void copy_bfc( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *ccw;
+   GLuint conditional;
+
+   /* Do we have any colors to copy? 
+    */
+   if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) &&
+       !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]))
+      return;
+
+   /* In some wierd degnerate cases we can end up testing the
+    * direction twice, once for culling and once for bfc copying.  Oh
+    * well, that's what you get for setting wierd GL state.
+    */
+   if (c->key.copy_bfc_ccw)
+      conditional = BRW_CONDITIONAL_GE;
+   else
+      conditional = BRW_CONDITIONAL_L;
+
+   brw_CMP(p,
+	   vec1(brw_null_reg()),
+	   conditional,
+	   get_element(c->reg.dir, 2),
+	   brw_imm_f(0));
+   
+   ccw = brw_IF(p, BRW_EXECUTE_1);
+   {
+      GLuint i;
+
+      for (i = 0; i < 3; i++) {
+	 if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0])
+	    brw_MOV(p, 
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]),
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0]));
+
+	 if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])
+	    brw_MOV(p, 
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]),
+		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1]));
+      }
+   }
+   brw_ENDIF(p, ccw);
+}
+
+
+
+
+/*
+  GLfloat iz	= 1.0 / dir.z;
+  GLfloat ac	= dir.x * iz;
+  GLfloat bc	= dir.y * iz;
+  offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE;
+  offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor;
+  offset *= MRD;
+*/
+static void compute_offset( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg off = c->reg.offset;
+   struct brw_reg dir = c->reg.dir;
+   
+   brw_math_invert(p, get_element(off, 2), get_element(dir, 2));
+   brw_MUL(p, vec2(off), dir, get_element(off, 2));
+
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_GE,
+	   brw_abs(get_element(off, 0)), 
+	   brw_abs(get_element(off, 1)));
+
+   brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1)));
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+   brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor));
+   brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units));
+}
+
+
+static void merge_edgeflags( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *is_poly;
+   struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0);
+
+   brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); 
+   brw_CMP(p, 
+	   vec1(brw_null_reg()), 
+	   BRW_CONDITIONAL_EQ, 
+	   tmp0,
+	   brw_imm_ud(_3DPRIM_POLYGON));
+
+   /* Get away with using reg.vertex because we know that this is not
+    * a _3DPRIM_TRISTRIP_REVERSE:
+    */
+   is_poly = brw_IF(p, BRW_EXECUTE_1);
+   {   
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
+      brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
+      brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
+      brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+   }
+   brw_ENDIF(p, is_poly);
+}
+
+
+
+static void apply_one_offset( struct brw_clip_compile *c,
+			  struct brw_indirect vert )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg z = deref_1f(vert, c->header_position_offset +
+			       2 * type_sz(BRW_REGISTER_TYPE_F));
+
+   brw_ADD(p, z, z, vec1(c->reg.offset));
+}
+
+
+
+/***********************************************************************
+ * Output clipped polygon as an unfilled primitive:
+ */
+static void emit_lines(struct brw_clip_compile *c,
+		       GLboolean do_offset)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *loop;
+   struct brw_instruction *draw_edge;
+   struct brw_indirect v0 = brw_indirect(0, 0);
+   struct brw_indirect v1 = brw_indirect(1, 0);
+   struct brw_indirect v0ptr = brw_indirect(2, 0);
+   struct brw_indirect v1ptr = brw_indirect(3, 0);
+
+   /* Need a seperate loop for offset:
+    */
+   if (do_offset) {
+      brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+      brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+      loop = brw_DO(p, BRW_EXECUTE_1);
+      {
+	 brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+	 brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+	    
+	 apply_one_offset(c, v0);
+	    
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_G);
+	 brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+      }
+      brw_WHILE(p, loop);
+   }
+
+   /* v1ptr = &inlist[nr_verts]
+    * *v1ptr = v0
+    */
+   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+   brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW));
+   brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0));
+
+   loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+      brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2));
+      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+      /* draw edge if edgeflag != 0 */
+      brw_CMP(p, 
+	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
+	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+	      brw_imm_f(0));
+      draw_edge = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
+	 brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END);
+      }
+      brw_ENDIF(p, draw_edge);
+
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+   }
+   brw_WHILE(p, loop);
+}
+
+
+
+static void emit_points(struct brw_clip_compile *c,
+			GLboolean do_offset )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *loop;
+   struct brw_instruction *draw_point;
+
+   struct brw_indirect v0 = brw_indirect(0, 0);
+   struct brw_indirect v0ptr = brw_indirect(2, 0);
+
+   brw_MOV(p, c->reg.loopcount, c->reg.nr_verts);
+   brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist));
+
+   loop = brw_DO(p, BRW_EXECUTE_1);
+   {
+      brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0));
+      brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2));
+
+      /* draw if edgeflag != 0 
+       */
+      brw_CMP(p, 
+	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
+	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+	      brw_imm_f(0));
+      draw_point = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 if (do_offset)
+	    apply_one_offset(c, v0);
+
+	 brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END);
+      }
+      brw_ENDIF(p, draw_point);
+
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+      brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1));
+   }
+   brw_WHILE(p, loop);
+}
+
+
+
+
+
+
+
+static void emit_primitives( struct brw_clip_compile *c,
+			     GLuint mode, 
+			     GLboolean do_offset )
+{
+   switch (mode) {
+   case CLIP_FILL:
+      brw_clip_tri_emit_polygon(c);
+      break;
+
+   case CLIP_LINE:
+      emit_lines(c, do_offset);
+      break;
+
+   case CLIP_POINT:
+      emit_points(c, do_offset);
+      break;
+
+   case CLIP_CULL:
+      assert(0);
+      break;
+   }
+} 
+
+
+
+static void emit_unfilled_primitives( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *ccw;
+
+   /* Direction culling has already been done.
+    */
+   if (c->key.fill_ccw != c->key.fill_cw &&
+       c->key.fill_ccw != CLIP_CULL &&
+       c->key.fill_cw != CLIP_CULL)
+   {
+      brw_CMP(p,
+	      vec1(brw_null_reg()),
+	      BRW_CONDITIONAL_GE,
+	      get_element(c->reg.dir, 2),
+	      brw_imm_f(0));
+   
+      ccw = brw_IF(p, BRW_EXECUTE_1);
+      {
+	 emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+      }
+      ccw = brw_ELSE(p, ccw);
+      {
+	 emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+      }
+      brw_ENDIF(p, ccw);
+   }
+   else if (c->key.fill_cw != CLIP_CULL) {
+      emit_primitives(c, c->key.fill_cw, c->key.offset_cw);
+   }
+   else if (c->key.fill_ccw != CLIP_CULL) { 
+      emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw);
+   }
+}
+
+
+
+
+static void check_nr_verts( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *if_insn;
+
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3));      
+   if_insn = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_kill_thread(c);
+   }
+   brw_ENDIF(p, if_insn);
+}
+
+
+void brw_emit_unfilled_clip( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *do_clip;
+   
+
+   c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) ||
+			(c->key.fill_ccw != c->key.fill_cw) ||
+			c->key.fill_ccw == CLIP_CULL ||
+			c->key.fill_cw == CLIP_CULL ||
+			c->key.copy_bfc_cw ||
+			c->key.copy_bfc_ccw);
+
+   brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
+   brw_clip_tri_init_vertices(c);
+   brw_clip_init_ff_sync(c);
+
+   assert(c->offset[VERT_RESULT_EDGE]);
+
+   if (c->key.fill_ccw == CLIP_CULL &&
+       c->key.fill_cw == CLIP_CULL) {
+      brw_clip_kill_thread(c);
+      return;
+   }
+
+   merge_edgeflags(c);
+
+   /* Need to use the inlist indirection here: 
+    */
+   if (c->need_direction) 
+      compute_tri_direction(c);
+   
+   if (c->key.fill_ccw == CLIP_CULL ||
+       c->key.fill_cw == CLIP_CULL)
+      cull_direction(c);
+
+   if (c->key.offset_ccw ||
+       c->key.offset_cw)
+      compute_offset(c);
+
+   if (c->key.copy_bfc_ccw ||
+       c->key.copy_bfc_cw)
+      copy_bfc(c);
+
+   /* Need to do this whether we clip or not:
+    */
+   if (c->key.do_flat_shading)
+      brw_clip_tri_flat_shade(c);
+   
+   brw_clip_init_clipmask(c);
+   brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0));
+   do_clip = brw_IF(p, BRW_EXECUTE_1);
+   {
+      brw_clip_init_planes(c);
+      brw_clip_tri(c);
+      check_nr_verts(c);
+   }
+   brw_ENDIF(p, do_clip);
+   
+   emit_unfilled_primitives(c);
+   brw_clip_kill_thread(c);
+}
+
+
+
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
new file mode 100644
index 0000000000..5a73abdfee
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -0,0 +1,396 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_clip.h"
+
+
+
+
+struct brw_reg get_tmp( struct brw_clip_compile *c )
+{
+   struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
+
+   if (++c->last_tmp > c->prog_data.total_grf)
+      c->prog_data.total_grf = c->last_tmp;
+
+   return tmp;
+}
+
+static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp )
+{
+   if (tmp.nr == c->last_tmp-1)
+      c->last_tmp--;
+}
+
+
+static struct brw_reg make_plane_ud(GLuint x, GLuint y, GLuint z, GLuint w)
+{
+   return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x);
+}
+
+
+void brw_clip_init_planes( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+
+   if (!c->key.nr_userclip) {
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0,    0, 0xff, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0,    0,    1, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0,    1,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff,  0,    0, 1));
+      brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1,    0,    0, 1));
+   }
+}
+
+
+
+#define W 3
+
+/* Project 'pos' to screen space (or back again), overwrite with results:
+ */
+void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
+{
+   struct brw_compile *p = &c->func;
+
+   /* calc rhw 
+    */
+   brw_math_invert(p, get_element(pos, W), get_element(pos, W));
+
+   /* value.xyz *= value.rhw
+    */
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
+   brw_set_access_mode(p, BRW_ALIGN_1);
+}
+
+
+static void brw_clip_project_vertex( struct brw_clip_compile *c, 
+				     struct brw_indirect vert_addr )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = get_tmp(c);
+
+   /* Fixup position.  Extract from the original vertex and re-project
+    * to screen space:
+    */
+   brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS]));
+   brw_clip_project_position(c, tmp);
+   brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
+	 
+   release_tmp(c, tmp);
+}
+
+
+
+
+/* Interpolate between two vertices and put the result into a0.0.  
+ * Increment a0.0 accordingly.
+ */
+void brw_clip_interp_vertex( struct brw_clip_compile *c,
+			     struct brw_indirect dest_ptr,
+			     struct brw_indirect v0_ptr, /* from */
+			     struct brw_indirect v1_ptr, /* to */
+			     struct brw_reg t0,
+			     GLboolean force_edgeflag)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = get_tmp(c);
+   GLuint i;
+
+   /* Just copy the vertex header:
+    */
+   /*
+    * After CLIP stage, only first 256 bits of the VUE are read
+    * back on IGDNG, so needn't change it
+    */
+   brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
+      
+   /* Iterate over each attribute (could be done in pairs?)
+    */
+   for (i = 0; i < c->nr_attrs; i++) {
+      GLuint delta = i*16 + 32;
+
+      if (BRW_IS_IGDNG(p->brw))
+          delta = i * 16 + 32 * 3;
+
+      if (delta == c->offset[VERT_RESULT_EDGE]) {
+	 if (force_edgeflag) 
+	    brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
+	 else
+	    brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta));
+      }
+      else {
+	 /* Interpolate: 
+	  *
+	  *        New = attr0 + t*attr1 - t*attr0
+	  */
+	 brw_MUL(p, 
+		 vec4(brw_null_reg()),
+		 deref_4f(v1_ptr, delta),
+		 t0);
+
+	 brw_MAC(p, 
+		 tmp,	      
+		 negate(deref_4f(v0_ptr, delta)),
+		 t0); 
+	      
+	 brw_ADD(p,
+		 deref_4f(dest_ptr, delta), 
+		 deref_4f(v0_ptr, delta),
+		 tmp);
+      }
+   }
+
+   if (i & 1) {
+      GLuint delta = i*16 + 32;
+
+      if (BRW_IS_IGDNG(p->brw))
+          delta = i * 16 + 32 * 3;
+
+      brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
+   }
+
+   release_tmp(c, tmp);
+
+   /* Recreate the projected (NDC) coordinate in the new vertex
+    * header:
+    */
+   brw_clip_project_vertex(c, dest_ptr );
+}
+
+
+
+
+#define MAX_MRF 16
+
+void brw_clip_emit_vue(struct brw_clip_compile *c, 
+		       struct brw_indirect vert,
+		       GLboolean allocate,
+		       GLboolean eot,
+		       GLuint header)
+{
+   struct brw_compile *p = &c->func;
+   GLuint start = c->last_mrf;
+
+   brw_clip_ff_sync(c);
+
+   assert(!(allocate && eot));
+   
+   /* Cycle through mrf regs - probably futile as we have to wait for
+    * the allocation response anyway.  Also, the order this function
+    * is invoked doesn't correspond to the order the instructions will
+    * be executed, so it won't have any effect in many cases.
+    */
+#if 0
+   if (start + c->nr_regs + 1 >= MAX_MRF)
+      start = 0;
+
+   c->last_mrf = start + c->nr_regs + 1;
+#endif
+	
+   /* Copy the vertex from vertn into m1..mN+1:
+    */
+   brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs);
+
+   /* Overwrite PrimType and PrimStart in the message header, for
+    * each vertex in turn:
+    */
+   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+
+   /* Send each vertex as a seperate write to the urb.  This
+    * is different to the concept in brw_sf_emit.c, where
+    * subsequent writes are used to build up a single urb
+    * entry.  Each of these writes instantiates a seperate
+    * urb entry - (I think... what about 'allocate'?)
+    */
+   brw_urb_WRITE(p, 
+		 allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+		 start,
+		 c->reg.R0,
+		 allocate,
+		 1,		/* used */
+		 c->nr_regs + 1, /* msg length */
+		 allocate ? 1 : 0, /* response_length */ 
+		 eot,		/* eot */
+		 1,		/* writes_complete */
+		 0,		/* urb offset */
+		 BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+void brw_clip_kill_thread(struct brw_clip_compile *c)
+{
+   struct brw_compile *p = &c->func;
+
+   brw_clip_ff_sync(c);
+   /* Send an empty message to kill the thread and release any
+    * allocated urb entry:
+    */
+   brw_urb_WRITE(p, 
+		 retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+		 0,
+		 c->reg.R0,
+		 0,		/* allocate */
+		 0,		/* used */
+		 1, 		/* msg len */
+		 0, 		/* response len */
+		 1, 		/* eot */
+		 1,		/* writes complete */
+		 0,
+		 BRW_URB_SWIZZLE_NONE);
+}
+
+
+
+
+struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
+{
+   return brw_address(c->reg.fixed_planes);
+}
+
+
+struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c )
+{
+   if (c->key.nr_userclip) {
+      return brw_imm_uw(16);
+   }
+   else {
+      return brw_imm_uw(4);
+   }
+}
+
+
+/* If flatshading, distribute color from provoking vertex prior to
+ * clipping.
+ */
+void brw_clip_copy_colors( struct brw_clip_compile *c,
+			   GLuint to, GLuint from )
+{
+   struct brw_compile *p = &c->func;
+
+   if (c->offset[VERT_RESULT_COL0])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0]));
+
+   if (c->offset[VERT_RESULT_COL1])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1]));
+
+   if (c->offset[VERT_RESULT_BFC0])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0]));
+
+   if (c->offset[VERT_RESULT_BFC1])
+      brw_MOV(p, 
+	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]),
+	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1]));
+}
+
+
+
+void brw_clip_init_clipmask( struct brw_clip_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg incoming = get_element_ud(c->reg.R0, 2);
+   
+   /* Shift so that lowest outcode bit is rightmost: 
+    */
+   brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
+
+   if (c->key.nr_userclip) {
+      struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
+
+      /* Rearrange userclip outcodes so that they come directly after
+       * the fixed plane bits.
+       */
+      brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14));
+      brw_SHR(p, tmp, tmp, brw_imm_ud(8));
+      brw_OR(p, c->reg.planemask, c->reg.planemask, tmp);
+      
+      release_tmp(c, tmp);
+   }
+}
+
+void brw_clip_ff_sync(struct brw_clip_compile *c)
+{
+    if (c->need_ff_sync) {
+        struct brw_compile *p = &c->func;
+        struct brw_instruction *need_ff_sync;
+
+        brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+        brw_AND(p, brw_null_reg(), c->reg.ff_sync, brw_imm_ud(0x1));
+        need_ff_sync = brw_IF(p, BRW_EXECUTE_1);
+        {
+            brw_OR(p, c->reg.ff_sync, c->reg.ff_sync, brw_imm_ud(0x1));
+            brw_ff_sync(p, 
+                    c->reg.R0,
+                    0,
+                    c->reg.R0,
+                    1,	
+                    1,		/* used */
+                    1,  	/* msg length */
+                    1,		/* response length */
+                    0,		/* eot */
+                    1,		/* write compelete */
+                    0,		/* urb offset */
+                    BRW_URB_SWIZZLE_NONE);
+        }
+        brw_ENDIF(p, need_ff_sync);
+        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    }
+}
+
+void brw_clip_init_ff_sync(struct brw_clip_compile *c)
+{
+    if (c->need_ff_sync) {
+	struct brw_compile *p = &c->func;
+        
+        brw_MOV(p, c->reg.ff_sync, brw_imm_ud(0));
+    }
+}
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
new file mode 100644
index 0000000000..c300c33adc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -0,0 +1,173 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#include "main/imports.h"
+#include "main/api_noop.h"
+#include "main/macros.h"
+#include "main/vtxfmt.h"
+#include "main/simple_list.h"
+#include "shader/shader_api.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_draw.h"
+#include "brw_state.h"
+#include "brw_vs.h"
+#include "intel_tex.h"
+#include "intel_blit.h"
+#include "intel_batchbuffer.h"
+#include "intel_pixel.h"
+#include "intel_span.h"
+#include "tnl/t_pipeline.h"
+
+#include "utils.h"
+
+
+/***************************************
+ * Mesa's Driver Functions
+ ***************************************/
+
+static void brwUseProgram(GLcontext *ctx, GLuint program)
+{
+   _mesa_use_program(ctx, program);
+}
+
+static void brwInitProgFuncs( struct dd_function_table *functions )
+{
+   functions->UseProgram = brwUseProgram;
+}
+static void brwInitDriverFunctions( struct dd_function_table *functions )
+{
+   intelInitDriverFunctions( functions );
+
+   brwInitFragProgFuncs( functions );
+   brwInitProgFuncs( functions );
+   brw_init_queryobj_functions(functions);
+
+   functions->Viewport = intel_viewport;
+}
+
+GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
+			    __DRIcontextPrivate *driContextPriv,
+			    void *sharedContextPrivate)
+{
+   struct dd_function_table functions;
+   struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+
+   if (!brw) {
+      _mesa_printf("%s: failed to alloc context\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   brwInitVtbl( brw );
+   brwInitDriverFunctions( &functions );
+
+   if (!intelInitContext( intel, mesaVis, driContextPriv,
+			  sharedContextPrivate, &functions )) {
+      _mesa_printf("%s: failed to init intel context\n", __FUNCTION__);
+      FREE(brw);
+      return GL_FALSE;
+   }
+
+   /* Initialize swrast, tnl driver tables: */
+   intelInitSpanFuncs(ctx);
+
+   TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+
+   ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
+   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
+   ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
+                                     ctx->Const.MaxTextureImageUnits);
+   ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
+
+   /* Mesa limits textures to 4kx4k; it would be nice to fix that someday
+    */
+   ctx->Const.MaxTextureLevels = 13;
+   ctx->Const.Max3DTextureLevels = 9;
+   ctx->Const.MaxCubeTextureLevels = 12;
+   ctx->Const.MaxTextureRectSize = (1<<12);
+   
+   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+
+   /* if conformance mode is set, swrast can handle any size AA point */
+   ctx->Const.MaxPointSizeAA = 255.0;
+
+   /* We want the GLSL compiler to emit code that uses condition codes */
+   ctx->Shader.EmitCondCodes = GL_TRUE;
+   ctx->Shader.EmitNVTempInitialization = GL_TRUE;
+
+   ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
+   ctx->Const.VertexProgram.MaxAluInstructions = 0;
+   ctx->Const.VertexProgram.MaxTexInstructions = 0;
+   ctx->Const.VertexProgram.MaxTexIndirections = 0;
+   ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
+   ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
+   ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
+   ctx->Const.VertexProgram.MaxNativeAttribs = 16;
+   ctx->Const.VertexProgram.MaxNativeTemps = 256;
+   ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+   ctx->Const.VertexProgram.MaxNativeParameters = 1024;
+   ctx->Const.VertexProgram.MaxEnvParams =
+      MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
+	   ctx->Const.VertexProgram.MaxEnvParams);
+
+   ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024);
+   ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024);
+   ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024);
+   ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024);
+   ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
+   ctx->Const.FragmentProgram.MaxNativeTemps = 256;
+   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
+   ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
+   ctx->Const.FragmentProgram.MaxEnvParams =
+      MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
+	   ctx->Const.FragmentProgram.MaxEnvParams);
+
+   brw_init_state( brw );
+
+   brw->state.dirty.mesa = ~0;
+   brw->state.dirty.brw = ~0;
+
+   brw->emit_state_always = 0;
+
+   ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+   ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+
+   make_empty_list(&brw->query.active_head);
+
+   brw_draw_init( brw );
+
+   return GL_TRUE;
+}
+
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
new file mode 100644
index 0000000000..fa3e32c7ff
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -0,0 +1,767 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+#ifndef BRWCONTEXT_INC
+#define BRWCONTEXT_INC
+
+#include "intel_context.h"
+#include "brw_structs.h"
+#include "main/imports.h"
+
+
+/* Glossary:
+ *
+ * URB - uniform resource buffer.  A mid-sized buffer which is
+ * partitioned between the fixed function units and used for passing
+ * values (vertices, primitives, constants) between them.
+ *
+ * CURBE - constant URB entry.  An urb region (entry) used to hold
+ * constant values which the fixed function units can be instructed to
+ * preload into the GRF when spawning a thread.
+ *
+ * VUE - vertex URB entry.  An urb entry holding a vertex and usually
+ * a vertex header.  The header contains control information and
+ * things like primitive type, Begin/end flags and clip codes.  
+ *
+ * PUE - primitive URB entry.  An urb entry produced by the setup (SF)
+ * unit holding rasterization and interpolation parameters.
+ *
+ * GRF - general register file.  One of several register files
+ * addressable by programmed threads.  The inputs (r0, payload, curbe,
+ * urb) of the thread are preloaded to this area before the thread is
+ * spawned.  The registers are individually 8 dwords wide and suitable
+ * for general usage.  Registers holding thread input values are not
+ * special and may be overwritten.
+ *
+ * MRF - message register file.  Threads communicate (and terminate)
+ * by sending messages.  Message parameters are placed in contiguous
+ * MRF registers.  All program output is via these messages.  URB
+ * entries are populated by sending a message to the shared URB
+ * function containing the new data, together with a control word,
+ * often an unmodified copy of R0.
+ *
+ * R0 - GRF register 0.  Typically holds control information used when
+ * sending messages to other threads.
+ *
+ * EU or GEN4 EU: The name of the programmable subsystem of the
+ * i965 hardware.  Threads are executed by the EU, the registers
+ * described above are part of the EU architecture.
+ *
+ * Fixed function units:
+ *
+ * CS - Command streamer.  Notional first unit, little software
+ * interaction.  Holds the URB entries used for constant data, ie the
+ * CURBEs.
+ *
+ * VF/VS - Vertex Fetch / Vertex Shader.  The fixed function part of
+ * this unit is responsible for pulling vertices out of vertex buffers
+ * in vram and injecting them into the processing pipe as VUEs.  If
+ * enabled, it first passes them to a VS thread which is a good place
+ * for the driver to implement any active vertex shader.
+ *
+ * GS - Geometry Shader.  This corresponds to a new DX10 concept.  If
+ * enabled, incoming strips etc are passed to GS threads in individual
+ * line/triangle/point units.  The GS thread may perform arbitary
+ * computation and emit whatever primtives with whatever vertices it
+ * chooses.  This makes GS an excellent place to implement GL's
+ * unfilled polygon modes, though of course it is capable of much
+ * more.  Additionally, GS is used to translate away primitives not
+ * handled by latter units, including Quads and Lineloops.
+ *
+ * CS - Clipper.  Mesa's clipping algorithms are imported to run on
+ * this unit.  The fixed function part performs cliptesting against
+ * the 6 fixed clipplanes and makes descisions on whether or not the
+ * incoming primitive needs to be passed to a thread for clipping.
+ * User clip planes are handled via cooperation with the VS thread.
+ *
+ * SF - Strips Fans or Setup: Triangles are prepared for
+ * rasterization.  Interpolation coefficients are calculated.
+ * Flatshading and two-side lighting usually performed here.
+ *
+ * WM - Windower.  Interpolation of vertex attributes performed here.
+ * Fragment shader implemented here.  SIMD aspects of EU taken full
+ * advantage of, as pixels are processed in blocks of 16.
+ *
+ * CC - Color Calculator.  No EU threads associated with this unit.
+ * Handles blending and (presumably) depth and stencil testing.
+ */
+
+#define BRW_FALLBACK_TEXTURE		 0x1
+#define BRW_MAX_CURBE                    (32*16)
+
+struct brw_context;
+
+#define BRW_NEW_URB_FENCE               0x1
+#define BRW_NEW_FRAGMENT_PROGRAM        0x2
+#define BRW_NEW_VERTEX_PROGRAM          0x4
+#define BRW_NEW_INPUT_DIMENSIONS        0x8
+#define BRW_NEW_CURBE_OFFSETS           0x10
+#define BRW_NEW_REDUCED_PRIMITIVE       0x20
+#define BRW_NEW_PRIMITIVE               0x40
+#define BRW_NEW_CONTEXT                 0x80
+#define BRW_NEW_WM_INPUT_DIMENSIONS     0x100
+#define BRW_NEW_PSP                     0x800
+#define BRW_NEW_WM_SURFACES		0x1000
+#define BRW_NEW_FENCE                   0x2000
+#define BRW_NEW_INDICES			0x4000
+#define BRW_NEW_VERTICES		0x8000
+/**
+ * Used for any batch entry with a relocated pointer that will be used
+ * by any 3D rendering.
+ */
+#define BRW_NEW_BATCH			0x10000
+/** brw->depth_region updated */
+#define BRW_NEW_DEPTH_BUFFER		0x20000
+#define BRW_NEW_NR_WM_SURFACES		0x40000
+#define BRW_NEW_NR_VS_SURFACES		0x80000
+#define BRW_NEW_INDEX_BUFFER		0x100000
+
+struct brw_state_flags {
+   /** State update flags signalled by mesa internals */
+   GLuint mesa;
+   /**
+    * State update flags signalled as the result of brw_tracked_state updates
+    */
+   GLuint brw;
+   /** State update flags signalled by brw_state_cache.c searches */
+   GLuint cache;
+};
+
+
+/** Subclass of Mesa vertex program */
+struct brw_vertex_program {
+   struct gl_vertex_program program;
+   GLuint id;
+   dri_bo *const_buffer;    /** Program constant buffer/surface */
+   GLboolean use_const_buffer;
+};
+
+
+/** Subclass of Mesa fragment program */
+struct brw_fragment_program {
+   struct gl_fragment_program program;
+   GLuint id;  /**< serial no. to identify frag progs, never re-used */
+   GLboolean isGLSL;  /**< really, any IF/LOOP/CONT/BREAK instructions */
+
+   dri_bo *const_buffer;    /** Program constant buffer/surface */
+   GLboolean use_const_buffer;
+
+   /** for debugging, which texture units are referenced */
+   GLbitfield tex_units_used;
+};
+
+
+/* Data about a particular attempt to compile a program.  Note that
+ * there can be many of these, each in a different GL state
+ * corresponding to a different brw_wm_prog_key struct, with different
+ * compiled programs:
+ */
+struct brw_wm_prog_data {
+   GLuint curb_read_length;
+   GLuint urb_read_length;
+
+   GLuint first_curbe_grf;
+   GLuint total_grf;
+   GLuint total_scratch;
+
+   GLuint nr_params;       /**< number of float params/constants */
+   GLboolean error;
+
+   /* Pointer to tracked values (only valid once
+    * _mesa_load_state_parameters has been called at runtime).
+    */
+   const GLfloat *param[BRW_MAX_CURBE];
+};
+
+struct brw_sf_prog_data {
+   GLuint urb_read_length;
+   GLuint total_grf;
+
+   /* Each vertex may have upto 12 attributes, 4 components each,
+    * except WPOS which requires only 2.  (11*4 + 2) == 44 ==> 11
+    * rows.
+    *
+    * Actually we use 4 for each, so call it 12 rows.
+    */
+   GLuint urb_entry_size;
+};
+
+struct brw_clip_prog_data {
+   GLuint curb_read_length;	/* user planes? */
+   GLuint clip_mode;
+   GLuint urb_read_length;
+   GLuint total_grf;
+};
+
+struct brw_gs_prog_data {
+   GLuint urb_read_length;
+   GLuint total_grf;
+};
+
+struct brw_vs_prog_data {
+   GLuint curb_read_length;
+   GLuint urb_read_length;
+   GLuint total_grf;
+   GLuint outputs_written;
+   GLuint nr_params;       /**< number of float params/constants */
+
+   GLuint inputs_read;
+
+   /* Used for calculating urb partitions:
+    */
+   GLuint urb_entry_size;
+};
+
+
+/* Size == 0 if output either not written, or always [0,0,0,1]
+ */
+struct brw_vs_ouput_sizes {
+   GLubyte output_size[VERT_RESULT_MAX];
+};
+
+
+/** Number of texture sampler units */
+#define BRW_MAX_TEX_UNIT 16
+
+/**
+ * Size of our surface binding table for the WM.
+ * This contains pointers to the drawing surfaces and current texture
+ * objects and shader constant buffers (+2).
+ */
+#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+
+/**
+ * Helpers to convert drawing buffers, textures and constant buffers
+ * to surface binding table indexes, for WM.
+ */
+#define SURF_INDEX_DRAW(d)           (d)
+#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) 
+#define SURF_INDEX_TEXTURE(t)        (MAX_DRAW_BUFFERS + 1 + (t))
+
+/**
+ * Size of surface binding table for the VS.
+ * Only one constant buffer for now.
+ */
+#define BRW_VS_MAX_SURF 1
+
+/**
+ * Only a VS constant buffer
+ */
+#define SURF_INDEX_VERT_CONST_BUFFER 0
+
+
+enum brw_cache_id {
+   BRW_CC_VP,
+   BRW_CC_UNIT,
+   BRW_WM_PROG,
+   BRW_SAMPLER_DEFAULT_COLOR,
+   BRW_SAMPLER,
+   BRW_WM_UNIT,
+   BRW_SF_PROG,
+   BRW_SF_VP,
+   BRW_SF_UNIT,
+   BRW_VS_UNIT,
+   BRW_VS_PROG,
+   BRW_GS_UNIT,
+   BRW_GS_PROG,
+   BRW_CLIP_VP,
+   BRW_CLIP_UNIT,
+   BRW_CLIP_PROG,
+   BRW_SS_SURFACE,
+   BRW_SS_SURF_BIND,
+
+   BRW_MAX_CACHE
+};
+
+struct brw_cache_item {
+   /**
+    * Effectively part of the key, cache_id identifies what kind of state
+    * buffer is involved, and also which brw->state.dirty.cache flag should
+    * be set when this cache item is chosen.
+    */
+   enum brw_cache_id cache_id;
+   /** 32-bit hash of the key data */
+   GLuint hash;
+   GLuint key_size;		/* for variable-sized keys */
+   const void *key;
+   dri_bo **reloc_bufs;
+   GLuint nr_reloc_bufs;
+
+   dri_bo *bo;
+   GLuint data_size;
+
+   struct brw_cache_item *next;
+};   
+
+
+
+struct brw_cache {
+   struct brw_context *brw;
+
+   struct brw_cache_item **items;
+   GLuint size, n_items;
+
+   GLuint key_size[BRW_MAX_CACHE];		/* for fixed-size keys */
+   GLuint aux_size[BRW_MAX_CACHE];
+   char *name[BRW_MAX_CACHE];
+
+   /* Record of the last BOs chosen for each cache_id.  Used to set
+    * brw->state.dirty.cache when a new cache item is chosen.
+    */
+   dri_bo *last_bo[BRW_MAX_CACHE];
+};
+
+
+/* Considered adding a member to this struct to document which flags
+ * an update might raise so that ordering of the state atoms can be
+ * checked or derived at runtime.  Dropped the idea in favor of having
+ * a debug mode where the state is monitored for flags which are
+ * raised that have already been tested against.
+ */
+struct brw_tracked_state {
+   struct brw_state_flags dirty;
+   void (*prepare)( struct brw_context *brw );
+   void (*emit)( struct brw_context *brw );
+};
+
+/* Flags for brw->state.cache.
+ */
+#define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP)
+#define CACHE_NEW_CC_UNIT                (1<<BRW_CC_UNIT)
+#define CACHE_NEW_WM_PROG                (1<<BRW_WM_PROG)
+#define CACHE_NEW_SAMPLER_DEFAULT_COLOR  (1<<BRW_SAMPLER_DEFAULT_COLOR)
+#define CACHE_NEW_SAMPLER                (1<<BRW_SAMPLER)
+#define CACHE_NEW_WM_UNIT                (1<<BRW_WM_UNIT)
+#define CACHE_NEW_SF_PROG                (1<<BRW_SF_PROG)
+#define CACHE_NEW_SF_VP                  (1<<BRW_SF_VP)
+#define CACHE_NEW_SF_UNIT                (1<<BRW_SF_UNIT)
+#define CACHE_NEW_VS_UNIT                (1<<BRW_VS_UNIT)
+#define CACHE_NEW_VS_PROG                (1<<BRW_VS_PROG)
+#define CACHE_NEW_GS_UNIT                (1<<BRW_GS_UNIT)
+#define CACHE_NEW_GS_PROG                (1<<BRW_GS_PROG)
+#define CACHE_NEW_CLIP_VP                (1<<BRW_CLIP_VP)
+#define CACHE_NEW_CLIP_UNIT              (1<<BRW_CLIP_UNIT)
+#define CACHE_NEW_CLIP_PROG              (1<<BRW_CLIP_PROG)
+#define CACHE_NEW_SURFACE                (1<<BRW_SS_SURFACE)
+#define CACHE_NEW_SURF_BIND              (1<<BRW_SS_SURF_BIND)
+
+struct brw_cached_batch_item {
+   struct header *header;
+   GLuint sz;
+   struct brw_cached_batch_item *next;
+};
+   
+
+
+/* Protect against a future where VERT_ATTRIB_MAX > 32.  Wouldn't life
+ * be easier if C allowed arrays of packed elements?
+ */
+#define ATTRIB_BIT_DWORDS  ((VERT_ATTRIB_MAX+31)/32)
+
+struct brw_vertex_element {
+   const struct gl_client_array *glarray;
+
+   /** The corresponding Mesa vertex attribute */
+   gl_vert_attrib attrib;
+   /** Size of a complete element */
+   GLuint element_size;
+   /** Number of uploaded elements for this input. */
+   GLuint count;
+   /** Byte stride between elements in the uploaded array */
+   GLuint stride;
+   /** Offset of the first element within the buffer object */
+   unsigned int offset;
+   /** Buffer object containing the uploaded vertex data */
+   dri_bo *bo;
+};
+
+
+
+struct brw_vertex_info {
+   GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
+};
+
+
+
+
+/* Cache for TNL programs.
+ */
+struct brw_tnl_cache_item {
+   GLuint hash;
+   void *key;
+   void *data;
+   struct brw_tnl_cache_item *next;
+};
+
+struct brw_tnl_cache {
+   struct brw_tnl_cache_item **items;
+   GLuint size, n_items;
+};
+
+struct brw_query_object {
+   struct gl_query_object Base;
+
+   /** Doubly linked list of active query objects in the context. */
+   struct brw_query_object *prev, *next;
+
+   /** Last query BO associated with this query. */
+   dri_bo *bo;
+   /** First index in bo with query data for this object. */
+   int first_index;
+   /** Last index in bo with query data for this object. */
+   int last_index;
+
+   /* Total count of pixels from previous BOs */
+   unsigned int count;
+};
+
+
+/**
+ * brw_context is derived from intel_context.
+ */
+struct brw_context 
+{
+   struct intel_context intel;  /**< base class, must be first field */
+   GLuint primitive;
+
+   GLboolean emit_state_always;
+   GLboolean tmp_fallback;
+   GLboolean no_batch_wrap;
+
+   struct {
+      struct brw_state_flags dirty;
+
+      GLuint nr_color_regions;
+      struct intel_region *color_regions[MAX_DRAW_BUFFERS];
+      struct intel_region *depth_region;
+
+      /**
+       * List of buffers accumulated in brw_validate_state to receive
+       * dri_bo_check_aperture treatment before exec, so we can know if we
+       * should flush the batch and try again before emitting primitives.
+       *
+       * This can be a fixed number as we only have a limited number of
+       * objects referenced from the batchbuffer in a primitive emit,
+       * consisting of the vertex buffers, pipelined state pointers,
+       * the CURBE, the depth buffer, and a query BO.
+       */
+      dri_bo *validated_bos[VERT_ATTRIB_MAX + 16];
+      int validated_bo_count;
+   } state;
+
+   struct brw_cache cache;  /** non-surface items */
+   struct brw_cache surface_cache;  /* surface items */
+   struct brw_cached_batch_item *cached_batch_items;
+
+   struct {
+      struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+
+      struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
+      GLuint nr_enabled;
+
+#define BRW_NR_UPLOAD_BUFS 17
+#define BRW_UPLOAD_INIT_SIZE (128*1024)
+
+      struct {
+	 dri_bo *bo;
+	 GLuint offset;
+      } upload;
+
+      /* Summary of size and varying of active arrays, so we can check
+       * for changes to this state:
+       */
+      struct brw_vertex_info info;
+      unsigned int min_index, max_index;
+   } vb;
+
+   struct {
+      /**
+       * Index buffer for this draw_prims call.
+       *
+       * Updates are signaled by BRW_NEW_INDICES.
+       */
+      const struct _mesa_index_buffer *ib;
+
+      /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
+      dri_bo *bo;
+      unsigned int offset;
+      unsigned int size;
+      /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
+       * avoid re-uploading the IB packet over and over if we're actually
+       * referencing the same index buffer.
+       */
+      unsigned int start_vertex_offset;
+   } ib;
+
+   /* Active vertex program: 
+    */
+   const struct gl_vertex_program *vertex_program;
+   const struct gl_fragment_program *fragment_program;
+
+
+   /* For populating the gtt:
+    */
+   GLuint next_free_page;
+
+
+   /* BRW_NEW_URB_ALLOCATIONS:
+    */
+   struct {
+      GLuint vsize;		/* vertex size plus header in urb registers */
+      GLuint csize;		/* constant buffer size in urb registers */
+      GLuint sfsize;		/* setup data size in urb registers */
+
+      GLboolean constrained;
+
+      GLuint nr_vs_entries;
+      GLuint nr_gs_entries;
+      GLuint nr_clip_entries;
+      GLuint nr_sf_entries;
+      GLuint nr_cs_entries;
+
+/*       GLuint vs_size; */
+/*       GLuint gs_size; */
+/*       GLuint clip_size; */
+/*       GLuint sf_size; */
+/*       GLuint cs_size; */
+
+      GLuint vs_start;
+      GLuint gs_start;
+      GLuint clip_start;
+      GLuint sf_start;
+      GLuint cs_start;
+   } urb;
+
+   
+   /* BRW_NEW_CURBE_OFFSETS: 
+    */
+   struct {
+      GLuint wm_start;  /**< pos of first wm const in CURBE buffer */
+      GLuint wm_size;   /**< number of float[4] consts, multiple of 16 */
+      GLuint clip_start;
+      GLuint clip_size;
+      GLuint vs_start;
+      GLuint vs_size;
+      GLuint total_size;
+
+      dri_bo *curbe_bo;
+      /** Offset within curbe_bo of space for current curbe entry */
+      GLuint curbe_offset;
+      /** Offset within curbe_bo of space for next curbe entry */
+      GLuint curbe_next_offset;
+
+      GLfloat *last_buf;
+      GLuint last_bufsz;
+      /**
+       *  Whether we should create a new bo instead of reusing the old one
+       * (if we just dispatch the batch pointing at the old one.
+       */
+      GLboolean need_new_bo;
+   } curbe;
+
+   struct {
+      struct brw_vs_prog_data *prog_data;
+
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+
+      /** Binding table of pointers to surf_bo entries */
+      dri_bo *bind_bo;
+      dri_bo *surf_bo[BRW_VS_MAX_SURF];
+      GLuint nr_surfaces;      
+   } vs;
+
+   struct {
+      struct brw_gs_prog_data *prog_data;
+
+      GLboolean prog_active;
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+   } gs;
+
+   struct {
+      struct brw_clip_prog_data *prog_data;
+
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+      dri_bo *vp_bo;
+   } clip;
+
+
+   struct {
+      struct brw_sf_prog_data *prog_data;
+
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+      dri_bo *vp_bo;
+   } sf;
+
+   struct {
+      struct brw_wm_prog_data *prog_data;
+      struct brw_wm_compile *compile_data;
+
+      /** Input sizes, calculated from active vertex program.
+       * One bit per fragment program input attribute.
+       */
+      GLbitfield input_size_masks[4];
+
+      /** Array of surface default colors (texture border color) */
+      dri_bo *sdc_bo[BRW_MAX_TEX_UNIT];
+
+      GLuint render_surf;
+      GLuint nr_surfaces;      
+
+      GLuint max_threads;
+      dri_bo *scratch_bo;
+
+      GLuint sampler_count;
+      dri_bo *sampler_bo;
+
+      /** Binding table of pointers to surf_bo entries */
+      dri_bo *bind_bo;
+      dri_bo *surf_bo[BRW_WM_MAX_SURF];
+
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+   } wm;
+
+
+   struct {
+      dri_bo *prog_bo;
+      dri_bo *state_bo;
+      dri_bo *vp_bo;
+   } cc;
+
+   struct {
+      struct brw_query_object active_head;
+      dri_bo *bo;
+      int index;
+      GLboolean active;
+   } query;
+   /* Used to give every program string a unique id
+    */
+   GLuint program_id;
+};
+
+
+#define BRW_PACKCOLOR8888(r,g,b,a)  ((r<<24) | (g<<16) | (b<<8) | a)
+
+
+
+/*======================================================================
+ * brw_vtbl.c
+ */
+void brwInitVtbl( struct brw_context *brw );
+
+/*======================================================================
+ * brw_context.c
+ */
+GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
+			    __DRIcontextPrivate *driContextPriv,
+			    void *sharedContextPrivate);
+
+/*======================================================================
+ * brw_queryobj.c
+ */
+void brw_init_queryobj_functions(struct dd_function_table *functions);
+void brw_prepare_query_begin(struct brw_context *brw);
+void brw_emit_query_begin(struct brw_context *brw);
+void brw_emit_query_end(struct brw_context *brw);
+
+/*======================================================================
+ * brw_state_dump.c
+ */
+void brw_debug_batch(struct intel_context *intel);
+
+/*======================================================================
+ * brw_tex.c
+ */
+void brw_validate_textures( struct brw_context *brw );
+
+
+/*======================================================================
+ * brw_program.c
+ */
+void brwInitFragProgFuncs( struct dd_function_table *functions );
+
+
+/* brw_urb.c
+ */
+void brw_upload_urb_fence(struct brw_context *brw);
+
+/* brw_curbe.c
+ */
+void brw_upload_cs_urb_state(struct brw_context *brw);
+
+/* brw_disasm.c */
+int brw_disasm (FILE *file, struct brw_instruction *inst);
+
+/*======================================================================
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct brw_context *
+brw_context( GLcontext *ctx )
+{
+   return (struct brw_context *)ctx;
+}
+
+static INLINE struct brw_vertex_program *
+brw_vertex_program(struct gl_vertex_program *p)
+{
+   return (struct brw_vertex_program *) p;
+}
+
+static INLINE const struct brw_vertex_program *
+brw_vertex_program_const(const struct gl_vertex_program *p)
+{
+   return (const struct brw_vertex_program *) p;
+}
+
+static INLINE struct brw_fragment_program *
+brw_fragment_program(struct gl_fragment_program *p)
+{
+   return (struct brw_fragment_program *) p;
+}
+
+static INLINE const struct brw_fragment_program *
+brw_fragment_program_const(const struct gl_fragment_program *p)
+{
+   return (const struct brw_fragment_program *) p;
+}
+
+
+
+#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
+
+#endif
+
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
new file mode 100644
index 0000000000..4be6c77aa1
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -0,0 +1,376 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+#include "brw_util.h"
+
+
+/**
+ * Partition the CURBE between the various users of constant values:
+ * Note that vertex and fragment shaders can now fetch constants out
+ * of constant buffers.  We no longer allocatea block of the GRF for
+ * constants.  That greatly reduces the demand for space in the CURBE.
+ * Some of the comments within are dated...
+ */
+static void calculate_curbe_offsets( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   /* CACHE_NEW_WM_PROG */
+   const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
+   
+   /* BRW_NEW_VERTEX_PROGRAM */
+   const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16;
+   GLuint nr_clip_regs = 0;
+   GLuint total_regs;
+
+   /* _NEW_TRANSFORM */
+   if (ctx->Transform.ClipPlanesEnabled) {
+      GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+      nr_clip_regs = (nr_planes * 4 + 15) / 16;
+   }
+
+
+   total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
+
+   /* This can happen - what to do?  Probably rather than falling
+    * back, the best thing to do is emit programs which code the
+    * constants as immediate values.  Could do this either as a static
+    * cap on WM and VS, or adaptively.
+    *
+    * Unfortunately, this is currently dependent on the results of the
+    * program generation process (in the case of wm), so this would
+    * introduce the need to re-generate programs in the event of a
+    * curbe allocation failure.
+    */
+   /* Max size is 32 - just large enough to
+    * hold the 128 parameters allowed by
+    * the fragment and vertex program
+    * api's.  It's not clear what happens
+    * when both VP and FP want to use 128
+    * parameters, though. 
+    */
+   assert(total_regs <= 32);
+
+   /* Lazy resize:
+    */
+   if (nr_fp_regs > brw->curbe.wm_size ||
+       nr_vp_regs > brw->curbe.vs_size ||
+       nr_clip_regs != brw->curbe.clip_size ||
+       (total_regs < brw->curbe.total_size / 4 &&
+	brw->curbe.total_size > 16)) {
+
+      GLuint reg = 0;
+
+      /* Calculate a new layout: 
+       */
+      reg = 0;
+      brw->curbe.wm_start = reg;
+      brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
+      brw->curbe.clip_start = reg;
+      brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
+      brw->curbe.vs_start = reg;
+      brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
+      brw->curbe.total_size = reg;
+
+      if (0)
+	 _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+		      brw->curbe.wm_start,
+		      brw->curbe.wm_size,
+		      brw->curbe.clip_start,
+		      brw->curbe.clip_size,
+		      brw->curbe.vs_start,
+		      brw->curbe.vs_size );
+
+      brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
+   }
+}
+
+
+const struct brw_tracked_state brw_curbe_offsets = {
+   .dirty = {
+      .mesa = _NEW_TRANSFORM,
+      .brw  = BRW_NEW_VERTEX_PROGRAM,
+      .cache = CACHE_NEW_WM_PROG
+   },
+   .prepare = calculate_curbe_offsets
+};
+
+
+
+
+/* Define the number of curbes within CS's urb allocation.  Multiple
+ * urb entries -> multiple curbes.  These will be used by
+ * fixed-function hardware in a double-buffering scheme to avoid a
+ * pipeline stall each time the contents of the curbe is changed.
+ */
+void brw_upload_cs_urb_state(struct brw_context *brw)
+{
+   struct brw_cs_urb_state cs_urb;
+   memset(&cs_urb, 0, sizeof(cs_urb));
+
+   /* It appears that this is the state packet for the CS unit, ie. the
+    * urb entries detailed here are housed in the CS range from the
+    * URB_FENCE command.
+    */
+   cs_urb.header.opcode = CMD_CS_URB_STATE;
+   cs_urb.header.length = sizeof(cs_urb)/4 - 2;
+
+   /* BRW_NEW_URB_FENCE */
+   cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
+   cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
+
+   assert(brw->urb.nr_cs_entries);
+   BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
+}
+
+static GLfloat fixed_plane[6][4] = {
+   { 0,    0,   -1, 1 },
+   { 0,    0,    1, 1 },
+   { 0,   -1,    0, 1 },
+   { 0,    1,    0, 1 },
+   {-1,    0,    0, 1 },
+   { 1,    0,    0, 1 }
+};
+
+/* Upload a new set of constants.  Too much variability to go into the
+ * cache mechanism, but maybe would benefit from a comparison against
+ * the current uploaded set of constants.
+ */
+static void prepare_constant_buffer(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const struct brw_vertex_program *vp =
+      brw_vertex_program_const(brw->vertex_program);
+   const struct brw_fragment_program *fp =
+      brw_fragment_program_const(brw->fragment_program);
+   const GLuint sz = brw->curbe.total_size;
+   const GLuint bufsz = sz * 16 * sizeof(GLfloat);
+   GLfloat *buf;
+   GLuint i;
+
+   if (sz == 0) {
+      if (brw->curbe.last_buf) {
+	 free(brw->curbe.last_buf);
+	 brw->curbe.last_buf = NULL;
+	 brw->curbe.last_bufsz  = 0;
+      }
+      return;
+   }
+
+   buf = (GLfloat *) _mesa_calloc(bufsz);
+
+   /* fragment shader constants */
+   if (brw->curbe.wm_size) {
+      GLuint offset = brw->curbe.wm_start * 16;
+
+      _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); 
+
+      /* copy float constants */
+      for (i = 0; i < brw->wm.prog_data->nr_params; i++) 
+	 buf[offset + i] = *brw->wm.prog_data->param[i];
+   }
+
+
+   /* The clipplanes are actually delivered to both CLIP and VS units.
+    * VS uses them to calculate the outcode bitmasks.
+    */
+   if (brw->curbe.clip_size) {
+      GLuint offset = brw->curbe.clip_start * 16;
+      GLuint j;
+
+      /* If any planes are going this way, send them all this way:
+       */
+      for (i = 0; i < 6; i++) {
+	 buf[offset + i * 4 + 0] = fixed_plane[i][0];
+	 buf[offset + i * 4 + 1] = fixed_plane[i][1];
+	 buf[offset + i * 4 + 2] = fixed_plane[i][2];
+	 buf[offset + i * 4 + 3] = fixed_plane[i][3];
+      }
+
+      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
+       * clip-space:
+       */
+      assert(MAX_CLIP_PLANES == 6);
+      for (j = 0; j < MAX_CLIP_PLANES; j++) {
+	 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
+	    buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0];
+	    buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1];
+	    buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2];
+	    buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3];
+	    i++;
+	 }
+      }
+   }
+
+   /* vertex shader constants */
+   if (brw->curbe.vs_size) {
+      GLuint offset = brw->curbe.vs_start * 16;
+      GLuint nr = brw->vs.prog_data->nr_params / 4;
+
+      if (brw->vertex_program->IsNVProgram)
+	 _mesa_load_tracked_matrices(ctx);
+
+      /* Updates the ParamaterValues[i] pointers for all parameters of the
+       * basic type of PROGRAM_STATE_VAR.
+       */
+      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); 
+
+      /* XXX just use a memcpy here */
+      for (i = 0; i < nr; i++) {
+         const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i];
+	 buf[offset + i * 4 + 0] = value[0];
+	 buf[offset + i * 4 + 1] = value[1];
+	 buf[offset + i * 4 + 2] = value[2];
+	 buf[offset + i * 4 + 3] = value[3];
+      }
+   }
+
+   if (0) {
+      for (i = 0; i < sz*16; i+=4) 
+	 _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+		      buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
+
+      _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+		   brw->curbe.last_buf, buf,
+		   bufsz, brw->curbe.last_bufsz,
+		   brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
+   }
+
+   if (brw->curbe.curbe_bo != NULL &&
+       brw->curbe.last_buf &&
+       bufsz == brw->curbe.last_bufsz &&
+       memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
+      /* constants have not changed */
+      _mesa_free(buf);
+   } 
+   else {
+      /* constants have changed */
+      if (brw->curbe.last_buf)
+	 _mesa_free(brw->curbe.last_buf);
+
+      brw->curbe.last_buf = buf;
+      brw->curbe.last_bufsz = bufsz;
+
+      if (brw->curbe.curbe_bo != NULL &&
+	  (brw->curbe.need_new_bo ||
+	   brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
+      {
+	 dri_bo_unreference(brw->curbe.curbe_bo);
+	 brw->curbe.curbe_bo = NULL;
+      }
+
+      if (brw->curbe.curbe_bo == NULL) {
+	 /* Allocate a single page for CURBE entries for this batchbuffer.
+	  * They're generally around 64b.
+	  */
+	 brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
+					    4096, 1 << 6);
+	 brw->curbe.curbe_next_offset = 0;
+      }
+
+      brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
+      brw->curbe.curbe_next_offset += bufsz;
+      brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);
+
+      /* Copy data to the buffer:
+       */
+      dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
+   }
+
+   brw_add_validated_bo(brw, brw->curbe.curbe_bo);
+
+   /* Because this provokes an action (ie copy the constants into the
+    * URB), it shouldn't be shortcircuited if identical to the
+    * previous time - because eg. the urb destination may have
+    * changed, or the urb contents different to last time.
+    *
+    * Note that the data referred to is actually copied internally,
+    * not just used in place according to passed pointer.
+    *
+    * It appears that the CS unit takes care of using each available
+    * URB entry (Const URB Entry == CURBE) in turn, and issuing
+    * flushes as necessary when doublebuffering of CURBEs isn't
+    * possible.
+    */
+}
+
+static void emit_constant_buffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLuint sz = brw->curbe.total_size;
+
+   BEGIN_BATCH(2, IGNORE_CLIPRECTS);
+   if (sz == 0) {
+      OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
+      OUT_BATCH(0);
+   } else {
+      OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
+      OUT_RELOC(brw->curbe.curbe_bo,
+		I915_GEM_DOMAIN_INSTRUCTION, 0,
+		(sz - 1) + brw->curbe.curbe_offset);
+   }
+   ADVANCE_BATCH();
+}
+
+/* This tracked state is unique in that the state it monitors varies
+ * dynamically depending on the parameters tracked by the fragment and
+ * vertex programs.  This is the template used as a starting point,
+ * each context will maintain a copy of this internally and update as
+ * required.
+ */
+const struct brw_tracked_state brw_constant_buffer = {
+   .dirty = {
+      .mesa = _NEW_PROGRAM_CONSTANTS,
+      .brw  = (BRW_NEW_FRAGMENT_PROGRAM |
+	       BRW_NEW_VERTEX_PROGRAM |
+	       BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
+	       BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
+	       BRW_NEW_CURBE_OFFSETS |
+	       BRW_NEW_BATCH),
+      .cache = (CACHE_NEW_WM_PROG) 
+   },
+   .prepare = prepare_constant_buffer,
+   .emit = emit_constant_buffer,
+};
+
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
new file mode 100644
index 0000000000..78d457ad2b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -0,0 +1,851 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+#ifndef BRW_DEFINES_H
+#define BRW_DEFINES_H
+
+/* 3D state:
+ */
+#define _3DOP_3DSTATE_PIPELINED       0x0
+#define _3DOP_3DSTATE_NONPIPELINED    0x1
+#define _3DOP_3DCONTROL               0x2
+#define _3DOP_3DPRIMITIVE             0x3
+
+#define _3DSTATE_PIPELINED_POINTERS       0x00
+#define _3DSTATE_BINDING_TABLE_POINTERS   0x01
+#define _3DSTATE_VERTEX_BUFFERS           0x08
+#define _3DSTATE_VERTEX_ELEMENTS          0x09
+#define _3DSTATE_INDEX_BUFFER             0x0A
+#define _3DSTATE_VF_STATISTICS            0x0B
+#define _3DSTATE_DRAWING_RECTANGLE            0x00
+#define _3DSTATE_CONSTANT_COLOR               0x01
+#define _3DSTATE_SAMPLER_PALETTE_LOAD         0x02
+#define _3DSTATE_CHROMA_KEY                   0x04
+#define _3DSTATE_DEPTH_BUFFER                 0x05
+#define _3DSTATE_POLY_STIPPLE_OFFSET          0x06
+#define _3DSTATE_POLY_STIPPLE_PATTERN         0x07
+#define _3DSTATE_LINE_STIPPLE                 0x08
+#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP    0x09
+#define _3DCONTROL    0x00
+
+#define PIPE_CONTROL_NOWRITE          0x00
+#define PIPE_CONTROL_WRITEIMMEDIATE   0x01
+#define PIPE_CONTROL_WRITEDEPTH       0x02
+#define PIPE_CONTROL_WRITETIMESTAMP   0x03
+
+#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00
+#define PIPE_CONTROL_GTTWRITE_GLOBAL        0x01
+
+#define _3DPRIM_POINTLIST         0x01
+#define _3DPRIM_LINELIST          0x02
+#define _3DPRIM_LINESTRIP         0x03
+#define _3DPRIM_TRILIST           0x04
+#define _3DPRIM_TRISTRIP          0x05
+#define _3DPRIM_TRIFAN            0x06
+#define _3DPRIM_QUADLIST          0x07
+#define _3DPRIM_QUADSTRIP         0x08
+#define _3DPRIM_LINELIST_ADJ      0x09
+#define _3DPRIM_LINESTRIP_ADJ     0x0A
+#define _3DPRIM_TRILIST_ADJ       0x0B
+#define _3DPRIM_TRISTRIP_ADJ      0x0C
+#define _3DPRIM_TRISTRIP_REVERSE  0x0D
+#define _3DPRIM_POLYGON           0x0E
+#define _3DPRIM_RECTLIST          0x0F
+#define _3DPRIM_LINELOOP          0x10
+#define _3DPRIM_POINTLIST_BF      0x11
+#define _3DPRIM_LINESTRIP_CONT    0x12
+#define _3DPRIM_LINESTRIP_BF      0x13
+#define _3DPRIM_LINESTRIP_CONT_BF 0x14
+#define _3DPRIM_TRIFAN_NOSTIPPLE  0x15
+
+#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0
+#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM     1
+
+#define BRW_ANISORATIO_2     0 
+#define BRW_ANISORATIO_4     1 
+#define BRW_ANISORATIO_6     2 
+#define BRW_ANISORATIO_8     3 
+#define BRW_ANISORATIO_10    4 
+#define BRW_ANISORATIO_12    5 
+#define BRW_ANISORATIO_14    6 
+#define BRW_ANISORATIO_16    7
+
+#define BRW_BLENDFACTOR_ONE                 0x1
+#define BRW_BLENDFACTOR_SRC_COLOR           0x2
+#define BRW_BLENDFACTOR_SRC_ALPHA           0x3
+#define BRW_BLENDFACTOR_DST_ALPHA           0x4
+#define BRW_BLENDFACTOR_DST_COLOR           0x5
+#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE  0x6
+#define BRW_BLENDFACTOR_CONST_COLOR         0x7
+#define BRW_BLENDFACTOR_CONST_ALPHA         0x8
+#define BRW_BLENDFACTOR_SRC1_COLOR          0x9
+#define BRW_BLENDFACTOR_SRC1_ALPHA          0x0A
+#define BRW_BLENDFACTOR_ZERO                0x11
+#define BRW_BLENDFACTOR_INV_SRC_COLOR       0x12
+#define BRW_BLENDFACTOR_INV_SRC_ALPHA       0x13
+#define BRW_BLENDFACTOR_INV_DST_ALPHA       0x14
+#define BRW_BLENDFACTOR_INV_DST_COLOR       0x15
+#define BRW_BLENDFACTOR_INV_CONST_COLOR     0x17
+#define BRW_BLENDFACTOR_INV_CONST_ALPHA     0x18
+#define BRW_BLENDFACTOR_INV_SRC1_COLOR      0x19
+#define BRW_BLENDFACTOR_INV_SRC1_ALPHA      0x1A
+
+#define BRW_BLENDFUNCTION_ADD               0
+#define BRW_BLENDFUNCTION_SUBTRACT          1
+#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT  2
+#define BRW_BLENDFUNCTION_MIN               3
+#define BRW_BLENDFUNCTION_MAX               4
+
+#define BRW_ALPHATEST_FORMAT_UNORM8         0
+#define BRW_ALPHATEST_FORMAT_FLOAT32        1
+
+#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH  0
+#define BRW_CHROMAKEY_REPLACE_BLACK      1
+
+#define BRW_CLIP_API_OGL     0
+#define BRW_CLIP_API_DX      1
+
+#define BRW_CLIPMODE_NORMAL              0
+#define BRW_CLIPMODE_CLIP_ALL            1
+#define BRW_CLIPMODE_CLIP_NON_REJECTED   2
+#define BRW_CLIPMODE_REJECT_ALL          3
+#define BRW_CLIPMODE_ACCEPT_ALL          4
+#define BRW_CLIPMODE_KERNEL_CLIP         5
+
+#define BRW_CLIP_NDCSPACE     0
+#define BRW_CLIP_SCREENSPACE  1
+
+#define BRW_COMPAREFUNCTION_ALWAYS       0
+#define BRW_COMPAREFUNCTION_NEVER        1
+#define BRW_COMPAREFUNCTION_LESS         2
+#define BRW_COMPAREFUNCTION_EQUAL        3
+#define BRW_COMPAREFUNCTION_LEQUAL       4
+#define BRW_COMPAREFUNCTION_GREATER      5
+#define BRW_COMPAREFUNCTION_NOTEQUAL     6
+#define BRW_COMPAREFUNCTION_GEQUAL       7
+
+#define BRW_COVERAGE_PIXELS_HALF     0
+#define BRW_COVERAGE_PIXELS_1        1
+#define BRW_COVERAGE_PIXELS_2        2
+#define BRW_COVERAGE_PIXELS_4        3
+
+#define BRW_CULLMODE_BOTH        0
+#define BRW_CULLMODE_NONE        1
+#define BRW_CULLMODE_FRONT       2
+#define BRW_CULLMODE_BACK        3
+
+#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM      0
+#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT  1
+
+#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT     0
+#define BRW_DEPTHFORMAT_D32_FLOAT                1
+#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT        2
+#define BRW_DEPTHFORMAT_D16_UNORM                5
+
+#define BRW_FLOATING_POINT_IEEE_754        0
+#define BRW_FLOATING_POINT_NON_IEEE_754    1
+
+#define BRW_FRONTWINDING_CW      0
+#define BRW_FRONTWINDING_CCW     1
+
+#define BRW_SPRITE_POINT_ENABLE  16
+
+#define BRW_INDEX_BYTE     0
+#define BRW_INDEX_WORD     1
+#define BRW_INDEX_DWORD    2
+
+#define BRW_LOGICOPFUNCTION_CLEAR            0
+#define BRW_LOGICOPFUNCTION_NOR              1
+#define BRW_LOGICOPFUNCTION_AND_INVERTED     2
+#define BRW_LOGICOPFUNCTION_COPY_INVERTED    3
+#define BRW_LOGICOPFUNCTION_AND_REVERSE      4
+#define BRW_LOGICOPFUNCTION_INVERT           5
+#define BRW_LOGICOPFUNCTION_XOR              6
+#define BRW_LOGICOPFUNCTION_NAND             7
+#define BRW_LOGICOPFUNCTION_AND              8
+#define BRW_LOGICOPFUNCTION_EQUIV            9
+#define BRW_LOGICOPFUNCTION_NOOP             10
+#define BRW_LOGICOPFUNCTION_OR_INVERTED      11
+#define BRW_LOGICOPFUNCTION_COPY             12
+#define BRW_LOGICOPFUNCTION_OR_REVERSE       13
+#define BRW_LOGICOPFUNCTION_OR               14
+#define BRW_LOGICOPFUNCTION_SET              15  
+
+#define BRW_MAPFILTER_NEAREST        0x0 
+#define BRW_MAPFILTER_LINEAR         0x1 
+#define BRW_MAPFILTER_ANISOTROPIC    0x2
+
+#define BRW_MIPFILTER_NONE        0   
+#define BRW_MIPFILTER_NEAREST     1   
+#define BRW_MIPFILTER_LINEAR      3
+
+#define BRW_POLYGON_FRONT_FACING     0
+#define BRW_POLYGON_BACK_FACING      1
+
+#define BRW_PREFILTER_ALWAYS     0x0 
+#define BRW_PREFILTER_NEVER      0x1
+#define BRW_PREFILTER_LESS       0x2
+#define BRW_PREFILTER_EQUAL      0x3
+#define BRW_PREFILTER_LEQUAL     0x4
+#define BRW_PREFILTER_GREATER    0x5
+#define BRW_PREFILTER_NOTEQUAL   0x6
+#define BRW_PREFILTER_GEQUAL     0x7
+
+#define BRW_PROVOKING_VERTEX_0    0
+#define BRW_PROVOKING_VERTEX_1    1 
+#define BRW_PROVOKING_VERTEX_2    2
+
+#define BRW_RASTRULE_UPPER_LEFT  0    
+#define BRW_RASTRULE_UPPER_RIGHT 1
+/* These are listed as "Reserved, but not seen as useful"
+ * in Intel documentation (page 212, "Point Rasterization Rule",
+ * section 7.4 "SF Pipeline State Summary", of document
+ * "Intel® 965 Express Chipset Family and Intel® G35 Express
+ * Chipset Graphics Controller Programmer's Reference Manual,
+ * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+ * available at 
+ *     http://intellinuxgraphics.org/documentation.html
+ * at the time of this writing).
+ *
+ * These appear to be supported on at least some
+ * i965-family devices, and the BRW_RASTRULE_LOWER_RIGHT
+ * is useful when using OpenGL to render to a FBO
+ * (which has the pixel coordinate Y orientation inverted
+ * with respect to the normal OpenGL pixel coordinate system).
+ */
+#define BRW_RASTRULE_LOWER_LEFT  2
+#define BRW_RASTRULE_LOWER_RIGHT 3
+
+#define BRW_RENDERTARGET_CLAMPRANGE_UNORM    0
+#define BRW_RENDERTARGET_CLAMPRANGE_SNORM    1
+#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT   2
+
+#define BRW_STENCILOP_KEEP               0
+#define BRW_STENCILOP_ZERO               1
+#define BRW_STENCILOP_REPLACE            2
+#define BRW_STENCILOP_INCRSAT            3
+#define BRW_STENCILOP_DECRSAT            4
+#define BRW_STENCILOP_INCR               5
+#define BRW_STENCILOP_DECR               6
+#define BRW_STENCILOP_INVERT             7
+
+#define BRW_SURFACE_MIPMAPLAYOUT_BELOW   0
+#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT   1
+
+#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT             0x000 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SINT              0x001 
+#define BRW_SURFACEFORMAT_R32G32B32A32_UINT              0x002 
+#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM             0x003 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM             0x004 
+#define BRW_SURFACEFORMAT_R64G64_FLOAT                   0x005 
+#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT             0x006 
+#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED           0x007
+#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED           0x008
+#define BRW_SURFACEFORMAT_R32G32B32_FLOAT                0x040 
+#define BRW_SURFACEFORMAT_R32G32B32_SINT                 0x041 
+#define BRW_SURFACEFORMAT_R32G32B32_UINT                 0x042 
+#define BRW_SURFACEFORMAT_R32G32B32_UNORM                0x043 
+#define BRW_SURFACEFORMAT_R32G32B32_SNORM                0x044 
+#define BRW_SURFACEFORMAT_R32G32B32_SSCALED              0x045 
+#define BRW_SURFACEFORMAT_R32G32B32_USCALED              0x046 
+#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM             0x080 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM             0x081 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SINT              0x082 
+#define BRW_SURFACEFORMAT_R16G16B16A16_UINT              0x083 
+#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT             0x084 
+#define BRW_SURFACEFORMAT_R32G32_FLOAT                   0x085 
+#define BRW_SURFACEFORMAT_R32G32_SINT                    0x086 
+#define BRW_SURFACEFORMAT_R32G32_UINT                    0x087 
+#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS       0x088 
+#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT        0x089 
+#define BRW_SURFACEFORMAT_L32A32_FLOAT                   0x08A 
+#define BRW_SURFACEFORMAT_R32G32_UNORM                   0x08B 
+#define BRW_SURFACEFORMAT_R32G32_SNORM                   0x08C 
+#define BRW_SURFACEFORMAT_R64_FLOAT                      0x08D 
+#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM             0x08E 
+#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT             0x08F 
+#define BRW_SURFACEFORMAT_A32X32_FLOAT                   0x090 
+#define BRW_SURFACEFORMAT_L32X32_FLOAT                   0x091 
+#define BRW_SURFACEFORMAT_I32X32_FLOAT                   0x092 
+#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED           0x093
+#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED           0x094
+#define BRW_SURFACEFORMAT_R32G32_SSCALED                 0x095
+#define BRW_SURFACEFORMAT_R32G32_USCALED                 0x096
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM                 0x0C0 
+#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB            0x0C1 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM              0x0C2 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB         0x0C3 
+#define BRW_SURFACEFORMAT_R10G10B10A2_UINT               0x0C4 
+#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM       0x0C5 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM                 0x0C7 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB            0x0C8 
+#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM                 0x0C9 
+#define BRW_SURFACEFORMAT_R8G8B8A8_SINT                  0x0CA 
+#define BRW_SURFACEFORMAT_R8G8B8A8_UINT                  0x0CB 
+#define BRW_SURFACEFORMAT_R16G16_UNORM                   0x0CC 
+#define BRW_SURFACEFORMAT_R16G16_SNORM                   0x0CD 
+#define BRW_SURFACEFORMAT_R16G16_SINT                    0x0CE 
+#define BRW_SURFACEFORMAT_R16G16_UINT                    0x0CF 
+#define BRW_SURFACEFORMAT_R16G16_FLOAT                   0x0D0 
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM              0x0D1 
+#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB         0x0D2 
+#define BRW_SURFACEFORMAT_R11G11B10_FLOAT                0x0D3 
+#define BRW_SURFACEFORMAT_R32_SINT                       0x0D6 
+#define BRW_SURFACEFORMAT_R32_UINT                       0x0D7 
+#define BRW_SURFACEFORMAT_R32_FLOAT                      0x0D8 
+#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS          0x0D9 
+#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT           0x0DA 
+#define BRW_SURFACEFORMAT_L16A16_UNORM                   0x0DF 
+#define BRW_SURFACEFORMAT_I24X8_UNORM                    0x0E0 
+#define BRW_SURFACEFORMAT_L24X8_UNORM                    0x0E1 
+#define BRW_SURFACEFORMAT_A24X8_UNORM                    0x0E2 
+#define BRW_SURFACEFORMAT_I32_FLOAT                      0x0E3 
+#define BRW_SURFACEFORMAT_L32_FLOAT                      0x0E4 
+#define BRW_SURFACEFORMAT_A32_FLOAT                      0x0E5 
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM                 0x0E9 
+#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB            0x0EA 
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM                 0x0EB 
+#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB            0x0EC 
+#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP             0x0ED 
+#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM              0x0EE 
+#define BRW_SURFACEFORMAT_L16A16_FLOAT                   0x0F0 
+#define BRW_SURFACEFORMAT_R32_UNORM                      0x0F1 
+#define BRW_SURFACEFORMAT_R32_SNORM                      0x0F2 
+#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED            0x0F3
+#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED               0x0F4
+#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED               0x0F5
+#define BRW_SURFACEFORMAT_R16G16_SSCALED                 0x0F6
+#define BRW_SURFACEFORMAT_R16G16_USCALED                 0x0F7
+#define BRW_SURFACEFORMAT_R32_SSCALED                    0x0F8
+#define BRW_SURFACEFORMAT_R32_USCALED                    0x0F9
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM                   0x100 
+#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB              0x101 
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM                 0x102 
+#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB            0x103 
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM                 0x104 
+#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB            0x105 
+#define BRW_SURFACEFORMAT_R8G8_UNORM                     0x106 
+#define BRW_SURFACEFORMAT_R8G8_SNORM                     0x107 
+#define BRW_SURFACEFORMAT_R8G8_SINT                      0x108 
+#define BRW_SURFACEFORMAT_R8G8_UINT                      0x109 
+#define BRW_SURFACEFORMAT_R16_UNORM                      0x10A 
+#define BRW_SURFACEFORMAT_R16_SNORM                      0x10B 
+#define BRW_SURFACEFORMAT_R16_SINT                       0x10C 
+#define BRW_SURFACEFORMAT_R16_UINT                       0x10D 
+#define BRW_SURFACEFORMAT_R16_FLOAT                      0x10E 
+#define BRW_SURFACEFORMAT_I16_UNORM                      0x111 
+#define BRW_SURFACEFORMAT_L16_UNORM                      0x112 
+#define BRW_SURFACEFORMAT_A16_UNORM                      0x113 
+#define BRW_SURFACEFORMAT_L8A8_UNORM                     0x114 
+#define BRW_SURFACEFORMAT_I16_FLOAT                      0x115
+#define BRW_SURFACEFORMAT_L16_FLOAT                      0x116
+#define BRW_SURFACEFORMAT_A16_FLOAT                      0x117
+#define BRW_SURFACEFORMAT_L8A8_UNORM_SRGB                0x118
+#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM            0x119
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM                 0x11A
+#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB            0x11B
+#define BRW_SURFACEFORMAT_R8G8_SSCALED                   0x11C
+#define BRW_SURFACEFORMAT_R8G8_USCALED                   0x11D
+#define BRW_SURFACEFORMAT_R16_SSCALED                    0x11E
+#define BRW_SURFACEFORMAT_R16_USCALED                    0x11F
+#define BRW_SURFACEFORMAT_R8_UNORM                       0x140 
+#define BRW_SURFACEFORMAT_R8_SNORM                       0x141 
+#define BRW_SURFACEFORMAT_R8_SINT                        0x142 
+#define BRW_SURFACEFORMAT_R8_UINT                        0x143 
+#define BRW_SURFACEFORMAT_A8_UNORM                       0x144 
+#define BRW_SURFACEFORMAT_I8_UNORM                       0x145 
+#define BRW_SURFACEFORMAT_L8_UNORM                       0x146 
+#define BRW_SURFACEFORMAT_P4A4_UNORM                     0x147 
+#define BRW_SURFACEFORMAT_A4P4_UNORM                     0x148
+#define BRW_SURFACEFORMAT_R8_SSCALED                     0x149
+#define BRW_SURFACEFORMAT_R8_USCALED                     0x14A
+#define BRW_SURFACEFORMAT_L8_UNORM_SRGB                  0x14C
+#define BRW_SURFACEFORMAT_R1_UINT                        0x181 
+#define BRW_SURFACEFORMAT_YCRCB_NORMAL                   0x182 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY                  0x183 
+#define BRW_SURFACEFORMAT_BC1_UNORM                      0x186 
+#define BRW_SURFACEFORMAT_BC2_UNORM                      0x187 
+#define BRW_SURFACEFORMAT_BC3_UNORM                      0x188 
+#define BRW_SURFACEFORMAT_BC4_UNORM                      0x189 
+#define BRW_SURFACEFORMAT_BC5_UNORM                      0x18A 
+#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB                 0x18B 
+#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB                 0x18C 
+#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB                 0x18D 
+#define BRW_SURFACEFORMAT_MONO8                          0x18E 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPUV                   0x18F 
+#define BRW_SURFACEFORMAT_YCRCB_SWAPY                    0x190 
+#define BRW_SURFACEFORMAT_DXT1_RGB                       0x191 
+#define BRW_SURFACEFORMAT_FXT1                           0x192 
+#define BRW_SURFACEFORMAT_R8G8B8_UNORM                   0x193 
+#define BRW_SURFACEFORMAT_R8G8B8_SNORM                   0x194 
+#define BRW_SURFACEFORMAT_R8G8B8_SSCALED                 0x195 
+#define BRW_SURFACEFORMAT_R8G8B8_USCALED                 0x196 
+#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT             0x197 
+#define BRW_SURFACEFORMAT_R64G64B64_FLOAT                0x198 
+#define BRW_SURFACEFORMAT_BC4_SNORM                      0x199 
+#define BRW_SURFACEFORMAT_BC5_SNORM                      0x19A 
+#define BRW_SURFACEFORMAT_R16G16B16_UNORM                0x19C 
+#define BRW_SURFACEFORMAT_R16G16B16_SNORM                0x19D 
+#define BRW_SURFACEFORMAT_R16G16B16_SSCALED              0x19E 
+#define BRW_SURFACEFORMAT_R16G16B16_USCALED              0x19F
+
+#define BRW_SURFACERETURNFORMAT_FLOAT32  0
+#define BRW_SURFACERETURNFORMAT_S1       1
+
+#define BRW_SURFACE_1D      0
+#define BRW_SURFACE_2D      1
+#define BRW_SURFACE_3D      2
+#define BRW_SURFACE_CUBE    3
+#define BRW_SURFACE_BUFFER  4
+#define BRW_SURFACE_NULL    7
+
+#define BRW_TEXCOORDMODE_WRAP            0
+#define BRW_TEXCOORDMODE_MIRROR          1
+#define BRW_TEXCOORDMODE_CLAMP           2
+#define BRW_TEXCOORDMODE_CUBE            3
+#define BRW_TEXCOORDMODE_CLAMP_BORDER    4
+#define BRW_TEXCOORDMODE_MIRROR_ONCE     5
+
+#define BRW_THREAD_PRIORITY_NORMAL   0
+#define BRW_THREAD_PRIORITY_HIGH     1
+
+#define BRW_TILEWALK_XMAJOR                 0
+#define BRW_TILEWALK_YMAJOR                 1
+
+#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS  0
+#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS  1
+
+/* Execution Unit (EU) defines
+ */
+
+#define BRW_ALIGN_1   0
+#define BRW_ALIGN_16  1
+
+#define BRW_ADDRESS_DIRECT                        0
+#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER    1
+
+#define BRW_CHANNEL_X     0
+#define BRW_CHANNEL_Y     1
+#define BRW_CHANNEL_Z     2
+#define BRW_CHANNEL_W     3
+
+#define BRW_COMPRESSION_NONE          0
+#define BRW_COMPRESSION_2NDHALF       1
+#define BRW_COMPRESSION_COMPRESSED    2
+
+#define BRW_CONDITIONAL_NONE  0
+#define BRW_CONDITIONAL_Z     1
+#define BRW_CONDITIONAL_NZ    2
+#define BRW_CONDITIONAL_EQ    1	/* Z */
+#define BRW_CONDITIONAL_NEQ   2	/* NZ */
+#define BRW_CONDITIONAL_G     3
+#define BRW_CONDITIONAL_GE    4
+#define BRW_CONDITIONAL_L     5
+#define BRW_CONDITIONAL_LE    6
+#define BRW_CONDITIONAL_R     7
+#define BRW_CONDITIONAL_O     8
+#define BRW_CONDITIONAL_U     9
+
+#define BRW_DEBUG_NONE        0
+#define BRW_DEBUG_BREAKPOINT  1
+
+#define BRW_DEPENDENCY_NORMAL         0
+#define BRW_DEPENDENCY_NOTCLEARED     1
+#define BRW_DEPENDENCY_NOTCHECKED     2
+#define BRW_DEPENDENCY_DISABLE        3
+
+#define BRW_EXECUTE_1     0
+#define BRW_EXECUTE_2     1
+#define BRW_EXECUTE_4     2
+#define BRW_EXECUTE_8     3
+#define BRW_EXECUTE_16    4
+#define BRW_EXECUTE_32    5
+
+#define BRW_HORIZONTAL_STRIDE_0   0
+#define BRW_HORIZONTAL_STRIDE_1   1
+#define BRW_HORIZONTAL_STRIDE_2   2
+#define BRW_HORIZONTAL_STRIDE_4   3
+
+#define BRW_INSTRUCTION_NORMAL    0
+#define BRW_INSTRUCTION_SATURATE  1
+
+#define BRW_MASK_ENABLE   0
+#define BRW_MASK_DISABLE  1
+
+#define BRW_OPCODE_MOV        1
+#define BRW_OPCODE_SEL        2
+#define BRW_OPCODE_NOT        4
+#define BRW_OPCODE_AND        5
+#define BRW_OPCODE_OR         6
+#define BRW_OPCODE_XOR        7
+#define BRW_OPCODE_SHR        8
+#define BRW_OPCODE_SHL        9
+#define BRW_OPCODE_RSR        10
+#define BRW_OPCODE_RSL        11
+#define BRW_OPCODE_ASR        12
+#define BRW_OPCODE_CMP        16
+#define BRW_OPCODE_CMPN       17
+#define BRW_OPCODE_JMPI       32
+#define BRW_OPCODE_IF         34
+#define BRW_OPCODE_IFF        35
+#define BRW_OPCODE_ELSE       36
+#define BRW_OPCODE_ENDIF      37
+#define BRW_OPCODE_DO         38
+#define BRW_OPCODE_WHILE      39
+#define BRW_OPCODE_BREAK      40
+#define BRW_OPCODE_CONTINUE   41
+#define BRW_OPCODE_HALT       42
+#define BRW_OPCODE_MSAVE      44
+#define BRW_OPCODE_MRESTORE   45
+#define BRW_OPCODE_PUSH       46
+#define BRW_OPCODE_POP        47
+#define BRW_OPCODE_WAIT       48
+#define BRW_OPCODE_SEND       49
+#define BRW_OPCODE_ADD        64
+#define BRW_OPCODE_MUL        65
+#define BRW_OPCODE_AVG        66
+#define BRW_OPCODE_FRC        67
+#define BRW_OPCODE_RNDU       68
+#define BRW_OPCODE_RNDD       69
+#define BRW_OPCODE_RNDE       70
+#define BRW_OPCODE_RNDZ       71
+#define BRW_OPCODE_MAC        72
+#define BRW_OPCODE_MACH       73
+#define BRW_OPCODE_LZD        74
+#define BRW_OPCODE_SAD2       80
+#define BRW_OPCODE_SADA2      81
+#define BRW_OPCODE_DP4        84
+#define BRW_OPCODE_DPH        85
+#define BRW_OPCODE_DP3        86
+#define BRW_OPCODE_DP2        87
+#define BRW_OPCODE_DPA2       88
+#define BRW_OPCODE_LINE       89
+#define BRW_OPCODE_NOP        126
+
+#define BRW_PREDICATE_NONE             0
+#define BRW_PREDICATE_NORMAL           1
+#define BRW_PREDICATE_ALIGN1_ANYV             2
+#define BRW_PREDICATE_ALIGN1_ALLV             3
+#define BRW_PREDICATE_ALIGN1_ANY2H            4
+#define BRW_PREDICATE_ALIGN1_ALL2H            5
+#define BRW_PREDICATE_ALIGN1_ANY4H            6
+#define BRW_PREDICATE_ALIGN1_ALL4H            7
+#define BRW_PREDICATE_ALIGN1_ANY8H            8
+#define BRW_PREDICATE_ALIGN1_ALL8H            9
+#define BRW_PREDICATE_ALIGN1_ANY16H           10
+#define BRW_PREDICATE_ALIGN1_ALL16H           11
+#define BRW_PREDICATE_ALIGN16_REPLICATE_X     2
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Y     3
+#define BRW_PREDICATE_ALIGN16_REPLICATE_Z     4
+#define BRW_PREDICATE_ALIGN16_REPLICATE_W     5
+#define BRW_PREDICATE_ALIGN16_ANY4H           6
+#define BRW_PREDICATE_ALIGN16_ALL4H           7
+
+#define BRW_ARCHITECTURE_REGISTER_FILE    0
+#define BRW_GENERAL_REGISTER_FILE         1
+#define BRW_MESSAGE_REGISTER_FILE         2
+#define BRW_IMMEDIATE_VALUE               3
+
+#define BRW_REGISTER_TYPE_UD  0
+#define BRW_REGISTER_TYPE_D   1
+#define BRW_REGISTER_TYPE_UW  2
+#define BRW_REGISTER_TYPE_W   3
+#define BRW_REGISTER_TYPE_UB  4
+#define BRW_REGISTER_TYPE_B   5
+#define BRW_REGISTER_TYPE_VF  5	/* packed float vector, immediates only? */
+#define BRW_REGISTER_TYPE_HF  6
+#define BRW_REGISTER_TYPE_V   6	/* packed int vector, immediates only, uword dest only */
+#define BRW_REGISTER_TYPE_F   7
+
+#define BRW_ARF_NULL                  0x00
+#define BRW_ARF_ADDRESS               0x10
+#define BRW_ARF_ACCUMULATOR           0x20   
+#define BRW_ARF_FLAG                  0x30
+#define BRW_ARF_MASK                  0x40
+#define BRW_ARF_MASK_STACK            0x50
+#define BRW_ARF_MASK_STACK_DEPTH      0x60
+#define BRW_ARF_STATE                 0x70
+#define BRW_ARF_CONTROL               0x80
+#define BRW_ARF_NOTIFICATION_COUNT    0x90
+#define BRW_ARF_IP                    0xA0
+
+#define BRW_AMASK   0
+#define BRW_IMASK   1
+#define BRW_LMASK   2
+#define BRW_CMASK   3
+
+
+
+#define BRW_THREAD_NORMAL     0
+#define BRW_THREAD_ATOMIC     1
+#define BRW_THREAD_SWITCH     2
+
+#define BRW_VERTICAL_STRIDE_0                 0
+#define BRW_VERTICAL_STRIDE_1                 1
+#define BRW_VERTICAL_STRIDE_2                 2
+#define BRW_VERTICAL_STRIDE_4                 3
+#define BRW_VERTICAL_STRIDE_8                 4
+#define BRW_VERTICAL_STRIDE_16                5
+#define BRW_VERTICAL_STRIDE_32                6
+#define BRW_VERTICAL_STRIDE_64                7
+#define BRW_VERTICAL_STRIDE_128               8
+#define BRW_VERTICAL_STRIDE_256               9
+#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL   0xF
+
+#define BRW_WIDTH_1       0
+#define BRW_WIDTH_2       1
+#define BRW_WIDTH_4       2
+#define BRW_WIDTH_8       3
+#define BRW_WIDTH_16      4
+
+#define BRW_STATELESS_BUFFER_BOUNDARY_1K      0
+#define BRW_STATELESS_BUFFER_BOUNDARY_2K      1
+#define BRW_STATELESS_BUFFER_BOUNDARY_4K      2
+#define BRW_STATELESS_BUFFER_BOUNDARY_8K      3
+#define BRW_STATELESS_BUFFER_BOUNDARY_16K     4
+#define BRW_STATELESS_BUFFER_BOUNDARY_32K     5
+#define BRW_STATELESS_BUFFER_BOUNDARY_64K     6
+#define BRW_STATELESS_BUFFER_BOUNDARY_128K    7
+#define BRW_STATELESS_BUFFER_BOUNDARY_256K    8
+#define BRW_STATELESS_BUFFER_BOUNDARY_512K    9
+#define BRW_STATELESS_BUFFER_BOUNDARY_1M      10
+#define BRW_STATELESS_BUFFER_BOUNDARY_2M      11
+
+#define BRW_POLYGON_FACING_FRONT      0
+#define BRW_POLYGON_FACING_BACK       1
+
+#define BRW_MESSAGE_TARGET_NULL               0
+#define BRW_MESSAGE_TARGET_MATH               1
+#define BRW_MESSAGE_TARGET_SAMPLER            2
+#define BRW_MESSAGE_TARGET_GATEWAY            3
+#define BRW_MESSAGE_TARGET_DATAPORT_READ      4
+#define BRW_MESSAGE_TARGET_DATAPORT_WRITE     5
+#define BRW_MESSAGE_TARGET_URB                6
+#define BRW_MESSAGE_TARGET_THREAD_SPAWNER     7
+
+#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32     0
+#define BRW_SAMPLER_RETURN_FORMAT_UINT32      2
+#define BRW_SAMPLER_RETURN_FORMAT_SINT32      3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE              0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE             0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS        0
+#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX             1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD        1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD         1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS  2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS    2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE    0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE     2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO           2
+#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO             2
+#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO            2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD                3
+#define BRW_SAMPLER_MESSAGE_SIMD8_LD                  3
+#define BRW_SAMPLER_MESSAGE_SIMD16_LD                 3
+
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG            0
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_IGDNG          0
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG           0
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG       1
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_BIAS_IGDNG     1
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG      1
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_IGDNG        2
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_IGDNG      2
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD_IGDNG       2
+#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG    3
+#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE_IGDNG  3
+#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG   3
+
+/* for IGDNG only */
+#define BRW_SAMPLER_SIMD_MODE_SIMD4X2                   0
+#define BRW_SAMPLER_SIMD_MODE_SIMD8                     1
+#define BRW_SAMPLER_SIMD_MODE_SIMD16                    2
+#define BRW_SAMPLER_SIMD_MODE_SIMD32_64                 3
+
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW   0
+#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH  1
+#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS     2
+#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS     3
+#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS     4
+
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD     0
+#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS    2
+
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS   2
+#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS  3
+
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ          0
+#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ     1
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ          2
+#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ      3
+
+#define BRW_DATAPORT_READ_TARGET_DATA_CACHE      0
+#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE    1
+#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE   2
+
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE                0
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED     1
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01         2
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23         3
+#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01       4
+
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE                0
+#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE           1
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE                2
+#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE            3
+#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE              4
+#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE     5
+#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE               7
+
+#define BRW_MATH_FUNCTION_INV                              1
+#define BRW_MATH_FUNCTION_LOG                              2
+#define BRW_MATH_FUNCTION_EXP                              3
+#define BRW_MATH_FUNCTION_SQRT                             4
+#define BRW_MATH_FUNCTION_RSQ                              5
+#define BRW_MATH_FUNCTION_SIN                              6 /* was 7 */
+#define BRW_MATH_FUNCTION_COS                              7 /* was 8 */
+#define BRW_MATH_FUNCTION_SINCOS                           8 /* was 6 */
+#define BRW_MATH_FUNCTION_TAN                              9
+#define BRW_MATH_FUNCTION_POW                              10
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER   11
+#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT                 12
+#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER                13
+
+#define BRW_MATH_INTEGER_UNSIGNED     0
+#define BRW_MATH_INTEGER_SIGNED       1
+
+#define BRW_MATH_PRECISION_FULL        0
+#define BRW_MATH_PRECISION_PARTIAL     1
+
+#define BRW_MATH_SATURATE_NONE         0
+#define BRW_MATH_SATURATE_SATURATE     1
+
+#define BRW_MATH_DATA_VECTOR  0
+#define BRW_MATH_DATA_SCALAR  1
+
+#define BRW_URB_OPCODE_WRITE  0
+
+#define BRW_URB_SWIZZLE_NONE          0
+#define BRW_URB_SWIZZLE_INTERLEAVE    1
+#define BRW_URB_SWIZZLE_TRANSPOSE     2
+
+#define BRW_SCRATCH_SPACE_SIZE_1K     0
+#define BRW_SCRATCH_SPACE_SIZE_2K     1
+#define BRW_SCRATCH_SPACE_SIZE_4K     2
+#define BRW_SCRATCH_SPACE_SIZE_8K     3
+#define BRW_SCRATCH_SPACE_SIZE_16K    4
+#define BRW_SCRATCH_SPACE_SIZE_32K    5
+#define BRW_SCRATCH_SPACE_SIZE_64K    6
+#define BRW_SCRATCH_SPACE_SIZE_128K   7
+#define BRW_SCRATCH_SPACE_SIZE_256K   8
+#define BRW_SCRATCH_SPACE_SIZE_512K   9
+#define BRW_SCRATCH_SPACE_SIZE_1M     10
+#define BRW_SCRATCH_SPACE_SIZE_2M     11
+
+
+
+
+#define CMD_URB_FENCE                 0x6000
+#define CMD_CS_URB_STATE              0x6001
+#define CMD_CONST_BUFFER              0x6002
+
+#define CMD_STATE_BASE_ADDRESS        0x6101
+#define CMD_STATE_INSN_POINTER        0x6102
+#define CMD_PIPELINE_SELECT_965       0x6104
+#define CMD_PIPELINE_SELECT_GM45      0x6904
+
+#define CMD_PIPELINED_STATE_POINTERS  0x7800
+#define CMD_BINDING_TABLE_PTRS        0x7801
+
+#define CMD_VERTEX_BUFFER             0x7808
+# define BRW_VB0_INDEX_SHIFT		27
+# define BRW_VB0_ACCESS_VERTEXDATA	(0 << 26)
+# define BRW_VB0_ACCESS_INSTANCEDATA	(1 << 26)
+# define BRW_VB0_PITCH_SHIFT		0
+
+#define CMD_VERTEX_ELEMENT            0x7809
+# define BRW_VE0_INDEX_SHIFT		27
+# define BRW_VE0_FORMAT_SHIFT		16
+# define BRW_VE0_VALID			(1 << 26)
+# define BRW_VE0_SRC_OFFSET_SHIFT	0
+# define BRW_VE1_COMPONENT_NOSTORE	0
+# define BRW_VE1_COMPONENT_STORE_SRC	1
+# define BRW_VE1_COMPONENT_STORE_0	2
+# define BRW_VE1_COMPONENT_STORE_1_FLT	3
+# define BRW_VE1_COMPONENT_STORE_1_INT	4
+# define BRW_VE1_COMPONENT_STORE_VID	5
+# define BRW_VE1_COMPONENT_STORE_IID	6
+# define BRW_VE1_COMPONENT_STORE_PID	7
+# define BRW_VE1_COMPONENT_0_SHIFT	28
+# define BRW_VE1_COMPONENT_1_SHIFT	24
+# define BRW_VE1_COMPONENT_2_SHIFT	20
+# define BRW_VE1_COMPONENT_3_SHIFT	16
+# define BRW_VE1_DST_OFFSET_SHIFT	0
+
+#define CMD_INDEX_BUFFER              0x780a
+#define CMD_VF_STATISTICS_965         0x780b
+#define CMD_VF_STATISTICS_GM45        0x680b
+
+#define CMD_DRAW_RECT                 0x7900
+#define CMD_BLEND_CONSTANT_COLOR      0x7901
+#define CMD_CHROMA_KEY                0x7904
+#define CMD_DEPTH_BUFFER              0x7905
+#define CMD_POLY_STIPPLE_OFFSET       0x7906
+#define CMD_POLY_STIPPLE_PATTERN      0x7907
+#define CMD_LINE_STIPPLE_PATTERN      0x7908
+#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909
+#define CMD_AA_LINE_PARAMETERS        0x790a
+
+#define CMD_PIPE_CONTROL              0x7a00
+
+#define CMD_3D_PRIM                   0x7b00
+
+#define CMD_MI_FLUSH                  0x0200
+
+
+/* Various values from the R0 vertex header:
+ */
+#define R02_PRIM_END    0x1
+#define R02_PRIM_START  0x2
+
+#include "intel_chipset.h"
+
+#define BRW_IS_G4X(brw)         (IS_G4X((brw)->intel.intelScreen->deviceID))
+#define BRW_IS_IGDNG(brw)         (IS_IGDNG((brw)->intel.intelScreen->deviceID))
+#define BRW_IS_965(brw)         (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))
+#define CMD_PIPELINE_SELECT(brw)        ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
+#define CMD_VF_STATISTICS(brw)          ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
+#define URB_SIZES(brw)                  (BRW_IS_IGDNG(brw) ? 1024 : \
+                                         (BRW_IS_G4X(brw) ? 384 : 256))  /* 512 bit units */
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
new file mode 100644
index 0000000000..9fef230507
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -0,0 +1,903 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+#include "main/mtypes.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+struct {
+    char    *name;
+    int	    nsrc;
+    int	    ndst;
+} opcode[128] = {
+    [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+
+    [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+
+    [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+    [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+    [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
+    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+    [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+
+char *conditional_modifier[16] = {
+    [BRW_CONDITIONAL_NONE] = "",
+    [BRW_CONDITIONAL_Z] = ".e",
+    [BRW_CONDITIONAL_NZ] = ".ne",
+    [BRW_CONDITIONAL_G] = ".g",
+    [BRW_CONDITIONAL_GE] = ".ge",
+    [BRW_CONDITIONAL_L] = ".l",
+    [BRW_CONDITIONAL_LE] = ".le",
+    [BRW_CONDITIONAL_R] = ".r",
+    [BRW_CONDITIONAL_O] = ".o",
+    [BRW_CONDITIONAL_U] = ".u",
+};
+
+char *negate[2] = {
+    [0] = "",
+    [1] = "-",
+};
+
+char *_abs[2] = {
+    [0] = "",
+    [1] = "(abs)",
+};
+
+char *vert_stride[16] = {
+    [0] = "0",
+    [1] = "1",
+    [2] = "2",
+    [3] = "4",
+    [4] = "8",
+    [5] = "16",
+    [6] = "32",
+    [15] = "VxH",
+};
+
+char *width[8] = {
+    [0] = "1",
+    [1] = "2",
+    [2] = "4",
+    [3] = "8",
+    [4] = "16",
+};
+
+char *horiz_stride[4] = {
+    [0] = "0",
+    [1] = "1",
+    [2] = "2",
+    [3] = "4"
+};
+
+char *chan_sel[4] = {
+    [0] = "x",
+    [1] = "y",
+    [2] = "z",
+    [3] = "w",
+};
+
+char *dest_condmod[16] = {
+};
+
+char *debug_ctrl[2] = {
+    [0] = "",
+    [1] = ".breakpoint"
+};
+
+char *saturate[2] = {
+    [0] = "",
+    [1] = ".sat"
+};
+
+char *exec_size[8] = {
+    [0] = "1",
+    [1] = "2",
+    [2] = "4",
+    [3] = "8",
+    [4] = "16",
+    [5] = "32"
+};
+
+char *pred_inv[2] = {
+    [0] = "+",
+    [1] = "-"
+};
+
+char *pred_ctrl_align16[16] = {
+    [1] = "",
+    [2] = ".x",
+    [3] = ".y",
+    [4] = ".z",
+    [5] = ".w",
+    [6] = ".any4h",
+    [7] = ".all4h",
+};
+
+char *pred_ctrl_align1[16] = {
+    [1] = "",
+    [2] = ".anyv",
+    [3] = ".allv",
+    [4] = ".any2h",
+    [5] = ".all2h",
+    [6] = ".any4h",
+    [7] = ".all4h",
+    [8] = ".any8h",
+    [9] = ".all8h",
+    [10] = ".any16h",
+    [11] = ".all16h",
+};
+
+char *thread_ctrl[4] = {
+    [0] = "",
+    [2] = "switch"
+};
+
+char *compr_ctrl[4] = {
+    [0] = "",
+    [1] = "sechalf",
+    [2] = "compr",
+};
+
+char *dep_ctrl[4] = {
+    [0] = "",
+    [1] = "NoDDClr",
+    [2] = "NoDDChk",
+    [3] = "NoDDClr,NoDDChk",
+};
+
+char *mask_ctrl[4] = {
+    [0] = "",
+    [1] = "nomask",
+};
+
+char *access_mode[2] = {
+    [0] = "align1",
+    [1] = "align16",
+};
+
+char *reg_encoding[8] = {
+    [0] = "UD",
+    [1] = "D",
+    [2] = "UW",
+    [3] = "W",
+    [4] = "UB",
+    [5] = "B",
+    [7] = "F"
+};
+
+char *imm_encoding[8] = {
+    [0] = "UD",
+    [1] = "D",
+    [2] = "UW",
+    [3] = "W",
+    [5] = "VF",
+    [5] = "V",
+    [7] = "F"
+};
+
+char *reg_file[4] = {
+    [0] = "A",
+    [1] = "g",
+    [2] = "m",
+    [3] = "imm",
+};
+
+char *writemask[16] = {
+    [0x0] = ".",
+    [0x1] = ".x",
+    [0x2] = ".y",
+    [0x3] = ".xy",
+    [0x4] = ".z",
+    [0x5] = ".xz",
+    [0x6] = ".yz",
+    [0x7] = ".xyz",
+    [0x8] = ".w",
+    [0x9] = ".xw",
+    [0xa] = ".yw",
+    [0xb] = ".xyw",
+    [0xc] = ".zw",
+    [0xd] = ".xzw",
+    [0xe] = ".yzw",
+    [0xf] = "",
+};
+
+char *end_of_thread[2] = {
+    [0] = "",
+    [1] = "EOT"
+};
+
+char *target_function[16] = {
+    [BRW_MESSAGE_TARGET_NULL] = "null",
+    [BRW_MESSAGE_TARGET_MATH] = "math",
+    [BRW_MESSAGE_TARGET_SAMPLER] = "sampler",
+    [BRW_MESSAGE_TARGET_GATEWAY] = "gateway",
+    [BRW_MESSAGE_TARGET_DATAPORT_READ] = "read",
+    [BRW_MESSAGE_TARGET_DATAPORT_WRITE] = "write",
+    [BRW_MESSAGE_TARGET_URB] = "urb",
+    [BRW_MESSAGE_TARGET_THREAD_SPAWNER] = "thread_spawner"
+};
+
+char *math_function[16] = {
+    [BRW_MATH_FUNCTION_INV] = "inv",
+    [BRW_MATH_FUNCTION_LOG] = "log",
+    [BRW_MATH_FUNCTION_EXP] = "exp",
+    [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+    [BRW_MATH_FUNCTION_RSQ] = "rsq",
+    [BRW_MATH_FUNCTION_SIN] = "sin",
+    [BRW_MATH_FUNCTION_COS] = "cos",
+    [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+    [BRW_MATH_FUNCTION_TAN] = "tan",
+    [BRW_MATH_FUNCTION_POW] = "pow",
+    [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+    [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod",
+    [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv",
+};
+
+char *math_saturate[2] = {
+    [0] = "",
+    [1] = "sat"
+};
+
+char *math_signed[2] = {
+    [0] = "",
+    [1] = "signed"
+};
+
+char *math_scalar[2] = {
+    [0] = "",
+    [1] = "scalar"
+};
+
+char *math_precision[2] = {
+    [0] = "",
+    [1] = "partial_precision"
+};
+
+char *urb_swizzle[4] = {
+    [BRW_URB_SWIZZLE_NONE] = "",
+    [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+    [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+
+char *urb_allocate[2] = {
+    [0] = "",
+    [1] = "allocate"
+};
+
+char *urb_used[2] = {
+    [0] = "",
+    [1] = "used"
+};
+
+char *urb_complete[2] = {
+    [0] = "",
+    [1] = "complete"
+};
+
+char *sampler_target_format[4] = {
+    [0] = "F",
+    [2] = "UD",
+    [3] = "D"
+};
+
+
+static int column;
+
+static int string (FILE *file, char *string)
+{
+    fputs (string, file);
+    column += strlen (string);
+    return 0;
+}
+
+static int format (FILE *f, char *format, ...)
+{
+    char    buf[1024];
+    va_list	args;
+    va_start (args, format);
+
+    vsnprintf (buf, sizeof (buf) - 1, format, args);
+    string (f, buf);
+    return 0;
+}
+
+static int newline (FILE *f)
+{
+    putc ('\n', f);
+    column = 0;
+    return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+    do
+	string (f, " ");
+    while (column < c);
+    return 0;
+}
+
+static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space)
+{
+    if (!ctrl[id]) {
+	fprintf (file, "*** invalid %s value %d ",
+		 name, id);
+	return 1;
+    }
+    if (ctrl[id][0])
+    {
+	if (space && *space)
+	    string (file, " ");
+	string (file, ctrl[id]);
+	if (space)
+	    *space = 1;
+    }
+    return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+    if (!opcode[id].name) {
+	format (file, "*** invalid opcode value %d ", id);
+	return 1;
+    }
+    string (file, opcode[id].name);
+    return 0;
+}
+
+static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
+{
+    int	err = 0;
+    if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+	switch (_reg_nr & 0xf0) {
+	case BRW_ARF_NULL:
+	    string (file, "null");
+	    return -1;
+	case BRW_ARF_ADDRESS:
+	    format (file, "a%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_ACCUMULATOR:
+	    format (file, "acc%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_MASK:
+	    format (file, "mask%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_MASK_STACK:
+	    format (file, "msd%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_STATE:
+	    format (file, "sr%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_CONTROL:
+	    format (file, "cr%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_NOTIFICATION_COUNT:
+	    format (file, "n%d", _reg_nr & 0x0f);
+	    break;
+	case BRW_ARF_IP:
+	    string (file, "ip");
+	    return -1;
+	    break;
+	default:
+	    format (file, "ARF%d", _reg_nr);
+	    break;
+	}
+    } else {
+	err  |= control (file, "src reg file", reg_file, _reg_file, NULL);
+	format (file, "%d", _reg_nr);
+    }
+    return err;
+}
+
+static int dest (FILE *file, struct brw_instruction *inst)
+{
+    int	err = 0;
+
+    if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+	if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+	    if (err == -1)
+		return 0;
+	    if (inst->bits1.da1.dest_subreg_nr)
+		format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+	    format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
+	    err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+	}
+	else
+	{
+	    string (file, "g[a0");
+	    if (inst->bits1.ia1.dest_subreg_nr)
+		format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+	    if (inst->bits1.ia1.dest_indirect_offset)
+		format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+	    string (file, "]");
+	    format (file, "<%d>", inst->bits1.ia1.dest_horiz_stride);
+	    err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+	}
+    }
+    else
+    {
+	if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+	    if (err == -1)
+		return 0;
+	    if (inst->bits1.da16.dest_subreg_nr)
+		format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+	    string (file, "<1>");
+	    err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+	    err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+	}
+	else
+	{
+	    err = 1;
+	    string (file, "Indirect align16 address mode not supported");
+	}
+    }
+
+    return 0;
+}
+
+static int src_align1_region (FILE *file,
+			      GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
+{
+    int err = 0;
+    string (file, "<");
+    err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+    string (file, ",");
+    err |= control (file, "width", width, _width, NULL);
+    string (file, ",");
+    err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+    string (file, ">");
+    return err;
+}
+
+static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
+		    GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
+		    GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+
+    err |= reg (file, _reg_file, reg_num);
+    if (err == -1)
+	return 0;
+    if (sub_reg_num)
+	format (file, ".%d", sub_reg_num);
+    src_align1_region (file, _vert_stride, _width, _horiz_stride);
+    err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+    return err;
+}
+
+static int src_ia1 (FILE *file,
+		    GLuint type,
+		    GLuint _reg_file,
+		    GLint _addr_imm,
+		    GLuint _addr_subreg_nr,
+		    GLuint _negate,
+		    GLuint __abs,
+		    GLuint _addr_mode,
+		    GLuint _horiz_stride,
+		    GLuint _width,
+		    GLuint _vert_stride)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+
+    string (file, "g[a0");
+    if (_addr_subreg_nr)
+	format (file, ".%d", _addr_subreg_nr);
+    if (_addr_imm)
+	format (file, " %d", _addr_imm);
+    string (file, "]");
+    src_align1_region (file, _vert_stride, _width, _horiz_stride);
+    err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+    return err;
+}
+
+static int src_da16 (FILE *file,
+		     GLuint _reg_type,
+		     GLuint _reg_file,
+		     GLuint _vert_stride,
+		     GLuint _reg_nr,
+		     GLuint _subreg_nr,
+		     GLuint __abs,
+		     GLuint _negate,
+		     GLuint swz_x,
+		     GLuint swz_y,
+		     GLuint swz_z,
+		     GLuint swz_w)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+
+    err |= reg (file, _reg_file, _reg_nr);
+    if (err == -1)
+	return 0;
+    if (_subreg_nr)
+	format (file, ".%d", _subreg_nr);
+    string (file, "<");
+    err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+    string (file, ",1,1>");
+    err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all	 - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+	swz_y == BRW_CHANNEL_Y &&
+	swz_z == BRW_CHANNEL_Z &&
+	swz_w == BRW_CHANNEL_W)
+    {
+	;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+	string (file, ".");
+	err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+	string (file, ".");
+	err |= control (file, "channel select", chan_sel, swz_x, NULL);
+	err |= control (file, "channel select", chan_sel, swz_y, NULL);
+	err |= control (file, "channel select", chan_sel, swz_z, NULL);
+	err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    return err;
+}
+
+
+static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
+    switch (type) {
+    case BRW_REGISTER_TYPE_UD:
+	format (file, "0x%08xUD", inst->bits3.ud);
+	break;
+    case BRW_REGISTER_TYPE_D:
+	format (file, "%dD", inst->bits3.d);
+	break;
+    case BRW_REGISTER_TYPE_UW:
+	format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+	break;
+    case BRW_REGISTER_TYPE_W:
+	format (file, "%dW", (int16_t) inst->bits3.d);
+	break;
+    case BRW_REGISTER_TYPE_UB:
+	format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+	break;
+    case BRW_REGISTER_TYPE_VF:
+	format (file, "Vector Float");
+	break;
+    case BRW_REGISTER_TYPE_V:
+	format (file, "0x%08xV", inst->bits3.ud);
+	break;
+    case BRW_REGISTER_TYPE_F:
+	format (file, "%-gF", inst->bits3.f);
+    }
+    return 0;
+}
+
+static int src0 (FILE *file, struct brw_instruction *inst)
+{
+    if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+	return imm (file, inst->bits1.da1.src0_reg_type,
+		    inst);
+    else if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+	if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    return src_da1 (file,
+			    inst->bits1.da1.src0_reg_type,
+			    inst->bits1.da1.src0_reg_file,
+			    inst->bits2.da1.src0_vert_stride,
+			    inst->bits2.da1.src0_width,
+			    inst->bits2.da1.src0_horiz_stride,
+			    inst->bits2.da1.src0_reg_nr,
+			    inst->bits2.da1.src0_subreg_nr,
+			    inst->bits2.da1.src0_abs,
+			    inst->bits2.da1.src0_negate);
+	}
+	else
+	{
+	    return src_ia1 (file,
+			    inst->bits1.ia1.src0_reg_type,
+			    inst->bits1.ia1.src0_reg_file,
+			    inst->bits2.ia1.src0_indirect_offset,
+			    inst->bits2.ia1.src0_subreg_nr,
+			    inst->bits2.ia1.src0_negate,
+			    inst->bits2.ia1.src0_abs,
+			    inst->bits2.ia1.src0_address_mode,
+			    inst->bits2.ia1.src0_horiz_stride,
+			    inst->bits2.ia1.src0_width,
+			    inst->bits2.ia1.src0_vert_stride);
+	}
+    }
+    else
+    {
+	if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    return src_da16 (file,
+			     inst->bits1.da16.src0_reg_type,
+			     inst->bits1.da16.src0_reg_file,
+			     inst->bits2.da16.src0_vert_stride,
+			     inst->bits2.da16.src0_reg_nr,
+			     inst->bits2.da16.src0_subreg_nr,
+			     inst->bits2.da16.src0_abs,
+			     inst->bits2.da16.src0_negate,
+			     inst->bits2.da16.src0_swz_x,
+			     inst->bits2.da16.src0_swz_y,
+			     inst->bits2.da16.src0_swz_z,
+			     inst->bits2.da16.src0_swz_w);
+	}
+	else
+	{
+	    string (file, "Indirect align16 address mode not supported");
+	    return 1;
+	}
+    }
+}
+
+static int src1 (FILE *file, struct brw_instruction *inst)
+{
+    if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+	return imm (file, inst->bits1.da1.src1_reg_type,
+		    inst);
+    else if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+	if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    return src_da1 (file,
+			    inst->bits1.da1.src1_reg_type,
+			    inst->bits1.da1.src1_reg_file,
+			    inst->bits3.da1.src1_vert_stride,
+			    inst->bits3.da1.src1_width,
+			    inst->bits3.da1.src1_horiz_stride,
+			    inst->bits3.da1.src1_reg_nr,
+			    inst->bits3.da1.src1_subreg_nr,
+			    inst->bits3.da1.src1_abs,
+			    inst->bits3.da1.src1_negate);
+	}
+	else
+	{
+	    return src_ia1 (file,
+			    inst->bits1.ia1.src1_reg_type,
+			    inst->bits1.ia1.src1_reg_file,
+			    inst->bits3.ia1.src1_indirect_offset,
+			    inst->bits3.ia1.src1_subreg_nr,
+			    inst->bits3.ia1.src1_negate,
+			    inst->bits3.ia1.src1_abs,
+			    inst->bits3.ia1.src1_address_mode,
+			    inst->bits3.ia1.src1_horiz_stride,
+			    inst->bits3.ia1.src1_width,
+			    inst->bits3.ia1.src1_vert_stride);
+	}
+    }
+    else
+    {
+	if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+	{
+	    return src_da16 (file,
+			     inst->bits1.da16.src1_reg_type,
+			     inst->bits1.da16.src1_reg_file,
+			     inst->bits3.da16.src1_vert_stride,
+			     inst->bits3.da16.src1_reg_nr,
+			     inst->bits3.da16.src1_subreg_nr,
+			     inst->bits3.da16.src1_abs,
+			     inst->bits3.da16.src1_negate,
+			     inst->bits3.da16.src1_swz_x,
+			     inst->bits3.da16.src1_swz_y,
+			     inst->bits3.da16.src1_swz_z,
+			     inst->bits3.da16.src1_swz_w);
+	}
+	else
+	{
+	    string (file, "Indirect align16 address mode not supported");
+	    return 1;
+	}
+    }
+}
+
+int brw_disasm (FILE *file, struct brw_instruction *inst)
+{
+    int	err = 0;
+    int space = 0;
+
+    if (inst->header.predicate_control) {
+	string (file, "(");
+	err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+	string (file, "f0");
+	if (inst->bits2.da1.flag_reg_nr)
+	    format (file, ".%d", inst->bits2.da1.flag_reg_nr);
+	if (inst->header.access_mode == BRW_ALIGN_1)
+	    err |= control (file, "predicate control align1", pred_ctrl_align1,
+			    inst->header.predicate_control, NULL);
+	else
+	    err |= control (file, "predicate control align16", pred_ctrl_align16,
+			    inst->header.predicate_control, NULL);
+	string (file, ") ");
+    }
+
+    err |= print_opcode (file, inst->header.opcode);
+    err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+    err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+    if (inst->header.opcode != BRW_OPCODE_SEND)
+	err |= control (file, "conditional modifier", conditional_modifier,
+			inst->header.destreg__conditionalmod, NULL);
+
+    if (inst->header.opcode != BRW_OPCODE_NOP) {
+	string (file, "(");
+	err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+	string (file, ")");
+    }
+
+    if (inst->header.opcode == BRW_OPCODE_SEND)
+	format (file, " %d", inst->header.destreg__conditionalmod);
+
+    if (opcode[inst->header.opcode].ndst > 0) {
+	pad (file, 16);
+	err |= dest (file, inst);
+    }
+    if (opcode[inst->header.opcode].nsrc > 0) {
+	pad (file, 32);
+	err |= src0 (file, inst);
+    }
+    if (opcode[inst->header.opcode].nsrc > 1) {
+	pad (file, 48);
+	err |= src1 (file, inst);
+    }
+
+    if (inst->header.opcode == BRW_OPCODE_SEND) {
+	newline (file);
+	pad (file, 16);
+	space = 0;
+	err |= control (file, "target function", target_function,
+			inst->bits3.generic.msg_target, &space);
+	switch (inst->bits3.generic.msg_target) {
+	case BRW_MESSAGE_TARGET_MATH:
+	    err |= control (file, "math function", math_function,
+			    inst->bits3.math.function, &space);
+	    err |= control (file, "math saturate", math_saturate,
+			    inst->bits3.math.saturate, &space);
+	    err |= control (file, "math signed", math_signed,
+			    inst->bits3.math.int_type, &space);
+	    err |= control (file, "math scalar", math_scalar,
+			    inst->bits3.math.data_type, &space);
+	    err |= control (file, "math precision", math_precision,
+			    inst->bits3.math.precision, &space);
+	    break;
+	case BRW_MESSAGE_TARGET_SAMPLER:
+	    format (file, " (%d, %d, ",
+		    inst->bits3.sampler.binding_table_index,
+		    inst->bits3.sampler.sampler);
+	    err |= control (file, "sampler target format", sampler_target_format,
+			    inst->bits3.sampler.return_format, NULL);
+	    string (file, ")");
+	    break;
+	case BRW_MESSAGE_TARGET_DATAPORT_WRITE:
+	    format (file, " (%d, %d, %d, %d)",
+		    inst->bits3.dp_write.binding_table_index,
+		    (inst->bits3.dp_write.pixel_scoreboard_clear << 3) |
+		    inst->bits3.dp_write.msg_control,
+		    inst->bits3.dp_write.msg_type,
+		    inst->bits3.dp_write.send_commit_msg);
+	    break;
+	case BRW_MESSAGE_TARGET_URB:
+	    format (file, " %d", inst->bits3.urb.offset);
+	    space = 1;
+	    err |= control (file, "urb swizzle", urb_swizzle,
+			    inst->bits3.urb.swizzle_control, &space);
+	    err |= control (file, "urb allocate", urb_allocate,
+			    inst->bits3.urb.allocate, &space);
+	    err |= control (file, "urb used", urb_used,
+			    inst->bits3.urb.used, &space);
+	    err |= control (file, "urb complete", urb_complete,
+			    inst->bits3.urb.complete, &space);
+	    break;
+	case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
+	    break;
+	default:
+	    format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+	    break;
+	}
+	if (space)
+	    string (file, " ");
+	format (file, "mlen %d",
+		inst->bits3.generic.msg_length);
+	format (file, " rlen %d",
+		inst->bits3.generic.response_length);
+    }
+    pad (file, 64);
+    if (inst->header.opcode != BRW_OPCODE_NOP) {
+	string (file, "{");
+	space = 1;
+	err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+	err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+	err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+	err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
+	err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+	if (inst->header.opcode == BRW_OPCODE_SEND)
+	    err |= control (file, "end of thread", end_of_thread,
+			    inst->bits3.generic.end_of_thread, &space);
+	if (space)
+	    string (file, " ");
+	string (file, "}");
+    }
+    string (file, ";");
+    newline (file);
+    return err;
+}
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
new file mode 100644
index 0000000000..44bb7bd588
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -0,0 +1,493 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/enums.h"
+#include "tnl/tnl.h"
+#include "vbo/vbo_context.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_fallback.h"
+
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BATCH
+
+static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
+   _3DPRIM_POINTLIST,
+   _3DPRIM_LINELIST,
+   _3DPRIM_LINELOOP,
+   _3DPRIM_LINESTRIP,
+   _3DPRIM_TRILIST,
+   _3DPRIM_TRISTRIP,
+   _3DPRIM_TRIFAN,
+   _3DPRIM_QUADLIST,
+   _3DPRIM_QUADSTRIP,
+   _3DPRIM_POLYGON
+};
+
+
+static const GLenum reduced_prim[GL_POLYGON+1] = {  
+   GL_POINTS,
+   GL_LINES,
+   GL_LINES,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES
+};
+
+
+/* When the primitive changes, set a state bit and re-validate.  Not
+ * the nicest and would rather deal with this by having all the
+ * programs be immune to the active primitive (ie. cope with all
+ * possibilities).  That may not be realistic however.
+ */
+static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   if (INTEL_DEBUG & DEBUG_PRIMS)
+      _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
+   
+   /* Slight optimization to avoid the GS program when not needed:
+    */
+   if (prim == GL_QUAD_STRIP &&
+       ctx->Light.ShadeModel != GL_FLAT &&
+       ctx->Polygon.FrontMode == GL_FILL &&
+       ctx->Polygon.BackMode == GL_FILL)
+      prim = GL_TRIANGLE_STRIP;
+
+   if (prim != brw->primitive) {
+      brw->primitive = prim;
+      brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+
+      if (reduced_prim[prim] != brw->intel.reduced_primitive) {
+	 brw->intel.reduced_primitive = reduced_prim[prim];
+	 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
+      }
+   }
+
+   return prim_to_hw_prim[prim];
+}
+
+
+static GLuint trim(GLenum prim, GLuint length)
+{
+   if (prim == GL_QUAD_STRIP)
+      return length > 3 ? (length - length % 2) : 0;
+   else if (prim == GL_QUADS)
+      return length - length % 4;
+   else 
+      return length;
+}
+
+
+static void brw_emit_prim(struct brw_context *brw,
+			  const struct _mesa_prim *prim,
+			  uint32_t hw_prim)
+{
+   struct brw_3d_primitive prim_packet;
+   struct intel_context *intel = &brw->intel;
+
+   if (INTEL_DEBUG & DEBUG_PRIMS)
+      _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 
+		   prim->start, prim->count);
+
+   prim_packet.header.opcode = CMD_3D_PRIM;
+   prim_packet.header.length = sizeof(prim_packet)/4 - 2;
+   prim_packet.header.pad = 0;
+   prim_packet.header.topology = hw_prim;
+   prim_packet.header.indexed = prim->indexed;
+
+   prim_packet.verts_per_instance = trim(prim->mode, prim->count);
+   prim_packet.start_vert_location = prim->start;
+   if (prim->indexed)
+      prim_packet.start_vert_location += brw->ib.start_vertex_offset;
+   prim_packet.instance_count = 1;
+   prim_packet.start_instance_location = 0;
+   prim_packet.base_vert_location = prim->basevertex;
+
+   /* Can't wrap here, since we rely on the validated state. */
+   brw->no_batch_wrap = GL_TRUE;
+
+   /* If we're set to always flush, do it before and after the primitive emit.
+    * We want to catch both missed flushes that hurt instruction/state cache
+    * and missed flushes of the render cache as it heads to other parts of
+    * the besides the draw code.
+    */
+   if (intel->always_flush_cache) {
+      BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+      OUT_BATCH(intel->vtbl.flush_cmd());
+      ADVANCE_BATCH();
+   }
+   if (prim_packet.verts_per_instance) {
+      intel_batchbuffer_data( brw->intel.batch, &prim_packet,
+			      sizeof(prim_packet), LOOP_CLIPRECTS);
+   }
+   if (intel->always_flush_cache) {
+      BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+      OUT_BATCH(intel->vtbl.flush_cmd());
+      ADVANCE_BATCH();
+   }
+
+   brw->no_batch_wrap = GL_FALSE;
+}
+
+static void brw_merge_inputs( struct brw_context *brw,
+		       const struct gl_client_array *arrays[])
+{
+   struct brw_vertex_info old = brw->vb.info;
+   GLuint i;
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++)
+      dri_bo_unreference(brw->vb.inputs[i].bo);
+
+   memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
+   memset(&brw->vb.info, 0, sizeof(brw->vb.info));
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+      brw->vb.inputs[i].glarray = arrays[i];
+      brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
+
+      if (arrays[i]->StrideB != 0)
+	 brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
+	    ((i%16) * 2);
+   }
+
+   /* Raise statechanges if input sizes have changed. */
+   if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
+      brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
+}
+
+/* XXX: could split the primitive list to fallback only on the
+ * non-conformant primitives.
+ */
+static GLboolean check_fallbacks( struct brw_context *brw,
+				  const struct _mesa_prim *prim,
+				  GLuint nr_prims )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   GLuint i;
+
+   /* If we don't require strict OpenGL conformance, never 
+    * use fallbacks.  If we're forcing fallbacks, always
+    * use fallfacks.
+    */
+   if (brw->intel.conformance_mode == 0)
+      return GL_FALSE;
+
+   if (brw->intel.conformance_mode == 2)
+      return GL_TRUE;
+
+   if (ctx->Polygon.SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (reduced_prim[prim[i].mode] == GL_TRIANGLES) 
+	    return GL_TRUE;
+   }
+
+   /* BRW hardware will do AA lines, but they are non-conformant it
+    * seems.  TBD whether we keep this fallback:
+    */
+   if (ctx->Line.SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (reduced_prim[prim[i].mode] == GL_LINES) 
+	    return GL_TRUE;
+   }
+
+   /* Stipple -- these fallbacks could be resolved with a little
+    * bit of work?
+    */
+   if (ctx->Line.StippleFlag) {
+      for (i = 0; i < nr_prims; i++) {
+	 /* GS doesn't get enough information to know when to reset
+	  * the stipple counter?!?
+	  */
+	 if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) 
+	    return GL_TRUE;
+	    
+	 if (prim[i].mode == GL_POLYGON &&
+	     (ctx->Polygon.FrontMode == GL_LINE ||
+	      ctx->Polygon.BackMode == GL_LINE))
+	    return GL_TRUE;
+      }
+   }
+
+   if (ctx->Point.SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (prim[i].mode == GL_POINTS) 
+	    return GL_TRUE;
+   }
+
+   /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
+    * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
+    * as GL_CLAMP_TO_EDGE instead.  If we're using GL_CLAMP, and
+    * we want strict conformance, force the fallback.
+    * Right now, we only do this for 2D textures.
+    */
+   {
+      int u;
+      for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
+         struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
+         if (texUnit->Enabled) {
+            if (texUnit->Enabled & TEXTURE_1D_BIT) {
+               if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
+                   return GL_TRUE;
+               }
+            }
+            if (texUnit->Enabled & TEXTURE_2D_BIT) {
+               if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
+                   texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
+                   return GL_TRUE;
+               }
+            }
+            if (texUnit->Enabled & TEXTURE_3D_BIT) {
+               if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
+                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
+                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
+                   return GL_TRUE;
+               }
+            }
+         }
+      }
+   }
+      
+   /* Nothing stopping us from the fast path now */
+   return GL_FALSE;
+}
+
+/* May fail if out of video memory for texture or vbo upload, or on
+ * fallback conditions.
+ */
+static GLboolean brw_try_draw_prims( GLcontext *ctx,
+				     const struct gl_client_array *arrays[],
+				     const struct _mesa_prim *prim,
+				     GLuint nr_prims,
+				     const struct _mesa_index_buffer *ib,
+				     GLuint min_index,
+				     GLuint max_index )
+{
+   struct intel_context *intel = intel_context(ctx);
+   struct brw_context *brw = brw_context(ctx);
+   GLboolean retval = GL_FALSE;
+   GLboolean warn = GL_FALSE;
+   GLboolean first_time = GL_TRUE;
+   GLuint i;
+
+   if (ctx->NewState)
+      _mesa_update_state( ctx );
+
+   /* We have to validate the textures *before* checking for fallbacks;
+    * otherwise, the software fallback won't be able to rely on the
+    * texture state, the firstLevel and lastLevel fields won't be
+    * set in the intel texture object (they'll both be 0), and the 
+    * software fallback will segfault if it attempts to access any
+    * texture level other than level 0.
+    */
+   brw_validate_textures( brw );
+
+   if (check_fallbacks(brw, prim, nr_prims))
+      return GL_FALSE;
+
+   /* Bind all inputs, derive varying and size information:
+    */
+   brw_merge_inputs( brw, arrays );
+
+   brw->ib.ib = ib;
+   brw->state.dirty.brw |= BRW_NEW_INDICES;
+
+   brw->vb.min_index = min_index;
+   brw->vb.max_index = max_index;
+   brw->state.dirty.brw |= BRW_NEW_VERTICES;
+
+   /* Have to validate state quite late.  Will rebuild tnl_program,
+    * which depends on varying information.  
+    * 
+    * Note this is where brw->vs->prog_data.inputs_read is calculated,
+    * so can't access it earlier.
+    */
+
+   LOCK_HARDWARE(intel);
+
+   if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) {
+      UNLOCK_HARDWARE(intel);
+      return GL_TRUE;
+   }
+
+   for (i = 0; i < nr_prims; i++) {
+      uint32_t hw_prim;
+
+      /* Flush the batch if it's approaching full, so that we don't wrap while
+       * we've got validated state that needs to be in the same batch as the
+       * primitives.  This fraction is just a guess (minimal full state plus
+       * a primitive is around 512 bytes), and would be better if we had
+       * an upper bound of how much we might emit in a single
+       * brw_try_draw_prims().
+       */
+      intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
+				      LOOP_CLIPRECTS);
+
+      hw_prim = brw_set_prim(brw, prim[i].mode);
+
+      if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) {
+	 first_time = GL_FALSE;
+
+	 brw_validate_state(brw);
+
+	 /* Various fallback checks:  */
+	 if (brw->intel.Fallback)
+	    goto out;
+
+	 /* Check that we can fit our state in with our existing batchbuffer, or
+	  * flush otherwise.
+	  */
+	 if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+					     brw->state.validated_bo_count)) {
+	    static GLboolean warned;
+	    intel_batchbuffer_flush(intel->batch);
+
+	    /* Validate the state after we flushed the batch (which would have
+	     * changed the set of dirty state).  If we still fail to
+	     * check_aperture, warn of what's happening, but attempt to continue
+	     * on since it may succeed anyway, and the user would probably rather
+	     * see a failure and a warning than a fallback.
+	     */
+	    brw_validate_state(brw);
+	    if (!warned &&
+		dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+						brw->state.validated_bo_count)) {
+	       warn = GL_TRUE;
+	       warned = GL_TRUE;
+	    }
+	 }
+
+	 brw_upload_state(brw);
+      }
+
+      brw_emit_prim(brw, &prim[i], hw_prim);
+
+      retval = GL_TRUE;
+   }
+
+   if (intel->always_flush_batch)
+      intel_batchbuffer_flush(intel->batch);
+ out:
+   UNLOCK_HARDWARE(intel);
+
+   brw_state_cache_check_size(brw);
+
+   if (warn)
+      fprintf(stderr, "i965: Single primitive emit potentially exceeded "
+	      "available aperture space\n");
+
+   if (!retval)
+      DBG("%s failed\n", __FUNCTION__);
+
+   return retval;
+}
+
+void brw_draw_prims( GLcontext *ctx,
+		     const struct gl_client_array *arrays[],
+		     const struct _mesa_prim *prim,
+		     GLuint nr_prims,
+		     const struct _mesa_index_buffer *ib,
+		     GLboolean index_bounds_valid,
+		     GLuint min_index,
+		     GLuint max_index )
+{
+   GLboolean retval;
+
+   if (!vbo_all_varyings_in_vbos(arrays)) {
+      if (!index_bounds_valid)
+	 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+
+      /* Decide if we want to rebase.  If so we end up recursing once
+       * only into this function.
+       */
+      if (min_index != 0) {
+	 vbo_rebase_prims(ctx, arrays,
+			  prim, nr_prims,
+			  ib, min_index, max_index,
+			  brw_draw_prims );
+	 return;
+      }
+   }
+
+   /* Make a first attempt at drawing:
+    */
+   retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+
+   /* Otherwise, we really are out of memory.  Pass the drawing
+    * command to the software tnl module and which will in turn call
+    * swrast to do the drawing.
+    */
+   if (!retval) {
+       _swsetup_Wakeup(ctx);
+      _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+   }
+
+}
+
+void brw_draw_init( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct vbo_context *vbo = vbo_context(ctx);
+
+   /* Register our drawing function: 
+    */
+   vbo->draw_prims = brw_draw_prims;
+}
+
+void brw_draw_destroy( struct brw_context *brw )
+{
+   int i;
+
+   if (brw->vb.upload.bo != NULL) {
+      dri_bo_unreference(brw->vb.upload.bo);
+      brw->vb.upload.bo = NULL;
+   }
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+      dri_bo_unreference(brw->vb.inputs[i].bo);
+      brw->vb.inputs[i].bo = NULL;
+   }
+
+   dri_bo_unreference(brw->ib.bo);
+   brw->ib.bo = NULL;
+}
diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h
new file mode 100644
index 0000000000..2a14db217f
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw.h
@@ -0,0 +1,54 @@
+ /**************************************************************************
+ * 
+ * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef BRW_DRAW_H
+#define BRW_DRAW_H
+
+#include "main/mtypes.h"		/* for GLcontext... */
+#include "vbo/vbo.h"
+
+struct brw_context;
+
+
+void brw_draw_prims( GLcontext *ctx,
+		     const struct gl_client_array *arrays[],
+		     const struct _mesa_prim *prims,
+		     GLuint nr_prims,
+		     const struct _mesa_index_buffer *ib,
+		     GLboolean index_bounds_valid,
+		     GLuint min_index,
+		     GLuint max_index );
+
+void brw_draw_init( struct brw_context *brw );
+void brw_draw_destroy( struct brw_context *brw );
+
+/* brw_draw_current.c
+ */
+void brw_init_current_values(GLcontext *ctx,
+			     struct gl_client_array *arrays);
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
new file mode 100644
index 0000000000..a3ff6c58d8
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -0,0 +1,742 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "main/glheader.h"
+#include "main/bufferobj.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/api_validate.h"
+#include "main/enums.h"
+
+#include "brw_draw.h"
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_fallback.h"
+
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+#include "intel_tex.h"
+
+static GLuint double_types[5] = {
+   0,
+   BRW_SURFACEFORMAT_R64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64B64_FLOAT,
+   BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
+};
+
+static GLuint float_types[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32B32_FLOAT,
+   BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
+};
+
+static GLuint uint_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_UNORM,
+   BRW_SURFACEFORMAT_R32G32_UNORM,
+   BRW_SURFACEFORMAT_R32G32B32_UNORM,
+   BRW_SURFACEFORMAT_R32G32B32A32_UNORM
+};
+
+static GLuint uint_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_USCALED,
+   BRW_SURFACEFORMAT_R32G32_USCALED,
+   BRW_SURFACEFORMAT_R32G32B32_USCALED,
+   BRW_SURFACEFORMAT_R32G32B32A32_USCALED
+};
+
+static GLuint int_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_SNORM,
+   BRW_SURFACEFORMAT_R32G32_SNORM,
+   BRW_SURFACEFORMAT_R32G32B32_SNORM,
+   BRW_SURFACEFORMAT_R32G32B32A32_SNORM
+};
+
+static GLuint int_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32B32_SSCALED,
+   BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
+};
+
+static GLuint ushort_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_UNORM,
+   BRW_SURFACEFORMAT_R16G16_UNORM,
+   BRW_SURFACEFORMAT_R16G16B16_UNORM,
+   BRW_SURFACEFORMAT_R16G16B16A16_UNORM
+};
+
+static GLuint ushort_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_USCALED,
+   BRW_SURFACEFORMAT_R16G16_USCALED,
+   BRW_SURFACEFORMAT_R16G16B16_USCALED,
+   BRW_SURFACEFORMAT_R16G16B16A16_USCALED
+};
+
+static GLuint short_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_SNORM,
+   BRW_SURFACEFORMAT_R16G16_SNORM,
+   BRW_SURFACEFORMAT_R16G16B16_SNORM,
+   BRW_SURFACEFORMAT_R16G16B16A16_SNORM
+};
+
+static GLuint short_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16B16_SSCALED,
+   BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
+};
+
+static GLuint ubyte_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_UNORM,
+   BRW_SURFACEFORMAT_R8G8_UNORM,
+   BRW_SURFACEFORMAT_R8G8B8_UNORM,
+   BRW_SURFACEFORMAT_R8G8B8A8_UNORM
+};
+
+static GLuint ubyte_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_USCALED,
+   BRW_SURFACEFORMAT_R8G8_USCALED,
+   BRW_SURFACEFORMAT_R8G8B8_USCALED,
+   BRW_SURFACEFORMAT_R8G8B8A8_USCALED
+};
+
+static GLuint byte_types_norm[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_SNORM,
+   BRW_SURFACEFORMAT_R8G8_SNORM,
+   BRW_SURFACEFORMAT_R8G8B8_SNORM,
+   BRW_SURFACEFORMAT_R8G8B8A8_SNORM
+};
+
+static GLuint byte_types_scale[5] = {
+   0,
+   BRW_SURFACEFORMAT_R8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8B8_SSCALED,
+   BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
+};
+
+
+/**
+ * Given vertex array type/size/format/normalized info, return
+ * the appopriate hardware surface type.
+ * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
+ */
+static GLuint get_surface_type( GLenum type, GLuint size,
+                                GLenum format, GLboolean normalized )
+{
+   if (INTEL_DEBUG & DEBUG_VERTS)
+      _mesa_printf("type %s size %d normalized %d\n", 
+		   _mesa_lookup_enum_by_nr(type), size, normalized);
+
+   if (normalized) {
+      switch (type) {
+      case GL_DOUBLE: return double_types[size];
+      case GL_FLOAT: return float_types[size];
+      case GL_INT: return int_types_norm[size];
+      case GL_SHORT: return short_types_norm[size];
+      case GL_BYTE: return byte_types_norm[size];
+      case GL_UNSIGNED_INT: return uint_types_norm[size];
+      case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
+      case GL_UNSIGNED_BYTE:
+         if (format == GL_BGRA) {
+            /* See GL_EXT_vertex_array_bgra */
+            assert(size == 4);
+            return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+         }
+         else {
+            return ubyte_types_norm[size];
+         }
+      default: assert(0); return 0;
+      }      
+   }
+   else {
+      assert(format == GL_RGBA); /* sanity check */
+      switch (type) {
+      case GL_DOUBLE: return double_types[size];
+      case GL_FLOAT: return float_types[size];
+      case GL_INT: return int_types_scale[size];
+      case GL_SHORT: return short_types_scale[size];
+      case GL_BYTE: return byte_types_scale[size];
+      case GL_UNSIGNED_INT: return uint_types_scale[size];
+      case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
+      case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
+      default: assert(0); return 0;
+      }      
+   }
+}
+
+
+static GLuint get_size( GLenum type )
+{
+   switch (type) {
+   case GL_DOUBLE: return sizeof(GLdouble);
+   case GL_FLOAT: return sizeof(GLfloat);
+   case GL_INT: return sizeof(GLint);
+   case GL_SHORT: return sizeof(GLshort);
+   case GL_BYTE: return sizeof(GLbyte);
+   case GL_UNSIGNED_INT: return sizeof(GLuint);
+   case GL_UNSIGNED_SHORT: return sizeof(GLushort);
+   case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
+   default: return 0;
+   }      
+}
+
+static GLuint get_index_type(GLenum type) 
+{
+   switch (type) {
+   case GL_UNSIGNED_BYTE:  return BRW_INDEX_BYTE;
+   case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
+   case GL_UNSIGNED_INT:   return BRW_INDEX_DWORD;
+   default: assert(0); return 0;
+   }
+}
+
+static void wrap_buffers( struct brw_context *brw,
+			  GLuint size )
+{
+   if (size < BRW_UPLOAD_INIT_SIZE)
+      size = BRW_UPLOAD_INIT_SIZE;
+
+   brw->vb.upload.offset = 0;
+
+   if (brw->vb.upload.bo != NULL)
+      dri_bo_unreference(brw->vb.upload.bo);
+   brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO",
+				    size, 1);
+
+   /* Set the internal VBO\ to no-backing-store.  We only use them as a
+    * temporary within a brw_try_draw_prims while the lock is held.
+    */
+   /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH
+      FAKE TO PUSH THIS STUFF */
+//   if (!brw->intel.ttm)
+//      dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL);
+}
+
+static void get_space( struct brw_context *brw,
+		       GLuint size,
+		       dri_bo **bo_return,
+		       GLuint *offset_return )
+{
+   size = ALIGN(size, 64);
+
+   if (brw->vb.upload.bo == NULL ||
+       brw->vb.upload.offset + size > brw->vb.upload.bo->size) {
+      wrap_buffers(brw, size);
+   }
+
+   assert(*bo_return == NULL);
+   dri_bo_reference(brw->vb.upload.bo);
+   *bo_return = brw->vb.upload.bo;
+   *offset_return = brw->vb.upload.offset;
+   brw->vb.upload.offset += size;
+}
+
+static void
+copy_array_to_vbo_array( struct brw_context *brw,
+			 struct brw_vertex_element *element,
+			 GLuint dst_stride)
+{
+   struct intel_context *intel = &brw->intel;
+   GLuint size = element->count * dst_stride;
+
+   get_space(brw, size, &element->bo, &element->offset);
+
+   if (element->glarray->StrideB == 0) {
+      assert(element->count == 1);
+      element->stride = 0;
+   } else {
+      element->stride = dst_stride;
+   }
+
+   if (dst_stride == element->glarray->StrideB) {
+      if (intel->intelScreen->kernel_exec_fencing) {
+	 drm_intel_gem_bo_map_gtt(element->bo);
+	 memcpy((char *)element->bo->virtual + element->offset,
+		element->glarray->Ptr, size);
+	 drm_intel_gem_bo_unmap_gtt(element->bo);
+      } else {
+	 dri_bo_subdata(element->bo,
+			element->offset,
+			size,
+			element->glarray->Ptr);
+      }
+   } else {
+      char *dest;
+      const unsigned char *src = element->glarray->Ptr;
+      int i;
+
+      if (intel->intelScreen->kernel_exec_fencing) {
+	 drm_intel_gem_bo_map_gtt(element->bo);
+	 dest = element->bo->virtual;
+	 dest += element->offset;
+
+	 for (i = 0; i < element->count; i++) {
+	    memcpy(dest, src, dst_stride);
+	    src += element->glarray->StrideB;
+	    dest += dst_stride;
+	 }
+
+	 drm_intel_gem_bo_unmap_gtt(element->bo);
+      } else {
+	 void *data;
+
+	 data = _mesa_malloc(dst_stride * element->count);
+	 dest = data;
+	 for (i = 0; i < element->count; i++) {
+	    memcpy(dest, src, dst_stride);
+	    src += element->glarray->StrideB;
+	    dest += dst_stride;
+	 }
+
+	 dri_bo_subdata(element->bo,
+			element->offset,
+			size,
+			data);
+
+	 _mesa_free(data);
+      }
+   }
+}
+
+static void brw_prepare_vertices(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = intel_context(ctx);
+   GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; 
+   GLuint i;
+   const unsigned char *ptr = NULL;
+   GLuint interleave = 0;
+   unsigned int min_index = brw->vb.min_index;
+   unsigned int max_index = brw->vb.max_index;
+
+   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
+   GLuint nr_uploads = 0;
+
+   /* First build an array of pointers to ve's in vb.inputs_read
+    */
+   if (0)
+      _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
+
+   /* Accumulate the list of enabled arrays. */
+   brw->vb.nr_enabled = 0;
+   while (vs_inputs) {
+      GLuint i = _mesa_ffsll(vs_inputs) - 1;
+      struct brw_vertex_element *input = &brw->vb.inputs[i];
+
+      vs_inputs &= ~(1 << i);
+      brw->vb.enabled[brw->vb.nr_enabled++] = input;
+   }
+
+   /* XXX: In the rare cases where this happens we fallback all
+    * the way to software rasterization, although a tnl fallback
+    * would be sufficient.  I don't know of *any* real world
+    * cases with > 17 vertex attributes enabled, so it probably
+    * isn't an issue at this point.
+    */
+   if (brw->vb.nr_enabled >= BRW_VEP_MAX) {
+      intel->Fallback = 1;
+      return;
+   }
+
+   for (i = 0; i < brw->vb.nr_enabled; i++) {
+      struct brw_vertex_element *input = brw->vb.enabled[i];
+
+      input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
+
+      if (_mesa_is_bufferobj(input->glarray->BufferObj)) {
+	 struct intel_buffer_object *intel_buffer =
+	    intel_buffer_object(input->glarray->BufferObj);
+
+	 /* Named buffer object: Just reference its contents directly. */
+	 dri_bo_unreference(input->bo);
+	 input->bo = intel_bufferobj_buffer(intel, intel_buffer,
+					    INTEL_READ);
+	 dri_bo_reference(input->bo);
+	 input->offset = (unsigned long)input->glarray->Ptr;
+	 input->stride = input->glarray->StrideB;
+	 input->count = input->glarray->_MaxElement;
+
+	 /* This is a common place to reach if the user mistakenly supplies
+	  * a pointer in place of a VBO offset.  If we just let it go through,
+	  * we may end up dereferencing a pointer beyond the bounds of the
+	  * GTT.  We would hope that the VBO's max_index would save us, but
+	  * Mesa appears to hand us min/max values not clipped to the
+	  * array object's _MaxElement, and _MaxElement frequently appears
+	  * to be wrong anyway.
+	  *
+	  * The VBO spec allows application termination in this case, and it's
+	  * probably a service to the poor programmer to do so rather than
+	  * trying to just not render.
+	  */
+	 assert(input->offset < input->bo->size);
+      } else {
+	 input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
+	 if (input->bo != NULL) {
+	    /* Already-uploaded vertex data is present from a previous
+	     * prepare_vertices, but we had to re-validate state due to
+	     * check_aperture failing and a new batch being produced.
+	     */
+	    continue;
+	 }
+
+	 /* Queue the buffer object up to be uploaded in the next pass,
+	  * when we've decided if we're doing interleaved or not.
+	  */
+	 if (input->attrib == VERT_ATTRIB_POS) {
+	    /* Position array not properly enabled:
+	     */
+            if (input->glarray->StrideB == 0) {
+               intel->Fallback = 1;
+               return;
+            }
+
+	    interleave = input->glarray->StrideB;
+	    ptr = input->glarray->Ptr;
+	 }
+	 else if (interleave != input->glarray->StrideB ||
+		  (const unsigned char *)input->glarray->Ptr - ptr < 0 ||
+		  (const unsigned char *)input->glarray->Ptr - ptr > interleave)
+	 {
+	    interleave = 0;
+	 }
+
+	 upload[nr_uploads++] = input;
+	 
+	 /* We rebase drawing to start at element zero only when
+	  * varyings are not in vbos, which means we can end up
+	  * uploading non-varying arrays (stride != 0) when min_index
+	  * is zero.  This doesn't matter as the amount to upload is
+	  * the same for these arrays whether the draw call is rebased
+	  * or not - we just have to upload the one element.
+	  */
+	 assert(min_index == 0 || input->glarray->StrideB == 0);
+      }
+   }
+
+   /* Handle any arrays to be uploaded. */
+   if (nr_uploads > 1 && interleave && interleave <= 256) {
+      /* All uploads are interleaved, so upload the arrays together as
+       * interleaved.  First, upload the contents and set up upload[0].
+       */
+      copy_array_to_vbo_array(brw, upload[0], interleave);
+
+      for (i = 1; i < nr_uploads; i++) {
+	 /* Then, just point upload[i] at upload[0]'s buffer. */
+	 upload[i]->stride = interleave;
+	 upload[i]->offset = upload[0]->offset +
+	    ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
+	 upload[i]->bo = upload[0]->bo;
+	 dri_bo_reference(upload[i]->bo);
+      }
+   }
+   else {
+      /* Upload non-interleaved arrays */
+      for (i = 0; i < nr_uploads; i++) {
+          copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size);
+      }
+   }
+
+   brw_prepare_query_begin(brw);
+
+   for (i = 0; i < brw->vb.nr_enabled; i++) {
+      struct brw_vertex_element *input = brw->vb.enabled[i];
+
+      brw_add_validated_bo(brw, input->bo);
+   }
+}
+
+static void brw_emit_vertices(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = intel_context(ctx);
+   GLuint i;
+
+   brw_emit_query_begin(brw);
+
+   /* If the VS doesn't read any inputs (calculating vertex position from
+    * a state variable for some reason, for example), emit a single pad
+    * VERTEX_ELEMENT struct and bail.
+    *
+    * The stale VB state stays in place, but they don't do anything unless
+    * a VE loads from them.
+    */
+   if (brw->vb.nr_enabled == 0) {
+      BEGIN_BATCH(3, IGNORE_CLIPRECTS);
+      OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
+      OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
+		BRW_VE0_VALID |
+		(BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
+		(0 << BRW_VE0_SRC_OFFSET_SHIFT));
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
+      ADVANCE_BATCH();
+      return;
+   }
+
+   /* Now emit VB and VEP state packets.
+    *
+    * This still defines a hardware VB for each input, even if they
+    * are interleaved or from the same VBO.  TBD if this makes a
+    * performance difference.
+    */
+   BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS);
+   OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
+	     ((1 + brw->vb.nr_enabled * 4) - 2));
+
+   for (i = 0; i < brw->vb.nr_enabled; i++) {
+      struct brw_vertex_element *input = brw->vb.enabled[i];
+
+      OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
+		BRW_VB0_ACCESS_VERTEXDATA |
+		(input->stride << BRW_VB0_PITCH_SHIFT));
+      OUT_RELOC(input->bo,
+		I915_GEM_DOMAIN_VERTEX, 0,
+		input->offset);
+      if (BRW_IS_IGDNG(brw)) {
+          if (input->stride) {
+              OUT_RELOC(input->bo,
+                        I915_GEM_DOMAIN_VERTEX, 0,
+                        input->offset + input->stride * input->count - 1);
+          } else {
+              assert(input->count == 1);
+              OUT_RELOC(input->bo,
+                        I915_GEM_DOMAIN_VERTEX, 0,
+                        input->offset + input->element_size - 1);
+          }
+      } else
+          OUT_BATCH(input->stride ? input->count : 0);
+      OUT_BATCH(0); /* Instance data step rate */
+   }
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS);
+   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
+   for (i = 0; i < brw->vb.nr_enabled; i++) {
+      struct brw_vertex_element *input = brw->vb.enabled[i];
+      uint32_t format = get_surface_type(input->glarray->Type,
+					 input->glarray->Size,
+					 input->glarray->Format,
+					 input->glarray->Normalized);
+      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
+      uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
+
+      switch (input->glarray->Size) {
+      case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
+      case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
+      case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
+      case 3: comp3 = BRW_VE1_COMPONENT_STORE_1_FLT;
+	 break;
+      }
+
+      OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
+		BRW_VE0_VALID |
+		(format << BRW_VE0_FORMAT_SHIFT) |
+		(0 << BRW_VE0_SRC_OFFSET_SHIFT));
+
+      if (BRW_IS_IGDNG(brw))
+          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
+      else
+          OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                    (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
+                    (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
+                    (comp3 << BRW_VE1_COMPONENT_3_SHIFT) |
+                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
+   }
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_vertices = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
+      .cache = 0,
+   },
+   .prepare = brw_prepare_vertices,
+   .emit = brw_emit_vertices,
+};
+
+static void brw_prepare_indices(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+   GLuint ib_size;
+   dri_bo *bo = NULL;
+   struct gl_buffer_object *bufferobj;
+   GLuint offset;
+   GLuint ib_type_size;
+
+   if (index_buffer == NULL)
+      return;
+
+   ib_type_size = get_size(index_buffer->type);
+   ib_size = ib_type_size * index_buffer->count;
+   bufferobj = index_buffer->obj;;
+
+   /* Turn into a proper VBO:
+    */
+   if (!_mesa_is_bufferobj(bufferobj)) {
+      brw->ib.start_vertex_offset = 0;
+
+      /* Get new bufferobj, offset:
+       */
+      get_space(brw, ib_size, &bo, &offset);
+
+      /* Straight upload
+       */
+      if (intel->intelScreen->kernel_exec_fencing) {
+	 drm_intel_gem_bo_map_gtt(bo);
+	 memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
+	 drm_intel_gem_bo_unmap_gtt(bo);
+      } else {
+	 dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
+      }
+   } else {
+      offset = (GLuint) (unsigned long) index_buffer->ptr;
+      brw->ib.start_vertex_offset = 0;
+
+      /* If the index buffer isn't aligned to its element size, we have to
+       * rebase it into a temporary.
+       */
+       if ((get_size(index_buffer->type) - 1) & offset) {
+           GLubyte *map = ctx->Driver.MapBuffer(ctx,
+                                                GL_ELEMENT_ARRAY_BUFFER_ARB,
+                                                GL_DYNAMIC_DRAW_ARB,
+                                                bufferobj);
+           map += offset;
+
+	   get_space(brw, ib_size, &bo, &offset);
+
+	   dri_bo_subdata(bo, offset, ib_size, map);
+
+           ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
+       } else {
+	  bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
+				      INTEL_READ);
+	  dri_bo_reference(bo);
+
+	  /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
+	   * the index buffer state when we're just moving the start index
+	   * of our drawing.
+	   */
+	  brw->ib.start_vertex_offset = offset / ib_type_size;
+	  offset = 0;
+	  ib_size = bo->size;
+       }
+   }
+
+   if (brw->ib.bo != bo ||
+       brw->ib.offset != offset ||
+       brw->ib.size != ib_size)
+   {
+      drm_intel_bo_unreference(brw->ib.bo);
+      brw->ib.bo = bo;
+      brw->ib.offset = offset;
+      brw->ib.size = ib_size;
+
+      brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
+   } else {
+      drm_intel_bo_unreference(bo);
+   }
+
+   brw_add_validated_bo(brw, brw->ib.bo);
+}
+
+const struct brw_tracked_state brw_indices = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_INDICES,
+      .cache = 0,
+   },
+   .prepare = brw_prepare_indices,
+};
+
+static void brw_emit_index_buffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
+
+   if (index_buffer == NULL)
+      return;
+
+   /* Emit the indexbuffer packet:
+    */
+   {
+      struct brw_indexbuffer ib;
+
+      memset(&ib, 0, sizeof(ib));
+
+      ib.header.bits.opcode = CMD_INDEX_BUFFER;
+      ib.header.bits.length = sizeof(ib)/4 - 2;
+      ib.header.bits.index_format = get_index_type(index_buffer->type);
+      ib.header.bits.cut_index_enable = 0;
+
+      BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+      OUT_BATCH( ib.header.dword );
+      OUT_RELOC(brw->ib.bo,
+		I915_GEM_DOMAIN_VERTEX, 0,
+		brw->ib.offset);
+      OUT_RELOC(brw->ib.bo,
+		I915_GEM_DOMAIN_VERTEX, 0,
+		brw->ib.offset + brw->ib.size - 1);
+      OUT_BATCH( 0 );
+      ADVANCE_BATCH();
+   }
+}
+
+const struct brw_tracked_state brw_index_buffer = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH | BRW_NEW_INDEX_BUFFER,
+      .cache = 0,
+   },
+   .emit = brw_emit_index_buffer,
+};
diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c
new file mode 100644
index 0000000000..1df561386e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu.c
@@ -0,0 +1,254 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+  
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+/* How does predicate control work when execution_size != 8?  Do I
+ * need to test/set for 0xffff when execution_size is 16?
+ */
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value )
+{
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+   if (value != 0xff) {
+      if (value != p->flag_value) {
+	 brw_push_insn_state(p);
+	 brw_MOV(p, brw_flag_reg(), brw_imm_uw(value));
+	 p->flag_value = value;
+	 brw_pop_insn_state(p);
+      }
+
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+   }   
+}
+
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc )
+{
+   p->current->header.predicate_control = pc;
+}
+
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional )
+{
+   p->current->header.destreg__conditionalmod = conditional;
+}
+
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode )
+{
+   p->current->header.access_mode = access_mode;
+}
+
+void brw_set_compression_control( struct brw_compile *p, GLboolean compression_control )
+{
+   p->current->header.compression_control = compression_control;
+}
+
+void brw_set_mask_control( struct brw_compile *p, GLuint value )
+{
+   p->current->header.mask_control = value;
+}
+
+void brw_set_saturate( struct brw_compile *p, GLuint value )
+{
+   p->current->header.saturate = value;
+}
+
+void brw_push_insn_state( struct brw_compile *p )
+{
+   assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
+   memcpy(p->current+1, p->current, sizeof(struct brw_instruction));
+   p->current++;   
+}
+
+void brw_pop_insn_state( struct brw_compile *p )
+{
+   assert(p->current != p->stack);
+   p->current--;
+}
+
+
+/***********************************************************************
+ */
+void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
+{
+   p->brw = brw;
+   p->nr_insn = 0;
+   p->current = p->stack;
+   memset(p->current, 0, sizeof(p->current[0]));
+
+   /* Some defaults?
+    */
+   brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
+   brw_set_saturate(p, 0);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_predicate_control_flag_value(p, 0xff); 
+}
+
+
+const GLuint *brw_get_program( struct brw_compile *p,
+			       GLuint *sz )
+{
+   GLuint i;
+
+   for (i = 0; i < 8; i++)
+      brw_NOP(p);
+
+   *sz = p->nr_insn * sizeof(struct brw_instruction);
+   return (const GLuint *)p->store;
+}
+
+
+
+/**
+ * Subroutine calls require special attention.
+ * Mesa instructions may be expanded into multiple hardware instructions
+ * so the prog_instruction::BranchTarget field can't be used as an index
+ * into the hardware instructions.
+ *
+ * The BranchTarget field isn't needed, however.  Mesa's GLSL compiler
+ * emits CAL and BGNSUB instructions with labels that can be used to map
+ * subroutine calls to actual subroutine code blocks.
+ *
+ * The structures and function here implement patching of CAL instructions
+ * so they jump to the right subroutine code...
+ */
+
+
+/**
+ * For each OPCODE_BGNSUB we create one of these.
+ */
+struct brw_glsl_label
+{
+   const char *name; /**< the label string */
+   GLuint position;  /**< the position of the brw instruction for this label */
+   struct brw_glsl_label *next;  /**< next in linked list */
+};
+
+
+/**
+ * For each OPCODE_CAL we create one of these.
+ */
+struct brw_glsl_call
+{
+   GLuint call_inst_pos;  /**< location of the CAL instruction */
+   const char *sub_name;  /**< name of subroutine to call */
+   struct brw_glsl_call *next;  /**< next in linked list */
+};
+
+
+/**
+ * Called for each OPCODE_BGNSUB.
+ */
+void
+brw_save_label(struct brw_compile *c, const char *name, GLuint position)
+{
+   struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label);
+   label->name = name;
+   label->position = position;
+   label->next = c->first_label;
+   c->first_label = label;
+}
+
+
+/**
+ * Called for each OPCODE_CAL.
+ */
+void
+brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos)
+{
+   struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call);
+   call->call_inst_pos = call_pos;
+   call->sub_name = name;
+   call->next = c->first_call;
+   c->first_call = call;
+}
+
+
+/**
+ * Lookup a label, return label's position/offset.
+ */
+static GLuint
+brw_lookup_label(struct brw_compile *c, const char *name)
+{
+   const struct brw_glsl_label *label;
+   for (label = c->first_label; label; label = label->next) {
+      if (strcmp(name, label->name) == 0) {
+         return label->position;
+      }
+   }
+   abort();  /* should never happen */
+   return ~0;
+}
+
+
+/**
+ * When we're done generating code, this function is called to resolve
+ * subroutine calls.
+ */
+void
+brw_resolve_cals(struct brw_compile *c)
+{
+    const struct brw_glsl_call *call;
+
+    for (call = c->first_call; call; call = call->next) {
+        const GLuint sub_loc = brw_lookup_label(c, call->sub_name);
+	struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos];
+	struct brw_instruction *brw_sub_inst = &c->store[sub_loc];
+	GLint offset = brw_sub_inst - brw_call_inst;
+
+	/* patch brw_inst1 to point to brw_inst2 */
+	brw_set_src1(brw_call_inst, brw_imm_d(offset * 16));
+    }
+
+    /* free linked list of calls */
+    {
+        struct brw_glsl_call *call, *next;
+        for (call = c->first_call; call; call = next) {
+	    next = call->next;
+	    _mesa_free(call);
+	}
+	c->first_call = NULL;
+    }
+
+    /* free linked list of labels */
+    {
+        struct brw_glsl_label *label, *next;
+	for (label = c->first_label; label; label = next) {
+	    next = label->next;
+	    _mesa_free(label);
+	}
+	c->first_label = NULL;
+    }
+}
diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h
new file mode 100644
index 0000000000..30603bdd0e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu.h
@@ -0,0 +1,968 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include "brw_structs.h"
+#include "brw_defines.h"
+#include "shader/prog_instruction.h"
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW      BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
+
+
+#define REG_SIZE (8*4)
+
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges.  Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg
+{
+   GLuint type:4;
+   GLuint file:2;
+   GLuint nr:8;
+   GLuint subnr:5;		/* :1 in align16 */
+   GLuint negate:1;		/* source only */
+   GLuint abs:1;		/* source only */
+   GLuint vstride:4;		/* source only */
+   GLuint width:3;		/* src only, align1 only */
+   GLuint hstride:2;   		/* align1 only */
+   GLuint address_mode:1;	/* relative addressing, hopefully! */
+   GLuint pad0:1;
+
+   union {      
+      struct {
+	 GLuint swizzle:8;		/* src only, align16 only */
+	 GLuint writemask:4;		/* dest only, align16 only */
+	 GLint  indirect_offset:10;	/* relative addressing offset */
+	 GLuint pad1:10;		/* two dwords total */
+      } bits;
+
+      GLfloat f;
+      GLint   d;
+      GLuint ud;
+   } dw1;      
+};
+
+
+struct brw_indirect {
+   GLuint addr_subnr:4;
+   GLint addr_offset:10;
+   GLuint pad:18;
+};
+
+
+struct brw_glsl_label;
+struct brw_glsl_call;
+
+
+
+#define BRW_EU_MAX_INSN_STACK 5
+#define BRW_EU_MAX_INSN 10000
+
+struct brw_compile {
+   struct brw_instruction store[BRW_EU_MAX_INSN];
+   GLuint nr_insn;
+
+   /* Allow clients to push/pop instruction state:
+    */
+   struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+   struct brw_instruction *current;
+
+   GLuint flag_value;
+   GLboolean single_program_flow;
+   struct brw_context *brw;
+
+   struct brw_glsl_label *first_label;  /**< linked list of labels */
+   struct brw_glsl_call *first_call;    /**< linked list of CALs */
+};
+
+
+void
+brw_save_label(struct brw_compile *c, const char *name, GLuint position);
+
+void
+brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos);
+
+void
+brw_resolve_cals(struct brw_compile *c);
+
+
+
+static INLINE int type_sz( GLuint type )
+{
+   switch( type ) {
+   case BRW_REGISTER_TYPE_UD:
+   case BRW_REGISTER_TYPE_D:
+   case BRW_REGISTER_TYPE_F:
+      return 4;
+   case BRW_REGISTER_TYPE_HF:
+   case BRW_REGISTER_TYPE_UW:
+   case BRW_REGISTER_TYPE_W:
+      return 2;
+   case BRW_REGISTER_TYPE_UB:
+   case BRW_REGISTER_TYPE_B:
+      return 1;
+   default:
+      return 0;
+   }
+}
+
+/**
+ * Construct a brw_reg.
+ * \param file  one of the BRW_x_REGISTER_FILE values
+ * \param nr  register number/index
+ * \param subnr  register sub number
+ * \param type  one of BRW_REGISTER_TYPE_x
+ * \param vstride  one of BRW_VERTICAL_STRIDE_x
+ * \param width  one of BRW_WIDTH_x
+ * \param hstride  one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle  one of BRW_SWIZZLE_x
+ * \param writemask  WRITEMASK_X/Y/Z/W bitfield
+ */
+static INLINE struct brw_reg brw_reg( GLuint file,
+                                      GLuint nr,
+                                      GLuint subnr,
+                                      GLuint type,
+                                      GLuint vstride,
+                                      GLuint width,
+                                      GLuint hstride,
+                                      GLuint swizzle,
+                                      GLuint writemask )
+{
+   struct brw_reg reg;
+   if (type == BRW_GENERAL_REGISTER_FILE)
+      assert(nr < BRW_MAX_GRF);
+   else if (type == BRW_MESSAGE_REGISTER_FILE)
+      assert(nr < BRW_MAX_MRF);
+   else if (type == BRW_ARCHITECTURE_REGISTER_FILE)
+      assert(nr <= BRW_ARF_IP);
+
+   reg.type = type;
+   reg.file = file;
+   reg.nr = nr;
+   reg.subnr = subnr * type_sz(type);
+   reg.negate = 0;
+   reg.abs = 0;
+   reg.vstride = vstride;
+   reg.width = width;
+   reg.hstride = hstride;
+   reg.address_mode = BRW_ADDRESS_DIRECT;
+   reg.pad0 = 0;
+
+   /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+    * set swizzle and writemask to W, as the lower bits of subnr will
+    * be lost when converted to align16.  This is probably too much to
+    * keep track of as you'd want it adjusted by suboffset(), etc.
+    * Perhaps fix up when converting to align16?
+    */
+   reg.dw1.bits.swizzle = swizzle;
+   reg.dw1.bits.writemask = writemask;
+   reg.dw1.bits.indirect_offset = 0;
+   reg.dw1.bits.pad1 = 0;
+   return reg;
+}
+
+/** Construct float[16] register */
+static INLINE struct brw_reg brw_vec16_reg( GLuint file,
+					      GLuint nr,
+					      GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_16,
+		  BRW_WIDTH_16,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYZW,
+		  WRITEMASK_XYZW);
+}
+
+/** Construct float[8] register */
+static INLINE struct brw_reg brw_vec8_reg( GLuint file,
+					     GLuint nr,
+					     GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_8,
+		  BRW_WIDTH_8,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYZW,
+		  WRITEMASK_XYZW);
+}
+
+/** Construct float[4] register */
+static INLINE struct brw_reg brw_vec4_reg( GLuint file,
+					      GLuint nr,
+					      GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_4,
+		  BRW_WIDTH_4,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYZW,
+		  WRITEMASK_XYZW);
+}
+
+/** Construct float[2] register */
+static INLINE struct brw_reg brw_vec2_reg( GLuint file,
+					      GLuint nr,
+					      GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_2,
+		  BRW_WIDTH_2,
+		  BRW_HORIZONTAL_STRIDE_1,
+		  BRW_SWIZZLE_XYXY,
+		  WRITEMASK_XY);
+}
+
+/** Construct float[1] register */
+static INLINE struct brw_reg brw_vec1_reg( GLuint file,
+					     GLuint nr,
+					     GLuint subnr )
+{
+   return brw_reg(file,
+		  nr,
+		  subnr,
+		  BRW_REGISTER_TYPE_F,
+		  BRW_VERTICAL_STRIDE_0,
+		  BRW_WIDTH_1,
+		  BRW_HORIZONTAL_STRIDE_0,
+		  BRW_SWIZZLE_XXXX,
+		  WRITEMASK_X);
+}
+
+
+static INLINE struct brw_reg retype( struct brw_reg reg,
+				       GLuint type )
+{
+   reg.type = type;
+   return reg;
+}
+
+static INLINE struct brw_reg suboffset( struct brw_reg reg,
+					  GLuint delta )
+{   
+   reg.subnr += delta * type_sz(reg.type);
+   return reg;
+}
+
+
+static INLINE struct brw_reg offset( struct brw_reg reg,
+				       GLuint delta )
+{
+   reg.nr += delta;
+   return reg;
+}
+
+
+static INLINE struct brw_reg byte_offset( struct brw_reg reg,
+					    GLuint bytes )
+{
+   GLuint newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+   reg.nr = newoffset / REG_SIZE;
+   reg.subnr = newoffset % REG_SIZE;
+   return reg;
+}
+   
+
+/** Construct unsigned word[16] register */
+static INLINE struct brw_reg brw_uw16_reg( GLuint file,
+					     GLuint nr,
+					     GLuint subnr )
+{
+   return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[8] register */
+static INLINE struct brw_reg brw_uw8_reg( GLuint file,
+					    GLuint nr,
+					    GLuint subnr )
+{
+   return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[1] register */
+static INLINE struct brw_reg brw_uw1_reg( GLuint file,
+					    GLuint nr,
+					    GLuint subnr )
+{
+   return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static INLINE struct brw_reg brw_imm_reg( GLuint type )
+{
+   return brw_reg( BRW_IMMEDIATE_VALUE,
+		   0,
+		   0,
+		   type,
+		   BRW_VERTICAL_STRIDE_0,
+		   BRW_WIDTH_1,
+		   BRW_HORIZONTAL_STRIDE_0,
+		   0,
+		   0);      
+}
+
+/** Construct float immediate register */
+static INLINE struct brw_reg brw_imm_f( GLfloat f )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+   imm.dw1.f = f;
+   return imm;
+}
+
+/** Construct integer immediate register */
+static INLINE struct brw_reg brw_imm_d( GLint d )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+   imm.dw1.d = d;
+   return imm;
+}
+
+/** Construct uint immediate register */
+static INLINE struct brw_reg brw_imm_ud( GLuint ud )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+   imm.dw1.ud = ud;
+   return imm;
+}
+
+/** Construct ushort immediate register */
+static INLINE struct brw_reg brw_imm_uw( GLushort uw )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+   imm.dw1.ud = uw | (uw << 16);
+   return imm;
+}
+
+/** Construct short immediate register */
+static INLINE struct brw_reg brw_imm_w( GLshort w )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+   imm.dw1.d = w | (w << 16);
+   return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/** Construct vector of eight signed half-byte values */
+static INLINE struct brw_reg brw_imm_v( GLuint v )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_8;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = v;
+   return imm;
+}
+
+/** Construct vector of four 8-bit float values */
+static INLINE struct brw_reg brw_imm_vf( GLuint v )
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_4;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = v;
+   return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE  0x30
+#define VF_NEG  (1<<7)
+
+static INLINE struct brw_reg brw_imm_vf4( GLuint v0, 
+					    GLuint v1, 
+					    GLuint v2,
+					    GLuint v3)
+{
+   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+   imm.vstride = BRW_VERTICAL_STRIDE_0;
+   imm.width = BRW_WIDTH_4;
+   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+   imm.dw1.ud = ((v0 << 0) |
+		 (v1 << 8) |
+		 (v2 << 16) |
+		 (v3 << 24));
+   return imm;
+}
+
+
+static INLINE struct brw_reg brw_address( struct brw_reg reg )
+{
+   return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+/** Construct float[1] general-purpose register */
+static INLINE struct brw_reg brw_vec1_grf( GLuint nr, GLuint subnr )
+{
+   return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[2] general-purpose register */
+static INLINE struct brw_reg brw_vec2_grf( GLuint nr, GLuint subnr )
+{
+   return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[4] general-purpose register */
+static INLINE struct brw_reg brw_vec4_grf( GLuint nr, GLuint subnr )
+{
+   return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[8] general-purpose register */
+static INLINE struct brw_reg brw_vec8_grf( GLuint nr, GLuint subnr )
+{
+   return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static INLINE struct brw_reg brw_uw8_grf( GLuint nr, GLuint subnr )
+{
+   return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static INLINE struct brw_reg brw_uw16_grf( GLuint nr, GLuint subnr )
+{
+   return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+/** Construct null register (usually used for setting condition codes) */
+static INLINE struct brw_reg brw_null_reg( void )
+{
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		       BRW_ARF_NULL, 
+		       0);
+}
+
+static INLINE struct brw_reg brw_address_reg( GLuint subnr )
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		      BRW_ARF_ADDRESS, 
+		      subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw.  This goes against the convention for other scalar
+ * regs:
+ */
+static INLINE struct brw_reg brw_ip_reg( void )
+{
+   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		  BRW_ARF_IP, 
+		  0,
+		  BRW_REGISTER_TYPE_UD,
+		  BRW_VERTICAL_STRIDE_4, /* ? */
+		  BRW_WIDTH_1,
+		  BRW_HORIZONTAL_STRIDE_0,
+		  BRW_SWIZZLE_XYZW, /* NOTE! */
+		  WRITEMASK_XYZW); /* NOTE! */
+}
+
+static INLINE struct brw_reg brw_acc_reg( void )
+{
+   return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+		       BRW_ARF_ACCUMULATOR, 
+		       0);
+}
+
+
+static INLINE struct brw_reg brw_flag_reg( void )
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+		      BRW_ARF_FLAG,
+		      0);
+}
+
+
+static INLINE struct brw_reg brw_mask_reg( GLuint subnr )
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+		      BRW_ARF_MASK,
+		      subnr);
+}
+
+static INLINE struct brw_reg brw_message_reg( GLuint nr )
+{
+   assert(nr < BRW_MAX_MRF);
+   return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE,
+		       nr,
+		       0);
+}
+
+
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static INLINE GLuint cvt( GLuint val )
+{
+   switch (val) {
+   case 0: return 0;
+   case 1: return 1;
+   case 2: return 2;
+   case 4: return 3;
+   case 8: return 4;
+   case 16: return 5;
+   case 32: return 6;
+   }
+   return 0;
+}
+
+static INLINE struct brw_reg stride( struct brw_reg reg,
+				       GLuint vstride,
+				       GLuint width,
+				       GLuint hstride )
+{
+   reg.vstride = cvt(vstride);
+   reg.width = cvt(width) - 1;
+   reg.hstride = cvt(hstride);
+   return reg;
+}
+
+
+static INLINE struct brw_reg vec16( struct brw_reg reg )
+{
+   return stride(reg, 16,16,1);
+}
+
+static INLINE struct brw_reg vec8( struct brw_reg reg )
+{
+   return stride(reg, 8,8,1);
+}
+
+static INLINE struct brw_reg vec4( struct brw_reg reg )
+{
+   return stride(reg, 4,4,1);
+}
+
+static INLINE struct brw_reg vec2( struct brw_reg reg )
+{
+   return stride(reg, 2,2,1);
+}
+
+static INLINE struct brw_reg vec1( struct brw_reg reg )
+{
+   return stride(reg, 0,1,0);
+}
+
+
+static INLINE struct brw_reg get_element( struct brw_reg reg, GLuint elt )
+{
+   return vec1(suboffset(reg, elt));
+}
+
+static INLINE struct brw_reg get_element_ud( struct brw_reg reg, GLuint elt )
+{
+   return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+
+static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
+					    GLuint x,
+					    GLuint y, 
+					    GLuint z,
+					    GLuint w)
+{
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+				       BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+				       BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+				       BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+   return reg;
+}
+
+
+static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
+					     GLuint x )
+{
+   return brw_swizzle(reg, x, x, x, x);
+}
+
+static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
+					      GLuint mask )
+{
+   reg.dw1.bits.writemask &= mask;
+   return reg;
+}
+
+static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
+						  GLuint mask )
+{
+   reg.dw1.bits.writemask = mask;
+   return reg;
+}
+
+static INLINE struct brw_reg negate( struct brw_reg reg )
+{
+   reg.negate ^= 1;
+   return reg;
+}
+
+static INLINE struct brw_reg brw_abs( struct brw_reg reg )
+{
+   reg.abs = 1;
+   return reg;
+}
+
+/***********************************************************************
+ */
+static INLINE struct brw_reg brw_vec4_indirect( GLuint subnr,
+						  GLint offset )
+{
+   struct brw_reg reg =  brw_vec4_grf(0, 0);
+   reg.subnr = subnr;
+   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+   reg.dw1.bits.indirect_offset = offset;
+   return reg;
+}
+
+static INLINE struct brw_reg brw_vec1_indirect( GLuint subnr,
+						  GLint offset )
+{
+   struct brw_reg reg =  brw_vec1_grf(0, 0);
+   reg.subnr = subnr;
+   reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+   reg.dw1.bits.indirect_offset = offset;
+   return reg;
+}
+
+static INLINE struct brw_reg deref_4f(struct brw_indirect ptr, GLint offset)
+{
+   return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_1f(struct brw_indirect ptr, GLint offset)
+{
+   return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static INLINE struct brw_reg deref_4b(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static INLINE struct brw_reg deref_1uw(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static INLINE struct brw_reg deref_1d(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+
+static INLINE struct brw_reg deref_1ud(struct brw_indirect ptr, GLint offset)
+{
+   return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static INLINE struct brw_reg get_addr_reg(struct brw_indirect ptr)
+{
+   return brw_address_reg(ptr.addr_subnr);
+}
+
+static INLINE struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, GLint offset )
+{
+   ptr.addr_offset += offset;
+   return ptr;
+}
+
+static INLINE struct brw_indirect brw_indirect( GLuint addr_subnr, GLint offset )
+{
+   struct brw_indirect ptr;
+   ptr.addr_subnr = addr_subnr;
+   ptr.addr_offset = offset;
+   ptr.pad = 0;
+   return ptr;
+}
+
+/** Do two brw_regs refer to the same register? */
+static INLINE GLboolean
+brw_same_reg(struct brw_reg r1, struct brw_reg r2)
+{
+   return r1.file == r2.file && r1.nr == r2.nr;
+}
+
+static INLINE struct brw_instruction *current_insn( struct brw_compile *p)
+{
+   return &p->store[p->nr_insn];
+}
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, GLuint value );
+void brw_set_saturate( struct brw_compile *p, GLuint value );
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_set_compression_control( struct brw_compile *p, GLboolean control );
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+
+void brw_init_compile( struct brw_context *, struct brw_compile *p );
+const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0);
+
+#define ALU2(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0,			\
+	      struct brw_reg src1);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+#undef ALU1
+#undef ALU2
+
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLboolean allocate,
+		   GLboolean used,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot,
+		   GLboolean writes_complete,
+		   GLuint offset,
+		   GLuint swizzle);
+
+void brw_ff_sync(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLboolean allocate,
+		   GLboolean used,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot,
+		   GLboolean writes_complete,
+		   GLuint offset,
+		   GLuint swizzle);
+
+void brw_fb_WRITE(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLuint binding_table_index,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot);
+
+void brw_SAMPLE(struct brw_compile *p,
+		struct brw_reg dest,
+		GLuint msg_reg_nr,
+		struct brw_reg src0,
+		GLuint binding_table_index,
+		GLuint sampler,
+		GLuint writemask,
+		GLuint msg_type,
+		GLuint response_length,
+		GLuint msg_length,
+		GLboolean eot,
+		GLuint header_present,
+		GLuint simd_mode);
+
+void brw_math_16( struct brw_compile *p,
+		  struct brw_reg dest,
+		  GLuint function,
+		  GLuint saturate,
+		  GLuint msg_reg_nr,
+		  struct brw_reg src,
+		  GLuint precision );
+
+void brw_math( struct brw_compile *p,
+	       struct brw_reg dest,
+	       GLuint function,
+	       GLuint saturate,
+	       GLuint msg_reg_nr,
+	       struct brw_reg src,
+	       GLuint data_type,
+	       GLuint precision );
+
+void brw_dp_READ_16( struct brw_compile *p,
+		     struct brw_reg dest,
+		     GLuint scratch_offset );
+
+void brw_dp_READ_4( struct brw_compile *p,
+                    struct brw_reg dest,
+                    GLboolean relAddr,
+                    GLuint location,
+                    GLuint bind_table_index );
+
+void brw_dp_READ_4_vs( struct brw_compile *p,
+                       struct brw_reg dest,
+                       GLuint oword,
+                       GLboolean relAddr,
+                       struct brw_reg addrReg,
+                       GLuint location,
+                       GLuint bind_table_index );
+
+void brw_dp_WRITE_16( struct brw_compile *p,
+		      struct brw_reg src,
+		      GLuint scratch_offset );
+
+/* If/else/endif.  Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, 
+			       GLuint execute_size);
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p, 
+				 struct brw_instruction *if_insn);
+
+void brw_ENDIF(struct brw_compile *p, 
+	       struct brw_instruction *if_or_else_insn);
+
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+			       GLuint execute_size);
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p, 
+	       struct brw_instruction *patch_insn);
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, 
+		       struct brw_instruction *jmp_insn);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+	     struct brw_reg dest,
+	     GLuint conditional,
+	     struct brw_reg src0,
+	     struct brw_reg src1);
+
+void brw_print_reg( struct brw_reg reg );
+
+
+/*********************************************************************** 
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+				   struct brw_indirect dst_ptr,
+				   struct brw_indirect src_ptr,
+				   GLuint count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+			    struct brw_reg dst,
+			    struct brw_indirect ptr,
+			    GLuint count);
+
+void brw_copy4(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count);
+
+void brw_copy8(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count);
+
+void brw_math_invert( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg src);
+
+void brw_set_src1( struct brw_instruction *insn,
+                          struct brw_reg reg );
+#endif
diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c
new file mode 100644
index 0000000000..29f3f6d02f
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_debug.c
@@ -0,0 +1,95 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+    
+
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "brw_eu.h"
+
+void brw_print_reg( struct brw_reg hwreg )
+{
+   static const char *file[] = {
+      "arf",
+      "grf",
+      "msg",
+      "imm"
+   };
+
+   static const char *type[] = {
+      "ud",
+      "d",
+      "uw",
+      "w",
+      "ub",
+      "vf",
+      "hf",
+      "f"
+   };
+
+   _mesa_printf("%s%s", 
+		hwreg.abs ? "abs/" : "",
+		hwreg.negate ? "-" : "");
+     
+   if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+       hwreg.nr % 2 == 0 &&
+       hwreg.subnr == 0 &&
+       hwreg.vstride == BRW_VERTICAL_STRIDE_8 &&
+       hwreg.width == BRW_WIDTH_8 &&
+       hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+       hwreg.type == BRW_REGISTER_TYPE_F) {
+      /* vector register */
+      _mesa_printf("vec%d", hwreg.nr);
+   }
+   else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
+	    hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
+	    hwreg.width == BRW_WIDTH_1 &&
+	    hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
+	    hwreg.type == BRW_REGISTER_TYPE_F) {      
+      /* "scalar" register */
+      _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+   }
+   else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
+      _mesa_printf("imm %f", hwreg.dw1.f);
+   }
+   else {
+      _mesa_printf("%s%d.%d<%d;%d,%d>:%s", 
+		   file[hwreg.file],
+		   hwreg.nr,
+		   hwreg.subnr / type_sz(hwreg.type),
+		   hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0,
+		   1<<hwreg.width,
+		   hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0,		
+		   type[hwreg.type]);
+   }
+}
+
+
+
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
new file mode 100644
index 0000000000..241cdc33f8
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -0,0 +1,1425 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+     
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+
+
+/***********************************************************************
+ * Internal helper for constructing instructions
+ */
+
+static void guess_execution_size( struct brw_instruction *insn,
+				  struct brw_reg reg )
+{
+   if (reg.width == BRW_WIDTH_8 && 
+       insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) 
+      insn->header.execution_size = BRW_EXECUTE_16;
+   else
+      insn->header.execution_size = reg.width;	/* note - definitions are compatible */
+}
+
+
+static void brw_set_dest( struct brw_instruction *insn,
+			  struct brw_reg dest )
+{
+   if (dest.type != BRW_ARCHITECTURE_REGISTER_FILE)
+      assert(dest.nr < 128);
+
+   insn->bits1.da1.dest_reg_file = dest.file;
+   insn->bits1.da1.dest_reg_type = dest.type;
+   insn->bits1.da1.dest_address_mode = dest.address_mode;
+
+   if (dest.address_mode == BRW_ADDRESS_DIRECT) {   
+      insn->bits1.da1.dest_reg_nr = dest.nr;
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 insn->bits1.da1.dest_subreg_nr = dest.subnr;
+	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+	 insn->bits1.da1.dest_horiz_stride = dest.hstride;
+      }
+      else {
+	 insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
+	 insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+      }
+   }
+   else {
+      insn->bits1.ia1.dest_subreg_nr = dest.subnr;
+
+      /* These are different sizes in align1 vs align16:
+       */
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+	 if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
+	    dest.hstride = BRW_HORIZONTAL_STRIDE_1;
+	 insn->bits1.ia1.dest_horiz_stride = dest.hstride;
+      }
+      else {
+	 insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+      }
+   }
+
+   /* NEW: Set the execution size based on dest.width and
+    * insn->compression_control:
+    */
+   guess_execution_size(insn, dest);
+}
+
+static void brw_set_src0( struct brw_instruction *insn,
+                          struct brw_reg reg )
+{
+   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+   if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
+      assert(reg.nr < 128);
+
+   insn->bits1.da1.src0_reg_file = reg.file;
+   insn->bits1.da1.src0_reg_type = reg.type;
+   insn->bits2.da1.src0_abs = reg.abs;
+   insn->bits2.da1.src0_negate = reg.negate;
+   insn->bits2.da1.src0_address_mode = reg.address_mode;
+
+   if (reg.file == BRW_IMMEDIATE_VALUE) {
+      insn->bits3.ud = reg.dw1.ud;
+   
+      /* Required to set some fields in src1 as well:
+       */
+      insn->bits1.da1.src1_reg_file = 0; /* arf */
+      insn->bits1.da1.src1_reg_type = reg.type;
+   }
+   else 
+   {
+      if (reg.address_mode == BRW_ADDRESS_DIRECT) {
+	 if (insn->header.access_mode == BRW_ALIGN_1) {
+	    insn->bits2.da1.src0_subreg_nr = reg.subnr;
+	    insn->bits2.da1.src0_reg_nr = reg.nr;
+	 }
+	 else {
+	    insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
+	    insn->bits2.da16.src0_reg_nr = reg.nr;
+	 }
+      }
+      else {
+	 insn->bits2.ia1.src0_subreg_nr = reg.subnr;
+
+	 if (insn->header.access_mode == BRW_ALIGN_1) {
+	    insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; 
+	 }
+	 else {
+	    insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
+	 }
+      }
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 if (reg.width == BRW_WIDTH_1 && 
+	     insn->header.execution_size == BRW_EXECUTE_1) {
+	    insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+	    insn->bits2.da1.src0_width = BRW_WIDTH_1;
+	    insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
+	 }
+	 else {
+	    insn->bits2.da1.src0_horiz_stride = reg.hstride;
+	    insn->bits2.da1.src0_width = reg.width;
+	    insn->bits2.da1.src0_vert_stride = reg.vstride;
+	 }
+      }
+      else {
+	 insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+	 insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+	 insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+	 insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+	 /* This is an oddity of the fact we're using the same
+	  * descriptions for registers in align_16 as align_1:
+	  */
+	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+	    insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
+	 else
+	    insn->bits2.da16.src0_vert_stride = reg.vstride;
+      }
+   }
+}
+
+
+void brw_set_src1( struct brw_instruction *insn,
+                   struct brw_reg reg )
+{
+   assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
+
+   assert(reg.nr < 128);
+
+   insn->bits1.da1.src1_reg_file = reg.file;
+   insn->bits1.da1.src1_reg_type = reg.type;
+   insn->bits3.da1.src1_abs = reg.abs;
+   insn->bits3.da1.src1_negate = reg.negate;
+
+   /* Only src1 can be immediate in two-argument instructions.
+    */
+   assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
+
+   if (reg.file == BRW_IMMEDIATE_VALUE) {
+      insn->bits3.ud = reg.dw1.ud;
+   }
+   else {
+      /* This is a hardware restriction, which may or may not be lifted
+       * in the future:
+       */
+      assert (reg.address_mode == BRW_ADDRESS_DIRECT);
+      //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
+	 insn->bits3.da1.src1_reg_nr = reg.nr;
+      }
+      else {
+	 insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
+	 insn->bits3.da16.src1_reg_nr = reg.nr;
+      }
+
+      if (insn->header.access_mode == BRW_ALIGN_1) {
+	 if (reg.width == BRW_WIDTH_1 && 
+	     insn->header.execution_size == BRW_EXECUTE_1) {
+	    insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
+	    insn->bits3.da1.src1_width = BRW_WIDTH_1;
+	    insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
+	 }
+	 else {
+	    insn->bits3.da1.src1_horiz_stride = reg.hstride;
+	    insn->bits3.da1.src1_width = reg.width;
+	    insn->bits3.da1.src1_vert_stride = reg.vstride;
+	 }
+      }
+      else {
+	 insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
+	 insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
+	 insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
+	 insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
+
+	 /* This is an oddity of the fact we're using the same
+	  * descriptions for registers in align_16 as align_1:
+	  */
+	 if (reg.vstride == BRW_VERTICAL_STRIDE_8)
+	    insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
+	 else
+	    insn->bits3.da16.src1_vert_stride = reg.vstride;
+      }
+   }
+}
+
+
+
+static void brw_set_math_message( struct brw_context *brw,
+				  struct brw_instruction *insn,
+				  GLuint msg_length,
+				  GLuint response_length,
+				  GLuint function,
+				  GLuint integer_type,
+				  GLboolean low_precision,
+				  GLboolean saturate,
+				  GLuint dataType )
+{
+   brw_set_src1(insn, brw_imm_d(0));
+
+   if (BRW_IS_IGDNG(brw)) {
+       insn->bits3.math_igdng.function = function;
+       insn->bits3.math_igdng.int_type = integer_type;
+       insn->bits3.math_igdng.precision = low_precision;
+       insn->bits3.math_igdng.saturate = saturate;
+       insn->bits3.math_igdng.data_type = dataType;
+       insn->bits3.math_igdng.snapshot = 0;
+       insn->bits3.math_igdng.header_present = 0;
+       insn->bits3.math_igdng.response_length = response_length;
+       insn->bits3.math_igdng.msg_length = msg_length;
+       insn->bits3.math_igdng.end_of_thread = 0;
+       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_MATH;
+       insn->bits2.send_igdng.end_of_thread = 0;
+   } else {
+       insn->bits3.math.function = function;
+       insn->bits3.math.int_type = integer_type;
+       insn->bits3.math.precision = low_precision;
+       insn->bits3.math.saturate = saturate;
+       insn->bits3.math.data_type = dataType;
+       insn->bits3.math.response_length = response_length;
+       insn->bits3.math.msg_length = msg_length;
+       insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
+       insn->bits3.math.end_of_thread = 0;
+   }
+}
+
+
+static void brw_set_ff_sync_message( struct brw_context *brw,
+				 struct brw_instruction *insn,
+				 GLboolean allocate,
+				 GLboolean used,
+				 GLuint msg_length,
+				 GLuint response_length,
+				 GLboolean end_of_thread,
+				 GLboolean complete,
+				 GLuint offset,
+				 GLuint swizzle_control )
+{
+	brw_set_src1(insn, brw_imm_d(0));
+
+	insn->bits3.urb_igdng.opcode = 1;
+	insn->bits3.urb_igdng.offset = offset;
+	insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+	insn->bits3.urb_igdng.allocate = allocate;
+	insn->bits3.urb_igdng.used = used;
+	insn->bits3.urb_igdng.complete = complete;
+	insn->bits3.urb_igdng.header_present = 1;
+	insn->bits3.urb_igdng.response_length = response_length;
+	insn->bits3.urb_igdng.msg_length = msg_length;
+	insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+	insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+	insn->bits2.send_igdng.end_of_thread = end_of_thread;
+}
+
+static void brw_set_urb_message( struct brw_context *brw,
+				 struct brw_instruction *insn,
+				 GLboolean allocate,
+				 GLboolean used,
+				 GLuint msg_length,
+				 GLuint response_length,
+				 GLboolean end_of_thread,
+				 GLboolean complete,
+				 GLuint offset,
+				 GLuint swizzle_control )
+{
+    brw_set_src1(insn, brw_imm_d(0));
+
+    if (BRW_IS_IGDNG(brw)) {
+        insn->bits3.urb_igdng.opcode = 0;	/* ? */
+        insn->bits3.urb_igdng.offset = offset;
+        insn->bits3.urb_igdng.swizzle_control = swizzle_control;
+        insn->bits3.urb_igdng.allocate = allocate;
+        insn->bits3.urb_igdng.used = used;	/* ? */
+        insn->bits3.urb_igdng.complete = complete;
+        insn->bits3.urb_igdng.header_present = 1;
+        insn->bits3.urb_igdng.response_length = response_length;
+        insn->bits3.urb_igdng.msg_length = msg_length;
+        insn->bits3.urb_igdng.end_of_thread = end_of_thread;
+        insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_URB;
+        insn->bits2.send_igdng.end_of_thread = end_of_thread;
+    } else {
+        insn->bits3.urb.opcode = 0;	/* ? */
+        insn->bits3.urb.offset = offset;
+        insn->bits3.urb.swizzle_control = swizzle_control;
+        insn->bits3.urb.allocate = allocate;
+        insn->bits3.urb.used = used;	/* ? */
+        insn->bits3.urb.complete = complete;
+        insn->bits3.urb.response_length = response_length;
+        insn->bits3.urb.msg_length = msg_length;
+        insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
+        insn->bits3.urb.end_of_thread = end_of_thread;
+    }
+}
+
+static void brw_set_dp_write_message( struct brw_context *brw,
+				      struct brw_instruction *insn,
+				      GLuint binding_table_index,
+				      GLuint msg_control,
+				      GLuint msg_type,
+				      GLuint msg_length,
+				      GLuint pixel_scoreboard_clear,
+				      GLuint response_length,
+				      GLuint end_of_thread )
+{
+   brw_set_src1(insn, brw_imm_d(0));
+
+   if (BRW_IS_IGDNG(brw)) {
+       insn->bits3.dp_write_igdng.binding_table_index = binding_table_index;
+       insn->bits3.dp_write_igdng.msg_control = msg_control;
+       insn->bits3.dp_write_igdng.pixel_scoreboard_clear = pixel_scoreboard_clear;
+       insn->bits3.dp_write_igdng.msg_type = msg_type;
+       insn->bits3.dp_write_igdng.send_commit_msg = 0;
+       insn->bits3.dp_write_igdng.header_present = 1;
+       insn->bits3.dp_write_igdng.response_length = response_length;
+       insn->bits3.dp_write_igdng.msg_length = msg_length;
+       insn->bits3.dp_write_igdng.end_of_thread = end_of_thread;
+       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+       insn->bits2.send_igdng.end_of_thread = end_of_thread;
+   } else {
+       insn->bits3.dp_write.binding_table_index = binding_table_index;
+       insn->bits3.dp_write.msg_control = msg_control;
+       insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
+       insn->bits3.dp_write.msg_type = msg_type;
+       insn->bits3.dp_write.send_commit_msg = 0;
+       insn->bits3.dp_write.response_length = response_length;
+       insn->bits3.dp_write.msg_length = msg_length;
+       insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+       insn->bits3.dp_write.end_of_thread = end_of_thread;
+   }
+}
+
+static void brw_set_dp_read_message( struct brw_context *brw,
+				      struct brw_instruction *insn,
+				      GLuint binding_table_index,
+				      GLuint msg_control,
+				      GLuint msg_type,
+				      GLuint target_cache,
+				      GLuint msg_length,
+				      GLuint response_length,
+				      GLuint end_of_thread )
+{
+   brw_set_src1(insn, brw_imm_d(0));
+
+   if (BRW_IS_IGDNG(brw)) {
+       insn->bits3.dp_read_igdng.binding_table_index = binding_table_index;
+       insn->bits3.dp_read_igdng.msg_control = msg_control;
+       insn->bits3.dp_read_igdng.msg_type = msg_type;
+       insn->bits3.dp_read_igdng.target_cache = target_cache;
+       insn->bits3.dp_read_igdng.header_present = 1;
+       insn->bits3.dp_read_igdng.response_length = response_length;
+       insn->bits3.dp_read_igdng.msg_length = msg_length;
+       insn->bits3.dp_read_igdng.pad1 = 0;
+       insn->bits3.dp_read_igdng.end_of_thread = end_of_thread;
+       insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+       insn->bits2.send_igdng.end_of_thread = end_of_thread;
+   } else {
+       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
+       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
+       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
+       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
+       insn->bits3.dp_read.response_length = response_length;  /*16:19*/
+       insn->bits3.dp_read.msg_length = msg_length;  /*20:23*/
+       insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+       insn->bits3.dp_read.pad1 = 0;  /*28:30*/
+       insn->bits3.dp_read.end_of_thread = end_of_thread;  /*31*/
+   }
+}
+
+static void brw_set_sampler_message(struct brw_context *brw,
+                                    struct brw_instruction *insn,
+                                    GLuint binding_table_index,
+                                    GLuint sampler,
+                                    GLuint msg_type,
+                                    GLuint response_length,
+                                    GLuint msg_length,
+                                    GLboolean eot,
+                                    GLuint header_present,
+                                    GLuint simd_mode)
+{
+   assert(eot == 0);
+   brw_set_src1(insn, brw_imm_d(0));
+
+   if (BRW_IS_IGDNG(brw)) {
+      insn->bits3.sampler_igdng.binding_table_index = binding_table_index;
+      insn->bits3.sampler_igdng.sampler = sampler;
+      insn->bits3.sampler_igdng.msg_type = msg_type;
+      insn->bits3.sampler_igdng.simd_mode = simd_mode;
+      insn->bits3.sampler_igdng.header_present = header_present;
+      insn->bits3.sampler_igdng.response_length = response_length;
+      insn->bits3.sampler_igdng.msg_length = msg_length;
+      insn->bits3.sampler_igdng.end_of_thread = eot;
+      insn->bits2.send_igdng.sfid = BRW_MESSAGE_TARGET_SAMPLER;
+      insn->bits2.send_igdng.end_of_thread = eot;
+   } else if (BRW_IS_G4X(brw)) {
+      insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
+      insn->bits3.sampler_g4x.sampler = sampler;
+      insn->bits3.sampler_g4x.msg_type = msg_type;
+      insn->bits3.sampler_g4x.response_length = response_length;
+      insn->bits3.sampler_g4x.msg_length = msg_length;
+      insn->bits3.sampler_g4x.end_of_thread = eot;
+      insn->bits3.sampler_g4x.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+   } else {
+      insn->bits3.sampler.binding_table_index = binding_table_index;
+      insn->bits3.sampler.sampler = sampler;
+      insn->bits3.sampler.msg_type = msg_type;
+      insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
+      insn->bits3.sampler.response_length = response_length;
+      insn->bits3.sampler.msg_length = msg_length;
+      insn->bits3.sampler.end_of_thread = eot;
+      insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
+   }
+}
+
+
+
+static struct brw_instruction *next_insn( struct brw_compile *p, 
+					  GLuint opcode )
+{
+   struct brw_instruction *insn;
+
+   assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
+
+   insn = &p->store[p->nr_insn++];
+   memcpy(insn, p->current, sizeof(*insn));
+
+   /* Reset this one-shot flag: 
+    */
+
+   if (p->current->header.destreg__conditionalmod) {
+      p->current->header.destreg__conditionalmod = 0;
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+   }
+
+   insn->header.opcode = opcode;
+   return insn;
+}
+
+
+static struct brw_instruction *brw_alu1( struct brw_compile *p,
+					 GLuint opcode,
+					 struct brw_reg dest,
+					 struct brw_reg src )
+{
+   struct brw_instruction *insn = next_insn(p, opcode);
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src);   
+   return insn;
+}
+
+static struct brw_instruction *brw_alu2(struct brw_compile *p,
+					GLuint opcode,
+					struct brw_reg dest,
+					struct brw_reg src0,
+					struct brw_reg src1 )
+{
+   struct brw_instruction *insn = next_insn(p, opcode);   
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_src1(insn, src1);
+   return insn;
+}
+
+
+/***********************************************************************
+ * Convenience routines.
+ */
+#define ALU1(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0)   			\
+{							\
+   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
+}
+
+#define ALU2(OP)					\
+struct brw_instruction *brw_##OP(struct brw_compile *p,	\
+	      struct brw_reg dest,			\
+	      struct brw_reg src0,			\
+	      struct brw_reg src1)   			\
+{							\
+   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
+}
+
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(ADD)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU1(RNDZ)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+
+
+
+
+void brw_NOP(struct brw_compile *p)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);   
+   brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+   brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+   brw_set_src1(insn, brw_imm_ud(0x0));
+}
+
+
+
+
+
+/***********************************************************************
+ * Comparisons, if/else/endif
+ */
+
+struct brw_instruction *brw_JMPI(struct brw_compile *p, 
+                                 struct brw_reg dest,
+                                 struct brw_reg src0,
+                                 struct brw_reg src1)
+{
+   struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
+
+   insn->header.execution_size = 1;
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.mask_control = BRW_MASK_DISABLE;
+
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+   return insn;
+}
+
+/* EU takes the value from the flag register and pushes it onto some
+ * sort of a stack (presumably merging with any flag value already on
+ * the stack).  Within an if block, the flags at the top of the stack
+ * control execution on each channel of the unit, eg. on each of the
+ * 16 pixel values in our wm programs.
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+ * functions), the relevent flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off.  If the stack is now empty, normal execution resumes.
+ *
+ * No attempt is made to deal with stack overflow (14 elements?).
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
+{
+   struct brw_instruction *insn;
+
+   if (p->single_program_flow) {
+      assert(execute_size == BRW_EXECUTE_1);
+
+      insn = next_insn(p, BRW_OPCODE_ADD);
+      insn->header.predicate_inverse = 1;
+   } else {
+      insn = next_insn(p, BRW_OPCODE_IF);
+   }
+
+   /* Override the defaults for this instruction:
+    */
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+
+   insn->header.execution_size = execute_size;
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.predicate_control = BRW_PREDICATE_NORMAL;
+   insn->header.mask_control = BRW_MASK_ENABLE;
+   if (!p->single_program_flow)
+       insn->header.thread_control = BRW_THREAD_SWITCH;
+
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;
+
+   return insn;
+}
+
+
+struct brw_instruction *brw_ELSE(struct brw_compile *p, 
+				 struct brw_instruction *if_insn)
+{
+   struct brw_instruction *insn;
+   GLuint br = 1;
+
+   if (BRW_IS_IGDNG(p->brw))
+      br = 2;
+
+   if (p->single_program_flow) {
+      insn = next_insn(p, BRW_OPCODE_ADD);
+   } else {
+      insn = next_insn(p, BRW_OPCODE_ELSE);
+   }
+
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = if_insn->header.execution_size;
+   insn->header.mask_control = BRW_MASK_ENABLE;
+   if (!p->single_program_flow)
+       insn->header.thread_control = BRW_THREAD_SWITCH;
+
+   /* Patch the if instruction to point at this instruction.
+    */
+   if (p->single_program_flow) {
+      assert(if_insn->header.opcode == BRW_OPCODE_ADD);
+
+      if_insn->bits3.ud = (insn - if_insn + 1) * 16;
+   } else {
+      assert(if_insn->header.opcode == BRW_OPCODE_IF);
+
+      if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
+      if_insn->bits3.if_else.pop_count = 0;
+      if_insn->bits3.if_else.pad0 = 0;
+   }
+
+   return insn;
+}
+
+void brw_ENDIF(struct brw_compile *p, 
+	       struct brw_instruction *patch_insn)
+{
+   GLuint br = 1;
+
+   if (BRW_IS_IGDNG(p->brw))
+      br = 2; 
+ 
+   if (p->single_program_flow) {
+      /* In single program flow mode, there's no need to execute an ENDIF,
+       * since we don't need to do any stack operations, and if we're executing
+       * currently, we want to just continue executing.
+       */
+      struct brw_instruction *next = &p->store[p->nr_insn];
+
+      assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
+
+      patch_insn->bits3.ud = (next - patch_insn) * 16;
+   } else {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
+
+      brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+      brw_set_src1(insn, brw_imm_d(0x0));
+
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.execution_size = patch_insn->header.execution_size;
+      insn->header.mask_control = BRW_MASK_ENABLE;
+      insn->header.thread_control = BRW_THREAD_SWITCH;
+
+      assert(patch_insn->bits3.if_else.jump_count == 0);
+
+      /* Patch the if or else instructions to point at this or the next
+       * instruction respectively.
+       */
+      if (patch_insn->header.opcode == BRW_OPCODE_IF) {
+	 /* Automagically turn it into an IFF:
+	  */
+	 patch_insn->header.opcode = BRW_OPCODE_IFF;
+	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+	 patch_insn->bits3.if_else.pop_count = 0;
+	 patch_insn->bits3.if_else.pad0 = 0;
+      } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
+	 patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
+	 patch_insn->bits3.if_else.pop_count = 1;
+	 patch_insn->bits3.if_else.pad0 = 0;
+      } else {
+	 assert(0);
+      }
+
+      /* Also pop item off the stack in the endif instruction:
+       */
+      insn->bits3.if_else.jump_count = 0;
+      insn->bits3.if_else.pop_count = 1;
+      insn->bits3.if_else.pad0 = 0;
+   }
+}
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_BREAK);
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   /* insn->header.mask_control = BRW_MASK_DISABLE; */
+   insn->bits3.if_else.pad0 = 0;
+   return insn;
+}
+
+struct brw_instruction *brw_CONT(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+   insn = next_insn(p, BRW_OPCODE_CONTINUE);
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   /* insn->header.mask_control = BRW_MASK_DISABLE; */
+   insn->bits3.if_else.pad0 = 0;
+   return insn;
+}
+
+/* DO/WHILE loop:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
+{
+   if (p->single_program_flow) {
+      return &p->store[p->nr_insn];
+   } else {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
+
+      /* Override the defaults for this instruction:
+       */
+      brw_set_dest(insn, brw_null_reg());
+      brw_set_src0(insn, brw_null_reg());
+      brw_set_src1(insn, brw_null_reg());
+
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.execution_size = execute_size;
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
+      /* insn->header.mask_control = BRW_MASK_ENABLE; */
+      /* insn->header.mask_control = BRW_MASK_DISABLE; */
+
+      return insn;
+   }
+}
+
+
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p, 
+                                  struct brw_instruction *do_insn)
+{
+   struct brw_instruction *insn;
+   GLuint br = 1;
+
+   if (BRW_IS_IGDNG(p->brw))
+      br = 2;
+
+   if (p->single_program_flow)
+      insn = next_insn(p, BRW_OPCODE_ADD);
+   else
+      insn = next_insn(p, BRW_OPCODE_WHILE);
+
+   brw_set_dest(insn, brw_ip_reg());
+   brw_set_src0(insn, brw_ip_reg());
+   brw_set_src1(insn, brw_imm_d(0x0));
+
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+   if (p->single_program_flow) {
+      insn->header.execution_size = BRW_EXECUTE_1;
+
+      insn->bits3.d = (do_insn - insn) * 16;
+   } else {
+      insn->header.execution_size = do_insn->header.execution_size;
+
+      assert(do_insn->header.opcode == BRW_OPCODE_DO);
+      insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+      insn->bits3.if_else.pop_count = 0;
+      insn->bits3.if_else.pad0 = 0;
+   }
+
+/*    insn->header.mask_control = BRW_MASK_ENABLE; */
+
+   /* insn->header.mask_control = BRW_MASK_DISABLE; */
+   p->current->header.predicate_control = BRW_PREDICATE_NONE;   
+   return insn;
+}
+
+
+/* FORWARD JUMPS:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, 
+		       struct brw_instruction *jmp_insn)
+{
+   struct brw_instruction *landing = &p->store[p->nr_insn];
+   GLuint jmpi = 1;
+
+   if (BRW_IS_IGDNG(p->brw))
+       jmpi = 2;
+
+   assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
+   assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
+
+   jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
+}
+
+
+
+/* To integrate with the above, it makes sense that the comparison
+ * instruction should populate the flag register.  It might be simpler
+ * just to use the flag reg for most WM tasks?
+ */
+void brw_CMP(struct brw_compile *p,
+	     struct brw_reg dest,
+	     GLuint conditional,
+	     struct brw_reg src0,
+	     struct brw_reg src1)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
+
+   insn->header.destreg__conditionalmod = conditional;
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_src1(insn, src1);
+
+/*    guess_execution_size(insn, src0); */
+
+
+   /* Make it so that future instructions will use the computed flag
+    * value until brw_set_predicate_control_flag_value() is called
+    * again.  
+    */
+   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
+       dest.nr == 0) {
+      p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
+      p->flag_value = 0xff;
+   }
+}
+
+
+
+/***********************************************************************
+ * Helpers for the various SEND message types:
+ */
+
+/** Extended math function, float[8].
+ */
+void brw_math( struct brw_compile *p,
+	       struct brw_reg dest,
+	       GLuint function,
+	       GLuint saturate,
+	       GLuint msg_reg_nr,
+	       struct brw_reg src,
+	       GLuint data_type,
+	       GLuint precision )
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
+   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
+
+   /* Example code doesn't set predicate_control for send
+    * instructions.
+    */
+   insn->header.predicate_control = 0; 
+   insn->header.destreg__conditionalmod = msg_reg_nr;
+
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src);
+   brw_set_math_message(p->brw,
+			insn, 
+			msg_length, response_length, 
+			function,
+			BRW_MATH_INTEGER_UNSIGNED,
+			precision,
+			saturate,
+			data_type);
+}
+
+/**
+ * Extended math function, float[16].
+ * Use 2 send instructions.
+ */
+void brw_math_16( struct brw_compile *p,
+		  struct brw_reg dest,
+		  GLuint function,
+		  GLuint saturate,
+		  GLuint msg_reg_nr,
+		  struct brw_reg src,
+		  GLuint precision )
+{
+   struct brw_instruction *insn;
+   GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; 
+   GLuint response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; 
+
+   /* First instruction:
+    */
+   brw_push_insn_state(p);
+   brw_set_predicate_control_flag_value(p, 0xff);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   insn->header.destreg__conditionalmod = msg_reg_nr;
+
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src);
+   brw_set_math_message(p->brw,
+			insn, 
+			msg_length, response_length, 
+			function,
+			BRW_MATH_INTEGER_UNSIGNED,
+			precision,
+			saturate,
+			BRW_MATH_DATA_VECTOR);
+
+   /* Second instruction:
+    */
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
+   insn->header.destreg__conditionalmod = msg_reg_nr+1;
+
+   brw_set_dest(insn, offset(dest,1));
+   brw_set_src0(insn, src);
+   brw_set_math_message(p->brw, 
+			insn, 
+			msg_length, response_length, 
+			function,
+			BRW_MATH_INTEGER_UNSIGNED,
+			precision,
+			saturate,
+			BRW_MATH_DATA_VECTOR);
+
+   brw_pop_insn_state(p);
+}
+
+
+/**
+ * Write block of 16 dwords/floats to the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_WRITE_16( struct brw_compile *p,
+		      struct brw_reg src,
+		      GLuint scratch_offset )
+{
+   GLuint msg_reg_nr = 1;
+   {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+      /* set message header global offset field (reg 0, element 2) */
+      brw_MOV(p,
+	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+	      brw_imm_d(scratch_offset));
+
+      brw_pop_insn_state(p);
+   }
+
+   {
+      GLuint msg_length = 3;
+      struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = 0; /* XXX */
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+  
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src);
+
+      brw_set_dp_write_message(p->brw,
+			       insn,
+			       255, /* binding table index (255=stateless) */
+			       BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
+			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+			       msg_length,
+			       0, /* pixel scoreboard */
+			       0, /* response_length */
+			       0); /* eot */
+   }
+}
+
+
+/**
+ * Read block of 16 dwords/floats from the data port Render Cache scratch buffer.
+ * Scratch offset should be a multiple of 64.
+ * Used for register spilling.
+ */
+void brw_dp_READ_16( struct brw_compile *p,
+		      struct brw_reg dest,
+		      GLuint scratch_offset )
+{
+   GLuint msg_reg_nr = 1;
+   {
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+      /* set message header global offset field (reg 0, element 2) */
+      brw_MOV(p,
+	      retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
+	      brw_imm_d(scratch_offset));
+
+      brw_pop_insn_state(p);
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = 0; /* XXX */
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+  
+      brw_set_dest(insn, dest);	/* UW? */
+      brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
+
+      brw_set_dp_read_message(p->brw,
+			      insn,
+			      255, /* binding table index (255=stateless) */
+			      3,  /* msg_control (3 means 4 Owords) */
+			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+			      1, /* target cache (render/scratch) */
+			      1, /* msg_length */
+			      2, /* response_length */
+			      0); /* eot */
+   }
+}
+
+
+/**
+ * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Location (in buffer) should be a multiple of 16.
+ * Used for fetching shader constants.
+ * If relAddr is true, we'll do an indirect fetch using the address register.
+ */
+void brw_dp_READ_4( struct brw_compile *p,
+                    struct brw_reg dest,
+                    GLboolean relAddr,
+                    GLuint location,
+                    GLuint bind_table_index )
+{
+   /* XXX: relAddr not implemented */
+   GLuint msg_reg_nr = 1;
+   {
+      struct brw_reg b;
+      brw_push_insn_state(p);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+   /* Setup MRF[1] with location/offset into const buffer */
+      b = brw_message_reg(msg_reg_nr);
+      b = retype(b, BRW_REGISTER_TYPE_UD);
+      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
+       */
+      brw_MOV(p, b, brw_imm_ud(location));
+      brw_pop_insn_state(p);
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+      insn->header.mask_control = BRW_MASK_DISABLE;
+  
+      /* cast dest to a uword[8] vector */
+      dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, brw_null_reg());
+
+      brw_set_dp_read_message(p->brw,
+			      insn,
+			      bind_table_index,
+			      0,  /* msg_control (0 means 1 Oword) */
+			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+			      0, /* source cache = data cache */
+			      1, /* msg_length */
+			      1, /* response_length (1 Oword) */
+			      0); /* eot */
+   }
+}
+
+
+/**
+ * Read float[4] constant(s) from VS constant buffer.
+ * For relative addressing, two float[4] constants will be read into 'dest'.
+ * Otherwise, one float[4] constant will be read into the lower half of 'dest'.
+ */
+void brw_dp_READ_4_vs(struct brw_compile *p,
+                      struct brw_reg dest,
+                      GLuint oword,
+                      GLboolean relAddr,
+                      struct brw_reg addrReg,
+                      GLuint location,
+                      GLuint bind_table_index)
+{
+   GLuint msg_reg_nr = 1;
+
+   assert(oword < 2);
+   /*
+   printf("vs const read msg, location %u, msg_reg_nr %d\n",
+          location, msg_reg_nr);
+   */
+
+   /* Setup MRF[1] with location/offset into const buffer */
+   {
+      struct brw_reg b;
+
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      /*brw_set_access_mode(p, BRW_ALIGN_16);*/
+
+      /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
+       * when the docs say only dword[2] should be set.  Hmmm.  But it works.
+       */
+      b = brw_message_reg(msg_reg_nr);
+      b = retype(b, BRW_REGISTER_TYPE_UD);
+      /*b = get_element_ud(b, 2);*/
+      if (relAddr) {
+         brw_ADD(p, b, addrReg, brw_imm_ud(location));
+      }
+      else {
+         brw_MOV(p, b, brw_imm_ud(location));
+      }
+
+      brw_pop_insn_state(p);
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = BRW_PREDICATE_NONE;
+      insn->header.compression_control = BRW_COMPRESSION_NONE; 
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+      insn->header.mask_control = BRW_MASK_DISABLE;
+      /*insn->header.access_mode = BRW_ALIGN_16;*/
+  
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, brw_null_reg());
+
+      brw_set_dp_read_message(p->brw,
+			      insn,
+			      bind_table_index,
+			      oword,  /* 0 = lower Oword, 1 = upper Oword */
+			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
+			      0, /* source cache = data cache */
+			      1, /* msg_length */
+			      1, /* response_length (1 Oword) */
+			      0); /* eot */
+   }
+}
+
+
+
+void brw_fb_WRITE(struct brw_compile *p,
+                  struct brw_reg dest,
+                  GLuint msg_reg_nr,
+                  struct brw_reg src0,
+                  GLuint binding_table_index,
+                  GLuint msg_length,
+                  GLuint response_length,
+                  GLboolean eot)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+   insn->header.predicate_control = 0; /* XXX */
+   insn->header.compression_control = BRW_COMPRESSION_NONE; 
+   insn->header.destreg__conditionalmod = msg_reg_nr;
+  
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_dp_write_message(p->brw,
+			    insn,
+			    binding_table_index,
+			    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
+			    BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
+			    msg_length,
+			    1,	/* pixel scoreboard */
+			    response_length, 
+			    eot);
+}
+
+
+/**
+ * Texture sample instruction.
+ * Note: the msg_type plus msg_length values determine exactly what kind
+ * of sampling operation is performed.  See volume 4, page 161 of docs.
+ */
+void brw_SAMPLE(struct brw_compile *p,
+		struct brw_reg dest,
+		GLuint msg_reg_nr,
+		struct brw_reg src0,
+		GLuint binding_table_index,
+		GLuint sampler,
+		GLuint writemask,
+		GLuint msg_type,
+		GLuint response_length,
+		GLuint msg_length,
+		GLboolean eot,
+		GLuint header_present,
+		GLuint simd_mode)
+{
+   GLboolean need_stall = 0;
+   
+   if (writemask == 0) {
+      /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
+      return;
+   }
+   
+   /* Hardware doesn't do destination dependency checking on send
+    * instructions properly.  Add a workaround which generates the
+    * dependency by other means.  In practice it seems like this bug
+    * only crops up for texture samples, and only where registers are
+    * written by the send and then written again later without being
+    * read in between.  Luckily for us, we already track that
+    * information and use it to modify the writemask for the
+    * instruction, so that is a guide for whether a workaround is
+    * needed.
+    */
+   if (writemask != WRITEMASK_XYZW) {
+      GLuint dst_offset = 0;
+      GLuint i, newmask = 0, len = 0;
+
+      for (i = 0; i < 4; i++) {
+	 if (writemask & (1<<i))
+	    break;
+	 dst_offset += 2;
+      }
+      for (; i < 4; i++) {
+	 if (!(writemask & (1<<i)))
+	    break;
+	 newmask |= 1<<i;
+	 len++;
+      }
+
+      if (newmask != writemask) {
+	 need_stall = 1;
+         /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
+      }
+      else {
+	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
+	 
+	 newmask = ~newmask & WRITEMASK_XYZW;
+
+	 brw_push_insn_state(p);
+
+	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+	 brw_set_mask_control(p, BRW_MASK_DISABLE);
+
+	 brw_MOV(p, m1, brw_vec8_grf(0,0));	 
+  	 brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); 
+
+	 brw_pop_insn_state(p);
+
+  	 src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); 
+	 dest = offset(dest, dst_offset);
+	 response_length = len * 2;
+      }
+   }
+
+   {
+      struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+   
+      insn->header.predicate_control = 0; /* XXX */
+      insn->header.compression_control = BRW_COMPRESSION_NONE;
+      insn->header.destreg__conditionalmod = msg_reg_nr;
+
+      brw_set_dest(insn, dest);
+      brw_set_src0(insn, src0);
+      brw_set_sampler_message(p->brw, insn,
+			      binding_table_index,
+			      sampler,
+			      msg_type,
+			      response_length, 
+			      msg_length,
+			      eot,
+			      header_present,
+			      simd_mode);
+   }
+
+   if (need_stall) {
+      struct brw_reg reg = vec8(offset(dest, response_length-1));
+
+      /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
+       */
+      brw_push_insn_state(p);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_MOV(p, reg, reg);	      
+      brw_pop_insn_state(p);
+   }
+
+}
+
+/* All these variables are pretty confusing - we might be better off
+ * using bitmasks and macros for this, in the old style.  Or perhaps
+ * just having the caller instantiate the fields in dword3 itself.
+ */
+void brw_urb_WRITE(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLboolean allocate,
+		   GLboolean used,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot,
+		   GLboolean writes_complete,
+		   GLuint offset,
+		   GLuint swizzle)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+   assert(msg_length < BRW_MAX_MRF);
+
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_src1(insn, brw_imm_d(0));
+
+   insn->header.destreg__conditionalmod = msg_reg_nr;
+
+   brw_set_urb_message(p->brw,
+		       insn,
+		       allocate,
+		       used,
+		       msg_length,
+		       response_length, 
+		       eot, 
+		       writes_complete, 
+		       offset,
+		       swizzle);
+}
+
+void brw_ff_sync(struct brw_compile *p,
+		   struct brw_reg dest,
+		   GLuint msg_reg_nr,
+		   struct brw_reg src0,
+		   GLboolean allocate,
+		   GLboolean used,
+		   GLuint msg_length,
+		   GLuint response_length,
+		   GLboolean eot,
+		   GLboolean writes_complete,
+		   GLuint offset,
+		   GLuint swizzle)
+{
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+   assert(msg_length < 16);
+
+   brw_set_dest(insn, dest);
+   brw_set_src0(insn, src0);
+   brw_set_src1(insn, brw_imm_d(0));
+
+   insn->header.destreg__conditionalmod = msg_reg_nr;
+
+   brw_set_ff_sync_message(p->brw,
+		       insn,
+		       allocate,
+		       used,
+		       msg_length,
+		       response_length, 
+		       eot, 
+		       writes_complete, 
+		       offset,
+		       swizzle);
+}
diff --git a/src/gallium/drivers/i965/brw_eu_util.c b/src/gallium/drivers/i965/brw_eu_util.c
new file mode 100644
index 0000000000..5405cf17a4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_eu_util.c
@@ -0,0 +1,126 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+      
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_eu.h"
+
+
+void brw_math_invert( struct brw_compile *p, 
+			     struct brw_reg dst,
+			     struct brw_reg src)
+{
+   brw_math( p, 
+	     dst,
+	     BRW_MATH_FUNCTION_INV, 
+	     BRW_MATH_SATURATE_NONE,
+	     0,
+	     src,
+	     BRW_MATH_PRECISION_FULL, 
+	     BRW_MATH_DATA_VECTOR );
+}
+
+
+
+void brw_copy4(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count)
+{
+   GLuint i;
+
+   dst = vec4(dst);
+   src = vec4(src);
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
+      brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16));
+   }
+}
+
+
+void brw_copy8(struct brw_compile *p,
+	       struct brw_reg dst,
+	       struct brw_reg src,
+	       GLuint count)
+{
+   GLuint i;
+
+   dst = vec8(dst);
+   src = vec8(src);
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    byte_offset(src, delta));
+   }
+}
+
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+				   struct brw_indirect dst_ptr,
+				   struct brw_indirect src_ptr,
+				   GLuint count)
+{
+   GLuint i;
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, deref_4f(dst_ptr, delta),    deref_4f(src_ptr, delta));
+      brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16));
+   }
+}
+
+
+void brw_copy_from_indirect(struct brw_compile *p,
+			    struct brw_reg dst,
+			    struct brw_indirect ptr,
+			    GLuint count)
+{
+   GLuint i;
+
+   dst = vec4(dst);
+
+   for (i = 0; i < count; i++)
+   {
+      GLuint delta = i*32;
+      brw_MOV(p, byte_offset(dst, delta),    deref_4f(ptr, delta));
+      brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16));
+   }
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
new file mode 100644
index 0000000000..48c2b9a41c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -0,0 +1,201 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+      
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "brw_gs.h"
+
+
+
+static void compile_gs_prog( struct brw_context *brw,
+			     struct brw_gs_prog_key *key )
+{
+   struct brw_gs_compile c;
+   const GLuint *program;
+   GLuint program_size;
+
+   memset(&c, 0, sizeof(c));
+   
+   c.key = *key;
+   c.need_ff_sync = BRW_IS_IGDNG(brw);
+   /* Need to locate the two positions present in vertex + header.
+    * These are currently hardcoded:
+    */
+   c.nr_attrs = brw_count_bits(c.key.attrs);
+
+   if (BRW_IS_IGDNG(brw))
+       c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
+   else
+       c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
+
+   c.nr_bytes = c.nr_regs * REG_SIZE;
+
+   
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func);
+
+   c.func.single_program_flow = 1;
+
+   /* For some reason the thread is spawned with only 4 channels
+    * unmasked.  
+    */
+   brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
+
+
+   /* Note that primitives which don't require a GS program have
+    * already been weeded out by this stage:
+    */
+   switch (key->primitive) {
+   case GL_QUADS:
+      brw_gs_quads( &c ); 
+      break;
+   case GL_QUAD_STRIP:
+      brw_gs_quad_strip( &c );
+      break;
+   case GL_LINE_LOOP:
+      brw_gs_lines( &c );
+      break;
+   case GL_LINES:
+      if (key->hint_gs_always)
+	 brw_gs_lines( &c );
+      else {
+	 return;
+      }
+      break;
+   case GL_TRIANGLES:
+      if (key->hint_gs_always)
+	 brw_gs_tris( &c );
+      else {
+	 return;
+      }
+      break;
+   case GL_POINTS:
+      if (key->hint_gs_always)
+	 brw_gs_points( &c );
+      else {
+	 return;
+      }
+      break;      
+   default:
+      return;
+   }
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+   /* Upload
+    */
+   dri_bo_unreference(brw->gs.prog_bo);
+   brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG,
+				       &c.key, sizeof(c.key),
+				       NULL, 0,
+				       program, program_size,
+				       &c.prog_data,
+				       &brw->gs.prog_data );
+}
+
+static const GLenum gs_prim[GL_POLYGON+1] = {  
+   GL_POINTS,
+   GL_LINES,
+   GL_LINE_LOOP,
+   GL_LINES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_TRIANGLES,
+   GL_QUADS,
+   GL_QUAD_STRIP,
+   GL_TRIANGLES
+};
+
+static void populate_key( struct brw_context *brw,
+			  struct brw_gs_prog_key *key )
+{
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_VS_PROG */
+   key->attrs = brw->vs.prog_data->outputs_written;
+
+   /* BRW_NEW_PRIMITIVE */
+   key->primitive = gs_prim[brw->primitive];
+
+   key->hint_gs_always = 0;	/* debug code? */
+
+   key->need_gs_prog = (key->hint_gs_always ||
+			brw->primitive == GL_QUADS ||
+			brw->primitive == GL_QUAD_STRIP ||
+			brw->primitive == GL_LINE_LOOP);
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void prepare_gs_prog(struct brw_context *brw)
+{
+   struct brw_gs_prog_key key;
+   /* Populate the key:
+    */
+   populate_key(brw, &key);
+
+   if (brw->gs.prog_active != key.need_gs_prog) {
+      brw->state.dirty.cache |= CACHE_NEW_GS_PROG;
+      brw->gs.prog_active = key.need_gs_prog;
+   }
+
+   if (brw->gs.prog_active) {
+      dri_bo_unreference(brw->gs.prog_bo);
+      brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
+					 &key, sizeof(key),
+					 NULL, 0,
+					 &brw->gs.prog_data);
+      if (brw->gs.prog_bo == NULL)
+	 compile_gs_prog( brw, &key );
+   }
+}
+
+
+const struct brw_tracked_state brw_gs_prog = {
+   .dirty = {
+      .mesa  = 0,
+      .brw   = BRW_NEW_PRIMITIVE,
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .prepare = prepare_gs_prog
+};
diff --git a/src/gallium/drivers/i965/brw_gs.h b/src/gallium/drivers/i965/brw_gs.h
new file mode 100644
index 0000000000..bbb991ea2e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+#ifndef BRW_GS_H
+#define BRW_GS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define MAX_GS_VERTS (4)	     
+
+struct brw_gs_prog_key {
+   GLuint attrs:32;
+   GLuint primitive:4;
+   GLuint hint_gs_always:1;
+   GLuint need_gs_prog:1;
+   GLuint pad:26;
+};
+
+struct brw_gs_compile {
+   struct brw_compile func;
+   struct brw_gs_prog_key key;
+   struct brw_gs_prog_data prog_data;
+   
+   struct {
+      struct brw_reg R0;
+      struct brw_reg vertex[MAX_GS_VERTS];
+   } reg;
+
+   /* 3 different ways of expressing vertex size:
+    */
+   GLuint nr_attrs;
+   GLuint nr_regs;
+   GLuint nr_bytes;
+   GLboolean need_ff_sync;
+};
+
+#define ATTR_SIZE  (4*4)
+
+void brw_gs_quads( struct brw_gs_compile *c );
+void brw_gs_quad_strip( struct brw_gs_compile *c );
+void brw_gs_tris( struct brw_gs_compile *c );
+void brw_gs_lines( struct brw_gs_compile *c );
+void brw_gs_points( struct brw_gs_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c
new file mode 100644
index 0000000000..a9b2aa2eac
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs_emit.c
@@ -0,0 +1,186 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "shader/program.h"
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_gs.h"
+
+static void brw_gs_alloc_regs( struct brw_gs_compile *c,
+			       GLuint nr_verts )
+{
+   GLuint i = 0,j;
+
+   /* Register usage is static, precompute here:
+    */
+   c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++;
+
+   /* Payload vertices plus space for more generated vertices:
+    */
+   for (j = 0; j < nr_verts; j++) {
+      c->reg.vertex[j] = brw_vec4_grf(i, 0);
+      i += c->nr_regs;
+   }
+
+   c->prog_data.urb_read_length = c->nr_regs; 
+   c->prog_data.total_grf = i;
+}
+
+
+static void brw_gs_emit_vue(struct brw_gs_compile *c, 
+			    struct brw_reg vert,
+			    GLboolean last,
+			    GLuint header)
+{
+   struct brw_compile *p = &c->func;
+   GLboolean allocate = !last;
+
+   /* Overwrite PrimType and PrimStart in the message header, for
+    * each vertex in turn:
+    */
+   brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header));
+
+   /* Copy the vertex from vertn into m1..mN+1:
+    */
+   brw_copy8(p, brw_message_reg(1), vert, c->nr_regs);
+
+   /* Send each vertex as a seperate write to the urb.  This is
+    * different to the concept in brw_sf_emit.c, where subsequent
+    * writes are used to build up a single urb entry.  Each of these
+    * writes instantiates a seperate urb entry, and a new one must be
+    * allocated each time.
+    */
+   brw_urb_WRITE(p, 
+		 allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+		 0,
+		 c->reg.R0,
+		 allocate,
+		 1,		/* used */
+		 c->nr_regs + 1, /* msg length */
+		 allocate ? 1 : 0, /* response length */
+		 allocate ? 0 : 1, /* eot */
+		 1,		/* writes_complete */
+		 0,		/* urb offset */
+		 BRW_URB_SWIZZLE_NONE);
+}
+
+static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim)
+{
+	struct brw_compile *p = &c->func;
+	brw_MOV(p, get_element_ud(c->reg.R0, 1), brw_imm_ud(num_prim));
+	brw_ff_sync(p, 
+				c->reg.R0,
+				0,
+				c->reg.R0,
+				1,	
+				1,		/* used */
+				1,  	/* msg length */
+				1,		/* response length */
+				0,		/* eot */
+				1,		/* write compelete */
+				0,		/* urb offset */
+				BRW_URB_SWIZZLE_NONE);
+}
+
+
+void brw_gs_quads( struct brw_gs_compile *c )
+{
+   brw_gs_alloc_regs(c, 4);
+   
+   /* Use polygons for correct edgeflag behaviour. Note that vertex 3
+    * is the PV for quads, but vertex 0 for polygons:
+    */
+   if (c->need_ff_sync)
+	   brw_gs_ff_sync(c, 1);    
+   brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2));
+   brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); 
+   brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_quad_strip( struct brw_gs_compile *c )
+{
+   brw_gs_alloc_regs(c, 4);
+   
+   if (c->need_ff_sync)
+	   brw_gs_ff_sync(c, 1);      
+   brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START));
+   brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2));
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); 
+   brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END));
+}
+
+void brw_gs_tris( struct brw_gs_compile *c )
+{
+   brw_gs_alloc_regs(c, 3);
+
+   if (c->need_ff_sync)
+	   brw_gs_ff_sync(c, 1);      
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START));
+   brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2));
+   brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END));
+}
+
+void brw_gs_lines( struct brw_gs_compile *c )
+{
+   brw_gs_alloc_regs(c, 2);
+
+   if (c->need_ff_sync)
+	   brw_gs_ff_sync(c, 1);      
+   brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START));
+   brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END));
+}
+
+void brw_gs_points( struct brw_gs_compile *c )
+{
+   brw_gs_alloc_regs(c, 1);
+
+   if (c->need_ff_sync)
+	   brw_gs_ff_sync(c, 1);      
+   brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END));
+}
+
+
+
+
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
new file mode 100644
index 0000000000..ed9d2ffe60
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -0,0 +1,149 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+
+struct brw_gs_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
+
+   unsigned int curbe_offset;
+
+   unsigned int nr_urb_entries, urb_size;
+   GLboolean prog_active;
+};
+
+static void
+gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+{
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_GS_PROG */
+   key->prog_active = brw->gs.prog_active;
+   if (key->prog_active) {
+      key->total_grf = brw->gs.prog_data->total_grf;
+      key->urb_entry_read_length = brw->gs.prog_data->urb_read_length;
+   } else {
+      key->total_grf = 1;
+      key->urb_entry_read_length = 1;
+   }
+
+   /* BRW_NEW_CURBE_OFFSETS */
+   key->curbe_offset = brw->curbe.clip_start;
+
+   /* BRW_NEW_URB_FENCE */
+   key->nr_urb_entries = brw->urb.nr_gs_entries;
+   key->urb_size = brw->urb.vsize;
+}
+
+static dri_bo *
+gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+{
+   struct brw_gs_unit_state gs;
+   dri_bo *bo;
+
+   memset(&gs, 0, sizeof(gs));
+
+   gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   if (key->prog_active) /* reloc */
+      gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
+
+   gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   gs.thread1.single_program_flow = 1;
+
+   gs.thread3.dispatch_grf_start_reg = 1;
+   gs.thread3.const_urb_entry_read_offset = 0;
+   gs.thread3.const_urb_entry_read_length = 0;
+   gs.thread3.urb_entry_read_offset = 0;
+   gs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+   gs.thread4.nr_urb_entries = key->nr_urb_entries;
+   gs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+   if (key->nr_urb_entries >= 8)
+      gs.thread4.max_threads = 1;
+   else
+      gs.thread4.max_threads = 0;
+
+   if (BRW_IS_IGDNG(brw))
+      gs.thread4.rendering_enable = 1;
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      gs.thread4.stats_enable = 1;
+
+   bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
+			 key, sizeof(*key),
+			 &brw->gs.prog_bo, 1,
+			 &gs, sizeof(gs),
+			 NULL, NULL);
+
+   if (key->prog_active) {
+      /* Emit GS program relocation */
+      dri_bo_emit_reloc(bo,
+			I915_GEM_DOMAIN_INSTRUCTION, 0,
+			gs.thread0.grf_reg_count << 1,
+			offsetof(struct brw_gs_unit_state, thread0),
+			brw->gs.prog_bo);
+   }
+
+   return bo;
+}
+
+static void prepare_gs_unit(struct brw_context *brw)
+{
+   struct brw_gs_unit_key key;
+
+   gs_unit_populate_key(brw, &key);
+
+   dri_bo_unreference(brw->gs.state_bo);
+   brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT,
+				       &key, sizeof(key),
+				       &brw->gs.prog_bo, 1,
+				       NULL);
+   if (brw->gs.state_bo == NULL) {
+      brw->gs.state_bo = gs_unit_create_from_key(brw, &key);
+   }
+}
+
+const struct brw_tracked_state brw_gs_unit = {
+   .dirty = {
+      .mesa  = 0,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+		BRW_NEW_URB_FENCE),
+      .cache = CACHE_NEW_GS_PROG
+   },
+   .prepare = prepare_gs_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
new file mode 100644
index 0000000000..ea71857548
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -0,0 +1,545 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+ 
+
+
+#include "intel_batchbuffer.h"
+#include "intel_regions.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+
+
+
+/***********************************************************************
+ * Blend color
+ */
+
+static void upload_blend_constant_color(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_blend_constant_color bcc;
+
+   memset(&bcc, 0, sizeof(bcc));      
+   bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
+   bcc.header.length = sizeof(bcc)/4-2;
+   bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
+   bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
+   bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
+   bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bcc);
+}
+
+
+const struct brw_tracked_state brw_blend_constant_color = {
+   .dirty = {
+      .mesa = _NEW_COLOR,
+      .brw = 0,
+      .cache = 0
+   },
+   .emit = upload_blend_constant_color
+};
+
+/* Constant single cliprect for framebuffer object or DRI2 drawing */
+static void upload_drawing_rect(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+
+   if (!intel->constant_cliprect)
+      return;
+
+   BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
+   OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
+   OUT_BATCH(0); /* xmin, ymin */
+   OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
+	    ((ctx->DrawBuffer->Height - 1) << 16));
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_drawing_rect = {
+   .dirty = {
+      .mesa = _NEW_BUFFERS,
+      .brw = 0,
+      .cache = 0
+   },
+   .emit = upload_drawing_rect
+};
+
+static void prepare_binding_table_pointers(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->vs.bind_bo);
+   brw_add_validated_bo(brw, brw->wm.bind_bo);
+}
+
+/**
+ * Upload the binding table pointers, which point each stage's array of surface
+ * state pointers.
+ *
+ * The binding table pointers are relative to the surface state base address,
+ * which is 0.
+ */
+static void upload_binding_table_pointers(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+   OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
+   if (brw->vs.bind_bo != NULL)
+      OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+   else
+      OUT_BATCH(0);
+   OUT_BATCH(0); /* gs */
+   OUT_BATCH(0); /* clip */
+   OUT_BATCH(0); /* sf */
+   OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
+   ADVANCE_BATCH();
+}
+
+const struct brw_tracked_state brw_binding_table_pointers = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_BATCH,
+      .cache = CACHE_NEW_SURF_BIND,
+   },
+   .prepare = prepare_binding_table_pointers,
+   .emit = upload_binding_table_pointers,
+};
+
+
+/**
+ * Upload pointers to the per-stage state.
+ *
+ * The state pointers in this packet are all relative to the general state
+ * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
+ */
+static void upload_pipelined_state_pointers(struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+
+   BEGIN_BATCH(7, IGNORE_CLIPRECTS);
+   OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
+   OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   if (brw->gs.prog_active)
+      OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   else
+      OUT_BATCH(0);
+   OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+   OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   ADVANCE_BATCH();
+
+   brw->state.dirty.brw |= BRW_NEW_PSP;
+}
+
+
+static void prepare_psp_urb_cbs(struct brw_context *brw)
+{
+   brw_add_validated_bo(brw, brw->vs.state_bo);
+   brw_add_validated_bo(brw, brw->gs.state_bo);
+   brw_add_validated_bo(brw, brw->clip.state_bo);
+   brw_add_validated_bo(brw, brw->sf.state_bo);
+   brw_add_validated_bo(brw, brw->wm.state_bo);
+   brw_add_validated_bo(brw, brw->cc.state_bo);
+}
+
+static void upload_psp_urb_cbs(struct brw_context *brw )
+{
+   upload_pipelined_state_pointers(brw);
+   brw_upload_urb_fence(brw);
+   brw_upload_cs_urb_state(brw);
+}
+
+const struct brw_tracked_state brw_psp_urb_cbs = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_URB_FENCE | BRW_NEW_BATCH,
+      .cache = (CACHE_NEW_VS_UNIT | 
+		CACHE_NEW_GS_UNIT | 
+		CACHE_NEW_GS_PROG | 
+		CACHE_NEW_CLIP_UNIT | 
+		CACHE_NEW_SF_UNIT | 
+		CACHE_NEW_WM_UNIT | 
+		CACHE_NEW_CC_UNIT)
+   },
+   .prepare = prepare_psp_urb_cbs,
+   .emit = upload_psp_urb_cbs,
+};
+
+static void prepare_depthbuffer(struct brw_context *brw)
+{
+   struct intel_region *region = brw->state.depth_region;
+
+   if (region != NULL)
+      brw_add_validated_bo(brw, region->buffer);
+}
+
+static void emit_depthbuffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct intel_region *region = brw->state.depth_region;
+   unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
+
+   if (region == NULL) {
+      BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+      OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+      OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
+		(BRW_SURFACE_NULL << 29));
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+      OUT_BATCH(0);
+
+      if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+         OUT_BATCH(0);
+
+      ADVANCE_BATCH();
+   } else {
+      unsigned int format;
+
+      switch (region->cpp) {
+      case 2:
+	 format = BRW_DEPTHFORMAT_D16_UNORM;
+	 break;
+      case 4:
+	 if (intel->depth_buffer_is_float)
+	    format = BRW_DEPTHFORMAT_D32_FLOAT;
+	 else
+	    format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+	 break;
+      default:
+	 assert(0);
+	 return;
+      }
+
+      assert(region->tiling != I915_TILING_X);
+
+      BEGIN_BATCH(len, IGNORE_CLIPRECTS);
+      OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
+      OUT_BATCH(((region->pitch * region->cpp) - 1) |
+		(format << 18) |
+		(BRW_TILEWALK_YMAJOR << 26) |
+		((region->tiling != I915_TILING_NONE) << 27) |
+		(BRW_SURFACE_2D << 29));
+      OUT_RELOC(region->buffer,
+		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		0);
+      OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
+		((region->pitch - 1) << 6) |
+		((region->height - 1) << 19));
+      OUT_BATCH(0);
+
+      if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+         OUT_BATCH(0);
+
+      ADVANCE_BATCH();
+   }
+}
+
+const struct brw_tracked_state brw_depthbuffer = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
+      .cache = 0,
+   },
+   .prepare = prepare_depthbuffer,
+   .emit = emit_depthbuffer,
+};
+
+
+
+/***********************************************************************
+ * Polygon stipple packet
+ */
+
+static void upload_polygon_stipple(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_polygon_stipple bps;
+   GLuint i;
+
+   memset(&bps, 0, sizeof(bps));
+   bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
+   bps.header.length = sizeof(bps)/4-2;
+
+   /* Polygon stipple is provided in OpenGL order, i.e. bottom
+    * row first.  If we're rendering to a window (i.e. the
+    * default frame buffer object, 0), then we need to invert
+    * it to match our pixel layout.  But if we're rendering
+    * to a FBO (i.e. any named frame buffer object), we *don't*
+    * need to invert - we already match the layout.
+    */
+   if (ctx->DrawBuffer->Name == 0) {
+      for (i = 0; i < 32; i++)
+         bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */
+   }
+   else {
+      for (i = 0; i < 32; i++)
+         bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */
+   }
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bps);
+}
+
+const struct brw_tracked_state brw_polygon_stipple = {
+   .dirty = {
+      .mesa = _NEW_POLYGONSTIPPLE,
+      .brw = 0,
+      .cache = 0
+   },
+   .emit = upload_polygon_stipple
+};
+
+
+/***********************************************************************
+ * Polygon stipple offset packet
+ */
+
+static void upload_polygon_stipple_offset(struct brw_context *brw)
+{
+   __DRIdrawablePrivate *dPriv = brw->intel.driDrawable;
+   struct brw_polygon_stipple_offset bpso;
+
+   memset(&bpso, 0, sizeof(bpso));
+   bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
+   bpso.header.length = sizeof(bpso)/4-2;
+
+   /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
+    * we have to invert the Y axis in order to match the OpenGL
+    * pixel coordinate system, and our offset must be matched
+    * to the window position.  If we're drawing to a FBO
+    * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
+    * system works just fine, and there's no window system to
+    * worry about.
+    */
+   if (brw->intel.ctx.DrawBuffer->Name == 0) {
+      bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31;
+      bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31;
+   }
+   else {
+      bpso.bits0.y_offset = 0;
+      bpso.bits0.x_offset = 0;
+   }
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bpso);
+}
+
+#define _NEW_WINDOW_POS 0x40000000
+
+const struct brw_tracked_state brw_polygon_stipple_offset = {
+   .dirty = {
+      .mesa = _NEW_WINDOW_POS,
+      .brw = 0,
+      .cache = 0
+   },
+   .emit = upload_polygon_stipple_offset
+};
+
+/**********************************************************************
+ * AA Line parameters
+ */
+static void upload_aa_line_parameters(struct brw_context *brw)
+{
+   struct brw_aa_line_parameters balp;
+   
+   if (BRW_IS_965(brw))
+      return;
+
+   /* use legacy aa line coverage computation */
+   memset(&balp, 0, sizeof(balp));
+   balp.header.opcode = CMD_AA_LINE_PARAMETERS;
+   balp.header.length = sizeof(balp) / 4 - 2;
+   
+   BRW_CACHED_BATCH_STRUCT(brw, &balp);
+}
+
+const struct brw_tracked_state brw_aa_line_parameters = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_aa_line_parameters
+};
+
+/***********************************************************************
+ * Line stipple packet
+ */
+
+static void upload_line_stipple(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_line_stipple bls;
+   GLfloat tmp;
+   GLint tmpi;
+
+   memset(&bls, 0, sizeof(bls));
+   bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
+   bls.header.length = sizeof(bls)/4 - 2;
+
+   bls.bits0.pattern = ctx->Line.StipplePattern;
+   bls.bits1.repeat_count = ctx->Line.StippleFactor;
+
+   tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
+   tmpi = tmp * (1<<13);
+
+
+   bls.bits1.inverse_repeat_count = tmpi;
+
+   BRW_CACHED_BATCH_STRUCT(brw, &bls);
+}
+
+const struct brw_tracked_state brw_line_stipple = {
+   .dirty = {
+      .mesa = _NEW_LINE,
+      .brw = 0,
+      .cache = 0
+   },
+   .emit = upload_line_stipple
+};
+
+
+/***********************************************************************
+ * Misc invarient state packets
+ */
+
+static void upload_invarient_state( struct brw_context *brw )
+{
+   {
+      /* 0x61040000  Pipeline Select */
+      /*     PipelineSelect            : 0 */
+      struct brw_pipeline_select ps;
+
+      memset(&ps, 0, sizeof(ps));
+      ps.header.opcode = CMD_PIPELINE_SELECT(brw);
+      ps.header.pipeline_select = 0;
+      BRW_BATCH_STRUCT(brw, &ps);
+   }
+
+   {
+      struct brw_global_depth_offset_clamp gdo;
+      memset(&gdo, 0, sizeof(gdo));
+
+      /* Disable depth offset clamping. 
+       */
+      gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP;
+      gdo.header.length = sizeof(gdo)/4 - 2;
+      gdo.depth_offset_clamp = 0.0;
+
+      BRW_BATCH_STRUCT(brw, &gdo);
+   }
+
+
+   /* 0x61020000  State Instruction Pointer */
+   {
+      struct brw_system_instruction_pointer sip;
+      memset(&sip, 0, sizeof(sip));
+
+      sip.header.opcode = CMD_STATE_INSN_POINTER;
+      sip.header.length = 0;
+      sip.bits0.pad = 0;
+      sip.bits0.system_instruction_pointer = 0;
+      BRW_BATCH_STRUCT(brw, &sip);
+   }
+
+
+   {
+      struct brw_vf_statistics vfs;
+      memset(&vfs, 0, sizeof(vfs));
+
+      vfs.opcode = CMD_VF_STATISTICS(brw);
+      if (INTEL_DEBUG & DEBUG_STATS)
+	 vfs.statistics_enable = 1; 
+
+      BRW_BATCH_STRUCT(brw, &vfs);
+   }
+}
+
+const struct brw_tracked_state brw_invarient_state = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0
+   },
+   .emit = upload_invarient_state
+};
+
+/**
+ * Define the base addresses which some state is referenced from.
+ *
+ * This allows us to avoid having to emit relocations in many places for
+ * cached state, and instead emit pointers inside of large, mostly-static
+ * state pools.  This comes at the expense of memory, and more expensive cache
+ * misses.
+ */
+static void upload_state_base_address( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+
+   /* Output the structure (brw_state_base_address) directly to the
+    * batchbuffer, so we can emit relocations inline.
+    */
+   if (BRW_IS_IGDNG(brw)) {
+       BEGIN_BATCH(8, IGNORE_CLIPRECTS);
+       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
+       OUT_BATCH(1); /* General state base address */
+       OUT_BATCH(1); /* Surface state base address */
+       OUT_BATCH(1); /* Indirect object base address */
+       OUT_BATCH(1); /* Instruction base address */
+       OUT_BATCH(1); /* General state upper bound */
+       OUT_BATCH(1); /* Indirect object upper bound */
+       OUT_BATCH(1); /* Instruction access upper bound */
+       ADVANCE_BATCH();
+   } else {
+       BEGIN_BATCH(6, IGNORE_CLIPRECTS);
+       OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
+       OUT_BATCH(1); /* General state base address */
+       OUT_BATCH(1); /* Surface state base address */
+       OUT_BATCH(1); /* Indirect object base address */
+       OUT_BATCH(1); /* General state upper bound */
+       OUT_BATCH(1); /* Indirect object upper bound */
+       ADVANCE_BATCH();
+   }
+}
+
+const struct brw_tracked_state brw_state_base_address = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
+      .cache = 0,
+   },
+   .emit = upload_state_base_address
+};
diff --git a/src/gallium/drivers/i965/brw_program.c b/src/gallium/drivers/i965/brw_program.c
new file mode 100644
index 0000000000..bac69187c1
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_program.c
@@ -0,0 +1,166 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+  
+#include "main/imports.h"
+#include "main/enums.h"
+#include "shader/prog_parameter.h"
+#include "shader/program.h"
+#include "shader/programopt.h"
+#include "tnl/tnl.h"
+
+#include "brw_context.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+
+static void brwBindProgram( GLcontext *ctx,
+			    GLenum target, 
+			    struct gl_program *prog )
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB: 
+      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+      break;
+   }
+}
+
+static struct gl_program *brwNewProgram( GLcontext *ctx,
+				      GLenum target, 
+				      GLuint id )
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB: {
+      struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
+      if (prog) {
+	 prog->id = brw->program_id++;
+
+	 return _mesa_init_vertex_program( ctx, &prog->program,
+					     target, id );
+      }
+      else
+	 return NULL;
+   }
+
+   case GL_FRAGMENT_PROGRAM_ARB: {
+      struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
+      if (prog) {
+	 prog->id = brw->program_id++;
+
+	 return _mesa_init_fragment_program( ctx, &prog->program,
+					     target, id );
+      }
+      else
+	 return NULL;
+   }
+
+   default:
+      return _mesa_new_program(ctx, target, id);
+   }
+}
+
+static void brwDeleteProgram( GLcontext *ctx,
+			      struct gl_program *prog )
+{
+   if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+      struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
+      dri_bo_unreference(brw_fprog->const_buffer);
+   }
+
+   _mesa_delete_program( ctx, prog );
+}
+
+
+static GLboolean brwIsProgramNative( GLcontext *ctx,
+				     GLenum target, 
+				     struct gl_program *prog )
+{
+   return GL_TRUE;
+}
+
+static void brwProgramStringNotify( GLcontext *ctx,
+				    GLenum target,
+				    struct gl_program *prog )
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+      struct brw_fragment_program *newFP = brw_fragment_program(fprog);
+      const struct brw_fragment_program *curFP =
+         brw_fragment_program_const(brw->fragment_program);
+
+      if (fprog->FogOption) {
+         _mesa_append_fog_code(ctx, fprog);
+         fprog->FogOption = GL_NONE;
+      }
+
+      if (newFP == curFP)
+	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+      newFP->id = brw->program_id++;      
+      newFP->isGLSL = brw_wm_is_glsl(fprog);
+   }
+   else if (target == GL_VERTEX_PROGRAM_ARB) {
+      struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
+      struct brw_vertex_program *newVP = brw_vertex_program(vprog);
+      const struct brw_vertex_program *curVP =
+         brw_vertex_program_const(brw->vertex_program);
+
+      if (newVP == curVP)
+	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      if (newVP->program.IsPositionInvariant) {
+	 _mesa_insert_mvp_code(ctx, &newVP->program);
+      }
+      newVP->id = brw->program_id++;      
+
+      /* Also tell tnl about it:
+       */
+      _tnl_program_string(ctx, target, prog);
+   }
+}
+
+void brwInitFragProgFuncs( struct dd_function_table *functions )
+{
+   assert(functions->ProgramStringNotify == _tnl_program_string); 
+
+   functions->BindProgram = brwBindProgram;
+   functions->NewProgram = brwNewProgram;
+   functions->DeleteProgram = brwDeleteProgram;
+   functions->IsProgramNative = brwIsProgramNative;
+   functions->ProgramStringNotify = brwProgramStringNotify;
+}
+
diff --git a/src/gallium/drivers/i965/brw_queryobj.c b/src/gallium/drivers/i965/brw_queryobj.c
new file mode 100644
index 0000000000..a195bc32b0
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_queryobj.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file support for ARB_query_object
+ *
+ * ARB_query_object is implemented by using the PIPE_CONTROL command to stall
+ * execution on the completion of previous depth tests, and write the
+ * current PS_DEPTH_COUNT to a buffer object.
+ *
+ * We use before and after counts when drawing during a query so that
+ * we don't pick up other clients' query data in ours.  To reduce overhead,
+ * a single BO is used to record the query data for all active queries at
+ * once.  This also gives us a simple bound on how much batchbuffer space is
+ * required for handling queries, so that we can be sure that we won't
+ * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
+ */
+#include "main/simple_list.h"
+#include "main/imports.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+
+/** Waits on the query object's BO and totals the results for this query */
+static void
+brw_queryobj_get_results(struct brw_query_object *query)
+{
+   int i;
+   uint64_t *results;
+
+   if (query->bo == NULL)
+      return;
+
+   /* Map and count the pixels from the current query BO */
+   dri_bo_map(query->bo, GL_FALSE);
+   results = query->bo->virtual;
+   for (i = query->first_index; i <= query->last_index; i++) {
+      query->Base.Result += results[i * 2 + 1] - results[i * 2];
+   }
+   dri_bo_unmap(query->bo);
+
+   dri_bo_unreference(query->bo);
+   query->bo = NULL;
+}
+
+static struct gl_query_object *
+brw_new_query_object(GLcontext *ctx, GLuint id)
+{
+   struct brw_query_object *query;
+
+   query = _mesa_calloc(sizeof(struct brw_query_object));
+
+   query->Base.Id = id;
+   query->Base.Result = 0;
+   query->Base.Active = GL_FALSE;
+   query->Base.Ready = GL_TRUE;
+
+   return &query->Base;
+}
+
+static void
+brw_delete_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   dri_bo_unreference(query->bo);
+   _mesa_free(query);
+}
+
+static void
+brw_begin_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_context *intel = intel_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   /* Reset our driver's tracking of query state. */
+   dri_bo_unreference(query->bo);
+   query->bo = NULL;
+   query->first_index = -1;
+   query->last_index = -1;
+
+   insert_at_head(&brw->query.active_head, query);
+   intel->stats_wm++;
+}
+
+/**
+ * Begin the ARB_occlusion_query query on a query object.
+ */
+static void
+brw_end_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct intel_context *intel = intel_context(ctx);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   /* Flush the batchbuffer in case it has writes to our query BO.
+    * Have later queries write to a new query BO so that further rendering
+    * doesn't delay the collection of our results.
+    */
+   if (query->bo) {
+      brw_emit_query_end(brw);
+      intel_batchbuffer_flush(intel->batch);
+
+      dri_bo_unreference(brw->query.bo);
+      brw->query.bo = NULL;
+   }
+
+   remove_from_list(query);
+
+   intel->stats_wm--;
+}
+
+static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   brw_queryobj_get_results(query);
+   query->Base.Ready = GL_TRUE;
+}
+
+static void brw_check_query(GLcontext *ctx, struct gl_query_object *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
+      brw_queryobj_get_results(query);
+      query->Base.Ready = GL_TRUE;
+   }
+}
+
+/** Called to set up the query BO and account for its aperture space */
+void
+brw_prepare_query_begin(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   /* Skip if we're not doing any queries. */
+   if (is_empty_list(&brw->query.active_head))
+      return;
+
+   /* Get a new query BO if we're going to need it. */
+   if (brw->query.bo == NULL ||
+       brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
+      dri_bo_unreference(brw->query.bo);
+      brw->query.bo = NULL;
+
+      brw->query.bo = dri_bo_alloc(intel->bufmgr, "query", 4096, 1);
+      brw->query.index = 0;
+   }
+
+   brw_add_validated_bo(brw, brw->query.bo);
+}
+
+/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
+void
+brw_emit_query_begin(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_query_object *query;
+
+   /* Skip if we're not doing any queries, or we've emitted the start. */
+   if (brw->query.active || is_empty_list(&brw->query.active_head))
+      return;
+
+   BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+	     PIPE_CONTROL_DEPTH_STALL |
+	     PIPE_CONTROL_WRITE_DEPTH_COUNT);
+   /* This object could be mapped cacheable, but we don't have an exposed
+    * mechanism to support that.  Since it's going uncached, tell GEM that
+    * we're writing to it.  The usual clflush should be all that's required
+    * to pick up the results.
+    */
+   OUT_RELOC(brw->query.bo,
+	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+	     PIPE_CONTROL_GLOBAL_GTT_WRITE |
+	     ((brw->query.index * 2) * sizeof(uint64_t)));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   foreach(query, &brw->query.active_head) {
+      if (query->bo != brw->query.bo) {
+	 if (query->bo != NULL)
+	    brw_queryobj_get_results(query);
+	 dri_bo_reference(brw->query.bo);
+	 query->bo = brw->query.bo;
+	 query->first_index = brw->query.index;
+      }
+      query->last_index = brw->query.index;
+   }
+   brw->query.active = GL_TRUE;
+}
+
+/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
+void
+brw_emit_query_end(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+
+   if (!brw->query.active)
+      return;
+
+   BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+	     PIPE_CONTROL_DEPTH_STALL |
+	     PIPE_CONTROL_WRITE_DEPTH_COUNT);
+   OUT_RELOC(brw->query.bo,
+	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+	     PIPE_CONTROL_GLOBAL_GTT_WRITE |
+	     ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   brw->query.active = GL_FALSE;
+   brw->query.index++;
+}
+
+void brw_init_queryobj_functions(struct dd_function_table *functions)
+{
+   functions->NewQueryObject = brw_new_query_object;
+   functions->DeleteQuery = brw_delete_query;
+   functions->BeginQuery = brw_begin_query;
+   functions->EndQuery = brw_end_query;
+   functions->CheckQuery = brw_check_query;
+   functions->WaitQuery = brw_wait_query;
+}
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
new file mode 100644
index 0000000000..e1c2c7777b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -0,0 +1,200 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+  
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+#include "brw_state.h"
+
+static void compile_sf_prog( struct brw_context *brw,
+			     struct brw_sf_prog_key *key )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_sf_compile c;
+   const GLuint *program;
+   GLuint program_size;
+   GLuint i, idx;
+
+   memset(&c, 0, sizeof(c));
+
+   /* Begin the compilation:
+    */
+   brw_init_compile(brw, &c.func);
+
+   c.key = *key;
+   c.nr_attrs = brw_count_bits(c.key.attrs);
+   c.nr_attr_regs = (c.nr_attrs+1)/2;
+   c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
+   c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
+
+   c.prog_data.urb_read_length = c.nr_attr_regs;
+   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+
+   /* Construct map from attribute number to position in the vertex.
+    */
+   for (i = idx = 0; i < VERT_RESULT_MAX; i++) 
+      if (c.key.attrs & (1<<i)) {
+	 c.attr_to_idx[i] = idx;
+	 c.idx_to_attr[idx] = i;
+	 if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
+            c.point_attrs[i].CoordReplace = 
+               ctx->Point.CoordReplace[i - VERT_RESULT_TEX0];
+	 }
+         else {
+            c.point_attrs[i].CoordReplace = GL_FALSE;
+         }
+	 idx++;
+      }
+   
+   /* Which primitive?  Or all three? 
+    */
+   switch (key->primitive) {
+   case SF_TRIANGLES:
+      c.nr_verts = 3;
+      brw_emit_tri_setup( &c, GL_TRUE );
+      break;
+   case SF_LINES:
+      c.nr_verts = 2;
+      brw_emit_line_setup( &c, GL_TRUE );
+      break;
+   case SF_POINTS:
+      c.nr_verts = 1;
+      if (key->do_point_sprite)
+	  brw_emit_point_sprite_setup( &c, GL_TRUE );
+      else
+	  brw_emit_point_setup( &c, GL_TRUE );
+      break;
+   case SF_UNFILLED_TRIS:
+      c.nr_verts = 3;
+      brw_emit_anyprim_setup( &c );
+      break;
+   default:
+      assert(0);
+      return;
+   }
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+   /* Upload
+    */
+   dri_bo_unreference(brw->sf.prog_bo);
+   brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
+				       &c.key, sizeof(c.key),
+				       NULL, 0,
+				       program, program_size,
+				       &c.prog_data,
+				       &brw->sf.prog_data );
+}
+
+/* Calculate interpolants for triangle and line rasterization.
+ */
+static void upload_sf_prog(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_sf_prog_key key;
+
+   memset(&key, 0, sizeof(key));
+
+   /* Populate the key, noting state dependencies:
+    */
+   /* CACHE_NEW_VS_PROG */
+   key.attrs = brw->vs.prog_data->outputs_written; 
+
+   /* BRW_NEW_REDUCED_PRIMITIVE */
+   switch (brw->intel.reduced_primitive) {
+   case GL_TRIANGLES: 
+      /* NOTE: We just use the edgeflag attribute as an indicator that
+       * unfilled triangles are active.  We don't actually do the
+       * edgeflag testing here, it is already done in the clip
+       * program.
+       */
+      if (key.attrs & (1<<VERT_RESULT_EDGE))
+	 key.primitive = SF_UNFILLED_TRIS;
+      else
+	 key.primitive = SF_TRIANGLES;
+      break;
+   case GL_LINES: 
+      key.primitive = SF_LINES; 
+      break;
+   case GL_POINTS: 
+      key.primitive = SF_POINTS; 
+      break;
+   }
+
+   key.do_point_sprite = ctx->Point.PointSprite;
+   key.SpriteOrigin = ctx->Point.SpriteOrigin;
+   /* _NEW_LIGHT */
+   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
+   key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
+
+   /* _NEW_HINT */
+   key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+
+   /* _NEW_POLYGON */
+   if (key.do_twoside_color) {
+      /* If we're rendering to a FBO, we have to invert the polygon
+       * face orientation, just as we invert the viewport in
+       * sf_unit_create_from_key().  ctx->DrawBuffer->Name will be
+       * nonzero if we're rendering to such an FBO.
+       */
+      key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0);
+   }
+
+   dri_bo_unreference(brw->sf.prog_bo);
+   brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->sf.prog_data);
+   if (brw->sf.prog_bo == NULL)
+      compile_sf_prog( brw, &key );
+}
+
+
+const struct brw_tracked_state brw_sf_prog = {
+   .dirty = {
+      .mesa  = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT),
+      .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .prepare = upload_sf_prog
+};
+
diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h
new file mode 100644
index 0000000000..6426b6df9f
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf.h
@@ -0,0 +1,113 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+#ifndef BRW_SF_H
+#define BRW_SF_H
+
+
+#include "shader/program.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+
+
+#define SF_POINTS    0
+#define SF_LINES     1
+#define SF_TRIANGLES 2
+#define SF_UNFILLED_TRIS   3
+
+struct brw_sf_prog_key {
+   GLuint attrs:32;
+   GLuint primitive:2;
+   GLuint do_twoside_color:1;
+   GLuint do_flat_shading:1;
+   GLuint frontface_ccw:1;
+   GLuint do_point_sprite:1;
+   GLuint linear_color:1;  /**< linear interp vs. perspective interp */
+   GLuint pad:25;
+   GLenum SpriteOrigin;
+};
+
+struct brw_sf_point_tex {
+	GLboolean CoordReplace;	
+};
+
+struct brw_sf_compile {
+   struct brw_compile func;
+   struct brw_sf_prog_key key;
+   struct brw_sf_prog_data prog_data;
+   
+   struct brw_reg pv;
+   struct brw_reg det;
+   struct brw_reg dx0;
+   struct brw_reg dx2;
+   struct brw_reg dy0;
+   struct brw_reg dy2;
+
+   /* z and 1/w passed in seperately:
+    */
+   struct brw_reg z[3];
+   struct brw_reg inv_w[3];
+   
+   /* The vertices:
+    */
+   struct brw_reg vert[3];
+
+    /* Temporaries, allocated after last vertex reg.
+    */
+   struct brw_reg inv_det;
+   struct brw_reg a1_sub_a0;
+   struct brw_reg a2_sub_a0;
+   struct brw_reg tmp;
+
+   struct brw_reg m1Cx;
+   struct brw_reg m2Cy;
+   struct brw_reg m3C0;
+
+   GLuint nr_verts;
+   GLuint nr_attrs;
+   GLuint nr_attr_regs;
+   GLuint nr_setup_attrs;
+   GLuint nr_setup_regs;
+
+   GLubyte attr_to_idx[VERT_RESULT_MAX];   
+   GLubyte idx_to_attr[VERT_RESULT_MAX];   
+   struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX];
+};
+
+ 
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate );
+void brw_emit_anyprim_setup( struct brw_sf_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
new file mode 100644
index 0000000000..ca8f97f9f9
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -0,0 +1,739 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+
+#include "intel_batchbuffer.h"
+
+#include "brw_defines.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_util.h"
+#include "brw_sf.h"
+
+
+static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
+				    struct brw_reg vert,
+				    GLuint attr)
+{
+   GLuint off = c->attr_to_idx[attr] / 2;
+   GLuint sub = c->attr_to_idx[attr] % 2;
+
+   return brw_vec4_grf(vert.nr + off, sub * 4);
+}
+
+static GLboolean have_attr(struct brw_sf_compile *c,
+			   GLuint attr)
+{
+   return (c->key.attrs & (1<<attr)) ? 1 : 0;
+}
+
+/*********************************************************************** 
+ * Twoside lighting
+ */
+static void copy_bfc( struct brw_sf_compile *c,
+		      struct brw_reg vert )
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   for (i = 0; i < 2; i++) {
+      if (have_attr(c, VERT_RESULT_COL0+i) &&
+	  have_attr(c, VERT_RESULT_BFC0+i))
+	 brw_MOV(p, 
+		 get_vert_attr(c, vert, VERT_RESULT_COL0+i), 
+		 get_vert_attr(c, vert, VERT_RESULT_BFC0+i));
+   }
+}
+
+
+static void do_twoside_color( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *if_insn;
+   GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
+
+   /* Already done in clip program:
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+
+   /* XXX: What happens if BFC isn't present?  This could only happen
+    * for user-supplied vertex programs, as t_vp_build.c always does
+    * the right thing.
+    */
+   if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
+       !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
+      return;
+   
+   /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
+    * to get all channels active inside the IF.  In the clipping code
+    * we run with NoMask, so it's not an option and we can use
+    * BRW_EXECUTE_1 for all comparisions.
+    */
+   brw_push_insn_state(p);
+   brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
+   if_insn = brw_IF(p, BRW_EXECUTE_4); 
+   {
+      switch (c->nr_verts) {
+      case 3: copy_bfc(c, c->vert[2]);
+      case 2: copy_bfc(c, c->vert[1]);
+      case 1: copy_bfc(c, c->vert[0]);
+      }
+   }
+   brw_ENDIF(p, if_insn);
+   brw_pop_insn_state(p);
+}
+
+
+
+/***********************************************************************
+ * Flat shading
+ */
+
+#define VERT_RESULT_COLOR_BITS ((1<<VERT_RESULT_COL0) | \
+                                 (1<<VERT_RESULT_COL1))
+
+static void copy_colors( struct brw_sf_compile *c,
+		     struct brw_reg dst,
+		     struct brw_reg src)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
+      if (have_attr(c,i))
+	 brw_MOV(p, 
+		 get_vert_attr(c, dst, i), 
+		 get_vert_attr(c, src, i));
+   }
+}
+
+
+
+/* Need to use a computed jump to copy flatshaded attributes as the
+ * vertices are ordered according to y-coordinate before reaching this
+ * point, so the PV could be anywhere.
+ */
+static void do_flatshade_triangle( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg ip = brw_ip_reg();
+   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+   GLuint jmpi = 1;
+
+   if (!nr)
+      return;
+
+   /* Already done in clip program:
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+
+   if (BRW_IS_IGDNG(p->brw))
+       jmpi = 2;
+
+   brw_push_insn_state(p);
+   
+   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
+   brw_JMPI(p, ip, ip, c->pv);
+
+   copy_colors(c, c->vert[1], c->vert[0]);
+   copy_colors(c, c->vert[2], c->vert[0]);
+   brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
+
+   copy_colors(c, c->vert[0], c->vert[1]);
+   copy_colors(c, c->vert[2], c->vert[1]);
+   brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
+
+   copy_colors(c, c->vert[0], c->vert[2]);
+   copy_colors(c, c->vert[1], c->vert[2]);
+
+   brw_pop_insn_state(p);
+}
+	
+
+static void do_flatshade_line( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg ip = brw_ip_reg();
+   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+   GLuint jmpi = 1;
+
+   if (!nr)
+      return;
+
+   /* Already done in clip program: 
+    */
+   if (c->key.primitive == SF_UNFILLED_TRIS)
+      return;
+
+   if (BRW_IS_IGDNG(p->brw))
+       jmpi = 2;
+
+   brw_push_insn_state(p);
+   
+   brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
+   brw_JMPI(p, ip, ip, c->pv);
+   copy_colors(c, c->vert[1], c->vert[0]);
+
+   brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
+   copy_colors(c, c->vert[0], c->vert[1]);
+
+   brw_pop_insn_state(p);
+}
+
+	
+
+/***********************************************************************
+ * Triangle setup.
+ */
+
+
+static void alloc_regs( struct brw_sf_compile *c )
+{
+   GLuint reg, i;
+
+   /* Values computed by fixed function unit:
+    */
+   c->pv  = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_D);
+   c->det = brw_vec1_grf(1, 2);
+   c->dx0 = brw_vec1_grf(1, 3);
+   c->dx2 = brw_vec1_grf(1, 4);
+   c->dy0 = brw_vec1_grf(1, 5);
+   c->dy2 = brw_vec1_grf(1, 6);
+
+   /* z and 1/w passed in seperately:
+    */
+   c->z[0]     = brw_vec1_grf(2, 0);
+   c->inv_w[0] = brw_vec1_grf(2, 1);
+   c->z[1]     = brw_vec1_grf(2, 2);
+   c->inv_w[1] = brw_vec1_grf(2, 3);
+   c->z[2]     = brw_vec1_grf(2, 4);
+   c->inv_w[2] = brw_vec1_grf(2, 5);
+   
+   /* The vertices:
+    */
+   reg = 3;
+   for (i = 0; i < c->nr_verts; i++) {
+      c->vert[i] = brw_vec8_grf(reg, 0);
+      reg += c->nr_attr_regs;
+   }
+
+   /* Temporaries, allocated after last vertex reg.
+    */
+   c->inv_det = brw_vec1_grf(reg, 0);  reg++;
+   c->a1_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
+   c->a2_sub_a0 = brw_vec8_grf(reg, 0);  reg++;
+   c->tmp = brw_vec8_grf(reg, 0);  reg++;
+
+   /* Note grf allocation:
+    */
+   c->prog_data.total_grf = reg;
+   
+
+   /* Outputs of this program - interpolation coefficients for
+    * rasterization:
+    */
+   c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0);
+   c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0);
+   c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0);
+}
+
+
+static void copy_z_inv_w( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   brw_push_insn_state(p);
+	
+   /* Copy both scalars with a single MOV:
+    */
+   for (i = 0; i < c->nr_verts; i++)
+      brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
+	 
+   brw_pop_insn_state(p);
+}
+
+
+static void invert_det( struct brw_sf_compile *c)
+{
+   /* Looks like we invert all 8 elements just to get 1/det in
+    * position 2 !?!
+    */
+   brw_math(&c->func, 
+	    c->inv_det, 
+	    BRW_MATH_FUNCTION_INV,
+	    BRW_MATH_SATURATE_NONE,
+	    0, 
+	    c->det,
+	    BRW_MATH_DATA_SCALAR,
+	    BRW_MATH_PRECISION_FULL);
+
+}
+
+
+static GLboolean calculate_masks( struct brw_sf_compile *c,
+				  GLuint reg,
+				  GLushort *pc,
+				  GLushort *pc_persp,
+				  GLushort *pc_linear)
+{
+   GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
+   GLuint persp_mask;
+   GLuint linear_mask;
+
+   if (c->key.do_flat_shading || c->key.linear_color)
+      persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS |
+                                    FRAG_BIT_COL0 |
+                                    FRAG_BIT_COL1);
+   else
+      persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS);
+
+   if (c->key.do_flat_shading)
+      linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
+   else
+      linear_mask = c->key.attrs;
+
+   *pc_persp = 0;
+   *pc_linear = 0;
+   *pc = 0xf;
+      
+   if (persp_mask & (1 << c->idx_to_attr[reg*2])) 
+      *pc_persp = 0xf;
+
+   if (linear_mask & (1 << c->idx_to_attr[reg*2])) 
+      *pc_linear = 0xf;
+
+   /* Maybe only processs one attribute on the final round:
+    */
+   if (reg*2+1 < c->nr_setup_attrs) {
+      *pc |= 0xf0;
+
+      if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) 
+	 *pc_persp |= 0xf0;
+
+      if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) 
+	 *pc_linear |= 0xf0;
+   }
+
+   return is_last_attr;
+}
+
+
+
+void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   c->nr_verts = 3;
+
+   if (allocate)
+      alloc_regs(c);
+
+   invert_det(c);
+   copy_z_inv_w(c);
+
+   if (c->key.do_twoside_color) 
+      do_twoside_color(c);
+
+   if (c->key.do_flat_shading)
+      do_flatshade_triangle(c);
+      
+   
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      /* Pair of incoming attributes:
+       */
+      struct brw_reg a0 = offset(c->vert[0], i);
+      struct brw_reg a1 = offset(c->vert[1], i);
+      struct brw_reg a2 = offset(c->vert[2], i);
+      GLushort pc, pc_persp, pc_linear;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+      if (pc_persp)
+      {
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
+	 brw_MUL(p, a1, a1, c->inv_w[1]);
+	 brw_MUL(p, a2, a2, c->inv_w[2]);
+      }
+      
+      
+      /* Calculate coefficients for interpolated values:
+       */      
+      if (pc_linear)
+      {
+	 brw_set_predicate_control_flag_value(p, pc_linear);
+
+	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+	 brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
+
+	 /* calculate dA/dx
+	  */
+	 brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
+	 brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
+	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+		
+	 /* calculate dA/dy
+	  */
+	 brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
+	 brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2));
+	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+      }
+
+      {
+	 brw_set_predicate_control_flag_value(p, pc); 
+	 /* start point for interpolation
+	  */
+	 brw_MOV(p, c->m3C0, a0);
+      
+	 /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
+	  * the send instruction:
+	  */	 
+	 brw_urb_WRITE(p, 
+		       brw_null_reg(),
+		       0,
+		       brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+		       0, 	/* allocate */
+		       1,	/* used */
+		       4, 	/* msg len */
+		       0,	/* response len */
+		       last,	/* eot */
+		       last, 	/* writes complete */
+		       i*4,	/* offset */
+		       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
+      }
+   }
+}
+
+
+
+void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+
+   c->nr_verts = 2;
+
+   if (allocate)
+      alloc_regs(c);
+
+   invert_det(c);
+   copy_z_inv_w(c);
+
+   if (c->key.do_flat_shading)
+      do_flatshade_line(c);
+
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      /* Pair of incoming attributes:
+       */
+      struct brw_reg a0 = offset(c->vert[0], i);
+      struct brw_reg a1 = offset(c->vert[1], i);
+      GLushort pc, pc_persp, pc_linear;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+
+      if (pc_persp)
+      {
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
+	 brw_MUL(p, a1, a1, c->inv_w[1]);
+      }
+
+      /* Calculate coefficients for position, color:
+       */
+      if (pc_linear) {
+	 brw_set_predicate_control_flag_value(p, pc_linear); 
+
+	 brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
+
+ 	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); 
+	 brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
+		
+	 brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
+	 brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
+      }
+
+      {
+	 brw_set_predicate_control_flag_value(p, pc); 
+
+	 /* start point for interpolation
+	  */
+	 brw_MOV(p, c->m3C0, a0);
+
+	 /* Copy m0..m3 to URB. 
+	  */
+	 brw_urb_WRITE(p, 
+		       brw_null_reg(),
+		       0,
+		       brw_vec8_grf(0, 0),
+		       0, 	/* allocate */
+		       1, 	/* used */
+		       4, 	/* msg len */
+		       0,	/* response len */
+		       last, 	/* eot */
+		       last, 	/* writes complete */
+		       i*4,	/* urb destination offset */
+		       BRW_URB_SWIZZLE_TRANSPOSE); 
+      }
+   } 
+}
+
+void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   c->nr_verts = 1;
+
+   if (allocate)
+      alloc_regs(c);
+
+   copy_z_inv_w(c);
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
+      struct brw_reg a0 = offset(c->vert[0], i);
+      GLushort pc, pc_persp, pc_linear;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+            
+      if (pc_persp)
+      {				
+	  if (!tex->CoordReplace) {
+	      brw_set_predicate_control_flag_value(p, pc_persp);
+	      brw_MUL(p, a0, a0, c->inv_w[0]);
+	  }
+      }
+
+      if (tex->CoordReplace) {
+	  /* Caculate 1.0/PointWidth */
+	  brw_math(&c->func,
+		  c->tmp,
+		  BRW_MATH_FUNCTION_INV,
+		  BRW_MATH_SATURATE_NONE,
+		  0,
+		  c->dx0,
+		  BRW_MATH_DATA_SCALAR,
+		  BRW_MATH_PRECISION_FULL);
+
+	  if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
+	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+	  	brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
+		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+	  } else {
+	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+	  	brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
+		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+	  }
+      } else {
+	  brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+	  brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+      }
+
+      {
+	 brw_set_predicate_control_flag_value(p, pc); 
+	 if (tex->CoordReplace) {
+	     if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
+		 brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
+		 brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
+	     }
+	     else
+		 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+	 } else {
+	 	brw_MOV(p, c->m3C0, a0); /* constant value */
+	 }
+
+	 /* Copy m0..m3 to URB. 
+	  */
+	 brw_urb_WRITE(p, 
+		       brw_null_reg(),
+		       0,
+		       brw_vec8_grf(0, 0),
+		       0, 	/* allocate */
+		       1,	/* used */
+		       4, 	/* msg len */
+		       0,	/* response len */
+		       last, 	/* eot */
+		       last, 	/* writes complete */
+		       i*4,	/* urb destination offset */
+		       BRW_URB_SWIZZLE_TRANSPOSE);
+      }
+   }
+}
+
+/* Points setup - several simplifications as all attributes are
+ * constant across the face of the point (point sprites excluded!)
+ */
+void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   c->nr_verts = 1;
+   
+   if (allocate)
+      alloc_regs(c);
+
+   copy_z_inv_w(c);
+
+   brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */
+   brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */
+
+   for (i = 0; i < c->nr_setup_regs; i++)
+   {
+      struct brw_reg a0 = offset(c->vert[0], i);
+      GLushort pc, pc_persp, pc_linear;
+      GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
+            
+      if (pc_persp)
+      {				
+	 /* This seems odd as the values are all constant, but the
+	  * fragment shader will be expecting it:
+	  */
+	 brw_set_predicate_control_flag_value(p, pc_persp);
+	 brw_MUL(p, a0, a0, c->inv_w[0]);
+      }
+
+
+      /* The delta values are always zero, just send the starting
+       * coordinate.  Again, this is to fit in with the interpolation
+       * code in the fragment shader.
+       */
+      {
+	 brw_set_predicate_control_flag_value(p, pc); 
+
+	 brw_MOV(p, c->m3C0, a0); /* constant value */
+
+	 /* Copy m0..m3 to URB. 
+	  */
+	 brw_urb_WRITE(p, 
+		       brw_null_reg(),
+		       0,
+		       brw_vec8_grf(0, 0),
+		       0, 	/* allocate */
+		       1,	/* used */
+		       4, 	/* msg len */
+		       0,	/* response len */
+		       last, 	/* eot */
+		       last, 	/* writes complete */
+		       i*4,	/* urb destination offset */
+		       BRW_URB_SWIZZLE_TRANSPOSE);
+      }
+   }
+}
+
+void brw_emit_anyprim_setup( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg ip = brw_ip_reg();
+   struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
+   struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0); 
+   struct brw_reg primmask;
+   struct brw_instruction *jmp;
+   struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+   
+   GLuint saveflag;
+
+   c->nr_verts = 3;
+   alloc_regs(c);
+
+   primmask = retype(get_element(c->tmp, 0), BRW_REGISTER_TYPE_UD);
+
+   brw_MOV(p, primmask, brw_imm_ud(1));
+   brw_SHL(p, primmask, primmask, payload_prim);
+
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
+					       (1<<_3DPRIM_TRISTRIP) |
+					       (1<<_3DPRIM_TRIFAN) |
+					       (1<<_3DPRIM_TRISTRIP_REVERSE) |
+					       (1<<_3DPRIM_POLYGON) |
+					       (1<<_3DPRIM_RECTLIST) |
+					       (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p); 
+      brw_emit_tri_setup( c, GL_FALSE );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+      /* note - thread killed in subroutine, so must
+       * restore the flag which is changed when building
+       * the subroutine. fix #13240
+       */
+   }
+   brw_land_fwd_jump(p, jmp);
+
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
+					       (1<<_3DPRIM_LINESTRIP) |
+					       (1<<_3DPRIM_LINELOOP) |
+					       (1<<_3DPRIM_LINESTRIP_CONT) |
+					       (1<<_3DPRIM_LINESTRIP_BF) |
+					       (1<<_3DPRIM_LINESTRIP_CONT_BF)));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p); 
+      brw_emit_line_setup( c, GL_FALSE );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+      /* note - thread killed in subroutine */
+   }
+   brw_land_fwd_jump(p, jmp); 
+
+   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+   brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
+   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+   {
+      saveflag = p->flag_value;
+      brw_push_insn_state(p); 
+      brw_emit_point_sprite_setup( c, GL_FALSE );
+      brw_pop_insn_state(p);
+      p->flag_value = saveflag;
+   }
+   brw_land_fwd_jump(p, jmp); 
+
+   brw_emit_point_setup( c, GL_FALSE );
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
new file mode 100644
index 0000000000..bc0f076073
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -0,0 +1,365 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+   
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+#include "intel_fbo.h"
+
+static void upload_sf_vp(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   struct brw_sf_viewport sfv;
+   GLfloat y_scale, y_bias;
+   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+   const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+   memset(&sfv, 0, sizeof(sfv));
+
+   if (render_to_fbo) {
+      y_scale = 1.0;
+      y_bias = 0;
+   }
+   else {
+      y_scale = -1.0;
+      y_bias = ctx->DrawBuffer->Height;
+   }
+
+   /* _NEW_VIEWPORT */
+
+   sfv.viewport.m00 = v[MAT_SX];
+   sfv.viewport.m11 = v[MAT_SY] * y_scale;
+   sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
+   sfv.viewport.m30 = v[MAT_TX];
+   sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
+   sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+
+   /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
+    * for DrawBuffer->_[XY]{min,max}
+    */
+
+   /* The scissor only needs to handle the intersection of drawable and
+    * scissor rect.  Clipping to the boundaries of static shared buffers
+    * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+    *
+    * Note that the hardware's coordinates are inclusive, while Mesa's min is
+    * inclusive but max is exclusive.
+    */
+   if (render_to_fbo) {
+      /* texmemory: Y=0=bottom */
+      sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
+      sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
+      sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
+   }
+   else {
+      /* memory: Y=0=top */
+      sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
+      sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+      sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
+      sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
+   }
+
+   dri_bo_unreference(brw->sf.vp_bo);
+   brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
+}
+
+const struct brw_tracked_state brw_sf_vp = {
+   .dirty = {
+      .mesa  = (_NEW_VIEWPORT | 
+		_NEW_SCISSOR |
+		_NEW_BUFFERS),
+      .brw   = 0,
+      .cache = 0
+   },
+   .prepare = upload_sf_vp
+};
+
+struct brw_sf_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
+
+   unsigned int nr_urb_entries, urb_size, sfsize;
+
+   GLenum front_face, cull_face, provoking_vertex;
+   unsigned scissor:1;
+   unsigned line_smooth:1;
+   unsigned point_sprite:1;
+   unsigned point_attenuated:1;
+   unsigned render_to_fbo:1;
+   float line_width;
+   float point_size;
+};
+
+static void
+sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_SF_PROG */
+   key->total_grf = brw->sf.prog_data->total_grf;
+   key->urb_entry_read_length = brw->sf.prog_data->urb_read_length;
+
+   /* BRW_NEW_URB_FENCE */
+   key->nr_urb_entries = brw->urb.nr_sf_entries;
+   key->urb_size = brw->urb.vsize;
+   key->sfsize = brw->urb.sfsize;
+
+   key->scissor = ctx->Scissor.Enabled;
+   key->front_face = ctx->Polygon.FrontFace;
+
+   if (ctx->Polygon.CullFlag)
+      key->cull_face = ctx->Polygon.CullFaceMode;
+   else
+      key->cull_face = GL_NONE;
+
+   key->line_width = ctx->Line.Width;
+   key->line_smooth = ctx->Line.SmoothFlag;
+
+   key->point_sprite = ctx->Point.PointSprite;
+   key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
+   key->point_attenuated = ctx->Point._Attenuated;
+
+   /* _NEW_LIGHT */
+   key->provoking_vertex = ctx->Light.ProvokingVertex;
+
+   key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+}
+
+static dri_bo *
+sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
+			dri_bo **reloc_bufs)
+{
+   struct brw_sf_unit_state sf;
+   dri_bo *bo;
+   int chipset_max_threads;
+   memset(&sf, 0, sizeof(sf));
+
+   sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
+
+   sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+   sf.thread3.dispatch_grf_start_reg = 3;
+
+   if (BRW_IS_IGDNG(brw))
+       sf.thread3.urb_entry_read_offset = 3;
+   else
+       sf.thread3.urb_entry_read_offset = 1;
+
+   sf.thread3.urb_entry_read_length = key->urb_entry_read_length;
+
+   sf.thread4.nr_urb_entries = key->nr_urb_entries;
+   sf.thread4.urb_entry_allocation_size = key->sfsize - 1;
+
+   /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or 
+    * 48(IGDNG) threads 
+    */
+   if (BRW_IS_IGDNG(brw))
+      chipset_max_threads = 48;
+   else
+      chipset_max_threads = 24;
+
+   sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+      sf.thread4.max_threads = 0;
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      sf.thread4.stats_enable = 1;
+
+   /* CACHE_NEW_SF_VP */
+   sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */
+
+   sf.sf5.viewport_transform = 1;
+
+   /* _NEW_SCISSOR */
+   if (key->scissor)
+      sf.sf6.scissor = 1;
+
+   /* _NEW_POLYGON */
+   if (key->front_face == GL_CCW)
+      sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+   else
+      sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+
+   /* The viewport is inverted for rendering to a FBO, and that inverts
+    * polygon front/back orientation.
+    */
+   sf.sf5.front_winding ^= key->render_to_fbo;
+
+   switch (key->cull_face) {
+   case GL_FRONT:
+      sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
+      break;
+   case GL_BACK:
+      sf.sf6.cull_mode = BRW_CULLMODE_BACK;
+      break;
+   case GL_FRONT_AND_BACK:
+      sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
+      break;
+   case GL_NONE:
+      sf.sf6.cull_mode = BRW_CULLMODE_NONE;
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
+   /* _NEW_LINE */
+   /* XXX use ctx->Const.Min/MaxLineWidth here */
+   sf.sf6.line_width = CLAMP(key->line_width, 1.0, 5.0) * (1<<1);
+
+   sf.sf6.line_endcap_aa_region_width = 1;
+   if (key->line_smooth)
+      sf.sf6.aa_enable = 1;
+   else if (sf.sf6.line_width <= 0x2)
+       sf.sf6.line_width = 0;
+
+   /* _NEW_BUFFERS */
+   key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   if (!key->render_to_fbo) {
+      /* Rendering to an OpenGL window */
+      sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
+   }
+   else {
+      /* If rendering to an FBO, the pixel coordinate system is
+       * inverted with respect to the normal OpenGL coordinate
+       * system, so BRW_RASTRULE_LOWER_RIGHT is correct.
+       * But this value is listed as "Reserved, but not seen as useful"
+       * in Intel documentation (page 212, "Point Rasterization Rule",
+       * section 7.4 "SF Pipeline State Summary", of document
+       * "Intel® 965 Express Chipset Family and Intel® G35 Express
+       * Chipset Graphics Controller Programmer's Reference Manual,
+       * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
+       * available at 
+       *     http://intellinuxgraphics.org/documentation.html
+       * at the time of this writing).
+       *
+       * It does work on at least some devices, if not all;
+       * if devices that don't support it can be identified,
+       * the likely failure case is that points are rasterized
+       * incorrectly, which is no worse than occurs without
+       * the value, so we're using it here.
+       */
+      sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
+   }
+   /* XXX clamp max depends on AA vs. non-AA */
+
+   /* _NEW_POINT */
+   sf.sf7.sprite_point = key->point_sprite;
+   sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3);
+   sf.sf7.use_point_size_state = !key->point_attenuated;
+   sf.sf7.aa_line_distance_mode = 0;
+
+   /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
+    */
+   if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) {
+      sf.sf7.trifan_pv = 2;
+      sf.sf7.linestrip_pv = 1;
+      sf.sf7.tristrip_pv = 2;
+   } else {
+      sf.sf7.trifan_pv = 1;
+      sf.sf7.linestrip_pv = 0;
+      sf.sf7.tristrip_pv = 0;
+   }
+   sf.sf7.line_last_pixel_enable = 0;
+
+   /* Set bias for OpenGL rasterization rules:
+    */
+   sf.sf6.dest_org_vbias = 0x8;
+   sf.sf6.dest_org_hbias = 0x8;
+
+   bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
+			 key, sizeof(*key),
+			 reloc_bufs, 2,
+			 &sf, sizeof(sf),
+			 NULL, NULL);
+
+   /* STATE_PREFETCH command description describes this state as being
+    * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
+    */
+   /* Emit SF program relocation */
+   dri_bo_emit_reloc(bo,
+		     I915_GEM_DOMAIN_INSTRUCTION, 0,
+		     sf.thread0.grf_reg_count << 1,
+		     offsetof(struct brw_sf_unit_state, thread0),
+		     brw->sf.prog_bo);
+
+   /* Emit SF viewport relocation */
+   dri_bo_emit_reloc(bo,
+		     I915_GEM_DOMAIN_INSTRUCTION, 0,
+		     sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
+		     offsetof(struct brw_sf_unit_state, sf5),
+		     brw->sf.vp_bo);
+
+   return bo;
+}
+
+static void upload_sf_unit( struct brw_context *brw )
+{
+   struct brw_sf_unit_key key;
+   dri_bo *reloc_bufs[2];
+
+   sf_unit_populate_key(brw, &key);
+
+   reloc_bufs[0] = brw->sf.prog_bo;
+   reloc_bufs[1] = brw->sf.vp_bo;
+
+   dri_bo_unreference(brw->sf.state_bo);
+   brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT,
+				       &key, sizeof(key),
+				       reloc_bufs, 2,
+				       NULL);
+   if (brw->sf.state_bo == NULL) {
+      brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs);
+   }
+}
+
+const struct brw_tracked_state brw_sf_unit = {
+   .dirty = {
+      .mesa  = (_NEW_POLYGON | 
+		_NEW_LIGHT |
+		_NEW_LINE | 
+		_NEW_POINT | 
+		_NEW_SCISSOR |
+		_NEW_BUFFERS),
+      .brw   = BRW_NEW_URB_FENCE,
+      .cache = (CACHE_NEW_SF_VP |
+		CACHE_NEW_SF_PROG)
+   },
+   .prepare = upload_sf_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
new file mode 100644
index 0000000000..d639656b9d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -0,0 +1,173 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+    
+
+#ifndef BRW_STATE_H
+#define BRW_STATE_H
+
+#include "brw_context.h"
+
+static inline void
+brw_add_validated_bo(struct brw_context *brw, dri_bo *bo)
+{
+   assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
+
+   if (bo != NULL) {
+      dri_bo_reference(bo);
+      brw->state.validated_bos[brw->state.validated_bo_count++] = bo;
+   }
+};
+
+const struct brw_tracked_state brw_blend_constant_color;
+const struct brw_tracked_state brw_cc_unit;
+const struct brw_tracked_state brw_cc_vp;
+const struct brw_tracked_state brw_check_fallback;
+const struct brw_tracked_state brw_clip_prog;
+const struct brw_tracked_state brw_clip_unit;
+const struct brw_tracked_state brw_constant_buffer;
+const struct brw_tracked_state brw_curbe_offsets;
+const struct brw_tracked_state brw_invarient_state;
+const struct brw_tracked_state brw_gs_prog;
+const struct brw_tracked_state brw_gs_unit;
+const struct brw_tracked_state brw_line_stipple;
+const struct brw_tracked_state brw_aa_line_parameters;
+const struct brw_tracked_state brw_pipelined_state_pointers;
+const struct brw_tracked_state brw_binding_table_pointers;
+const struct brw_tracked_state brw_depthbuffer;
+const struct brw_tracked_state brw_polygon_stipple_offset;
+const struct brw_tracked_state brw_polygon_stipple;
+const struct brw_tracked_state brw_program_parameters;
+const struct brw_tracked_state brw_recalculate_urb_fence;
+const struct brw_tracked_state brw_sf_prog;
+const struct brw_tracked_state brw_sf_unit;
+const struct brw_tracked_state brw_sf_vp;
+const struct brw_tracked_state brw_state_base_address;
+const struct brw_tracked_state brw_urb_fence;
+const struct brw_tracked_state brw_vertex_state;
+const struct brw_tracked_state brw_vs_surfaces;
+const struct brw_tracked_state brw_vs_prog;
+const struct brw_tracked_state brw_vs_unit;
+const struct brw_tracked_state brw_wm_input_sizes;
+const struct brw_tracked_state brw_wm_prog;
+const struct brw_tracked_state brw_wm_samplers;
+const struct brw_tracked_state brw_wm_constant_surface;
+const struct brw_tracked_state brw_wm_surfaces;
+const struct brw_tracked_state brw_wm_unit;
+
+const struct brw_tracked_state brw_psp_urb_cbs;
+
+const struct brw_tracked_state brw_pipe_control;
+
+const struct brw_tracked_state brw_drawing_rect;
+const struct brw_tracked_state brw_indices;
+const struct brw_tracked_state brw_vertices;
+const struct brw_tracked_state brw_index_buffer;
+
+/**
+ * Use same key for WM and VS surfaces.
+ */
+struct brw_surface_key {
+   GLenum target, depthmode;
+   dri_bo *bo;
+   GLint format, internal_format;
+   GLint first_level, last_level;
+   GLint width, height, depth;
+   GLint pitch, cpp;
+   uint32_t tiling;
+   GLuint offset;
+};
+
+/***********************************************************************
+ * brw_state.c
+ */
+void brw_validate_state(struct brw_context *brw);
+void brw_upload_state(struct brw_context *brw);
+void brw_init_state(struct brw_context *brw);
+void brw_destroy_state(struct brw_context *brw);
+
+/***********************************************************************
+ * brw_state_cache.c
+ */
+dri_bo *brw_cache_data(struct brw_cache *cache,
+		       enum brw_cache_id cache_id,
+		       const void *data,
+		       dri_bo **reloc_bufs,
+		       GLuint nr_reloc_bufs);
+
+dri_bo *brw_cache_data_sz(struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *data,
+			  GLuint data_size,
+			  dri_bo **reloc_bufs,
+			  GLuint nr_reloc_bufs);
+
+dri_bo *brw_upload_cache( struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *key,
+			  GLuint key_sz,
+			  dri_bo **reloc_bufs,
+			  GLuint nr_reloc_bufs,
+			  const void *data,
+			  GLuint data_sz,
+			  const void *aux,
+			  void *aux_return );
+
+dri_bo *brw_search_cache( struct brw_cache *cache,
+			  enum brw_cache_id cache_id,
+			  const void *key,
+			  GLuint key_size,
+			  dri_bo **reloc_bufs,
+			  GLuint nr_reloc_bufs,
+			  void *aux_return);
+void brw_state_cache_check_size( struct brw_context *brw );
+
+void brw_init_caches( struct brw_context *brw );
+void brw_destroy_caches( struct brw_context *brw );
+void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo);
+
+/***********************************************************************
+ * brw_state_batch.c
+ */
+#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
+#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
+
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+				   const void *data,
+				   GLuint sz );
+void brw_destroy_batch_cache( struct brw_context *brw );
+void brw_clear_batch_cache( struct brw_context *brw );
+
+/* brw_wm_surface_state.c */
+dri_bo *
+brw_create_constant_surface( struct brw_context *brw,
+                             struct brw_surface_key *key );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c
new file mode 100644
index 0000000000..7821898cf9
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_batch.c
@@ -0,0 +1,99 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+     
+
+
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "main/imports.h"
+
+
+
+/* A facility similar to the data caching code above, which aims to
+ * prevent identical commands being issued repeatedly.
+ */
+GLboolean brw_cached_batch_struct( struct brw_context *brw,
+				   const void *data,
+				   GLuint sz )
+{
+   struct brw_cached_batch_item *item = brw->cached_batch_items;
+   struct header *newheader = (struct header *)data;
+
+   if (brw->emit_state_always) {
+      intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+      return GL_TRUE;
+   }
+
+   while (item) {
+      if (item->header->opcode == newheader->opcode) {
+	 if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
+	    return GL_FALSE;
+	 if (item->sz != sz) {
+	    _mesa_free(item->header);
+	    item->header = _mesa_malloc(sz);
+	    item->sz = sz;
+	 }
+	 goto emit;
+      }
+      item = item->next;
+   }
+
+   assert(!item);
+   item = CALLOC_STRUCT(brw_cached_batch_item);
+   item->header = _mesa_malloc(sz);
+   item->sz = sz;
+   item->next = brw->cached_batch_items;
+   brw->cached_batch_items = item;
+
+ emit:
+   memcpy(item->header, newheader, sz);
+   intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+   return GL_TRUE;
+}
+
+void brw_clear_batch_cache( struct brw_context *brw )
+{
+   struct brw_cached_batch_item *item = brw->cached_batch_items;
+
+   while (item) {
+      struct brw_cached_batch_item *next = item->next;
+      free((void *)item->header);
+      free(item);
+      item = next;
+   }
+
+   brw->cached_batch_items = NULL;
+}
+
+void brw_destroy_batch_cache( struct brw_context *brw )
+{
+   brw_clear_batch_cache(brw);
+}
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
new file mode 100644
index 0000000000..c262e1db8b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -0,0 +1,597 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+/** @file brw_state_cache.c
+ *
+ * This file implements a simple static state cache for 965.  The consumers
+ * can query the hash table of state using a cache_id, opaque key data,
+ * and list of buffers that will be used in relocations, and receive the
+ * corresponding state buffer object of state (plus associated auxiliary
+ * data) in return.
+ *
+ * The inner workings are a simple hash table based on a CRC of the key data.
+ * The cache_id and relocation target buffers associated with the state
+ * buffer are included as auxiliary key data, but are not part of the hash
+ * value (this should be fixed, but will likely be fixed instead by making
+ * consumers use structured keys).
+ *
+ * Replacement is not implemented.  Instead, when the cache gets too big, at
+ * a safe point (unlock) we throw out all of the cache data and let it
+ * regenerate for the next rendering operation.
+ *
+ * The reloc_buf pointers need to be included as key data, otherwise the
+ * non-unique values stuffed in the offset in key data through
+ * brw_cache_data() may result in successful probe for state buffers
+ * even when the buffer being referenced doesn't match.  The result would be
+ * that the same state cache entry is used twice for different buffers,
+ * only one of the two buffers referenced gets put into the offset, and the
+ * incorrect program is run for the other instance.
+ */
+
+#include "main/imports.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+
+/* XXX: Fixme - have to include these to get the sizes of the prog_key
+ * structs:
+ */
+#include "brw_wm.h"
+#include "brw_vs.h"
+#include "brw_clip.h"
+#include "brw_sf.h"
+#include "brw_gs.h"
+
+
+static GLuint
+hash_key(const void *key, GLuint key_size,
+         dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+{
+   GLuint *ikey = (GLuint *)key;
+   GLuint hash = 0, i;
+
+   assert(key_size % 4 == 0);
+
+   /* I'm sure this can be improved on:
+    */
+   for (i = 0; i < key_size/4; i++) {
+      hash ^= ikey[i];
+      hash = (hash << 5) | (hash >> 27);
+   }
+
+   /* Include the BO pointers as key data as well */
+   ikey = (GLuint *)reloc_bufs;
+   key_size = nr_reloc_bufs * sizeof(dri_bo *);
+   for (i = 0; i < key_size/4; i++) {
+      hash ^= ikey[i];
+      hash = (hash << 5) | (hash >> 27);
+   }
+
+   return hash;
+}
+
+
+/**
+ * Marks a new buffer as being chosen for the given cache id.
+ */
+static void
+update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
+		  dri_bo *bo)
+{
+   if (bo == cache->last_bo[cache_id])
+      return; /* no change */
+
+   dri_bo_unreference(cache->last_bo[cache_id]);
+   cache->last_bo[cache_id] = bo;
+   dri_bo_reference(cache->last_bo[cache_id]);
+   cache->brw->state.dirty.cache |= 1 << cache_id;
+}
+
+
+static struct brw_cache_item *
+search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
+	     GLuint hash, const void *key, GLuint key_size,
+	     dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+{
+   struct brw_cache_item *c;
+
+#if 0
+   int bucketcount = 0;
+
+   for (c = cache->items[hash % cache->size]; c; c = c->next)
+      bucketcount++;
+
+   fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
+	   cache->size, bucketcount, cache->n_items);
+#endif
+
+   for (c = cache->items[hash % cache->size]; c; c = c->next) {
+      if (c->cache_id == cache_id &&
+	  c->hash == hash &&
+	  c->key_size == key_size &&
+	  memcmp(c->key, key, key_size) == 0 &&
+	  c->nr_reloc_bufs == nr_reloc_bufs &&
+	  memcmp(c->reloc_bufs, reloc_bufs,
+		 nr_reloc_bufs * sizeof(dri_bo *)) == 0)
+	 return c;
+   }
+
+   return NULL;
+}
+
+
+static void
+rehash(struct brw_cache *cache)
+{
+   struct brw_cache_item **items;
+   struct brw_cache_item *c, *next;
+   GLuint size, i;
+
+   size = cache->size * 3;
+   items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items));
+
+   for (i = 0; i < cache->size; i++)
+      for (c = cache->items[i]; c; c = next) {
+	 next = c->next;
+	 c->next = items[c->hash % size];
+	 items[c->hash % size] = c;
+      }
+
+   FREE(cache->items);
+   cache->items = items;
+   cache->size = size;
+}
+
+
+/**
+ * Returns the buffer object matching cache_id and key, or NULL.
+ */
+dri_bo *
+brw_search_cache(struct brw_cache *cache,
+                 enum brw_cache_id cache_id,
+                 const void *key,
+                 GLuint key_size,
+                 dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
+                 void *aux_return)
+{
+   struct brw_cache_item *item;
+   GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+
+   item = search_cache(cache, cache_id, hash, key, key_size,
+		       reloc_bufs, nr_reloc_bufs);
+
+   if (item == NULL)
+      return NULL;
+
+   if (aux_return)
+      *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+
+   update_cache_last(cache, cache_id, item->bo);
+
+   dri_bo_reference(item->bo);
+   return item->bo;
+}
+
+
+dri_bo *
+brw_upload_cache( struct brw_cache *cache,
+		  enum brw_cache_id cache_id,
+		  const void *key,
+		  GLuint key_size,
+		  dri_bo **reloc_bufs,
+		  GLuint nr_reloc_bufs,
+		  const void *data,
+		  GLuint data_size,
+		  const void *aux,
+		  void *aux_return )
+{
+   struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
+   GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+   GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *);
+   GLuint aux_size = cache->aux_size[cache_id];
+   void *tmp;
+   dri_bo *bo;
+   int i;
+
+   /* Create the buffer object to contain the data */
+   bo = dri_bo_alloc(cache->brw->intel.bufmgr,
+		     cache->name[cache_id], data_size, 1 << 6);
+
+
+   /* Set up the memory containing the key, aux_data, and reloc_bufs */
+   tmp = _mesa_malloc(key_size + aux_size + relocs_size);
+
+   memcpy(tmp, key, key_size);
+   memcpy(tmp + key_size, aux, cache->aux_size[cache_id]);
+   memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
+   for (i = 0; i < nr_reloc_bufs; i++) {
+      if (reloc_bufs[i] != NULL)
+	 dri_bo_reference(reloc_bufs[i]);
+   }
+
+   item->cache_id = cache_id;
+   item->key = tmp;
+   item->hash = hash;
+   item->key_size = key_size;
+   item->reloc_bufs = tmp + key_size + aux_size;
+   item->nr_reloc_bufs = nr_reloc_bufs;
+
+   item->bo = bo;
+   dri_bo_reference(bo);
+   item->data_size = data_size;
+
+   if (cache->n_items > cache->size * 1.5)
+      rehash(cache);
+
+   hash %= cache->size;
+   item->next = cache->items[hash];
+   cache->items[hash] = item;
+   cache->n_items++;
+
+   if (aux_return) {
+      assert(cache->aux_size[cache_id]);
+      *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+   }
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+      _mesa_printf("upload %s: %d bytes to cache id %d\n",
+		   cache->name[cache_id],
+		   data_size, cache_id);
+
+   /* Copy data to the buffer */
+   dri_bo_subdata(bo, 0, data_size, data);
+
+   update_cache_last(cache, cache_id, bo);
+
+   return bo;
+}
+
+
+/**
+ * This doesn't really work with aux data.  Use search/upload instead
+ */
+dri_bo *
+brw_cache_data_sz(struct brw_cache *cache,
+		  enum brw_cache_id cache_id,
+		  const void *data,
+		  GLuint data_size,
+		  dri_bo **reloc_bufs,
+		  GLuint nr_reloc_bufs)
+{
+   dri_bo *bo;
+   struct brw_cache_item *item;
+   GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs);
+
+   item = search_cache(cache, cache_id, hash, data, data_size,
+		       reloc_bufs, nr_reloc_bufs);
+   if (item) {
+      update_cache_last(cache, cache_id, item->bo);
+      dri_bo_reference(item->bo);
+      return item->bo;
+   }
+
+   bo = brw_upload_cache(cache, cache_id,
+			 data, data_size,
+			 reloc_bufs, nr_reloc_bufs,
+			 data, data_size,
+			 NULL, NULL);
+
+   return bo;
+}
+
+
+/**
+ * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
+ *
+ * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
+ * better to use, as the potentially changing offsets in the data-used-as-key
+ * will result in excessive cache misses.
+ */
+dri_bo *
+brw_cache_data(struct brw_cache *cache,
+	       enum brw_cache_id cache_id,
+	       const void *data,
+	       dri_bo **reloc_bufs,
+	       GLuint nr_reloc_bufs)
+{
+   return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
+			    reloc_bufs, nr_reloc_bufs);
+}
+
+enum pool_type {
+   DW_SURFACE_STATE,
+   DW_GENERAL_STATE
+};
+
+
+static void
+brw_init_cache_id(struct brw_cache *cache,
+                  const char *name,
+                  enum brw_cache_id id,
+                  GLuint key_size,
+                  GLuint aux_size)
+{
+   cache->name[id] = strdup(name);
+   cache->key_size[id] = key_size;
+   cache->aux_size[id] = aux_size;
+}
+
+
+static void
+brw_init_non_surface_cache(struct brw_context *brw)
+{
+   struct brw_cache *cache = &brw->cache;
+
+   cache->brw = brw;
+
+   cache->size = 7;
+   cache->n_items = 0;
+   cache->items = (struct brw_cache_item **)
+      _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
+
+   brw_init_cache_id(cache,
+		     "CC_VP",
+		     BRW_CC_VP,
+		     sizeof(struct brw_cc_viewport),
+		     0);
+
+   brw_init_cache_id(cache,
+		     "CC_UNIT",
+		     BRW_CC_UNIT,
+		     sizeof(struct brw_cc_unit_state),
+		     0);
+
+   brw_init_cache_id(cache,
+		     "WM_PROG",
+		     BRW_WM_PROG,
+		     sizeof(struct brw_wm_prog_key),
+		     sizeof(struct brw_wm_prog_data));
+
+   brw_init_cache_id(cache,
+		     "SAMPLER_DEFAULT_COLOR",
+		     BRW_SAMPLER_DEFAULT_COLOR,
+		     sizeof(struct brw_sampler_default_color),
+		     0);
+
+   brw_init_cache_id(cache,
+		     "SAMPLER",
+		     BRW_SAMPLER,
+		     0,		/* variable key/data size */
+		     0);
+
+   brw_init_cache_id(cache,
+		     "WM_UNIT",
+		     BRW_WM_UNIT,
+		     sizeof(struct brw_wm_unit_state),
+		     0);
+
+   brw_init_cache_id(cache,
+		     "SF_PROG",
+		     BRW_SF_PROG,
+		     sizeof(struct brw_sf_prog_key),
+		     sizeof(struct brw_sf_prog_data));
+
+   brw_init_cache_id(cache,
+		     "SF_VP",
+		     BRW_SF_VP,
+		     sizeof(struct brw_sf_viewport),
+		     0);
+
+   brw_init_cache_id(cache,
+		     "SF_UNIT",
+		     BRW_SF_UNIT,
+		     sizeof(struct brw_sf_unit_state),
+		     0);
+
+   brw_init_cache_id(cache,
+		     "VS_UNIT",
+		     BRW_VS_UNIT,
+		     sizeof(struct brw_vs_unit_state),
+		     0);
+
+   brw_init_cache_id(cache,
+		     "VS_PROG",
+		     BRW_VS_PROG,
+		     sizeof(struct brw_vs_prog_key),
+		     sizeof(struct brw_vs_prog_data));
+
+   brw_init_cache_id(cache,
+		     "CLIP_UNIT",
+		     BRW_CLIP_UNIT,
+		     sizeof(struct brw_clip_unit_state),
+		     0);
+
+   brw_init_cache_id(cache,
+		     "CLIP_PROG",
+		     BRW_CLIP_PROG,
+		     sizeof(struct brw_clip_prog_key),
+		     sizeof(struct brw_clip_prog_data));
+
+   brw_init_cache_id(cache,
+		     "GS_UNIT",
+		     BRW_GS_UNIT,
+		     sizeof(struct brw_gs_unit_state),
+		     0);
+
+   brw_init_cache_id(cache,
+		     "GS_PROG",
+		     BRW_GS_PROG,
+		     sizeof(struct brw_gs_prog_key),
+		     sizeof(struct brw_gs_prog_data));
+}
+
+
+static void
+brw_init_surface_cache(struct brw_context *brw)
+{
+   struct brw_cache *cache = &brw->surface_cache;
+
+   cache->brw = brw;
+
+   cache->size = 7;
+   cache->n_items = 0;
+   cache->items = (struct brw_cache_item **)
+      _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
+
+   brw_init_cache_id(cache,
+		     "SS_SURFACE",
+		     BRW_SS_SURFACE,
+		     sizeof(struct brw_surface_state),
+		     0);
+
+   brw_init_cache_id(cache,
+		     "SS_SURF_BIND",
+		     BRW_SS_SURF_BIND,
+		     0,
+		     0);
+}
+
+
+void
+brw_init_caches(struct brw_context *brw)
+{
+   brw_init_non_surface_cache(brw);
+   brw_init_surface_cache(brw);
+}
+
+
+static void
+brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+   struct brw_cache_item *c, *next;
+   GLuint i;
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+      _mesa_printf("%s\n", __FUNCTION__);
+
+   for (i = 0; i < cache->size; i++) {
+      for (c = cache->items[i]; c; c = next) {
+	 int j;
+
+	 next = c->next;
+	 for (j = 0; j < c->nr_reloc_bufs; j++)
+	    dri_bo_unreference(c->reloc_bufs[j]);
+	 dri_bo_unreference(c->bo);
+	 free((void *)c->key);
+	 free(c);
+      }
+      cache->items[i] = NULL;
+   }
+
+   cache->n_items = 0;
+
+   if (brw->curbe.last_buf) {
+      _mesa_free(brw->curbe.last_buf);
+      brw->curbe.last_buf = NULL;
+   }
+
+   brw->state.dirty.mesa |= ~0;
+   brw->state.dirty.brw |= ~0;
+   brw->state.dirty.cache |= ~0;
+}
+
+/* Clear all entries from the cache that point to the given bo.
+ *
+ * This lets us release memory for reuse earlier for known-dead buffers,
+ * at the cost of walking the entire hash table.
+ */
+void
+brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo)
+{
+   struct brw_cache_item **prev;
+   GLuint i;
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+      _mesa_printf("%s\n", __FUNCTION__);
+
+   for (i = 0; i < cache->size; i++) {
+      for (prev = &cache->items[i]; *prev;) {
+	 struct brw_cache_item *c = *prev;
+
+	 if (drm_intel_bo_references(c->bo, bo)) {
+	    int j;
+
+	    *prev = c->next;
+
+	    for (j = 0; j < c->nr_reloc_bufs; j++)
+	       dri_bo_unreference(c->reloc_bufs[j]);
+	    dri_bo_unreference(c->bo);
+	    free((void *)c->key);
+	    free(c);
+	    cache->n_items--;
+	 } else {
+	    prev = &c->next;
+	 }
+      }
+   }
+}
+
+void
+brw_state_cache_check_size(struct brw_context *brw)
+{
+   if (INTEL_DEBUG & DEBUG_STATE)
+      _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+
+   /* un-tuned guess.  We've got around 20 state objects for a total of around
+    * 32k, so 1000 of them is around 1.5MB.
+    */
+   if (brw->cache.n_items > 1000)
+      brw_clear_cache(brw, &brw->cache);
+
+   if (brw->surface_cache.n_items > 1000)
+      brw_clear_cache(brw, &brw->surface_cache);
+}
+
+
+static void
+brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
+{
+   GLuint i;
+
+   if (INTEL_DEBUG & DEBUG_STATE)
+      _mesa_printf("%s\n", __FUNCTION__);
+
+   brw_clear_cache(brw, cache);
+   for (i = 0; i < BRW_MAX_CACHE; i++) {
+      dri_bo_unreference(cache->last_bo[i]);
+      free(cache->name[i]);
+   }
+   free(cache->items);
+   cache->items = NULL;
+   cache->size = 0;
+}
+
+
+void
+brw_destroy_caches(struct brw_context *brw)
+{
+   brw_destroy_cache(brw, &brw->cache);
+   brw_destroy_cache(brw, &brw->surface_cache);
+}
diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c
new file mode 100644
index 0000000000..e94fa7d2b4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_dump.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#include "main/mtypes.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/**
+ * Prints out a header, the contents, and the message associated with
+ * the hardware state data given.
+ *
+ * \param name Name of the state object
+ * \param data Pointer to the base of the state object
+ * \param hw_offset Hardware offset of the base of the state data.
+ * \param index Index of the DWORD being output.
+ */
+static void
+state_out(const char *name, void *data, uint32_t hw_offset, int index,
+	  char *fmt, ...)
+{
+    va_list va;
+
+    fprintf(stderr, "%8s: 0x%08x: 0x%08x: ",
+	    name, hw_offset + index * 4, ((uint32_t *)data)[index]);
+    va_start(va, fmt);
+    vfprintf(stderr, fmt, va);
+    va_end(va);
+}
+
+/** Generic, undecoded state buffer debug printout */
+static void
+state_struct_out(const char *name, dri_bo *buffer, unsigned int state_size)
+{
+   int i;
+
+   if (buffer == NULL)
+      return;
+
+   dri_bo_map(buffer, GL_FALSE);
+   for (i = 0; i < state_size / 4; i++) {
+      state_out(name, buffer->virtual, buffer->offset, i,
+		"dword %d\n", i);
+   }
+   dri_bo_unmap(buffer);
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+    switch (surfacetype) {
+    case 0: return "1D";
+    case 1: return "2D";
+    case 2: return "3D";
+    case 3: return "CUBE";
+    case 4: return "BUFFER";
+    case 7: return "NULL";
+    default: return "unknown";
+    }
+}
+
+static const char *
+get_965_surface_format(unsigned int surface_format)
+{
+    switch (surface_format) {
+    case 0x000: return "r32g32b32a32_float";
+    case 0x0c1: return "b8g8r8a8_unorm";
+    case 0x100: return "b5g6r5_unorm";
+    case 0x102: return "b5g5r5a1_unorm";
+    case 0x104: return "b4g4r4a4_unorm";
+    default: return "unknown";
+    }
+}
+
+static void dump_wm_surface_state(struct brw_context *brw)
+{
+   int i;
+
+   for (i = 0; i < brw->wm.nr_surfaces; i++) {
+      dri_bo *surf_bo = brw->wm.surf_bo[i];
+      unsigned int surfoff;
+      struct brw_surface_state *surf;
+      char name[20];
+
+      if (surf_bo == NULL) {
+	 fprintf(stderr, "  WM SS%d: NULL\n", i);
+	 continue;
+      }
+      dri_bo_map(surf_bo, GL_FALSE);
+      surfoff = surf_bo->offset;
+      surf = (struct brw_surface_state *)(surf_bo->virtual);
+
+      sprintf(name, "WM SS%d", i);
+      state_out(name, surf, surfoff, 0, "%s %s\n",
+		get_965_surfacetype(surf->ss0.surface_type),
+		get_965_surface_format(surf->ss0.surface_format));
+      state_out(name, surf, surfoff, 1, "offset\n");
+      state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n",
+		surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count);
+      state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n",
+		surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not ");
+      state_out(name, surf, surfoff, 4, "mip base %d\n",
+		surf->ss4.min_lod);
+      state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n",
+		surf->ss5.x_offset, surf->ss5.y_offset);
+
+      dri_bo_unmap(surf_bo);
+   }
+}
+
+static void dump_sf_viewport_state(struct brw_context *brw)
+{
+   const char *name = "SF VP";
+   struct brw_sf_viewport *vp;
+   uint32_t vp_off;
+
+   if (brw->sf.vp_bo == NULL)
+      return;
+
+   dri_bo_map(brw->sf.vp_bo, GL_FALSE);
+
+   vp = brw->sf.vp_bo->virtual;
+   vp_off = brw->sf.vp_bo->offset;
+
+   state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
+   state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11);
+   state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22);
+   state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30);
+   state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31);
+   state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32);
+
+   state_out(name, vp, vp_off, 6, "top left = %d,%d\n",
+	     vp->scissor.xmin, vp->scissor.ymin);
+   state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n",
+	     vp->scissor.xmax, vp->scissor.ymax);
+
+   dri_bo_unmap(brw->sf.vp_bo);
+}
+
+static void brw_debug_prog(const char *name, dri_bo *prog)
+{
+   unsigned int i;
+   uint32_t *data;
+
+   if (prog == NULL)
+      return;
+
+   dri_bo_map(prog, GL_FALSE);
+
+   data = prog->virtual;
+
+   for (i = 0; i < prog->size / 4 / 4; i++) {
+      fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+	      name, (unsigned int)prog->offset + i * 4 * 4,
+	      data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
+      /* Stop at the end of the program.  It'd be nice to keep track of the actual
+       * intended program size instead of guessing like this.
+       */
+      if (data[i * 4 + 0] == 0 &&
+	  data[i * 4 + 1] == 0 &&
+	  data[i * 4 + 2] == 0 &&
+	  data[i * 4 + 3] == 0)
+	 break;
+   }
+
+   dri_bo_unmap(prog);
+}
+
+
+/**
+ * Print additional debug information associated with the batchbuffer
+ * when DEBUG_BATCH is set.
+ *
+ * For 965, this means mapping the state buffers that would have been referenced
+ * by the batchbuffer and dumping them.
+ *
+ * The buffer offsets printed rely on the buffer containing the last offset
+ * it was validated at.
+ */
+void brw_debug_batch(struct intel_context *intel)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
+   dump_wm_surface_state(brw);
+
+   state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state));
+   brw_debug_prog("VS prog", brw->vs.prog_bo);
+
+   state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state));
+   brw_debug_prog("GS prog", brw->gs.prog_bo);
+
+   state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state));
+   dump_sf_viewport_state(brw);
+   brw_debug_prog("SF prog", brw->sf.prog_bo);
+
+   state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state));
+   brw_debug_prog("WM prog", brw->wm.prog_bo);
+}
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
new file mode 100644
index 0000000000..b817b741e7
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -0,0 +1,416 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+       
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+
+/* This is used to initialize brw->state.atoms[].  We could use this
+ * list directly except for a single atom, brw_constant_buffer, which
+ * has a .dirty value which changes according to the parameters of the
+ * current fragment and vertex programs, and so cannot be a static
+ * value.
+ */
+const struct brw_tracked_state *atoms[] =
+{
+   &brw_check_fallback,
+
+   &brw_wm_input_sizes,
+   &brw_vs_prog,
+   &brw_gs_prog, 
+   &brw_clip_prog, 
+   &brw_sf_prog,
+   &brw_wm_prog,
+
+   /* Once all the programs are done, we know how large urb entry
+    * sizes need to be and can decide if we need to change the urb
+    * layout.
+    */
+   &brw_curbe_offsets,
+   &brw_recalculate_urb_fence,
+
+   &brw_cc_vp,
+   &brw_cc_unit,
+
+   &brw_vs_surfaces,		/* must do before unit */
+   &brw_wm_constant_surface,	/* must do before wm surfaces/bind bo */
+   &brw_wm_surfaces,		/* must do before samplers and unit */
+   &brw_wm_samplers,
+
+   &brw_wm_unit,
+   &brw_sf_vp,
+   &brw_sf_unit,
+   &brw_vs_unit,		/* always required, enabled or not */
+   &brw_clip_unit,
+   &brw_gs_unit,  
+
+   /* Command packets:
+    */
+   &brw_invarient_state,
+   &brw_state_base_address,
+
+   &brw_binding_table_pointers,
+   &brw_blend_constant_color,
+
+   &brw_depthbuffer,
+
+   &brw_polygon_stipple,
+   &brw_polygon_stipple_offset,
+
+   &brw_line_stipple,
+   &brw_aa_line_parameters,
+
+   &brw_psp_urb_cbs,
+
+   &brw_drawing_rect,
+   &brw_indices,
+   &brw_index_buffer,
+   &brw_vertices,
+
+   &brw_constant_buffer
+};
+
+
+void brw_init_state( struct brw_context *brw )
+{
+   brw_init_caches(brw);
+}
+
+
+void brw_destroy_state( struct brw_context *brw )
+{
+   brw_destroy_caches(brw);
+   brw_destroy_batch_cache(brw);
+}
+
+/***********************************************************************
+ */
+
+static GLboolean check_state( const struct brw_state_flags *a,
+			      const struct brw_state_flags *b )
+{
+   return ((a->mesa & b->mesa) ||
+	   (a->brw & b->brw) ||
+	   (a->cache & b->cache));
+}
+
+static void accumulate_state( struct brw_state_flags *a,
+			      const struct brw_state_flags *b )
+{
+   a->mesa |= b->mesa;
+   a->brw |= b->brw;
+   a->cache |= b->cache;
+}
+
+
+static void xor_states( struct brw_state_flags *result,
+			     const struct brw_state_flags *a,
+			      const struct brw_state_flags *b )
+{
+   result->mesa = a->mesa ^ b->mesa;
+   result->brw = a->brw ^ b->brw;
+   result->cache = a->cache ^ b->cache;
+}
+
+static void
+brw_clear_validated_bos(struct brw_context *brw)
+{
+   int i;
+
+   /* Clear the last round of validated bos */
+   for (i = 0; i < brw->state.validated_bo_count; i++) {
+      dri_bo_unreference(brw->state.validated_bos[i]);
+      brw->state.validated_bos[i] = NULL;
+   }
+   brw->state.validated_bo_count = 0;
+}
+
+struct dirty_bit_map {
+   uint32_t bit;
+   char *name;
+   uint32_t count;
+};
+
+#define DEFINE_BIT(name) {name, #name, 0}
+
+static struct dirty_bit_map mesa_bits[] = {
+   DEFINE_BIT(_NEW_MODELVIEW),
+   DEFINE_BIT(_NEW_PROJECTION),
+   DEFINE_BIT(_NEW_TEXTURE_MATRIX),
+   DEFINE_BIT(_NEW_COLOR_MATRIX),
+   DEFINE_BIT(_NEW_ACCUM),
+   DEFINE_BIT(_NEW_COLOR),
+   DEFINE_BIT(_NEW_DEPTH),
+   DEFINE_BIT(_NEW_EVAL),
+   DEFINE_BIT(_NEW_FOG),
+   DEFINE_BIT(_NEW_HINT),
+   DEFINE_BIT(_NEW_LIGHT),
+   DEFINE_BIT(_NEW_LINE),
+   DEFINE_BIT(_NEW_PIXEL),
+   DEFINE_BIT(_NEW_POINT),
+   DEFINE_BIT(_NEW_POLYGON),
+   DEFINE_BIT(_NEW_POLYGONSTIPPLE),
+   DEFINE_BIT(_NEW_SCISSOR),
+   DEFINE_BIT(_NEW_STENCIL),
+   DEFINE_BIT(_NEW_TEXTURE),
+   DEFINE_BIT(_NEW_TRANSFORM),
+   DEFINE_BIT(_NEW_VIEWPORT),
+   DEFINE_BIT(_NEW_PACKUNPACK),
+   DEFINE_BIT(_NEW_ARRAY),
+   DEFINE_BIT(_NEW_RENDERMODE),
+   DEFINE_BIT(_NEW_BUFFERS),
+   DEFINE_BIT(_NEW_MULTISAMPLE),
+   DEFINE_BIT(_NEW_TRACK_MATRIX),
+   DEFINE_BIT(_NEW_PROGRAM),
+   DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
+   {0, 0, 0}
+};
+
+static struct dirty_bit_map brw_bits[] = {
+   DEFINE_BIT(BRW_NEW_URB_FENCE),
+   DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
+   DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
+   DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
+   DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
+   DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
+   DEFINE_BIT(BRW_NEW_PRIMITIVE),
+   DEFINE_BIT(BRW_NEW_CONTEXT),
+   DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
+   DEFINE_BIT(BRW_NEW_PSP),
+   DEFINE_BIT(BRW_NEW_FENCE),
+   DEFINE_BIT(BRW_NEW_INDICES),
+   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
+   DEFINE_BIT(BRW_NEW_VERTICES),
+   DEFINE_BIT(BRW_NEW_BATCH),
+   DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
+   {0, 0, 0}
+};
+
+static struct dirty_bit_map cache_bits[] = {
+   DEFINE_BIT(CACHE_NEW_CC_VP),
+   DEFINE_BIT(CACHE_NEW_CC_UNIT),
+   DEFINE_BIT(CACHE_NEW_WM_PROG),
+   DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR),
+   DEFINE_BIT(CACHE_NEW_SAMPLER),
+   DEFINE_BIT(CACHE_NEW_WM_UNIT),
+   DEFINE_BIT(CACHE_NEW_SF_PROG),
+   DEFINE_BIT(CACHE_NEW_SF_VP),
+   DEFINE_BIT(CACHE_NEW_SF_UNIT),
+   DEFINE_BIT(CACHE_NEW_VS_UNIT),
+   DEFINE_BIT(CACHE_NEW_VS_PROG),
+   DEFINE_BIT(CACHE_NEW_GS_UNIT),
+   DEFINE_BIT(CACHE_NEW_GS_PROG),
+   DEFINE_BIT(CACHE_NEW_CLIP_VP),
+   DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
+   DEFINE_BIT(CACHE_NEW_CLIP_PROG),
+   DEFINE_BIT(CACHE_NEW_SURFACE),
+   DEFINE_BIT(CACHE_NEW_SURF_BIND),
+   {0, 0, 0}
+};
+
+
+static void
+brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+   int i;
+
+   for (i = 0; i < 32; i++) {
+      if (bit_map[i].bit == 0)
+	 return;
+
+      if (bit_map[i].bit & bits)
+	 bit_map[i].count++;
+   }
+}
+
+static void
+brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+   int i;
+
+   for (i = 0; i < 32; i++) {
+      if (bit_map[i].bit == 0)
+	 return;
+
+      fprintf(stderr, "0x%08x: %12d (%s)\n",
+	      bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+   }
+}
+
+/***********************************************************************
+ * Emit all state:
+ */
+void brw_validate_state( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   struct brw_state_flags *state = &brw->state.dirty;
+   GLuint i;
+
+   brw_clear_validated_bos(brw);
+
+   state->mesa |= brw->intel.NewGLState;
+   brw->intel.NewGLState = 0;
+
+   brw_add_validated_bo(brw, intel->batch->buf);
+
+   if (brw->emit_state_always) {
+      state->mesa |= ~0;
+      state->brw |= ~0;
+      state->cache |= ~0;
+   }
+
+   if (brw->fragment_program != ctx->FragmentProgram._Current) {
+      brw->fragment_program = ctx->FragmentProgram._Current;
+      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+   }
+
+   if (brw->vertex_program != ctx->VertexProgram._Current) {
+      brw->vertex_program = ctx->VertexProgram._Current;
+      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+   }
+
+   if (state->mesa == 0 &&
+       state->cache == 0 &&
+       state->brw == 0)
+      return;
+
+   if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
+      brw_clear_batch_cache(brw);
+
+   brw->intel.Fallback = 0;
+
+   /* do prepare stage for all atoms */
+   for (i = 0; i < Elements(atoms); i++) {
+      const struct brw_tracked_state *atom = atoms[i];
+
+      if (brw->intel.Fallback)
+         break;
+
+      if (check_state(state, &atom->dirty)) {
+         if (atom->prepare) {
+            atom->prepare(brw);
+        }
+      }
+   }
+
+   /* Make sure that the textures which are referenced by the current
+    * brw fragment program are actually present/valid.
+    * If this fails, we can experience GPU lock-ups.
+    */
+   {
+      const struct brw_fragment_program *fp;
+      fp = brw_fragment_program_const(brw->fragment_program);
+      if (fp) {
+         assert((fp->tex_units_used & ctx->Texture._EnabledUnits)
+                == fp->tex_units_used);
+      }
+   }
+}
+
+
+void brw_upload_state(struct brw_context *brw)
+{
+   struct brw_state_flags *state = &brw->state.dirty;
+   int i;
+   static int dirty_count = 0;
+
+   brw_clear_validated_bos(brw);
+
+   if (INTEL_DEBUG) {
+      /* Debug version which enforces various sanity checks on the
+       * state flags which are generated and checked to help ensure
+       * state atoms are ordered correctly in the list.
+       */
+      struct brw_state_flags examined, prev;      
+      _mesa_memset(&examined, 0, sizeof(examined));
+      prev = *state;
+
+      for (i = 0; i < Elements(atoms); i++) {	 
+	 const struct brw_tracked_state *atom = atoms[i];
+	 struct brw_state_flags generated;
+
+	 assert(atom->dirty.mesa ||
+		atom->dirty.brw ||
+		atom->dirty.cache);
+
+	 if (brw->intel.Fallback)
+	    break;
+
+	 if (check_state(state, &atom->dirty)) {
+	    if (atom->emit) {
+	       atom->emit( brw );
+	    }
+	 }
+
+	 accumulate_state(&examined, &atom->dirty);
+
+	 /* generated = (prev ^ state)
+	  * if (examined & generated)
+	  *     fail;
+	  */
+	 xor_states(&generated, &prev, state);
+	 assert(!check_state(&examined, &generated));
+	 prev = *state;
+      }
+   }
+   else {
+      for (i = 0; i < Elements(atoms); i++) {	 
+	 const struct brw_tracked_state *atom = atoms[i];
+
+	 if (brw->intel.Fallback)
+	    break;
+
+	 if (check_state(state, &atom->dirty)) {
+	    if (atom->emit) {
+	       atom->emit( brw );
+	    }
+	 }
+      }
+   }
+
+   if (INTEL_DEBUG & DEBUG_STATE) {
+      brw_update_dirty_count(mesa_bits, state->mesa);
+      brw_update_dirty_count(brw_bits, state->brw);
+      brw_update_dirty_count(cache_bits, state->cache);
+      if (dirty_count++ % 1000 == 0) {
+	 brw_print_dirty_count(mesa_bits, state->mesa);
+	 brw_print_dirty_count(brw_bits, state->brw);
+	 brw_print_dirty_count(cache_bits, state->cache);
+	 fprintf(stderr, "\n");
+      }
+   }
+
+   if (!brw->intel.Fallback)
+      memset(state, 0, sizeof(*state));
+}
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
new file mode 100644
index 0000000000..66d4127271
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -0,0 +1,1575 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+        
+
+#ifndef BRW_STRUCTS_H
+#define BRW_STRUCTS_H
+
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+
+/* Command packets:
+ */
+struct header 
+{
+   GLuint length:16; 
+   GLuint opcode:16; 
+};
+
+
+union header_union
+{
+   struct header bits;
+   GLuint dword;
+};
+
+struct brw_3d_control
+{   
+   struct 
+   {
+      GLuint length:8;
+      GLuint notify_enable:1;
+      GLuint pad:3;
+      GLuint wc_flush_enable:1; 
+      GLuint depth_stall_enable:1; 
+      GLuint operation:2; 
+      GLuint opcode:16; 
+   } header;
+   
+   struct
+   {
+      GLuint pad:2;
+      GLuint dest_addr_type:1; 
+      GLuint dest_addr:29; 
+   } dest;
+   
+   GLuint dword2;   
+   GLuint dword3;   
+};
+
+
+struct brw_3d_primitive
+{
+   struct
+   {
+      GLuint length:8; 
+      GLuint pad:2;
+      GLuint topology:5; 
+      GLuint indexed:1; 
+      GLuint opcode:16; 
+   } header;
+
+   GLuint verts_per_instance;  
+   GLuint start_vert_location;  
+   GLuint instance_count;  
+   GLuint start_instance_location;  
+   GLuint base_vert_location;  
+};
+
+/* These seem to be passed around as function args, so it works out
+ * better to keep them as #defines:
+ */
+#define BRW_FLUSH_READ_CACHE           0x1
+#define BRW_FLUSH_STATE_CACHE          0x2
+#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4
+#define BRW_FLUSH_SNAPSHOT_COUNTERS    0x8
+
+struct brw_mi_flush
+{
+   GLuint flags:4;
+   GLuint pad:12;
+   GLuint opcode:16;
+};
+
+struct brw_vf_statistics
+{
+   GLuint statistics_enable:1;
+   GLuint pad:15;
+   GLuint opcode:16;
+};
+
+
+
+struct brw_binding_table_pointers
+{
+   struct header header;
+   GLuint vs; 
+   GLuint gs; 
+   GLuint clp; 
+   GLuint sf; 
+   GLuint wm; 
+};
+
+
+struct brw_blend_constant_color
+{
+   struct header header;
+   GLfloat blend_constant_color[4];  
+};
+
+
+struct brw_depthbuffer
+{
+   union header_union header;
+   
+   union {
+      struct {
+	 GLuint pitch:18; 
+	 GLuint format:3; 
+	 GLuint pad:2;
+	 GLuint software_tiled_rendering_mode:2;
+	 GLuint depth_offset_disable:1; 
+	 GLuint tile_walk:1; 
+	 GLuint tiled_surface:1; 
+	 GLuint pad2:1;
+	 GLuint surface_type:3; 
+      } bits;
+      GLuint dword;
+   } dword1;
+   
+   GLuint dword2_base_addr; 
+ 
+   union {
+      struct {
+	 GLuint pad:1;
+	 GLuint mipmap_layout:1; 
+	 GLuint lod:4; 
+	 GLuint width:13; 
+	 GLuint height:13; 
+      } bits;
+      GLuint dword;
+   } dword3;
+
+   union {
+      struct {
+	 GLuint pad:10;
+	 GLuint min_array_element:11; 
+	 GLuint depth:11; 
+      } bits;
+      GLuint dword;
+   } dword4;
+};
+
+struct brw_depthbuffer_g4x
+{
+   union header_union header;
+   
+   union {
+      struct {
+	 GLuint pitch:18; 
+	 GLuint format:3; 
+	 GLuint pad:2;
+	 GLuint software_tiled_rendering_mode:2;
+	 GLuint depth_offset_disable:1; 
+	 GLuint tile_walk:1; 
+	 GLuint tiled_surface:1; 
+	 GLuint pad2:1;
+	 GLuint surface_type:3; 
+      } bits;
+      GLuint dword;
+   } dword1;
+   
+   GLuint dword2_base_addr; 
+ 
+   union {
+      struct {
+	 GLuint pad:1;
+	 GLuint mipmap_layout:1; 
+	 GLuint lod:4; 
+	 GLuint width:13; 
+	 GLuint height:13; 
+      } bits;
+      GLuint dword;
+   } dword3;
+
+   union {
+      struct {
+	 GLuint pad:10;
+	 GLuint min_array_element:11; 
+	 GLuint depth:11; 
+      } bits;
+      GLuint dword;
+   } dword4;
+
+   union {
+      struct {
+         GLuint xoffset:16;
+         GLuint yoffset:16;
+      } bits;
+      GLuint dword;
+   } dword5;   /* NEW in Integrated Graphics Device */
+};
+
+struct brw_drawrect
+{
+   struct header header;
+   GLuint xmin:16; 
+   GLuint ymin:16; 
+   GLuint xmax:16; 
+   GLuint ymax:16; 
+   GLuint xorg:16;  
+   GLuint yorg:16;  
+};
+
+
+
+
+struct brw_global_depth_offset_clamp
+{
+   struct header header;
+   GLfloat depth_offset_clamp;  
+};
+
+struct brw_indexbuffer
+{   
+   union {
+      struct
+      {
+	 GLuint length:8; 
+	 GLuint index_format:2; 
+	 GLuint cut_index_enable:1; 
+	 GLuint pad:5; 
+	 GLuint opcode:16; 
+      } bits;
+      GLuint dword;
+
+   } header;
+
+   GLuint buffer_start; 
+   GLuint buffer_end; 
+};
+
+/* NEW in Integrated Graphics Device */
+struct brw_aa_line_parameters
+{
+   struct header header;
+
+   struct {
+      GLuint aa_coverage_scope:8;
+      GLuint pad0:8;
+      GLuint aa_coverage_bias:8;
+      GLuint pad1:8;
+   } bits0;
+
+   struct {
+      GLuint aa_coverage_endcap_slope:8;
+      GLuint pad0:8;
+      GLuint aa_coverage_endcap_bias:8;
+      GLuint pad1:8;
+   } bits1;
+};
+
+struct brw_line_stipple
+{   
+   struct header header;
+  
+   struct
+   {
+      GLuint pattern:16; 
+      GLuint pad:16;
+   } bits0;
+   
+   struct
+   {
+      GLuint repeat_count:9; 
+      GLuint pad:7;
+      GLuint inverse_repeat_count:16; 
+   } bits1;
+};
+
+
+struct brw_pipelined_state_pointers
+{
+   struct header header;
+   
+   struct {
+      GLuint pad:5;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+   } vs;
+   
+   struct
+   {
+      GLuint enable:1;
+      GLuint pad:4;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+   } gs;
+   
+   struct
+   {
+      GLuint enable:1;
+      GLuint pad:4;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+   } clp;
+   
+   struct
+   {
+      GLuint pad:5;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+   } sf;
+
+   struct
+   {
+      GLuint pad:5;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE */
+   } wm;
+   
+   struct
+   {
+      GLuint pad:5;
+      GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */
+   } cc;
+};
+
+
+struct brw_polygon_stipple_offset
+{
+   struct header header;
+
+   struct {
+      GLuint y_offset:5; 
+      GLuint pad:3;
+      GLuint x_offset:5; 
+      GLuint pad0:19;
+   } bits0;
+};
+
+
+
+struct brw_polygon_stipple
+{
+   struct header header;
+   GLuint stipple[32];
+};
+
+
+
+struct brw_pipeline_select
+{
+   struct
+   {
+      GLuint pipeline_select:1;   
+      GLuint pad:15;
+      GLuint opcode:16;   
+   } header;
+};
+
+
+struct brw_pipe_control
+{
+   struct
+   {
+      GLuint length:8;
+      GLuint notify_enable:1;
+      GLuint texture_cache_flush_enable:1;
+      GLuint indirect_state_pointers_disable:1;
+      GLuint instruction_state_cache_flush_enable:1;
+      GLuint write_cache_flush_enable:1;
+      GLuint depth_stall_enable:1;
+      GLuint post_sync_operation:2;
+
+      GLuint opcode:16;
+   } header;
+
+   struct
+   {
+      GLuint pad:2;
+      GLuint dest_addr_type:1;
+      GLuint dest_addr:29;
+   } bits1;
+
+   GLuint data0;
+   GLuint data1;
+};
+
+
+struct brw_urb_fence
+{
+   struct
+   {
+      GLuint length:8;   
+      GLuint vs_realloc:1;   
+      GLuint gs_realloc:1;   
+      GLuint clp_realloc:1;   
+      GLuint sf_realloc:1;   
+      GLuint vfe_realloc:1;   
+      GLuint cs_realloc:1;   
+      GLuint pad:2;
+      GLuint opcode:16;   
+   } header;
+
+   struct
+   {
+      GLuint vs_fence:10;  
+      GLuint gs_fence:10;  
+      GLuint clp_fence:10;  
+      GLuint pad:2;
+   } bits0;
+
+   struct
+   {
+      GLuint sf_fence:10;  
+      GLuint vf_fence:10;  
+      GLuint cs_fence:11;  
+      GLuint pad:1;
+   } bits1;
+};
+
+struct brw_cs_urb_state
+{
+   struct header header;
+
+   struct
+   {
+      GLuint nr_urb_entries:3;   
+      GLuint pad:1;
+      GLuint urb_entry_size:5;   
+      GLuint pad0:23;
+   } bits0;
+};
+
+struct brw_constant_buffer
+{
+   struct
+   {
+      GLuint length:8;   
+      GLuint valid:1;   
+      GLuint pad:7;
+      GLuint opcode:16;   
+   } header;
+
+   struct
+   {
+      GLuint buffer_length:6;   
+      GLuint buffer_address:26;  
+   } bits0;
+};
+
+struct brw_state_base_address
+{
+   struct header header;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:4;
+      GLuint general_state_address:27;  
+   } bits0;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:4;
+      GLuint surface_state_address:27;  
+   } bits1;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:4;
+      GLuint indirect_object_state_address:27;  
+   } bits2;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:11;
+      GLuint general_state_upper_bound:20;  
+   } bits3;
+
+   struct
+   {
+      GLuint modify_enable:1;
+      GLuint pad:11;
+      GLuint indirect_object_state_upper_bound:20;  
+   } bits4;
+};
+
+struct brw_state_prefetch
+{
+   struct header header;
+
+   struct
+   {
+      GLuint prefetch_count:3;   
+      GLuint pad:3;
+      GLuint prefetch_pointer:26;  
+   } bits0;
+};
+
+struct brw_system_instruction_pointer
+{
+   struct header header;
+
+   struct
+   {
+      GLuint pad:4;
+      GLuint system_instruction_pointer:28;  
+   } bits0;
+};
+
+
+
+
+/* State structs for the various fixed function units:
+ */
+
+
+struct thread0
+{
+   GLuint pad0:1;
+   GLuint grf_reg_count:3; 
+   GLuint pad1:2;
+   GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */
+};
+
+struct thread1
+{
+   GLuint ext_halt_exception_enable:1; 
+   GLuint sw_exception_enable:1; 
+   GLuint mask_stack_exception_enable:1; 
+   GLuint timeout_exception_enable:1; 
+   GLuint illegal_op_exception_enable:1; 
+   GLuint pad0:3;
+   GLuint depth_coef_urb_read_offset:6;	/* WM only */
+   GLuint pad1:2;
+   GLuint floating_point_mode:1; 
+   GLuint thread_priority:1; 
+   GLuint binding_table_entry_count:8; 
+   GLuint pad3:5;
+   GLuint single_program_flow:1; 
+};
+
+struct thread2
+{
+   GLuint per_thread_scratch_space:4; 
+   GLuint pad0:6;
+   GLuint scratch_space_base_pointer:22; 
+};
+
+   
+struct thread3
+{
+   GLuint dispatch_grf_start_reg:4; 
+   GLuint urb_entry_read_offset:6; 
+   GLuint pad0:1;
+   GLuint urb_entry_read_length:6; 
+   GLuint pad1:1;
+   GLuint const_urb_entry_read_offset:6; 
+   GLuint pad2:1;
+   GLuint const_urb_entry_read_length:6; 
+   GLuint pad3:1;
+};
+
+
+
+struct brw_clip_unit_state
+{
+   struct thread0 thread0;
+   struct
+   {
+      GLuint pad0:7;
+      GLuint sw_exception_enable:1;
+      GLuint pad1:3;
+      GLuint mask_stack_exception_enable:1;
+      GLuint pad2:1;
+      GLuint illegal_op_exception_enable:1;
+      GLuint pad3:2;
+      GLuint floating_point_mode:1;
+      GLuint thread_priority:1;
+      GLuint binding_table_entry_count:8;
+      GLuint pad4:5;
+      GLuint single_program_flow:1;
+   } thread1;
+
+   struct thread2 thread2;
+   struct thread3 thread3;
+
+   struct
+   {
+      GLuint pad0:9;
+      GLuint gs_output_stats:1; /* not always */
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:5; 	/* may be less */
+      GLuint pad3:2;
+   } thread4;   
+      
+   struct
+   {
+      GLuint pad0:13;
+      GLuint clip_mode:3; 
+      GLuint userclip_enable_flags:8; 
+      GLuint userclip_must_clip:1; 
+      GLuint negative_w_clip_test:1;
+      GLuint guard_band_enable:1; 
+      GLuint viewport_z_clip_enable:1; 
+      GLuint viewport_xy_clip_enable:1; 
+      GLuint vertex_position_space:1; 
+      GLuint api_mode:1; 
+      GLuint pad2:1;
+   } clip5;
+   
+   struct
+   {
+      GLuint pad0:5;
+      GLuint clipper_viewport_state_ptr:27; 
+   } clip6;
+
+   
+   GLfloat viewport_xmin;  
+   GLfloat viewport_xmax;  
+   GLfloat viewport_ymin;  
+   GLfloat viewport_ymax;  
+};
+
+
+
+struct brw_cc_unit_state
+{
+   struct
+   {
+      GLuint pad0:3;
+      GLuint bf_stencil_pass_depth_pass_op:3; 
+      GLuint bf_stencil_pass_depth_fail_op:3; 
+      GLuint bf_stencil_fail_op:3; 
+      GLuint bf_stencil_func:3; 
+      GLuint bf_stencil_enable:1; 
+      GLuint pad1:2;
+      GLuint stencil_write_enable:1; 
+      GLuint stencil_pass_depth_pass_op:3; 
+      GLuint stencil_pass_depth_fail_op:3; 
+      GLuint stencil_fail_op:3; 
+      GLuint stencil_func:3; 
+      GLuint stencil_enable:1; 
+   } cc0;
+
+   
+   struct
+   {
+      GLuint bf_stencil_ref:8; 
+      GLuint stencil_write_mask:8; 
+      GLuint stencil_test_mask:8; 
+      GLuint stencil_ref:8; 
+   } cc1;
+
+   
+   struct
+   {
+      GLuint logicop_enable:1; 
+      GLuint pad0:10;
+      GLuint depth_write_enable:1; 
+      GLuint depth_test_function:3; 
+      GLuint depth_test:1; 
+      GLuint bf_stencil_write_mask:8; 
+      GLuint bf_stencil_test_mask:8; 
+   } cc2;
+
+   
+   struct
+   {
+      GLuint pad0:8;
+      GLuint alpha_test_func:3; 
+      GLuint alpha_test:1; 
+      GLuint blend_enable:1; 
+      GLuint ia_blend_enable:1; 
+      GLuint pad1:1;
+      GLuint alpha_test_format:1;
+      GLuint pad2:16;
+   } cc3;
+   
+   struct
+   {
+      GLuint pad0:5; 
+      GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+   } cc4;
+   
+   struct
+   {
+      GLuint pad0:2;
+      GLuint ia_dest_blend_factor:5; 
+      GLuint ia_src_blend_factor:5; 
+      GLuint ia_blend_function:3; 
+      GLuint statistics_enable:1; 
+      GLuint logicop_func:4; 
+      GLuint pad1:11;
+      GLuint dither_enable:1; 
+   } cc5;
+
+   struct
+   {
+      GLuint clamp_post_alpha_blend:1; 
+      GLuint clamp_pre_alpha_blend:1; 
+      GLuint clamp_range:2; 
+      GLuint pad0:11;
+      GLuint y_dither_offset:2; 
+      GLuint x_dither_offset:2; 
+      GLuint dest_blend_factor:5; 
+      GLuint src_blend_factor:5; 
+      GLuint blend_function:3; 
+   } cc6;
+
+   struct {
+      union {
+	 GLfloat f;  
+	 GLubyte ub[4];
+      } alpha_ref;
+   } cc7;
+};
+
+
+
+struct brw_sf_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+
+   struct
+   {
+      GLuint pad0:10;
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:6; 
+      GLuint pad3:1;
+   } thread4;   
+
+   struct
+   {
+      GLuint front_winding:1; 
+      GLuint viewport_transform:1; 
+      GLuint pad0:3;
+      GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
+   } sf5;
+   
+   struct
+   {
+      GLuint pad0:9;
+      GLuint dest_org_vbias:4; 
+      GLuint dest_org_hbias:4; 
+      GLuint scissor:1; 
+      GLuint disable_2x2_trifilter:1; 
+      GLuint disable_zero_pix_trifilter:1; 
+      GLuint point_rast_rule:2; 
+      GLuint line_endcap_aa_region_width:2; 
+      GLuint line_width:4; 
+      GLuint fast_scissor_disable:1; 
+      GLuint cull_mode:2; 
+      GLuint aa_enable:1; 
+   } sf6;
+
+   struct
+   {
+      GLuint point_size:11; 
+      GLuint use_point_size_state:1; 
+      GLuint subpixel_precision:1; 
+      GLuint sprite_point:1; 
+      GLuint pad0:10;
+      GLuint aa_line_distance_mode:1;
+      GLuint trifan_pv:2; 
+      GLuint linestrip_pv:2; 
+      GLuint tristrip_pv:2; 
+      GLuint line_last_pixel_enable:1; 
+   } sf7;
+
+};
+
+
+struct brw_gs_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+
+   struct
+   {
+      GLuint pad0:8;
+      GLuint rendering_enable:1; /* for IGDNG */
+      GLuint pad4:1;
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:5; 
+      GLuint pad3:2;
+   } thread4;   
+      
+   struct
+   {
+      GLuint sampler_count:3; 
+      GLuint pad0:2;
+      GLuint sampler_state_pointer:27; 
+   } gs5;
+
+   
+   struct
+   {
+      GLuint max_vp_index:4; 
+      GLuint pad0:12;
+      GLuint svbi_post_inc_value:10;
+      GLuint pad1:1;
+      GLuint svbi_post_inc_enable:1;
+      GLuint svbi_payload:1;
+      GLuint discard_adjaceny:1;
+      GLuint reorder_enable:1; 
+      GLuint pad2:1;
+   } gs6;
+};
+
+
+struct brw_vs_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+   
+   struct
+   {
+      GLuint pad0:10;
+      GLuint stats_enable:1; 
+      GLuint nr_urb_entries:7; 
+      GLuint pad1:1;
+      GLuint urb_entry_allocation_size:5; 
+      GLuint pad2:1;
+      GLuint max_threads:6; 
+      GLuint pad3:1;
+   } thread4;   
+
+   struct
+   {
+      GLuint sampler_count:3; 
+      GLuint pad0:2;
+      GLuint sampler_state_pointer:27; 
+   } vs5;
+
+   struct
+   {
+      GLuint vs_enable:1; 
+      GLuint vert_cache_disable:1; 
+      GLuint pad0:30;
+   } vs6;
+};
+
+
+struct brw_wm_unit_state
+{
+   struct thread0 thread0;
+   struct thread1 thread1;
+   struct thread2 thread2;
+   struct thread3 thread3;
+   
+   struct {
+      GLuint stats_enable:1; 
+      GLuint depth_buffer_clear:1;
+      GLuint sampler_count:3; 
+      GLuint sampler_state_pointer:27; 
+   } wm4;
+   
+   struct
+   {
+      GLuint enable_8_pix:1; 
+      GLuint enable_16_pix:1; 
+      GLuint enable_32_pix:1; 
+      GLuint enable_con_32_pix:1;
+      GLuint enable_con_64_pix:1;
+      GLuint pad0:5;
+      GLuint legacy_global_depth_bias:1; 
+      GLuint line_stipple:1; 
+      GLuint depth_offset:1; 
+      GLuint polygon_stipple:1; 
+      GLuint line_aa_region_width:2; 
+      GLuint line_endcap_aa_region_width:2; 
+      GLuint early_depth_test:1; 
+      GLuint thread_dispatch_enable:1; 
+      GLuint program_uses_depth:1; 
+      GLuint program_computes_depth:1; 
+      GLuint program_uses_killpixel:1; 
+      GLuint legacy_line_rast: 1; 
+      GLuint transposed_urb_read_enable:1; 
+      GLuint max_threads:7; 
+   } wm5;
+   
+   GLfloat global_depth_offset_constant;  
+   GLfloat global_depth_offset_scale;   
+   
+   /* for IGDNG only */
+   struct {
+      GLuint pad0:1;
+      GLuint grf_reg_count_1:3; 
+      GLuint pad1:2;
+      GLuint kernel_start_pointer_1:26;
+   } wm8;       
+
+   struct {
+      GLuint pad0:1;
+      GLuint grf_reg_count_2:3; 
+      GLuint pad1:2;
+      GLuint kernel_start_pointer_2:26;
+   } wm9;       
+
+   struct {
+      GLuint pad0:1;
+      GLuint grf_reg_count_3:3; 
+      GLuint pad1:2;
+      GLuint kernel_start_pointer_3:26;
+   } wm10;       
+};
+
+struct brw_sampler_default_color {
+   GLfloat color[4];
+};
+
+struct brw_sampler_state
+{
+   
+   struct
+   {
+      GLuint shadow_function:3; 
+      GLuint lod_bias:11; 
+      GLuint min_filter:3; 
+      GLuint mag_filter:3; 
+      GLuint mip_filter:2; 
+      GLuint base_level:5; 
+      GLuint pad:1;
+      GLuint lod_preclamp:1; 
+      GLuint default_color_mode:1; 
+      GLuint pad0:1;
+      GLuint disable:1; 
+   } ss0;
+
+   struct
+   {
+      GLuint r_wrap_mode:3; 
+      GLuint t_wrap_mode:3; 
+      GLuint s_wrap_mode:3; 
+      GLuint pad:3;
+      GLuint max_lod:10; 
+      GLuint min_lod:10; 
+   } ss1;
+
+   
+   struct
+   {
+      GLuint pad:5;
+      GLuint default_color_pointer:27; 
+   } ss2;
+   
+   struct
+   {
+      GLuint pad:19;
+      GLuint max_aniso:3; 
+      GLuint chroma_key_mode:1; 
+      GLuint chroma_key_index:2; 
+      GLuint chroma_key_enable:1; 
+      GLuint monochrome_filter_width:3; 
+      GLuint monochrome_filter_height:3; 
+   } ss3;
+};
+
+
+struct brw_clipper_viewport
+{
+   GLfloat xmin;  
+   GLfloat xmax;  
+   GLfloat ymin;  
+   GLfloat ymax;  
+};
+
+struct brw_cc_viewport
+{
+   GLfloat min_depth;  
+   GLfloat max_depth;  
+};
+
+struct brw_sf_viewport
+{
+   struct {
+      GLfloat m00;  
+      GLfloat m11;  
+      GLfloat m22;  
+      GLfloat m30;  
+      GLfloat m31;  
+      GLfloat m32;  
+   } viewport;
+
+   /* scissor coordinates are inclusive */
+   struct {
+      GLshort xmin;
+      GLshort ymin;
+      GLshort xmax;
+      GLshort ymax;
+   } scissor;
+};
+
+/* Documented in the subsystem/shared-functions/sampler chapter...
+ */
+struct brw_surface_state
+{
+   struct {
+      GLuint cube_pos_z:1; 
+      GLuint cube_neg_z:1; 
+      GLuint cube_pos_y:1; 
+      GLuint cube_neg_y:1; 
+      GLuint cube_pos_x:1; 
+      GLuint cube_neg_x:1; 
+      GLuint pad:4;
+      GLuint mipmap_layout_mode:1; 
+      GLuint vert_line_stride_ofs:1; 
+      GLuint vert_line_stride:1; 
+      GLuint color_blend:1; 
+      GLuint writedisable_blue:1; 
+      GLuint writedisable_green:1; 
+      GLuint writedisable_red:1; 
+      GLuint writedisable_alpha:1; 
+      GLuint surface_format:9;     /**< BRW_SURFACEFORMAT_x */
+      GLuint data_return_format:1; 
+      GLuint pad0:1;
+      GLuint surface_type:3;       /**< BRW_SURFACE_1D/2D/3D/CUBE */
+   } ss0;
+   
+   struct {
+      GLuint base_addr;  
+   } ss1;
+   
+   struct {
+      GLuint pad:2;
+      GLuint mip_count:4; 
+      GLuint width:13; 
+      GLuint height:13; 
+   } ss2;
+
+   struct {
+      GLuint tile_walk:1; 
+      GLuint tiled_surface:1; 
+      GLuint pad:1; 
+      GLuint pitch:18; 
+      GLuint depth:11; 
+   } ss3;
+   
+   struct {
+      GLuint multisample_position_palette_index:3;
+      GLuint pad1:1;
+      GLuint num_multisamples:3;
+      GLuint pad0:1;
+      GLuint render_target_view_extent:9;
+      GLuint min_array_elt:11;
+      GLuint min_lod:4; 
+   } ss4;
+
+   struct {
+      GLuint pad1:16;
+      GLuint llc_mapping:1;
+      GLuint mlc_mapping:1;
+      GLuint gfdt:1;
+      GLuint gfdt_src:1;
+      GLuint y_offset:4;
+      GLuint pad0:1;
+      GLuint x_offset:7;
+   } ss5;   /* New in G4X */
+
+};
+
+
+
+struct brw_vertex_buffer_state
+{
+   struct {
+      GLuint pitch:11; 
+      GLuint pad:15;
+      GLuint access_type:1; 
+      GLuint vb_index:5; 
+   } vb0;
+   
+   GLuint start_addr; 
+   GLuint max_index;   
+#if 1
+   GLuint instance_data_step_rate; /* not included for sequential/random vertices? */
+#endif
+};
+
+#define BRW_VBP_MAX 17
+
+struct brw_vb_array_state {
+   struct header header;
+   struct brw_vertex_buffer_state vb[BRW_VBP_MAX];
+};
+
+
+struct brw_vertex_element_state
+{
+   struct
+   {
+      GLuint src_offset:11; 
+      GLuint pad:5;
+      GLuint src_format:9; 
+      GLuint pad0:1;
+      GLuint valid:1; 
+      GLuint vertex_buffer_index:5; 
+   } ve0;
+   
+   struct
+   {
+      GLuint dst_offset:8; 
+      GLuint pad:8;
+      GLuint vfcomponent3:4; 
+      GLuint vfcomponent2:4; 
+      GLuint vfcomponent1:4; 
+      GLuint vfcomponent0:4; 
+   } ve1;
+};
+
+#define BRW_VEP_MAX 18
+
+struct brw_vertex_element_packet {
+   struct header header;
+   struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */
+};
+
+
+struct brw_urb_immediate {
+   GLuint opcode:4;
+   GLuint offset:6;
+   GLuint swizzle_control:2; 
+   GLuint pad:1;
+   GLuint allocate:1;
+   GLuint used:1;
+   GLuint complete:1;
+   GLuint response_length:4;
+   GLuint msg_length:4;
+   GLuint msg_target:4;
+   GLuint pad1:3;
+   GLuint end_of_thread:1;
+};
+
+/* Instruction format for the execution units:
+ */
+ 
+struct brw_instruction
+{
+   struct 
+   {
+      GLuint opcode:7;
+      GLuint pad:1;
+      GLuint access_mode:1;
+      GLuint mask_control:1;
+      GLuint dependency_control:2;
+      GLuint compression_control:2;
+      GLuint thread_control:2;
+      GLuint predicate_control:4;
+      GLuint predicate_inverse:1;
+      GLuint execution_size:3;
+      GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
+      GLuint pad0:2;
+      GLuint debug_control:1;
+      GLuint saturate:1;
+   } header;
+
+   union {
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint src1_reg_file:2;
+	 GLuint src1_reg_type:3;
+	 GLuint pad:1;
+	 GLuint dest_subreg_nr:5;
+	 GLuint dest_reg_nr:8;
+	 GLuint dest_horiz_stride:2;
+	 GLuint dest_address_mode:1;
+      } da1;
+
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint src1_reg_file:2;        /* 0x00000c00 */
+	 GLuint src1_reg_type:3;        /* 0x00007000 */
+	 GLuint pad:1;
+	 GLint dest_indirect_offset:10;	/* offset against the deref'd address reg */
+	 GLuint dest_subreg_nr:3; /* subnr for the address reg a0.x */
+	 GLuint dest_horiz_stride:2;
+	 GLuint dest_address_mode:1;
+      } ia1;
+
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint src1_reg_file:2;
+	 GLuint src1_reg_type:3;
+	 GLuint pad:1;
+	 GLuint dest_writemask:4;
+	 GLuint dest_subreg_nr:1;
+	 GLuint dest_reg_nr:8;
+	 GLuint pad1:2;
+	 GLuint dest_address_mode:1;
+      } da16;
+
+      struct
+      {
+	 GLuint dest_reg_file:2;
+	 GLuint dest_reg_type:3;
+	 GLuint src0_reg_file:2;
+	 GLuint src0_reg_type:3;
+	 GLuint pad0:6;
+	 GLuint dest_writemask:4;
+	 GLint dest_indirect_offset:6;
+	 GLuint dest_subreg_nr:3;
+	 GLuint pad1:2;
+	 GLuint dest_address_mode:1;
+      } ia16;
+   } bits1;
+
+
+   union {
+      struct
+      {
+	 GLuint src0_subreg_nr:5;
+	 GLuint src0_reg_nr:8;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_horiz_stride:2;
+	 GLuint src0_width:3;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad:6;
+      } da1;
+
+      struct
+      {
+	 GLint src0_indirect_offset:10;
+	 GLuint src0_subreg_nr:3;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_horiz_stride:2;
+	 GLuint src0_width:3;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad:6;	
+      } ia1;
+
+      struct
+      {
+	 GLuint src0_swz_x:2;
+	 GLuint src0_swz_y:2;
+	 GLuint src0_subreg_nr:1;
+	 GLuint src0_reg_nr:8;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_swz_z:2;
+	 GLuint src0_swz_w:2;
+	 GLuint pad0:1;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad1:6;
+      } da16;
+
+      struct
+      {
+	 GLuint src0_swz_x:2;
+	 GLuint src0_swz_y:2;
+	 GLint src0_indirect_offset:6;
+	 GLuint src0_subreg_nr:3;
+	 GLuint src0_abs:1;
+	 GLuint src0_negate:1;
+	 GLuint src0_address_mode:1;
+	 GLuint src0_swz_z:2;
+	 GLuint src0_swz_w:2;
+	 GLuint pad0:1;
+	 GLuint src0_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad1:6;
+      } ia16;
+
+       struct 
+       {
+           GLuint pad:26;
+           GLuint end_of_thread:1;
+           GLuint pad1:1;
+           GLuint sfid:4;
+       } send_igdng;  /* for IGDNG only */
+
+   } bits2;
+
+   union
+   {
+      struct
+      {
+	 GLuint src1_subreg_nr:5;
+	 GLuint src1_reg_nr:8;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint src1_address_mode:1;
+	 GLuint src1_horiz_stride:2;
+	 GLuint src1_width:3;
+	 GLuint src1_vert_stride:4;
+	 GLuint pad0:7;
+      } da1;
+
+      struct
+      {
+	 GLuint src1_swz_x:2;
+	 GLuint src1_swz_y:2;
+	 GLuint src1_subreg_nr:1;
+	 GLuint src1_reg_nr:8;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint src1_address_mode:1;
+	 GLuint src1_swz_z:2;
+	 GLuint src1_swz_w:2;
+	 GLuint pad1:1;
+	 GLuint src1_vert_stride:4;
+	 GLuint pad2:7;
+      } da16;
+
+      struct
+      {
+	 GLint  src1_indirect_offset:10;
+	 GLuint src1_subreg_nr:3;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint src1_address_mode:1;
+	 GLuint src1_horiz_stride:2;
+	 GLuint src1_width:3;
+	 GLuint src1_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad1:6;	
+      } ia1;
+
+      struct
+      {
+	 GLuint src1_swz_x:2;
+	 GLuint src1_swz_y:2;
+	 GLint  src1_indirect_offset:6;
+	 GLuint src1_subreg_nr:3;
+	 GLuint src1_abs:1;
+	 GLuint src1_negate:1;
+	 GLuint pad0:1;
+	 GLuint src1_swz_z:2;
+	 GLuint src1_swz_w:2;
+	 GLuint pad1:1;
+	 GLuint src1_vert_stride:4;
+	 GLuint flag_reg_nr:1;
+	 GLuint pad2:6;
+      } ia16;
+
+
+      struct
+      {
+	 GLint  jump_count:16;	/* note: signed */
+	 GLuint  pop_count:4;
+	 GLuint  pad0:12;
+      } if_else;
+
+      struct {
+	 GLuint function:4;
+	 GLuint int_type:1;
+	 GLuint precision:1;
+	 GLuint saturate:1;
+	 GLuint data_type:1;
+	 GLuint pad0:8;
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } math;
+
+      struct {
+	 GLuint function:4;
+	 GLuint int_type:1;
+	 GLuint precision:1;
+	 GLuint saturate:1;
+	 GLuint data_type:1;
+	 GLuint snapshot:1;
+	 GLuint pad0:10;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } math_igdng;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint sampler:4;
+	 GLuint return_format:2; 
+	 GLuint msg_type:2;   
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } sampler;
+
+      struct {
+         GLuint binding_table_index:8;
+         GLuint sampler:4;
+         GLuint msg_type:4;
+         GLuint response_length:4;
+         GLuint msg_length:4;
+         GLuint msg_target:4;
+         GLuint pad1:3;
+         GLuint end_of_thread:1;
+      } sampler_g4x;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint sampler:4;
+	 GLuint msg_type:4;
+	 GLuint simd_mode:2;
+	 GLuint pad0:1;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } sampler_igdng;
+
+      struct brw_urb_immediate urb;
+
+      struct {
+	 GLuint opcode:4;
+	 GLuint offset:6;
+	 GLuint swizzle_control:2; 
+	 GLuint pad:1;
+	 GLuint allocate:1;
+	 GLuint used:1;
+	 GLuint complete:1;
+	 GLuint pad0:3;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } urb_igdng;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint msg_control:4;  
+	 GLuint msg_type:2;  
+	 GLuint target_cache:2;    
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } dp_read;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint msg_control:3;  
+	 GLuint msg_type:3;  
+	 GLuint target_cache:2;    
+	 GLuint pad0:3;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } dp_read_igdng;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint msg_control:3;
+	 GLuint pixel_scoreboard_clear:1;
+	 GLuint msg_type:3;    
+	 GLuint send_commit_msg:1;
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } dp_write;
+
+      struct {
+	 GLuint binding_table_index:8;
+	 GLuint msg_control:3;
+	 GLuint pixel_scoreboard_clear:1;
+	 GLuint msg_type:3;    
+	 GLuint send_commit_msg:1;
+	 GLuint pad0:3;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } dp_write_igdng;
+
+      struct {
+	 GLuint pad:16;
+	 GLuint response_length:4;
+	 GLuint msg_length:4;
+	 GLuint msg_target:4;
+	 GLuint pad1:3;
+	 GLuint end_of_thread:1;
+      } generic;
+
+      struct {
+	 GLuint pad:19;
+	 GLuint header_present:1;
+	 GLuint response_length:5;
+	 GLuint msg_length:4;
+	 GLuint pad1:2;
+	 GLuint end_of_thread:1;
+      } generic_igdng;
+
+      GLint d;
+      GLuint ud;
+      float f;
+   } bits3;
+};
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_tex.c b/src/gallium/drivers/i965/brw_tex.c
new file mode 100644
index 0000000000..e911b105b2
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_tex.c
@@ -0,0 +1,59 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+        
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/teximage.h"
+
+#include "intel_context.h"
+#include "intel_regions.h"
+#include "intel_tex.h"
+#include "brw_context.h"
+
+/**
+ * Finalizes all textures, completing any rendering that needs to be done
+ * to prepare them.
+ */
+void brw_validate_textures( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   int i;
+
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+
+      if (texUnit->_ReallyEnabled) {
+	 intel_finalize_mipmap_tree(intel, i);
+      }
+   }
+}
diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c
new file mode 100644
index 0000000000..5986cbffad
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_tex_layout.c
@@ -0,0 +1,222 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+/* Code to layout images in a mipmap tree for i965.
+ */
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "intel_context.h"
+#include "main/macros.h"
+#include "intel_chipset.h"
+
+#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+
+GLboolean brw_miptree_layout(struct intel_context *intel,
+			     struct intel_mipmap_tree *mt,
+			     uint32_t tiling)
+{
+   /* XXX: these vary depending on image format: */
+   /* GLint align_w = 4; */
+
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP:
+      if (IS_IGDNG(intel->intelScreen->deviceID)) {
+          GLuint align_h = 2, align_w = 4;
+          GLuint level;
+          GLuint x = 0;
+          GLuint y = 0;
+          GLuint width = mt->width0;
+          GLuint height = mt->height0;
+          GLuint qpitch = 0;
+          GLuint y_pitch = 0;
+
+          mt->pitch = mt->width0;
+          intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+          y_pitch = ALIGN(height, align_h);
+
+          if (mt->compressed) {
+              mt->pitch = ALIGN(mt->width0, align_w);
+          }
+
+          if (mt->first_level != mt->last_level) {
+              GLuint mip1_width;
+
+              if (mt->compressed) {
+                  mip1_width = ALIGN(minify(mt->width0), align_w)
+                      + ALIGN(minify(minify(mt->width0)), align_w);
+              } else {
+                  mip1_width = ALIGN(minify(mt->width0), align_w)
+                      + minify(minify(mt->width0));
+              }
+
+              if (mip1_width > mt->pitch) {
+                  mt->pitch = mip1_width;
+              }
+          }
+
+          mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch);
+
+          if (mt->compressed) {
+              qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
+              mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
+          } else {
+              qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
+              mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
+          }
+
+          for (level = mt->first_level; level <= mt->last_level; level++) {
+              GLuint img_height;
+              GLuint nr_images = 6;
+              GLuint q = 0;
+
+              intel_miptree_set_level_info(mt, level, nr_images, x, y, width, 
+                                           height, 1);
+
+              for (q = 0; q < nr_images; q++)
+                  intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch);
+
+              if (mt->compressed)
+                  img_height = MAX2(1, height/4);
+              else
+                  img_height = ALIGN(height, align_h);
+
+              if (level == mt->first_level + 1) {
+                  x += ALIGN(width, align_w);
+              }
+              else {
+                  y += img_height;
+              }
+
+              width  = minify(width);
+              height = minify(height);
+          }
+
+          break;
+      }
+
+   case GL_TEXTURE_3D: {
+      GLuint width  = mt->width0;
+      GLuint height = mt->height0;
+      GLuint depth = mt->depth0;
+      GLuint pack_x_pitch, pack_x_nr;
+      GLuint pack_y_pitch;
+      GLuint level;
+      GLuint align_h = 2;
+      GLuint align_w = 4;
+
+      mt->total_height = 0;
+      intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+
+      if (mt->compressed) {
+          mt->pitch = ALIGN(width, align_w);
+          pack_y_pitch = (height + 3) / 4;
+      } else {
+	 mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
+	 pack_y_pitch = ALIGN(mt->height0, align_h);
+      }
+
+      pack_x_pitch = width;
+      pack_x_nr = 1;
+
+      for (level = mt->first_level ; level <= mt->last_level ; level++) {
+	 GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
+	 GLint x = 0;
+	 GLint y = 0;
+	 GLint q, j;
+
+	 intel_miptree_set_level_info(mt, level, nr_images,
+				      0, mt->total_height,
+				      width, height, depth);
+
+	 for (q = 0; q < nr_images;) {
+	    for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+	       intel_miptree_set_image_offset(mt, level, q, x, y);
+	       x += pack_x_pitch;
+	    }
+
+	    x = 0;
+	    y += pack_y_pitch;
+	 }
+
+
+	 mt->total_height += y;
+	 width  = minify(width);
+	 height = minify(height);
+	 depth  = minify(depth);
+
+	 if (mt->compressed) {
+	    pack_y_pitch = (height + 3) / 4;
+
+	    if (pack_x_pitch > ALIGN(width, align_w)) {
+	       pack_x_pitch = ALIGN(width, align_w);
+	       pack_x_nr <<= 1;
+	    }
+	 } else {
+	    if (pack_x_pitch > 4) {
+	       pack_x_pitch >>= 1;
+	       pack_x_nr <<= 1;
+	       assert(pack_x_pitch * pack_x_nr <= mt->pitch);
+	    }
+
+	    if (pack_y_pitch > 2) {
+	       pack_y_pitch >>= 1;
+	       pack_y_pitch = ALIGN(pack_y_pitch, align_h);
+	    }
+	 }
+
+      }
+      /* The 965's sampler lays cachelines out according to how accesses
+       * in the texture surfaces run, so they may be "vertical" through
+       * memory.  As a result, the docs say in Surface Padding Requirements:
+       * Sampling Engine Surfaces that two extra rows of padding are required.
+       * We don't know of similar requirements for pre-965, but given that
+       * those docs are silent on padding requirements in general, let's play
+       * it safe.
+       */
+      if (mt->target == GL_TEXTURE_CUBE_MAP)
+	 mt->total_height += 2;
+      break;
+   }
+
+   default:
+      i945_miptree_layout_2d(intel, mt, tiling);
+      break;
+   }
+   DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+		mt->pitch,
+		mt->total_height,
+		mt->cpp,
+		mt->pitch * mt->total_height * mt->cpp );
+
+   return GL_TRUE;
+}
+
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
new file mode 100644
index 0000000000..8c6f4355a6
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -0,0 +1,250 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+        
+
+
+#include "intel_batchbuffer.h"
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#define VS 0
+#define GS 1
+#define CLP 2
+#define SF 3
+#define CS 4
+
+/** @file brw_urb.c
+ *
+ * Manages the division of the URB space between the various fixed-function
+ * units.
+ *
+ * See the Thread Initiation Management section of the GEN4 B-Spec, and
+ * the individual *_STATE structures for restrictions on numbers of
+ * entries and threads.
+ */
+
+/*
+ * Generally, a unit requires a min_nr_entries based on how many entries
+ * it produces before the downstream unit gets unblocked and can use and
+ * dereference some of its handles.
+ *
+ * The SF unit preallocates a PUE at the start of thread dispatch, and only
+ * uses that one.  So it requires one entry per thread.
+ *
+ * For CLIP, the SF unit will hold the previous primitive while the
+ * next is getting assembled, meaning that linestrips require 3 CLIP VUEs
+ * (vertices) to ensure continued processing, trifans require 4, and tristrips
+ * require 5.  There can be 1 or 2 threads, and each has the same requirement.
+ *
+ * GS has the same requirement as CLIP, but it never handles tristrips,
+ * so we can lower the minimum to 4 for the POLYGONs (trifans) it produces.
+ * We only run it single-threaded.
+ *
+ * For VS, the number of entries may be 8, 12, 16, or 32 (or 64 on G4X).
+ * Each thread processes 2 preallocated VUEs (vertices) at a time, and they
+ * get streamed down as soon as threads processing earlier vertices get
+ * theirs accepted.
+ *
+ * Each unit will take the number of URB entries we give it (based on the
+ * entry size calculated in brw_vs_emit.c for VUEs, brw_sf_emit.c for PUEs,
+ * and brw_curbe.c for the CURBEs) and decide its maximum number of
+ * threads it can support based on that. in brw_*_state.c.
+ *
+ * XXX: Are the min_entry_size numbers useful?
+ * XXX: Verify min_nr_entries, esp for VS.
+ * XXX: Verify SF min_entry_size.
+ */
+static const struct {
+   GLuint min_nr_entries;
+   GLuint preferred_nr_entries;
+   GLuint min_entry_size;
+   GLuint max_entry_size;
+} limits[CS+1] = {
+   { 16, 32, 1, 5 },			/* vs */
+   { 4, 8,  1, 5 },			/* gs */
+   { 5, 10,  1, 5 },			/* clp */
+   { 1, 8,  1, 12 },		        /* sf */
+   { 1, 4,  1, 32 }			/* cs */
+};
+
+
+static GLboolean check_urb_layout( struct brw_context *brw )
+{
+   brw->urb.vs_start = 0;
+   brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize;
+   brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize;
+   brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize;
+   brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize;
+
+   return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= URB_SIZES(brw);
+}
+
+/* Most minimal update, forces re-emit of URB fence packet after GS
+ * unit turned on/off.
+ */
+static void recalculate_urb_fence( struct brw_context *brw )
+{
+   GLuint csize = brw->curbe.total_size;
+   GLuint vsize = brw->vs.prog_data->urb_entry_size;
+   GLuint sfsize = brw->sf.prog_data->urb_entry_size;
+
+   if (csize < limits[CS].min_entry_size)
+      csize = limits[CS].min_entry_size;
+
+   if (vsize < limits[VS].min_entry_size)
+      vsize = limits[VS].min_entry_size;
+
+   if (sfsize < limits[SF].min_entry_size)
+      sfsize = limits[SF].min_entry_size;
+
+   if (brw->urb.vsize < vsize ||
+       brw->urb.sfsize < sfsize ||
+       brw->urb.csize < csize ||
+       (brw->urb.constrained && (brw->urb.vsize > vsize ||
+				 brw->urb.sfsize > sfsize ||
+				 brw->urb.csize > csize))) {
+      
+
+      brw->urb.csize = csize;
+      brw->urb.sfsize = sfsize;
+      brw->urb.vsize = vsize;
+
+      brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;	
+      brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries;	
+      brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries;
+      brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;	
+      brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries;	
+
+      brw->urb.constrained = 0;
+
+      if (BRW_IS_IGDNG(brw)) {
+         brw->urb.nr_vs_entries = 128;
+         brw->urb.nr_sf_entries = 48;
+         if (check_urb_layout(brw)) {
+            goto done;
+         } else {
+            brw->urb.constrained = 1;
+            brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+            brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries;
+         }
+      } else if (BRW_IS_G4X(brw)) {
+	 brw->urb.nr_vs_entries = 64;
+	 if (check_urb_layout(brw)) {
+	    goto done;
+	 } else {
+	    brw->urb.constrained = 1;
+	    brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries;
+	 }
+      }
+
+      if (!check_urb_layout(brw)) {
+	 brw->urb.nr_vs_entries = limits[VS].min_nr_entries;	
+	 brw->urb.nr_gs_entries = limits[GS].min_nr_entries;	
+	 brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+	 brw->urb.nr_sf_entries = limits[SF].min_nr_entries;	
+	 brw->urb.nr_cs_entries = limits[CS].min_nr_entries;	
+
+	 /* Mark us as operating with constrained nr_entries, so that next
+	  * time we recalculate we'll resize the fences in the hope of
+	  * escaping constrained mode and getting back to normal performance.
+	  */
+	 brw->urb.constrained = 1;
+	 
+	 if (!check_urb_layout(brw)) {
+	    /* This is impossible, given the maximal sizes of urb
+	     * entries and the values for minimum nr of entries
+	     * provided above.
+	     */
+	    _mesa_printf("couldn't calculate URB layout!\n");
+	    exit(1);
+	 }
+	 
+	 if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
+	    _mesa_printf("URB CONSTRAINED\n");
+      }
+
+done:
+      if (INTEL_DEBUG & DEBUG_URB)
+	 _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+		      brw->urb.vs_start,
+		      brw->urb.gs_start,
+		      brw->urb.clip_start,
+		      brw->urb.sf_start,
+		      brw->urb.cs_start, 
+		      URB_SIZES(brw));
+      
+      brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
+   }
+}
+
+
+const struct brw_tracked_state brw_recalculate_urb_fence = {
+   .dirty = {
+      .mesa = 0,
+      .brw = BRW_NEW_CURBE_OFFSETS,
+      .cache = (CACHE_NEW_VS_PROG |
+		CACHE_NEW_SF_PROG)
+   },
+   .prepare = recalculate_urb_fence
+};
+
+
+
+
+
+void brw_upload_urb_fence(struct brw_context *brw)
+{
+   struct brw_urb_fence uf;
+   memset(&uf, 0, sizeof(uf));
+
+   uf.header.opcode = CMD_URB_FENCE;
+   uf.header.length = sizeof(uf)/4-2;
+   uf.header.vs_realloc = 1;
+   uf.header.gs_realloc = 1;
+   uf.header.clp_realloc = 1;
+   uf.header.sf_realloc = 1;
+   uf.header.vfe_realloc = 1;
+   uf.header.cs_realloc = 1;
+
+   /* The ordering below is correct, not the layout in the
+    * instruction.
+    *
+    * There are 256/384 urb reg pairs in total.
+    */
+   uf.bits0.vs_fence  = brw->urb.gs_start;
+   uf.bits0.gs_fence  = brw->urb.clip_start; 
+   uf.bits0.clp_fence = brw->urb.sf_start; 
+   uf.bits1.sf_fence  = brw->urb.cs_start; 
+   uf.bits1.cs_fence  = URB_SIZES(brw);
+
+   BRW_BATCH_STRUCT(brw, &uf);
+}
diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c
new file mode 100644
index 0000000000..ce21aa4869
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_util.c
@@ -0,0 +1,104 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+         
+
+#include "main/mtypes.h"
+#include "shader/prog_parameter.h"
+#include "brw_util.h"
+#include "brw_defines.h"
+
+GLuint brw_count_bits( GLuint val )
+{
+   GLuint i;
+   for (i = 0; val ; val >>= 1)
+      if (val & 1)
+	 i++;
+   return i;
+}
+
+
+GLuint brw_translate_blend_equation( GLenum mode )
+{
+   switch (mode) {
+   case GL_FUNC_ADD: 
+      return BRW_BLENDFUNCTION_ADD; 
+   case GL_MIN: 
+      return BRW_BLENDFUNCTION_MIN; 
+   case GL_MAX: 
+      return BRW_BLENDFUNCTION_MAX; 
+   case GL_FUNC_SUBTRACT: 
+      return BRW_BLENDFUNCTION_SUBTRACT; 
+   case GL_FUNC_REVERSE_SUBTRACT: 
+      return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; 
+   default: 
+      assert(0);
+      return BRW_BLENDFUNCTION_ADD;
+   }
+}
+
+GLuint brw_translate_blend_factor( GLenum factor )
+{
+   switch(factor) {
+   case GL_ZERO: 
+      return BRW_BLENDFACTOR_ZERO; 
+   case GL_SRC_ALPHA: 
+      return BRW_BLENDFACTOR_SRC_ALPHA; 
+   case GL_ONE: 
+      return BRW_BLENDFACTOR_ONE; 
+   case GL_SRC_COLOR: 
+      return BRW_BLENDFACTOR_SRC_COLOR; 
+   case GL_ONE_MINUS_SRC_COLOR: 
+      return BRW_BLENDFACTOR_INV_SRC_COLOR; 
+   case GL_DST_COLOR: 
+      return BRW_BLENDFACTOR_DST_COLOR; 
+   case GL_ONE_MINUS_DST_COLOR: 
+      return BRW_BLENDFACTOR_INV_DST_COLOR; 
+   case GL_ONE_MINUS_SRC_ALPHA:
+      return BRW_BLENDFACTOR_INV_SRC_ALPHA; 
+   case GL_DST_ALPHA: 
+      return BRW_BLENDFACTOR_DST_ALPHA; 
+   case GL_ONE_MINUS_DST_ALPHA:
+      return BRW_BLENDFACTOR_INV_DST_ALPHA; 
+   case GL_SRC_ALPHA_SATURATE: 
+      return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+   case GL_CONSTANT_COLOR:
+      return BRW_BLENDFACTOR_CONST_COLOR; 
+   case GL_ONE_MINUS_CONSTANT_COLOR:
+      return BRW_BLENDFACTOR_INV_CONST_COLOR;
+   case GL_CONSTANT_ALPHA:
+      return BRW_BLENDFACTOR_CONST_ALPHA; 
+   case GL_ONE_MINUS_CONSTANT_ALPHA:
+      return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+   default:
+      assert(0);
+      return BRW_BLENDFACTOR_ZERO;
+   }   
+}
diff --git a/src/gallium/drivers/i965/brw_util.h b/src/gallium/drivers/i965/brw_util.h
new file mode 100644
index 0000000000..33e7cd87e4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_util.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+          
+
+#ifndef BRW_UTIL_H
+#define BRW_UTIL_H
+
+#include "main/mtypes.h"
+
+extern GLuint brw_count_bits( GLuint val );
+extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList);
+extern GLuint brw_translate_blend_factor( GLenum factor );
+extern GLuint brw_translate_blend_equation( GLenum mode );
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
new file mode 100644
index 0000000000..f0c79efbd9
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -0,0 +1,124 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+           
+
+#include "brw_context.h"
+#include "brw_vs.h"
+#include "brw_util.h"
+#include "brw_state.h"
+#include "shader/prog_print.h"
+
+
+
+static void do_vs_prog( struct brw_context *brw, 
+			struct brw_vertex_program *vp,
+			struct brw_vs_prog_key *key )
+{
+   GLuint program_size;
+   const GLuint *program;
+   struct brw_vs_compile c;
+
+   memset(&c, 0, sizeof(c));
+   memcpy(&c.key, key, sizeof(*key));
+
+   brw_init_compile(brw, &c.func);
+   c.vp = vp;
+
+   c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
+   c.prog_data.inputs_read = vp->program.Base.InputsRead;
+
+   if (c.key.copy_edgeflag) {
+      c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE;
+      c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
+   }
+
+   if (0)
+      _mesa_print_program(&c.vp->program.Base);
+
+
+
+   /* Emit GEN4 code.
+    */
+   brw_vs_emit(&c);
+
+   /* get the program
+    */
+   program = brw_get_program(&c.func, &program_size);
+
+   dri_bo_unreference(brw->vs.prog_bo);
+   brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG,
+				       &c.key, sizeof(c.key),
+				       NULL, 0,
+				       program, program_size,
+				       &c.prog_data,
+				       &brw->vs.prog_data );
+}
+
+
+static void brw_upload_vs_prog(struct brw_context *brw)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_vs_prog_key key;
+   struct brw_vertex_program *vp = 
+      (struct brw_vertex_program *)brw->vertex_program;
+
+   memset(&key, 0, sizeof(key));
+
+   /* Just upload the program verbatim for now.  Always send it all
+    * the inputs it asks for, whether they are varying or not.
+    */
+   key.program_string_id = vp->id;
+   key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+   key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
+			ctx->Polygon.BackMode != GL_FILL);
+
+   /* Make an early check for the key.
+    */
+   dri_bo_unreference(brw->vs.prog_bo);
+   brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->vs.prog_data);
+   if (brw->vs.prog_bo == NULL)
+      do_vs_prog(brw, vp, &key);
+}
+
+
+/* See brw_vs.c:
+ */
+const struct brw_tracked_state brw_vs_prog = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM | _NEW_POLYGON,
+      .brw   = BRW_NEW_VERTEX_PROGRAM,
+      .cache = 0
+   },
+   .prepare = brw_upload_vs_prog
+};
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
new file mode 100644
index 0000000000..4a591365c9
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -0,0 +1,88 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+#ifndef BRW_VS_H
+#define BRW_VS_H
+
+
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "shader/program.h"
+
+
+struct brw_vs_prog_key {
+   GLuint program_string_id;
+   GLuint nr_userclip:4;
+   GLuint copy_edgeflag:1;
+   GLuint pad:26;
+};
+
+
+struct brw_vs_compile {
+   struct brw_compile func;
+   struct brw_vs_prog_key key;
+   struct brw_vs_prog_data prog_data;
+
+   struct brw_vertex_program *vp;
+
+   GLuint nr_inputs;
+
+   GLuint first_output;
+   GLuint nr_outputs;
+   GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+
+   GLuint first_tmp;
+   GLuint last_tmp;
+
+   struct brw_reg r0;
+   struct brw_reg r1;
+   struct brw_reg regs[PROGRAM_ADDRESS+1][128];
+   struct brw_reg tmp;
+   struct brw_reg stack;
+
+   struct {	
+       GLboolean used_in_src;
+       struct brw_reg reg;
+   } output_regs[128];
+
+   struct brw_reg userplane[6];
+
+   /** we may need up to 3 constants per instruction (if use_const_buffer) */
+   struct {
+      GLint index;
+      struct brw_reg reg;
+   } current_const[3];
+};
+
+void brw_vs_emit( struct brw_vs_compile *c );
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
new file mode 100644
index 0000000000..1638ef8111
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -0,0 +1,1667 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+#include "main/macros.h"
+#include "shader/program.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "brw_context.h"
+#include "brw_vs.h"
+
+
+static struct brw_reg get_tmp( struct brw_vs_compile *c )
+{
+   struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
+
+   if (++c->last_tmp > c->prog_data.total_grf)
+      c->prog_data.total_grf = c->last_tmp;
+
+   return tmp;
+}
+
+static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
+{
+   if (tmp.nr == c->last_tmp-1)
+      c->last_tmp--;
+}
+			       
+static void release_tmps( struct brw_vs_compile *c )
+{
+   c->last_tmp = c->first_tmp;
+}
+
+
+/**
+ * Preallocate GRF register before code emit.
+ * Do things as simply as possible.  Allocate and populate all regs
+ * ahead of time.
+ */
+static void brw_vs_alloc_regs( struct brw_vs_compile *c )
+{
+   GLuint i, reg = 0, mrf;
+   int attributes_in_vue;
+
+   /* Determine whether to use a real constant buffer or use a block
+    * of GRF registers for constants.  The later is faster but only
+    * works if everything fits in the GRF.
+    * XXX this heuristic/check may need some fine tuning...
+    */
+   if (c->vp->program.Base.Parameters->NumParameters +
+       c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
+      c->vp->use_const_buffer = GL_TRUE;
+   else
+      c->vp->use_const_buffer = GL_FALSE;
+
+   /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
+
+   /* r0 -- reserved as usual
+    */
+   c->r0 = brw_vec8_grf(reg, 0);
+   reg++;
+
+   /* User clip planes from curbe: 
+    */
+   if (c->key.nr_userclip) {
+      for (i = 0; i < c->key.nr_userclip; i++) {
+	 c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
+      }     
+
+      /* Deal with curbe alignment:
+       */
+      reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;
+   }
+
+   /* Vertex program parameters from curbe:
+    */
+   if (c->vp->use_const_buffer) {
+      /* get constants from a real constant buffer */
+      c->prog_data.curb_read_length = 0;
+      c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+   }
+   else {
+      /* use a section of the GRF for constants */
+      GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
+      for (i = 0; i < nr_params; i++) {
+         c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+      }
+      reg += (nr_params + 1) / 2;
+      c->prog_data.curb_read_length = reg - 1;
+
+      c->prog_data.nr_params = nr_params * 4;
+   }
+
+   /* Allocate input regs:  
+    */
+   c->nr_inputs = 0;
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+      if (c->prog_data.inputs_read & (1 << i)) {
+	 c->nr_inputs++;
+	 c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0);
+	 reg++;
+      }
+   }
+   /* If there are no inputs, we'll still be reading one attribute's worth
+    * because it's required -- see urb_read_length setting.
+    */
+   if (c->nr_inputs == 0)
+      reg++;
+
+   /* Allocate outputs.  The non-position outputs go straight into message regs.
+    */
+   c->nr_outputs = 0;
+   c->first_output = reg;
+   c->first_overflow_output = 0;
+
+   if (BRW_IS_IGDNG(c->func.brw))
+       mrf = 8;
+   else
+       mrf = 4;
+
+   for (i = 0; i < VERT_RESULT_MAX; i++) {
+      if (c->prog_data.outputs_written & (1 << i)) {
+	 c->nr_outputs++;
+         assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
+	 if (i == VERT_RESULT_HPOS) {
+	    c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+	    reg++;
+	 }
+	 else if (i == VERT_RESULT_PSIZ) {
+	    c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+	    reg++;
+	    mrf++;		/* just a placeholder?  XXX fix later stages & remove this */
+	 }
+	 else {
+            if (mrf < 16) {
+               c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
+               mrf++;
+            }
+            else {
+               /* too many vertex results to fit in MRF, use GRF for overflow */
+               if (!c->first_overflow_output)
+                  c->first_overflow_output = i;
+               c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+               reg++;
+            }
+	 }
+      }
+   }     
+
+   /* Allocate program temporaries:
+    */
+   for (i = 0; i < c->vp->program.Base.NumTemporaries; i++) {
+      c->regs[PROGRAM_TEMPORARY][i] = brw_vec8_grf(reg, 0);
+      reg++;
+   }
+
+   /* Address reg(s).  Don't try to use the internal address reg until
+    * deref time.
+    */
+   for (i = 0; i < c->vp->program.Base.NumAddressRegs; i++) {
+      c->regs[PROGRAM_ADDRESS][i] =  brw_reg(BRW_GENERAL_REGISTER_FILE,
+					     reg,
+					     0,
+					     BRW_REGISTER_TYPE_D,
+					     BRW_VERTICAL_STRIDE_8,
+					     BRW_WIDTH_8,
+					     BRW_HORIZONTAL_STRIDE_1,
+					     BRW_SWIZZLE_XXXX,
+					     WRITEMASK_X);
+      reg++;
+   }
+
+   if (c->vp->use_const_buffer) {
+      for (i = 0; i < 3; i++) {
+         c->current_const[i].index = -1;
+         c->current_const[i].reg = brw_vec8_grf(reg, 0);
+         reg++;
+      }
+   }
+
+   for (i = 0; i < 128; i++) {
+      if (c->output_regs[i].used_in_src) {
+         c->output_regs[i].reg = brw_vec8_grf(reg, 0);
+         reg++;
+      }
+   }
+
+   c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+   reg += 2;
+
+   /* Some opcodes need an internal temporary:
+    */
+   c->first_tmp = reg;
+   c->last_tmp = reg;		/* for allocation purposes */
+
+   /* Each input reg holds data from two vertices.  The
+    * urb_read_length is the number of registers read from *each*
+    * vertex urb, so is half the amount:
+    */
+   c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
+   /* Setting this field to 0 leads to undefined behavior according to the
+    * the VS_STATE docs.  Our VUEs will always have at least one attribute
+    * sitting in them, even if it's padding.
+    */
+   if (c->prog_data.urb_read_length == 0)
+      c->prog_data.urb_read_length = 1;
+
+   /* The VS VUEs are shared by VF (outputting our inputs) and VS, so size
+    * them to fit the biggest thing they need to.
+    */
+   attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
+
+   if (BRW_IS_IGDNG(c->func.brw))
+       c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
+   else
+       c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
+
+   c->prog_data.total_grf = reg;
+
+   if (INTEL_DEBUG & DEBUG_VS) {
+      _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
+      _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
+      _mesa_printf("%s reg = %d\n", __FUNCTION__, reg);
+   }
+}
+
+
+/**
+ * If an instruction uses a temp reg both as a src and the dest, we
+ * sometimes need to allocate an intermediate temporary.
+ */
+static void unalias1( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      void (*func)( struct brw_vs_compile *,
+				    struct brw_reg,
+				    struct brw_reg ))
+{
+   if (dst.file == arg0.file && dst.nr == arg0.nr) {
+      struct brw_compile *p = &c->func;
+      struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+      func(c, tmp, arg0);
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+   else {
+      func(c, dst, arg0);
+   }
+}
+
+/**
+ * \sa unalias2
+ * Checkes if 2-operand instruction needs an intermediate temporary.
+ */
+static void unalias2( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1,
+		      void (*func)( struct brw_vs_compile *,
+				    struct brw_reg,
+				    struct brw_reg,
+				    struct brw_reg ))
+{
+   if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+       (dst.file == arg1.file && dst.nr == arg1.nr)) {
+      struct brw_compile *p = &c->func;
+      struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+      func(c, tmp, arg0, arg1);
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+   else {
+      func(c, dst, arg0, arg1);
+   }
+}
+
+/**
+ * \sa unalias2
+ * Checkes if 3-operand instruction needs an intermediate temporary.
+ */
+static void unalias3( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1,
+		      struct brw_reg arg2,
+		      void (*func)( struct brw_vs_compile *,
+				    struct brw_reg,
+				    struct brw_reg,
+				    struct brw_reg,
+				    struct brw_reg ))
+{
+   if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
+       (dst.file == arg1.file && dst.nr == arg1.nr) ||
+       (dst.file == arg2.file && dst.nr == arg2.nr)) {
+      struct brw_compile *p = &c->func;
+      struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
+      func(c, tmp, arg0, arg1, arg2);
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+   else {
+      func(c, dst, arg0, arg1, arg2);
+   }
+}
+
+static void emit_sop( struct brw_compile *p,
+                      struct brw_reg dst,
+                      struct brw_reg arg0,
+                      struct brw_reg arg1, 
+		      GLuint cond)
+{
+   brw_MOV(p, dst, brw_imm_f(0.0f));
+   brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
+   brw_MOV(p, dst, brw_imm_f(1.0f));
+   brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+static void emit_seq( struct brw_compile *p,
+                      struct brw_reg dst,
+                      struct brw_reg arg0,
+                      struct brw_reg arg1 )
+{
+   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne( struct brw_compile *p,
+                      struct brw_reg dst,
+                      struct brw_reg arg0,
+                      struct brw_reg arg1 )
+{
+   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
+}
+static void emit_slt( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+  emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
+}
+
+static void emit_max( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+   brw_SEL(p, dst, arg1, arg0);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+static void emit_min( struct brw_compile *p, 
+		      struct brw_reg dst,
+		      struct brw_reg arg0,
+		      struct brw_reg arg1 )
+{
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
+   brw_SEL(p, dst, arg0, arg1);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+static void emit_math1( struct brw_vs_compile *c,
+			GLuint function,
+			struct brw_reg dst,
+			struct brw_reg arg0,
+			GLuint precision)
+{
+   /* There are various odd behaviours with SEND on the simulator.  In
+    * addition there are documented issues with the fact that the GEN4
+    * processor doesn't do dependency control properly on SEND
+    * results.  So, on balance, this kludge to get around failures
+    * with writemasked math results looks like it might be necessary
+    * whether that turns out to be a simulator bug or not:
+    */
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+			 dst.file != BRW_GENERAL_REGISTER_FILE);
+
+   if (need_tmp) 
+      tmp = get_tmp(c);
+
+   brw_math(p, 
+	    tmp,
+	    function,
+	    BRW_MATH_SATURATE_NONE,
+	    2,
+	    arg0,
+	    BRW_MATH_DATA_SCALAR,
+	    precision);
+
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+
+
+static void emit_math2( struct brw_vs_compile *c, 
+			GLuint function,
+			struct brw_reg dst,
+			struct brw_reg arg0,
+			struct brw_reg arg1,
+			GLuint precision)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+			 dst.file != BRW_GENERAL_REGISTER_FILE);
+
+   if (need_tmp) 
+      tmp = get_tmp(c);
+
+   brw_MOV(p, brw_message_reg(3), arg1);
+   
+   brw_math(p, 
+	    tmp,
+	    function,
+	    BRW_MATH_SATURATE_NONE,
+	    2,
+ 	    arg0,
+	    BRW_MATH_DATA_SCALAR,
+	    precision);
+
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+
+
+static void emit_exp_noalias( struct brw_vs_compile *c,
+			      struct brw_reg dst,
+			      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   
+
+   if (dst.dw1.bits.writemask & WRITEMASK_X) {
+      struct brw_reg tmp = get_tmp(c);
+      struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
+
+      /* tmp_d = floor(arg0.x) */
+      brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
+
+      /* result[0] = 2.0 ^ tmp */
+
+      /* Adjust exponent for floating point: 
+       * exp += 127 
+       */
+      brw_ADD(p, brw_writemask(tmp_d, WRITEMASK_X), tmp_d, brw_imm_d(127));
+
+      /* Install exponent and sign.  
+       * Excess drops off the edge: 
+       */
+      brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), WRITEMASK_X), 
+	      tmp_d, brw_imm_d(23));
+
+      release_tmp(c, tmp);
+   }
+
+   if (dst.dw1.bits.writemask & WRITEMASK_Y) {
+      /* result[1] = arg0.x - floor(arg0.x) */
+      brw_FRC(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0, 0));
+   }
+   
+   if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+      /* As with the LOG instruction, we might be better off just
+       * doing a taylor expansion here, seeing as we have to do all
+       * the prep work.
+       *
+       * If mathbox partial precision is too low, consider also:
+       * result[3] = result[0] * EXP(result[1])
+       */
+      emit_math1(c, 
+		 BRW_MATH_FUNCTION_EXP, 
+		 brw_writemask(dst, WRITEMASK_Z),
+		 brw_swizzle1(arg0, 0), 
+		 BRW_MATH_PRECISION_FULL);
+   }  
+
+   if (dst.dw1.bits.writemask & WRITEMASK_W) {
+      /* result[3] = 1.0; */
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1));
+   }
+}
+
+
+static void emit_log_noalias( struct brw_vs_compile *c,
+			      struct brw_reg dst,
+			      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = dst;
+   struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+   struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
+   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+			 dst.file != BRW_GENERAL_REGISTER_FILE);
+
+   if (need_tmp) {
+      tmp = get_tmp(c);
+      tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+   }
+   
+   /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
+    * according to spec:
+    *
+    * These almost look likey they could be joined up, but not really
+    * practical:
+    *
+    * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
+    * result[1].i = (x.i & ((1<<23)-1)        + (127<<23)
+    */
+   if (dst.dw1.bits.writemask & WRITEMASK_XZ) {
+      brw_AND(p, 
+	      brw_writemask(tmp_ud, WRITEMASK_X),
+	      brw_swizzle1(arg0_ud, 0),
+	      brw_imm_ud((1U<<31)-1));
+
+      brw_SHR(p, 
+	      brw_writemask(tmp_ud, WRITEMASK_X), 
+	      tmp_ud,
+	      brw_imm_ud(23));
+
+      brw_ADD(p, 
+	      brw_writemask(tmp, WRITEMASK_X), 
+	      retype(tmp_ud, BRW_REGISTER_TYPE_D),	/* does it matter? */
+	      brw_imm_d(-127));
+   }
+
+   if (dst.dw1.bits.writemask & WRITEMASK_YZ) {
+      brw_AND(p, 
+	      brw_writemask(tmp_ud, WRITEMASK_Y),
+	      brw_swizzle1(arg0_ud, 0),
+	      brw_imm_ud((1<<23)-1));
+
+      brw_OR(p, 
+	     brw_writemask(tmp_ud, WRITEMASK_Y), 
+	     tmp_ud,
+	     brw_imm_ud(127<<23));
+   }
+   
+   if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+      /* result[2] = result[0] + LOG2(result[1]); */
+
+      /* Why bother?  The above is just a hint how to do this with a
+       * taylor series.  Maybe we *should* use a taylor series as by
+       * the time all the above has been done it's almost certainly
+       * quicker than calling the mathbox, even with low precision.
+       * 
+       * Options are:
+       *    - result[0] + mathbox.LOG2(result[1])
+       *    - mathbox.LOG2(arg0.x)
+       *    - result[0] + inline_taylor_approx(result[1])
+       */
+      emit_math1(c, 
+		 BRW_MATH_FUNCTION_LOG, 
+		 brw_writemask(tmp, WRITEMASK_Z), 
+		 brw_swizzle1(tmp, 1), 
+		 BRW_MATH_PRECISION_FULL);
+      
+      brw_ADD(p, 
+	      brw_writemask(tmp, WRITEMASK_Z), 
+	      brw_swizzle1(tmp, 2), 
+	      brw_swizzle1(tmp, 0));
+   }  
+
+   if (dst.dw1.bits.writemask & WRITEMASK_W) {
+      /* result[3] = 1.0; */
+      brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1));
+   }
+
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+
+
+/* Need to unalias - consider swizzles:   r0 = DST r0.xxxx r1
+ */
+static void emit_dst_noalias( struct brw_vs_compile *c, 
+			      struct brw_reg dst,
+			      struct brw_reg arg0,
+			      struct brw_reg arg1)
+{
+   struct brw_compile *p = &c->func;
+
+   /* There must be a better way to do this: 
+    */
+   if (dst.dw1.bits.writemask & WRITEMASK_X)
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_X), brw_imm_f(1.0));
+   if (dst.dw1.bits.writemask & WRITEMASK_Y)
+      brw_MUL(p, brw_writemask(dst, WRITEMASK_Y), arg0, arg1);
+   if (dst.dw1.bits.writemask & WRITEMASK_Z)
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_Z), arg0);
+   if (dst.dw1.bits.writemask & WRITEMASK_W)
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1);
+}
+
+
+static void emit_xpd( struct brw_compile *p,
+		      struct brw_reg dst,
+		      struct brw_reg t,
+		      struct brw_reg u)
+{
+   brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3),  brw_swizzle(u,2,0,1,3));
+   brw_MAC(p, dst,     negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
+}
+
+
+static void emit_lit_noalias( struct brw_vs_compile *c, 
+			      struct brw_reg dst,
+			      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_instruction *if_insn;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+
+   if (need_tmp) 
+      tmp = get_tmp(c);
+   
+   brw_MOV(p, brw_writemask(dst, WRITEMASK_YZ), brw_imm_f(0)); 
+   brw_MOV(p, brw_writemask(dst, WRITEMASK_XW), brw_imm_f(1)); 
+
+   /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
+    * to get all channels active inside the IF.  In the clipping code
+    * we run with NoMask, so it's not an option and we can use
+    * BRW_EXECUTE_1 for all comparisions.
+    */
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
+   if_insn = brw_IF(p, BRW_EXECUTE_8);
+   {
+      brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0));
+
+      brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
+      brw_MOV(p, brw_writemask(tmp, WRITEMASK_Z),  brw_swizzle1(arg0,1));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+      emit_math2(c, 
+		 BRW_MATH_FUNCTION_POW, 
+		 brw_writemask(dst, WRITEMASK_Z),
+		 brw_swizzle1(tmp, 2),
+		 brw_swizzle1(arg0, 3),
+		 BRW_MATH_PRECISION_PARTIAL);      
+   }
+
+   brw_ENDIF(p, if_insn);
+
+   release_tmp(c, tmp);
+}
+
+static void emit_lrp_noalias(struct brw_vs_compile *c,
+			     struct brw_reg dst,
+			     struct brw_reg arg0,
+			     struct brw_reg arg1,
+			     struct brw_reg arg2)
+{
+   struct brw_compile *p = &c->func;
+
+   brw_ADD(p, dst, negate(arg0), brw_imm_f(1.0));
+   brw_MUL(p, brw_null_reg(), dst, arg2);
+   brw_MAC(p, dst, arg0, arg1);
+}
+
+/** 3 or 4-component vector normalization */
+static void emit_nrm( struct brw_vs_compile *c, 
+                      struct brw_reg dst,
+                      struct brw_reg arg0,
+                      int num_comps)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = get_tmp(c);
+
+   /* tmp = dot(arg0, arg0) */
+   if (num_comps == 3)
+      brw_DP3(p, tmp, arg0, arg0);
+   else
+      brw_DP4(p, tmp, arg0, arg0);
+
+   /* tmp = 1 / sqrt(tmp) */
+   emit_math1(c, BRW_MATH_FUNCTION_RSQ, tmp, tmp, BRW_MATH_PRECISION_FULL);
+
+   /* dst = arg0 * tmp */
+   brw_MUL(p, dst, arg0, tmp);
+
+   release_tmp(c, tmp);
+}
+
+
+static struct brw_reg
+get_constant(struct brw_vs_compile *c,
+             const struct prog_instruction *inst,
+             GLuint argIndex)
+{
+   const struct prog_src_register *src = &inst->SrcReg[argIndex];
+   struct brw_compile *p = &c->func;
+   struct brw_reg const_reg;
+   struct brw_reg const2_reg;
+   const GLboolean relAddr = src->RelAddr;
+
+   assert(argIndex < 3);
+
+   if (c->current_const[argIndex].index != src->Index || relAddr) {
+      struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+
+      c->current_const[argIndex].index = src->Index;
+
+#if 0
+      printf("  fetch const[%d] for arg %d into reg %d\n",
+             src->Index, argIndex, c->current_const[argIndex].reg.nr);
+#endif
+      /* need to fetch the constant now */
+      brw_dp_READ_4_vs(p,
+                       c->current_const[argIndex].reg,/* writeback dest */
+                       0,                             /* oword */
+                       relAddr,                       /* relative indexing? */
+                       addrReg,                       /* address register */
+                       16 * src->Index,               /* byte offset */
+                       SURF_INDEX_VERT_CONST_BUFFER   /* binding table index */
+                       );
+
+      if (relAddr) {
+         /* second read */
+         const2_reg = get_tmp(c);
+
+         /* use upper half of address reg for second read */
+         addrReg = stride(addrReg, 0, 4, 0);
+         addrReg.subnr = 16;
+
+         brw_dp_READ_4_vs(p,
+                          const2_reg,              /* writeback dest */
+                          1,                       /* oword */
+                          relAddr,                 /* relative indexing? */
+                          addrReg,                 /* address register */
+                          16 * src->Index,         /* byte offset */
+                          SURF_INDEX_VERT_CONST_BUFFER
+                          );
+      }
+   }
+
+   const_reg = c->current_const[argIndex].reg;
+
+   if (relAddr) {
+      /* merge the two Owords into the constant register */
+      /* const_reg[7..4] = const2_reg[7..4] */
+      brw_MOV(p,
+              suboffset(stride(const_reg, 0, 4, 1), 4),
+              suboffset(stride(const2_reg, 0, 4, 1), 4));
+      release_tmp(c, const2_reg);
+   }
+   else {
+      /* replicate lower four floats into upper half (to get XYZWXYZW) */
+      const_reg = stride(const_reg, 0, 4, 0);
+      const_reg.subnr = 0;
+   }
+
+   return const_reg;
+}
+
+
+
+/* TODO: relative addressing!
+ */
+static struct brw_reg get_reg( struct brw_vs_compile *c,
+			       gl_register_file file,
+			       GLuint index )
+{
+   switch (file) {
+   case PROGRAM_TEMPORARY:
+   case PROGRAM_INPUT:
+   case PROGRAM_OUTPUT:
+      assert(c->regs[file][index].nr != 0);
+      return c->regs[file][index];
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT:
+   case PROGRAM_UNIFORM:
+      assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+      return c->regs[PROGRAM_STATE_VAR][index];
+   case PROGRAM_ADDRESS:
+      assert(index == 0);
+      return c->regs[file][index];
+
+   case PROGRAM_UNDEFINED:			/* undef values */
+      return brw_null_reg();
+
+   case PROGRAM_LOCAL_PARAM: 
+   case PROGRAM_ENV_PARAM: 
+   case PROGRAM_WRITE_ONLY:
+   default:
+      assert(0);
+      return brw_null_reg();
+   }
+}
+
+
+/**
+ * Indirect addressing:  get reg[[arg] + offset].
+ */
+static struct brw_reg deref( struct brw_vs_compile *c,
+			     struct brw_reg arg,
+			     GLint offset)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = vec4(get_tmp(c));
+   struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+   struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
+   GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
+   struct brw_reg indirect = brw_vec4_indirect(0,0);
+
+   {
+      brw_push_insn_state(p);
+      brw_set_access_mode(p, BRW_ALIGN_1);
+
+      /* This is pretty clunky - load the address register twice and
+       * fetch each 4-dword value in turn.  There must be a way to do
+       * this in a single pass, but I couldn't get it to work.
+       */
+      brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
+      brw_MOV(p, tmp, indirect);
+
+      brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
+      brw_MOV(p, suboffset(tmp, 4), indirect);
+
+      brw_pop_insn_state(p);
+   }
+   
+   /* NOTE: tmp not released */
+   return vec8(tmp);
+}
+
+
+/**
+ * Get brw reg corresponding to the instruction's [argIndex] src reg.
+ * TODO: relative addressing!
+ */
+static struct brw_reg
+get_src_reg( struct brw_vs_compile *c,
+             const struct prog_instruction *inst,
+             GLuint argIndex )
+{
+   const GLuint file = inst->SrcReg[argIndex].File;
+   const GLint index = inst->SrcReg[argIndex].Index;
+   const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
+
+   switch (file) {
+   case PROGRAM_TEMPORARY:
+   case PROGRAM_INPUT:
+   case PROGRAM_OUTPUT:
+      if (relAddr) {
+         return deref(c, c->regs[file][0], index);
+      }
+      else {
+         assert(c->regs[file][index].nr != 0);
+         return c->regs[file][index];
+      }
+
+   case PROGRAM_STATE_VAR:
+   case PROGRAM_CONSTANT:
+   case PROGRAM_UNIFORM:
+   case PROGRAM_ENV_PARAM:
+      if (c->vp->use_const_buffer) {
+         return get_constant(c, inst, argIndex);
+      }
+      else if (relAddr) {
+         return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+      }
+      else {
+         assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
+         return c->regs[PROGRAM_STATE_VAR][index];
+      }
+   case PROGRAM_ADDRESS:
+      assert(index == 0);
+      return c->regs[file][index];
+
+   case PROGRAM_UNDEFINED:
+      /* this is a normal case since we loop over all three src args */
+      return brw_null_reg();
+
+   case PROGRAM_LOCAL_PARAM: 
+   case PROGRAM_WRITE_ONLY:
+   default:
+      assert(0);
+      return brw_null_reg();
+   }
+}
+
+
+static void emit_arl( struct brw_vs_compile *c,
+		      struct brw_reg dst,
+		      struct brw_reg arg0 )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+   
+   if (need_tmp) 
+      tmp = get_tmp(c);
+
+   brw_RNDD(p, tmp, arg0);               /* tmp = round(arg0) */
+   brw_MUL(p, dst, tmp, brw_imm_d(16));  /* dst = tmp * 16 */
+
+   if (need_tmp)
+      release_tmp(c, tmp);
+}
+
+
+/**
+ * Return the brw reg for the given instruction's src argument.
+ * Will return mangled results for SWZ op.  The emit_swz() function
+ * ignores this result and recalculates taking extended swizzles into
+ * account.
+ */
+static struct brw_reg get_arg( struct brw_vs_compile *c,
+                               const struct prog_instruction *inst,
+                               GLuint argIndex )
+{
+   const struct prog_src_register *src = &inst->SrcReg[argIndex];
+   struct brw_reg reg;
+
+   if (src->File == PROGRAM_UNDEFINED)
+      return brw_null_reg();
+
+   reg = get_src_reg(c, inst, argIndex);
+
+   /* Convert 3-bit swizzle to 2-bit.  
+    */
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+				       GET_SWZ(src->Swizzle, 1),
+				       GET_SWZ(src->Swizzle, 2),
+				       GET_SWZ(src->Swizzle, 3));
+
+   /* Note this is ok for non-swizzle instructions: 
+    */
+   reg.negate = src->Negate ? 1 : 0;   
+
+   return reg;
+}
+
+
+/**
+ * Get brw register for the given program dest register.
+ */
+static struct brw_reg get_dst( struct brw_vs_compile *c,
+			       struct prog_dst_register dst )
+{
+   struct brw_reg reg;
+
+   switch (dst.File) {
+   case PROGRAM_TEMPORARY:
+   case PROGRAM_OUTPUT:
+      assert(c->regs[dst.File][dst.Index].nr != 0);
+      reg = c->regs[dst.File][dst.Index];
+      break;
+   case PROGRAM_ADDRESS:
+      assert(dst.Index == 0);
+      reg = c->regs[dst.File][dst.Index];
+      break;
+   case PROGRAM_UNDEFINED:
+      /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
+      reg = brw_null_reg();
+      break;
+   default:
+      assert(0);
+      reg = brw_null_reg();
+   }
+
+   reg.dw1.bits.writemask = dst.WriteMask;
+
+   return reg;
+}
+
+
+static void emit_swz( struct brw_vs_compile *c, 
+		      struct brw_reg dst,
+                      const struct prog_instruction *inst)
+{
+   const GLuint argIndex = 0;
+   const struct prog_src_register src = inst->SrcReg[argIndex];
+   struct brw_compile *p = &c->func;
+   GLuint zeros_mask = 0;
+   GLuint ones_mask = 0;
+   GLuint src_mask = 0;
+   GLubyte src_swz[4];
+   GLboolean need_tmp = (src.Negate &&
+			 dst.file != BRW_GENERAL_REGISTER_FILE);
+   struct brw_reg tmp = dst;
+   GLuint i;
+
+   if (need_tmp)
+      tmp = get_tmp(c);
+
+   for (i = 0; i < 4; i++) {
+      if (dst.dw1.bits.writemask & (1<<i)) {
+	 GLubyte s = GET_SWZ(src.Swizzle, i);
+	 switch (s) {
+	 case SWIZZLE_X:
+	 case SWIZZLE_Y:
+	 case SWIZZLE_Z:
+	 case SWIZZLE_W:
+	    src_mask |= 1<<i;
+	    src_swz[i] = s;
+	    break;
+	 case SWIZZLE_ZERO:
+	    zeros_mask |= 1<<i;
+	    break;
+	 case SWIZZLE_ONE:
+	    ones_mask |= 1<<i;
+	    break;
+	 }
+      }
+   }
+   
+   /* Do src first, in case dst aliases src:
+    */
+   if (src_mask) {
+      struct brw_reg arg0;
+
+      arg0 = get_src_reg(c, inst, argIndex);
+
+      arg0 = brw_swizzle(arg0, 
+			 src_swz[0], src_swz[1], 
+			 src_swz[2], src_swz[3]);
+
+      brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
+   } 
+   
+   if (zeros_mask) 
+      brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
+
+   if (ones_mask) 
+      brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
+
+   if (src.Negate)
+      brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
+   
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
+   }
+}
+
+
+/**
+ * Post-vertex-program processing.  Send the results to the URB.
+ */
+static void emit_vertex_write( struct brw_vs_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg m0 = brw_message_reg(0);
+   struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
+   struct brw_reg ndc;
+   int eot;
+   GLuint len_vertext_header = 2;
+
+   if (c->key.copy_edgeflag) {
+      brw_MOV(p, 
+	      get_reg(c, PROGRAM_OUTPUT, VERT_RESULT_EDGE),
+	      get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
+   }
+
+   /* Build ndc coords */
+   ndc = get_tmp(c);
+   /* ndc = 1.0 / pos.w */
+   emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+   /* ndc.xyz = pos * ndc */
+   brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+
+   /* Update the header for point size, user clipping flags, and -ve rhw
+    * workaround.
+    */
+   if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
+       c->key.nr_userclip || BRW_IS_965(p->brw))
+   {
+      struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
+      GLuint i;
+
+      brw_MOV(p, header1, brw_imm_ud(0));
+
+      brw_set_access_mode(p, BRW_ALIGN_16);	
+
+      if (c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) {
+	 struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
+	 brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+	 brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
+      }
+
+      for (i = 0; i < c->key.nr_userclip; i++) {
+	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
+	 brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
+	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<i));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      }
+
+      /* i965 clipping workaround: 
+       * 1) Test for -ve rhw
+       * 2) If set, 
+       *      set ndc = (0,0,0,0)
+       *      set ucp[6] = 1
+       *
+       * Later, clipping will detect ucp[6] and ensure the primitive is
+       * clipped against all fixed planes.
+       */
+      if (BRW_IS_965(p->brw)) {
+	 brw_CMP(p,
+		 vec8(brw_null_reg()),
+		 BRW_CONDITIONAL_L,
+		 brw_swizzle1(ndc, 3),
+		 brw_imm_f(0));
+   
+	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+	 brw_MOV(p, ndc, brw_imm_f(0));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      }
+
+      brw_set_access_mode(p, BRW_ALIGN_1);	/* why? */
+      brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
+      brw_set_access_mode(p, BRW_ALIGN_16);
+
+      release_tmp(c, header1);
+   }
+   else {
+      brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
+   }
+
+   /* Emit the (interleaved) headers for the two vertices - an 8-reg
+    * of zeros followed by two sets of NDC coordinates:
+    */
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_MOV(p, offset(m0, 2), ndc);
+
+   if (BRW_IS_IGDNG(p->brw)) {
+       /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
+       brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
+       /* m4, m5 contain the distances from vertex to the user clip planeXXX. 
+        * Seems it is useless for us.
+        * m6 is used for aligning, so that the remainder of vertex element is 
+        * reg-aligned.
+        */
+       brw_MOV(p, offset(m0, 7), pos); /* the remainder of vertex element */
+       len_vertext_header = 6;
+   } else {
+       brw_MOV(p, offset(m0, 3), pos);
+       len_vertext_header = 2;
+   }
+
+   eot = (c->first_overflow_output == 0);
+
+   brw_urb_WRITE(p, 
+		 brw_null_reg(), /* dest */
+		 0,		/* starting mrf reg nr */
+		 c->r0,		/* src */
+		 0,		/* allocate */
+		 1,		/* used */
+		 MIN2(c->nr_outputs + 1 + len_vertext_header, (BRW_MAX_MRF-1)), /* msg len */
+		 0,		/* response len */
+		 eot, 		/* eot */
+		 eot, 		/* writes complete */
+		 0, 		/* urb destination offset */
+		 BRW_URB_SWIZZLE_INTERLEAVE);
+
+   if (c->first_overflow_output > 0) {
+      /* Not all of the vertex outputs/results fit into the MRF.
+       * Move the overflowed attributes from the GRF to the MRF and
+       * issue another brw_urb_WRITE().
+       */
+      /* XXX I'm not 100% sure about which MRF regs to use here.  Starting
+       * at mrf[4] atm...
+       */
+      GLuint i, mrf = 0;
+      for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
+         if (c->prog_data.outputs_written & (1 << i)) {
+            /* move from GRF to MRF */
+            brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
+            mrf++;
+         }
+      }
+
+      brw_urb_WRITE(p,
+                    brw_null_reg(), /* dest */
+                    4,              /* starting mrf reg nr */
+                    c->r0,          /* src */
+                    0,              /* allocate */
+                    1,              /* used */
+                    mrf+1,          /* msg len */
+                    0,              /* response len */
+                    1,              /* eot */
+                    1,              /* writes complete */
+                    BRW_MAX_MRF-1,  /* urb destination offset */
+                    BRW_URB_SWIZZLE_INTERLEAVE);
+   }
+}
+
+
+/**
+ * Called after code generation to resolve subroutine calls and the
+ * END instruction.
+ * \param end_inst  points to brw code for END instruction
+ * \param last_inst  points to last instruction emitted before vertex write
+ */
+static void 
+post_vs_emit( struct brw_vs_compile *c,
+              struct brw_instruction *end_inst,
+              struct brw_instruction *last_inst )
+{
+   GLint offset;
+
+   brw_resolve_cals(&c->func);
+
+   /* patch up the END code to jump past subroutines, etc */
+   offset = last_inst - end_inst;
+   if (offset > 1) {
+      brw_set_src1(end_inst, brw_imm_d(offset * 16));
+   } else {
+      end_inst->header.opcode = BRW_OPCODE_NOP;
+   }
+}
+
+static uint32_t
+get_predicate(const struct prog_instruction *inst)
+{
+   if (inst->DstReg.CondMask == COND_TR)
+      return BRW_PREDICATE_NONE;
+
+   /* All of GLSL only produces predicates for COND_NE and one channel per
+    * vector.  Fail badly if someone starts doing something else, as it might
+    * mean infinite looping or something.
+    *
+    * We'd like to support all the condition codes, but our hardware doesn't
+    * quite match the Mesa IR, which is modeled after the NV extensions.  For
+    * those, the instruction may update the condition codes or not, then any
+    * later instruction may use one of those condition codes.  For gen4, the
+    * instruction may update the flags register based on one of the condition
+    * codes output by the instruction, and then further instructions may
+    * predicate on that.  We can probably support this, but it won't
+    * necessarily be easy.
+    */
+   assert(inst->DstReg.CondMask == COND_NE);
+
+   switch (inst->DstReg.CondSwizzle) {
+   case SWIZZLE_XXXX:
+      return BRW_PREDICATE_ALIGN16_REPLICATE_X;
+   case SWIZZLE_YYYY:
+      return BRW_PREDICATE_ALIGN16_REPLICATE_Y;
+   case SWIZZLE_ZZZZ:
+      return BRW_PREDICATE_ALIGN16_REPLICATE_Z;
+   case SWIZZLE_WWWW:
+      return BRW_PREDICATE_ALIGN16_REPLICATE_W;
+   default:
+      _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n",
+		    inst->DstReg.CondMask);
+      return BRW_PREDICATE_NORMAL;
+   }
+}
+
+/* Emit the vertex program instructions here.
+ */
+void brw_vs_emit(struct brw_vs_compile *c )
+{
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   const GLuint nr_insns = c->vp->program.Base.NumInstructions;
+   GLuint insn, if_depth = 0, loop_depth = 0;
+   GLuint end_offset = 0;
+   struct brw_instruction *end_inst, *last_inst;
+   struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+   const struct brw_indirect stack_index = brw_indirect(0, 0);   
+   GLuint index;
+   GLuint file;
+
+   if (INTEL_DEBUG & DEBUG_VS) {
+      _mesa_printf("vs-mesa:\n");
+      _mesa_print_program(&c->vp->program.Base); 
+      _mesa_printf("\n");
+   }
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_set_access_mode(p, BRW_ALIGN_16);
+   
+   /* Message registers can't be read, so copy the output into GRF register
+      if they are used in source registers */
+   for (insn = 0; insn < nr_insns; insn++) {
+       GLuint i;
+       struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+       for (i = 0; i < 3; i++) {
+	   struct prog_src_register *src = &inst->SrcReg[i];
+	   GLuint index = src->Index;
+	   GLuint file = src->File;	
+	   if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS)
+	       c->output_regs[index].used_in_src = GL_TRUE;
+       }
+   }
+
+   /* Static register allocation
+    */
+   brw_vs_alloc_regs(c);
+   brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+   for (insn = 0; insn < nr_insns; insn++) {
+
+      const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
+      struct brw_reg args[3], dst;
+      GLuint i;
+      
+#if 0
+      printf("%d: ", insn);
+      _mesa_print_instruction(inst);
+#endif
+
+      /* Get argument regs.  SWZ is special and does this itself.
+       */
+      if (inst->Opcode != OPCODE_SWZ)
+	  for (i = 0; i < 3; i++) {
+	      const struct prog_src_register *src = &inst->SrcReg[i];
+	      index = src->Index;
+	      file = src->File;	
+	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
+		  args[i] = c->output_regs[index].reg;
+	      else
+                  args[i] = get_arg(c, inst, i);
+	  }
+
+      /* Get dest regs.  Note that it is possible for a reg to be both
+       * dst and arg, given the static allocation of registers.  So
+       * care needs to be taken emitting multi-operation instructions.
+       */ 
+      index = inst->DstReg.Index;
+      file = inst->DstReg.File;
+      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
+	  dst = c->output_regs[index].reg;
+      else
+	  dst = get_dst(c, inst->DstReg);
+
+      if (inst->SaturateMode != SATURATE_OFF) {
+	 _mesa_problem(NULL, "Unsupported saturate %d in vertex shader",
+                       inst->SaturateMode);
+      }
+
+      switch (inst->Opcode) {
+      case OPCODE_ABS:
+	 brw_MOV(p, dst, brw_abs(args[0]));
+	 break;
+      case OPCODE_ADD:
+	 brw_ADD(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_COS:
+	 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_DP3:
+	 brw_DP3(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_DP4:
+	 brw_DP4(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_DPH:
+	 brw_DPH(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_NRM3:
+	 emit_nrm(c, dst, args[0], 3);
+	 break;
+      case OPCODE_NRM4:
+	 emit_nrm(c, dst, args[0], 4);
+	 break;
+      case OPCODE_DST:
+	 unalias2(c, dst, args[0], args[1], emit_dst_noalias); 
+	 break;
+      case OPCODE_EXP:
+	 unalias1(c, dst, args[0], emit_exp_noalias);
+	 break;
+      case OPCODE_EX2:
+	 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_ARL:
+	 emit_arl(c, dst, args[0]);
+	 break;
+      case OPCODE_FLR:
+	 brw_RNDD(p, dst, args[0]);
+	 break;
+      case OPCODE_FRC:
+	 brw_FRC(p, dst, args[0]);
+	 break;
+      case OPCODE_LOG:
+	 unalias1(c, dst, args[0], emit_log_noalias);
+	 break;
+      case OPCODE_LG2:
+	 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_LIT:
+	 unalias1(c, dst, args[0], emit_lit_noalias);
+	 break;
+      case OPCODE_LRP:
+	 unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
+	 break;
+      case OPCODE_MAD:
+	 brw_MOV(p, brw_acc_reg(), args[2]);
+	 brw_MAC(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_MAX:
+	 emit_max(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_MIN:
+	 emit_min(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_MOV:
+	 brw_MOV(p, dst, args[0]);
+	 break;
+      case OPCODE_MUL:
+	 brw_MUL(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_POW:
+	 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); 
+	 break;
+      case OPCODE_RCP:
+	 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_RSQ:
+	 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+
+      case OPCODE_SEQ:
+         emit_seq(p, dst, args[0], args[1]);
+         break;
+      case OPCODE_SIN:
+	 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
+	 break;
+      case OPCODE_SNE:
+         emit_sne(p, dst, args[0], args[1]);
+         break;
+      case OPCODE_SGE:
+	 emit_sge(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_SGT:
+         emit_sgt(p, dst, args[0], args[1]);
+         break;
+      case OPCODE_SLT:
+	 emit_slt(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_SLE:
+         emit_sle(p, dst, args[0], args[1]);
+         break;
+      case OPCODE_SUB:
+	 brw_ADD(p, dst, args[0], negate(args[1]));
+	 break;
+      case OPCODE_SWZ:
+	 /* The args[0] value can't be used here as it won't have
+	  * correctly encoded the full swizzle:
+	  */
+	 emit_swz(c, dst, inst);
+	 break;
+      case OPCODE_TRUNC:
+         /* round toward zero */
+	 brw_RNDZ(p, dst, args[0]);
+	 break;
+      case OPCODE_XPD:
+	 emit_xpd(p, dst, args[0], args[1]);
+	 break;
+      case OPCODE_IF:
+	 assert(if_depth < MAX_IF_DEPTH);
+	 if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
+	 /* Note that brw_IF smashes the predicate_control field. */
+	 if_inst[if_depth]->header.predicate_control = get_predicate(inst);
+	 if_depth++;
+	 break;
+      case OPCODE_ELSE:
+	 if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
+	 break;
+      case OPCODE_ENDIF:
+         assert(if_depth > 0);
+	 brw_ENDIF(p, if_inst[--if_depth]);
+	 break;			
+      case OPCODE_BGNLOOP:
+         loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+         break;
+      case OPCODE_BRK:
+	 brw_set_predicate_control(p, get_predicate(inst));
+         brw_BREAK(p);
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+         break;
+      case OPCODE_CONT:
+	 brw_set_predicate_control(p, get_predicate(inst));
+         brw_CONT(p);
+         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+         break;
+      case OPCODE_ENDLOOP: 
+         {
+            struct brw_instruction *inst0, *inst1;
+	    GLuint br = 1;
+
+            loop_depth--;
+
+	    if (BRW_IS_IGDNG(brw))
+	       br = 2;
+
+            inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+            /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+            while (inst0 > loop_inst[loop_depth]) {
+               inst0--;
+               if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+                  inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+                  inst0->bits3.if_else.pop_count = 0;
+               }
+               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+                  inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+                  inst0->bits3.if_else.pop_count = 0;
+               }
+            }
+         }
+         break;
+      case OPCODE_BRA:
+	 brw_set_predicate_control(p, get_predicate(inst));
+         brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+         break;
+      case OPCODE_CAL:
+	 brw_set_access_mode(p, BRW_ALIGN_1);
+	 brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+	 brw_set_access_mode(p, BRW_ALIGN_16);
+	 brw_ADD(p, get_addr_reg(stack_index),
+			 get_addr_reg(stack_index), brw_imm_d(4));
+         brw_save_call(p, inst->Comment, p->nr_insn);
+	 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+         break;
+      case OPCODE_RET:
+	 brw_ADD(p, get_addr_reg(stack_index),
+			 get_addr_reg(stack_index), brw_imm_d(-4));
+	 brw_set_access_mode(p, BRW_ALIGN_1);
+         brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
+	 brw_set_access_mode(p, BRW_ALIGN_16);
+	 break;
+      case OPCODE_END:	
+         end_offset = p->nr_insn;
+         /* this instruction will get patched later to jump past subroutine
+          * code, etc.
+          */
+         brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+         break;
+      case OPCODE_PRINT:
+         /* no-op */
+         break;
+      case OPCODE_BGNSUB:
+         brw_save_label(p, inst->Comment, p->nr_insn);
+         break;
+      case OPCODE_ENDSUB:
+         /* no-op */
+         break;
+      default:
+	 _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader",
+                       inst->Opcode, inst->Opcode < MAX_OPCODE ?
+				    _mesa_opcode_string(inst->Opcode) :
+				    "unknown");
+      }
+
+      /* Set the predication update on the last instruction of the native
+       * instruction sequence.
+       *
+       * This would be problematic if it was set on a math instruction,
+       * but that shouldn't be the case with the current GLSL compiler.
+       */
+      if (inst->CondUpdate) {
+	 struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
+
+	 assert(hw_insn->header.destreg__conditionalmod == 0);
+	 hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
+      }
+
+      if ((inst->DstReg.File == PROGRAM_OUTPUT)
+          && (inst->DstReg.Index != VERT_RESULT_HPOS)
+          && c->output_regs[inst->DstReg.Index].used_in_src) {
+         brw_MOV(p, get_dst(c, inst->DstReg), dst);
+      }
+
+      /* Result color clamping.
+       *
+       * When destination register is an output register and
+       * it's primary/secondary front/back color, we have to clamp
+       * the result to [0,1]. This is done by enabling the
+       * saturation bit for the last instruction.
+       *
+       * We don't use brw_set_saturate() as it modifies
+       * p->current->header.saturate, which affects all the subsequent
+       * instructions. Instead, we directly modify the header
+       * of the last (already stored) instruction.
+       */
+      if (inst->DstReg.File == PROGRAM_OUTPUT) {
+         if ((inst->DstReg.Index == VERT_RESULT_COL0)
+             || (inst->DstReg.Index == VERT_RESULT_COL1)
+             || (inst->DstReg.Index == VERT_RESULT_BFC0)
+             || (inst->DstReg.Index == VERT_RESULT_BFC1)) {
+            p->store[p->nr_insn-1].header.saturate = 1;
+         }
+      }
+
+      release_tmps(c);
+   }
+
+   end_inst = &p->store[end_offset];
+   last_inst = &p->store[p->nr_insn];
+
+   /* The END instruction will be patched to jump to this code */
+   emit_vertex_write(c);
+
+   post_vs_emit(c, end_inst, last_inst);
+
+   if (INTEL_DEBUG & DEBUG_VS) {
+      int i;
+
+      _mesa_printf("vs-native:\n");
+      for (i = 0; i < p->nr_insn; i++)
+	 brw_disasm(stderr, &p->store[i]);
+      _mesa_printf("\n");
+   }
+}
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
new file mode 100644
index 0000000000..d790ab6555
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -0,0 +1,185 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+            
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "main/macros.h"
+
+struct brw_vs_unit_key {
+   unsigned int total_grf;
+   unsigned int urb_entry_read_length;
+   unsigned int curb_entry_read_length;
+
+   unsigned int curbe_offset;
+
+   unsigned int nr_urb_entries, urb_size;
+
+   unsigned int nr_surfaces;
+};
+
+static void
+vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+
+   memset(key, 0, sizeof(*key));
+
+   /* CACHE_NEW_VS_PROG */
+   key->total_grf = brw->vs.prog_data->total_grf;
+   key->urb_entry_read_length = brw->vs.prog_data->urb_read_length;
+   key->curb_entry_read_length = brw->vs.prog_data->curb_read_length;
+
+   /* BRW_NEW_URB_FENCE */
+   key->nr_urb_entries = brw->urb.nr_vs_entries;
+   key->urb_size = brw->urb.vsize;
+
+   /* BRW_NEW_NR_VS_SURFACES */
+   key->nr_surfaces = brw->vs.nr_surfaces;
+
+   /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
+   if (ctx->Transform.ClipPlanesEnabled) {
+      /* Note that we read in the userclip planes as well, hence
+       * clip_start:
+       */
+      key->curbe_offset = brw->curbe.clip_start;
+   }
+   else {
+      key->curbe_offset = brw->curbe.vs_start;
+   }
+}
+
+static dri_bo *
+vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+{
+   struct brw_vs_unit_state vs;
+   dri_bo *bo;
+   int chipset_max_threads;
+
+   memset(&vs, 0, sizeof(vs));
+
+   vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
+   vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+   /* Choosing multiple program flow means that we may get 2-vertex threads,
+    * which will have the channel mask for dwords 4-7 enabled in the thread,
+    * and those dwords will be written to the second URB handle when we
+    * brw_urb_WRITE() results.
+    */
+   vs.thread1.single_program_flow = 0;
+
+   if (BRW_IS_IGDNG(brw))
+      vs.thread1.binding_table_entry_count = 0; /* hardware requirement */
+   else
+      vs.thread1.binding_table_entry_count = key->nr_surfaces;
+
+   vs.thread3.urb_entry_read_length = key->urb_entry_read_length;
+   vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+   vs.thread3.dispatch_grf_start_reg = 1;
+   vs.thread3.urb_entry_read_offset = 0;
+   vs.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+
+   if (BRW_IS_IGDNG(brw))
+       vs.thread4.nr_urb_entries = key->nr_urb_entries >> 2;
+   else
+       vs.thread4.nr_urb_entries = key->nr_urb_entries;
+
+   vs.thread4.urb_entry_allocation_size = key->urb_size - 1;
+
+   if (BRW_IS_IGDNG(brw))
+      chipset_max_threads = 72;
+   else if (BRW_IS_G4X(brw))
+      chipset_max_threads = 32;
+   else
+      chipset_max_threads = 16;
+   vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
+				  1, chipset_max_threads) - 1;
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+      vs.thread4.max_threads = 0;
+
+   /* No samplers for ARB_vp programs:
+    */
+   /* It has to be set to 0 for IGDNG
+    */
+   vs.vs5.sampler_count = 0;
+
+   if (INTEL_DEBUG & DEBUG_STATS)
+      vs.thread4.stats_enable = 1;
+
+   /* Vertex program always enabled:
+    */
+   vs.vs6.vs_enable = 1;
+
+   bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
+			 key, sizeof(*key),
+			 &brw->vs.prog_bo, 1,
+			 &vs, sizeof(vs),
+			 NULL, NULL);
+
+   /* Emit VS program relocation */
+   dri_bo_emit_reloc(bo,
+		     I915_GEM_DOMAIN_INSTRUCTION, 0,
+		     vs.thread0.grf_reg_count << 1,
+		     offsetof(struct brw_vs_unit_state, thread0),
+		     brw->vs.prog_bo);
+
+   return bo;
+}
+
+static void prepare_vs_unit(struct brw_context *brw)
+{
+   struct brw_vs_unit_key key;
+
+   vs_unit_populate_key(brw, &key);
+
+   dri_bo_unreference(brw->vs.state_bo);
+   brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT,
+				       &key, sizeof(key),
+				       &brw->vs.prog_bo, 1,
+				       NULL);
+   if (brw->vs.state_bo == NULL) {
+      brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
+   }
+}
+
+const struct brw_tracked_state brw_vs_unit = {
+   .dirty = {
+      .mesa  = _NEW_TRANSFORM,
+      .brw   = (BRW_NEW_CURBE_OFFSETS |
+                BRW_NEW_NR_VS_SURFACES |
+		BRW_NEW_URB_FENCE),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .prepare = prepare_vs_unit,
+};
diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c
new file mode 100644
index 0000000000..89f47522a1
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_vs_surface_state.c
@@ -0,0 +1,226 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "main/mtypes.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "shader/prog_parameter.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+/* Creates a new VS constant buffer reflecting the current VS program's
+ * constants, if needed by the VS program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_vs_update_constant_buffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_vertex_program *vp =
+      (struct brw_vertex_program *) brw->vertex_program;
+   const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+   const int size = params->NumParameters * 4 * sizeof(GLfloat);
+   drm_intel_bo *const_buffer;
+
+   /* BRW_NEW_VERTEX_PROGRAM */
+   if (!vp->use_const_buffer)
+      return NULL;
+
+   const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+				     size, 64);
+
+   /* _NEW_PROGRAM_CONSTANTS */
+   dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+   return const_buffer;
+}
+
+/**
+ * Update the surface state for a VS constant buffer.
+ *
+ * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
+ */
+static void
+brw_update_vs_constant_surface( GLcontext *ctx,
+                                GLuint surf)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_surface_key key;
+   struct brw_vertex_program *vp =
+      (struct brw_vertex_program *) brw->vertex_program;
+   const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+
+   assert(surf == 0);
+
+   /* If we're in this state update atom, we need to update VS constants, so
+    * free the old buffer and create a new one for the new contents.
+    */
+   dri_bo_unreference(vp->const_buffer);
+   vp->const_buffer = brw_vs_update_constant_buffer(brw);
+
+   /* If there's no constant buffer, then no surface BO is needed to point at
+    * it.
+    */
+   if (vp->const_buffer == 0) {
+      drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+      brw->vs.surf_bo[surf] = NULL;
+      return;
+   }
+
+   memset(&key, 0, sizeof(key));
+
+   key.format = MESA_FORMAT_RGBA_FLOAT32;
+   key.internal_format = GL_RGBA;
+   key.bo = vp->const_buffer;
+   key.depthmode = GL_NONE;
+   key.pitch = params->NumParameters;
+   key.width = params->NumParameters;
+   key.height = 1;
+   key.depth = 1;
+   key.cpp = 16;
+
+   /*
+   printf("%s:\n", __FUNCTION__);
+   printf("  width %d  height %d  depth %d  cpp %d  pitch %d\n",
+          key.width, key.height, key.depth, key.cpp, key.pitch);
+   */
+
+   drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
+   brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+                                            BRW_SS_SURFACE,
+                                            &key, sizeof(key),
+                                            &key.bo, key.bo ? 1 : 0,
+                                            NULL);
+   if (brw->vs.surf_bo[surf] == NULL) {
+      brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+   }
+}
+
+
+/**
+ * Constructs the binding table for the VS surface state.
+ */
+static dri_bo *
+brw_vs_get_binding_table(struct brw_context *brw)
+{
+   dri_bo *bind_bo;
+
+   bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+			      NULL, 0,
+			      brw->vs.surf_bo, BRW_VS_MAX_SURF,
+			      NULL);
+
+   if (bind_bo == NULL) {
+      GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
+      uint32_t *data = malloc(data_size);
+      int i;
+
+      for (i = 0; i < BRW_VS_MAX_SURF; i++)
+         if (brw->vs.surf_bo[i])
+            data[i] = brw->vs.surf_bo[i]->offset;
+         else
+            data[i] = 0;
+
+      bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+				  NULL, 0,
+				  brw->vs.surf_bo, BRW_VS_MAX_SURF,
+				  data, data_size,
+				  NULL, NULL);
+
+      /* Emit binding table relocations to surface state */
+      for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+	 if (brw->vs.surf_bo[i] != NULL) {
+	    /* The presumed offsets were set in the data values for
+	     * brw_upload_cache.
+	     */
+	    drm_intel_bo_emit_reloc(bind_bo, i * 4,
+				    brw->vs.surf_bo[i], 0,
+				    I915_GEM_DOMAIN_INSTRUCTION, 0);
+	 }
+      }
+
+      free(data);
+   }
+
+   return bind_bo;
+}
+
+/**
+ * Vertex shader surfaces (constant buffer).
+ *
+ * This consumes the state updates for the constant buffer needing
+ * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
+ * CACHE_NEW_SURF_BIND for the binding table upload.
+ */
+static void prepare_vs_surfaces(struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   int i;
+   int nr_surfaces = 0;
+
+   brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
+
+   for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+      if (brw->vs.surf_bo[i] != NULL) {
+	 nr_surfaces = i + 1;
+      }
+   }
+
+   if (brw->vs.nr_surfaces != nr_surfaces) {
+      brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
+      brw->vs.nr_surfaces = nr_surfaces;
+   }
+
+   /* Note that we don't end up updating the bind_bo if we don't have a
+    * surface to be pointing at.  This should be relatively harmless, as it
+    * just slightly increases our working set size.
+    */
+   if (brw->vs.nr_surfaces != 0) {
+      dri_bo_unreference(brw->vs.bind_bo);
+      brw->vs.bind_bo = brw_vs_get_binding_table(brw);
+   }
+}
+
+const struct brw_tracked_state brw_vs_surfaces = {
+   .dirty = {
+      .mesa = (_NEW_PROGRAM_CONSTANTS),
+      .brw = (BRW_NEW_VERTEX_PROGRAM),
+      .cache = 0
+   },
+   .prepare = prepare_vs_surfaces,
+};
+
+
+
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
new file mode 100644
index 0000000000..2292de94c4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -0,0 +1,375 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+             
+#include "main/texformat.h"
+#include "brw_context.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+#include "brw_state.h"
+
+
+/** Return number of src args for given instruction */
+GLuint brw_wm_nr_args( GLuint opcode )
+{
+   switch (opcode) {
+   case WM_FRONTFACING:
+   case WM_PIXELXY:
+      return 0;
+   case WM_CINTERP:
+   case WM_WPOSXY:
+   case WM_DELTAXY:
+      return 1;
+   case WM_LINTERP:
+   case WM_PIXELW:
+      return 2;
+   case WM_FB_WRITE:
+   case WM_PINTERP:
+      return 3;
+   default:
+      assert(opcode < MAX_OPCODE);
+      return _mesa_num_inst_src_regs(opcode);
+   }
+}
+
+
+GLuint brw_wm_is_scalar_result( GLuint opcode )
+{
+   switch (opcode) {
+   case OPCODE_COS:
+   case OPCODE_EX2:
+   case OPCODE_LG2:
+   case OPCODE_POW:
+   case OPCODE_RCP:
+   case OPCODE_RSQ:
+   case OPCODE_SIN:
+   case OPCODE_DP3:
+   case OPCODE_DP4:
+   case OPCODE_DPH:
+   case OPCODE_DST:
+      return 1;
+      
+   default:
+      return 0;
+   }
+}
+
+
+/**
+ * Do GPU code generation for non-GLSL shader.  non-GLSL shaders have
+ * no flow control instructions so we can more readily do SSA-style
+ * optimizations.
+ */
+static void
+brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+   /* Augment fragment program.  Add instructions for pre- and
+    * post-fragment-program tasks such as interpolation and fogging.
+    */
+   brw_wm_pass_fp(c);
+
+   /* Translate to intermediate representation.  Build register usage
+    * chains.
+    */
+   brw_wm_pass0(c);
+
+   /* Dead code removal.
+    */
+   brw_wm_pass1(c);
+
+   /* Register allocation.
+    * Divide by two because we operate on 16 pixels at a time and require
+    * two GRF entries for each logical shader register.
+    */
+   c->grf_limit = BRW_WM_MAX_GRF / 2;
+
+   brw_wm_pass2(c);
+
+   /* how many general-purpose registers are used */
+   c->prog_data.total_grf = c->max_wm_grf;
+
+   /* Scratch space is used for register spilling */
+   if (c->last_scratch) {
+      c->prog_data.total_scratch = c->last_scratch + 0x40;
+   }
+   else {
+      c->prog_data.total_scratch = 0;
+   }
+
+   /* Emit GEN4 code.
+    */
+   brw_wm_emit(c);
+}
+
+
+/**
+ * All Mesa program -> GPU code generation goes through this function.
+ * Depending on the instructions used (i.e. flow control instructions)
+ * we'll use one of two code generators.
+ */
+static void do_wm_prog( struct brw_context *brw,
+			struct brw_fragment_program *fp, 
+			struct brw_wm_prog_key *key)
+{
+   struct brw_wm_compile *c;
+   const GLuint *program;
+   GLuint program_size;
+
+   c = brw->wm.compile_data;
+   if (c == NULL) {
+      brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
+      c = brw->wm.compile_data;
+      if (c == NULL) {
+         /* Ouch - big out of memory problem.  Can't continue
+          * without triggering a segfault, no way to signal,
+          * so just return.
+          */
+         return;
+      }
+   } else {
+      memset(c, 0, sizeof(*brw->wm.compile_data));
+   }
+   memcpy(&c->key, key, sizeof(*key));
+
+   c->fp = fp;
+   c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
+
+   brw_init_compile(brw, &c->func);
+
+   /* temporary sanity check assertion */
+   ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
+
+   /*
+    * Shader which use GLSL features such as flow control are handled
+    * differently from "simple" shaders.
+    */
+   if (fp->isGLSL) {
+      c->dispatch_width = 8;
+      brw_wm_glsl_emit(brw, c);
+   }
+   else {
+      c->dispatch_width = 16;
+      brw_wm_non_glsl_emit(brw, c);
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM)
+      fprintf(stderr, "\n");
+
+   /* get the program
+    */
+   program = brw_get_program(&c->func, &program_size);
+
+   dri_bo_unreference(brw->wm.prog_bo);
+   brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG,
+				       &c->key, sizeof(c->key),
+				       NULL, 0,
+				       program, program_size,
+				       &c->prog_data,
+				       &brw->wm.prog_data );
+}
+
+
+
+static void brw_wm_populate_key( struct brw_context *brw,
+				 struct brw_wm_prog_key *key )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   const struct brw_fragment_program *fp = 
+      (struct brw_fragment_program *)brw->fragment_program;
+   GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
+   GLuint lookup = 0;
+   GLuint line_aa;
+   GLuint i;
+
+   memset(key, 0, sizeof(*key));
+
+   /* Build the index for table lookup
+    */
+   /* _NEW_COLOR */
+   if (fp->program.UsesKill ||
+       ctx->Color.AlphaEnabled)
+      lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+   if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH))
+      lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+   /* _NEW_DEPTH */
+   if (ctx->Depth.Test)
+      lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+   if (ctx->Depth.Test &&  
+       ctx->Depth.Mask) /* ?? */
+      lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+   /* _NEW_STENCIL */
+   if (ctx->Stencil._Enabled) {
+      lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+      if (ctx->Stencil.WriteMask[0] ||
+	  ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
+	 lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+   }
+
+   line_aa = AA_NEVER;
+
+   /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
+   if (ctx->Line.SmoothFlag) {
+      if (brw->intel.reduced_primitive == GL_LINES) {
+	 line_aa = AA_ALWAYS;
+      }
+      else if (brw->intel.reduced_primitive == GL_TRIANGLES) {
+	 if (ctx->Polygon.FrontMode == GL_LINE) {
+	    line_aa = AA_SOMETIMES;
+
+	    if (ctx->Polygon.BackMode == GL_LINE ||
+		(ctx->Polygon.CullFlag &&
+		 ctx->Polygon.CullFaceMode == GL_BACK))
+	       line_aa = AA_ALWAYS;
+	 }
+	 else if (ctx->Polygon.BackMode == GL_LINE) {
+	    line_aa = AA_SOMETIMES;
+
+	    if ((ctx->Polygon.CullFlag &&
+		 ctx->Polygon.CullFaceMode == GL_FRONT))
+	       line_aa = AA_ALWAYS;
+	 }
+      }
+   }
+	 
+   brw_wm_lookup_iz(line_aa,
+		    lookup,
+		    uses_depth,
+		    key);
+
+
+   /* BRW_NEW_WM_INPUT_DIMENSIONS */
+   key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
+
+   /* _NEW_LIGHT */
+   key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
+
+   /* _NEW_HINT */
+   key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+
+   /* _NEW_TEXTURE */
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
+
+      if (unit->_ReallyEnabled) {
+         const struct gl_texture_object *t = unit->_Current;
+         const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+	 if (img->InternalFormat == GL_YCBCR_MESA) {
+	    key->yuvtex_mask |= 1 << i;
+	    if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR)
+		key->yuvtex_swap_mask |= 1 << i;
+	 }
+
+         key->tex_swizzles[i] = t->_Swizzle;
+      }
+      else {
+         key->tex_swizzles[i] = SWIZZLE_NOOP;
+      }
+   }
+
+   /* Shadow */
+   key->shadowtex_mask = fp->program.Base.ShadowSamplers;
+
+   /* _NEW_BUFFERS */
+   /*
+    * Include the draw buffer origin and height so that we can calculate
+    * fragment position values relative to the bottom left of the drawable,
+    * from the incoming screen origin relative position we get as part of our
+    * payload.
+    *
+    * We could avoid recompiling by including this as a constant referenced by
+    * our program, but if we were to do that it would also be nice to handle
+    * getting that constant updated at batchbuffer submit time (when we
+    * hold the lock and know where the buffer really is) rather than at emit
+    * time when we don't hold the lock and are just guessing.  We could also
+    * just avoid using this as key data if the program doesn't use
+    * fragment.position.
+    *
+    * This pretty much becomes moot with DRI2 and redirected buffers anyway,
+    * as our origins will always be zero then.
+    */
+   if (brw->intel.driDrawable != NULL) {
+      key->origin_x = brw->intel.driDrawable->x;
+      key->origin_y = brw->intel.driDrawable->y;
+      key->drawable_height = brw->intel.driDrawable->h;
+   }
+
+   /* CACHE_NEW_VS_PROG */
+   key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS;
+
+   /* The unique fragment program ID */
+   key->program_string_id = fp->id;
+}
+
+
+static void brw_prepare_wm_prog(struct brw_context *brw)
+{
+   struct brw_wm_prog_key key;
+   struct brw_fragment_program *fp = (struct brw_fragment_program *)
+      brw->fragment_program;
+     
+   brw_wm_populate_key(brw, &key);
+
+   /* Make an early check for the key.
+    */
+   dri_bo_unreference(brw->wm.prog_bo);
+   brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
+				      &key, sizeof(key),
+				      NULL, 0,
+				      &brw->wm.prog_data);
+   if (brw->wm.prog_bo == NULL)
+      do_wm_prog(brw, fp, &key);
+}
+
+
+const struct brw_tracked_state brw_wm_prog = {
+   .dirty = {
+      .mesa  = (_NEW_COLOR |
+		_NEW_DEPTH |
+                _NEW_HINT |
+		_NEW_STENCIL |
+		_NEW_POLYGON |
+		_NEW_LINE |
+		_NEW_LIGHT |
+		_NEW_BUFFERS |
+		_NEW_TEXTURE),
+      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
+		BRW_NEW_WM_INPUT_DIMENSIONS |
+		BRW_NEW_REDUCED_PRIMITIVE),
+      .cache = CACHE_NEW_VS_PROG,
+   },
+   .prepare = brw_prepare_wm_prog
+};
+
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
new file mode 100644
index 0000000000..872b1f3ecf
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -0,0 +1,309 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+              
+
+#ifndef BRW_WM_H
+#define BRW_WM_H
+
+
+#include "shader/prog_instruction.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+
+#define SATURATE (1<<5)
+
+/* A big lookup table is used to figure out which and how many
+ * additional regs will inserted before the main payload in the WM
+ * program execution.  These mainly relate to depth and stencil
+ * processing and the early-depth-test optimization.
+ */
+#define IZ_PS_KILL_ALPHATEST_BIT    0x1
+#define IZ_PS_COMPUTES_DEPTH_BIT    0x2
+#define IZ_DEPTH_WRITE_ENABLE_BIT   0x4
+#define IZ_DEPTH_TEST_ENABLE_BIT    0x8
+#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10
+#define IZ_STENCIL_TEST_ENABLE_BIT  0x20
+#define IZ_BIT_MAX                  0x40
+
+#define AA_NEVER     0
+#define AA_SOMETIMES 1
+#define AA_ALWAYS    2
+
+struct brw_wm_prog_key {
+   GLuint source_depth_reg:3;
+   GLuint aa_dest_stencil_reg:3;
+   GLuint dest_depth_reg:3;
+   GLuint nr_depth_regs:3;
+   GLuint computes_depth:1;	/* could be derived from program string */
+   GLuint source_depth_to_render_target:1;
+   GLuint flat_shade:1;
+   GLuint linear_color:1;  /**< linear interpolation vs perspective interp */
+   GLuint runtime_check_aads_emit:1;
+   
+   GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
+   GLuint shadowtex_mask:16;
+   GLuint yuvtex_mask:16;
+   GLuint yuvtex_swap_mask:16;	/* UV swaped */
+
+   GLuint tex_swizzles[BRW_MAX_TEX_UNIT];
+
+   GLuint program_string_id:32;
+   GLuint origin_x, origin_y;
+   GLuint drawable_height;
+   GLuint vp_outputs_written;
+};
+
+
+/* A bit of a glossary:
+ *
+ * brw_wm_value: A computed value or program input.  Values are
+ * constant, they are created once and are never modified.  When a
+ * fragment program register is written or overwritten, new values are
+ * created fresh, preserving the rule that values are constant.
+ *
+ * brw_wm_ref: A reference to a value.  Wherever a value used is by an
+ * instruction or as a program output, that is tracked with an
+ * instance of this struct.  All references to a value occur after it
+ * is created.  After the last reference, a value is dead and can be
+ * discarded.
+ *
+ * brw_wm_grf: Represents a physical hardware register.  May be either
+ * empty or hold a value.  Register allocation is the process of
+ * assigning values to grf registers.  This occurs in pass2 and the
+ * brw_wm_grf struct is not used before that.
+ *
+ * Fragment program registers: These are time-varying constructs that
+ * are hard to reason about and which we translate away in pass0.  A
+ * single fragment program register element (eg. temp[0].x) will be
+ * translated to one or more brw_wm_value structs, one for each time
+ * that temp[0].x is written to during the program. 
+ */
+
+
+
+/* Used in pass2 to track register allocation.
+ */
+struct brw_wm_grf {
+   struct brw_wm_value *value;
+   GLuint nextuse;
+};
+
+struct brw_wm_value {
+   struct brw_reg hw_reg;	/* emitted to this reg, may not always be there */
+   struct brw_wm_ref *lastuse;
+   struct brw_wm_grf *resident; 
+   GLuint contributes_to_output:1;
+   GLuint spill_slot:16;	/* if non-zero, spill immediately after calculation */
+};
+
+struct brw_wm_ref {
+   struct brw_reg hw_reg;	/* nr filled in in pass2, everything else, pass0 */
+   struct brw_wm_value *value;
+   struct brw_wm_ref *prevuse;
+   GLuint unspill_reg:7;	/* unspill to reg */
+   GLuint emitted:1;
+   GLuint insn:24;
+};
+
+struct brw_wm_constref {
+   const struct brw_wm_ref *ref;
+   GLfloat constval;
+};
+
+
+struct brw_wm_instruction {
+   struct brw_wm_value *dst[4];
+   struct brw_wm_ref *src[3][4];
+   GLuint opcode:8;
+   GLuint saturate:1;
+   GLuint writemask:4;
+   GLuint tex_unit:4;   /* texture unit for TEX, TXD, TXP instructions */
+   GLuint tex_idx:3;    /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
+   GLuint tex_shadow:1; /* do shadow comparison? */
+   GLuint eot:1;    	/* End of thread indicator for FB_WRITE*/
+   GLuint target:10;    /* target binding table index for FB_WRITE*/
+};
+
+
+#define BRW_WM_MAX_INSN  (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
+#define BRW_WM_MAX_GRF   128		/* hardware limit */
+#define BRW_WM_MAX_VREG  (BRW_WM_MAX_INSN * 4)
+#define BRW_WM_MAX_REF   (BRW_WM_MAX_INSN * 12)
+#define BRW_WM_MAX_PARAM 256
+#define BRW_WM_MAX_CONST 256
+#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS
+#define BRW_WM_MAX_SUBROUTINE 16
+
+
+
+/* New opcodes to track internal operations required for WM unit.
+ * These are added early so that the registers used can be tracked,
+ * freed and reused like those of other instructions.
+ */
+#define WM_PIXELXY        (MAX_OPCODE)
+#define WM_DELTAXY        (MAX_OPCODE + 1)
+#define WM_PIXELW         (MAX_OPCODE + 2)
+#define WM_LINTERP        (MAX_OPCODE + 3)
+#define WM_PINTERP        (MAX_OPCODE + 4)
+#define WM_CINTERP        (MAX_OPCODE + 5)
+#define WM_WPOSXY         (MAX_OPCODE + 6)
+#define WM_FB_WRITE       (MAX_OPCODE + 7)
+#define WM_FRONTFACING    (MAX_OPCODE + 8)
+#define MAX_WM_OPCODE     (MAX_OPCODE + 9)
+
+#define PROGRAM_PAYLOAD   (PROGRAM_FILE_MAX)
+#define PAYLOAD_DEPTH     (FRAG_ATTRIB_MAX)
+
+struct brw_wm_compile {
+   struct brw_compile func;
+   struct brw_wm_prog_key key;
+   struct brw_wm_prog_data prog_data;
+
+   struct brw_fragment_program *fp;
+
+   GLfloat (*env_param)[4];
+
+   enum {
+      START,
+      PASS2_DONE
+   } state;
+
+   /* Initial pass - translate fp instructions to fp instructions,
+    * simplifying and adding instructions for interpolation and
+    * framebuffer writes.
+    */
+   struct prog_instruction prog_instructions[BRW_WM_MAX_INSN];
+   GLuint nr_fp_insns;
+   GLuint fp_temp;
+   GLuint fp_interp_emitted;
+   GLuint fp_fragcolor_emitted;
+
+   struct prog_src_register pixel_xy;
+   struct prog_src_register delta_xy;
+   struct prog_src_register pixel_w;
+
+
+   struct brw_wm_value vreg[BRW_WM_MAX_VREG];
+   GLuint nr_vreg;
+
+   struct brw_wm_value creg[BRW_WM_MAX_PARAM];
+   GLuint nr_creg;
+
+   struct {
+      struct brw_wm_value depth[4]; /* includes r0/r1 */
+      struct brw_wm_value input_interp[FRAG_ATTRIB_MAX];
+   } payload;
+
+
+   const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4];
+
+   struct brw_wm_ref undef_ref;
+   struct brw_wm_value undef_value;
+
+   struct brw_wm_ref refs[BRW_WM_MAX_REF];
+   GLuint nr_refs;
+
+   struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
+   GLuint nr_insns;
+
+   struct brw_wm_constref constref[BRW_WM_MAX_CONST];
+   GLuint nr_constrefs;
+
+   struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
+
+   GLuint grf_limit;
+   GLuint max_wm_grf;
+   GLuint last_scratch;
+
+   GLuint cur_inst;  /**< index of current instruction */
+
+   GLboolean out_of_regs;  /**< ran out of GRF registers? */
+
+   /** Mapping from Mesa registers to hardware registers */
+   struct {
+      GLboolean inited;
+      struct brw_reg reg;
+   } wm_regs[PROGRAM_PAYLOAD+1][256][4];
+
+   GLboolean used_grf[BRW_WM_MAX_GRF];
+   GLuint first_free_grf;
+   struct brw_reg stack;
+   struct brw_reg emit_mask_reg;
+   GLuint tmp_regs[BRW_WM_MAX_GRF];
+   GLuint tmp_index;
+   GLuint tmp_max;
+   GLuint subroutines[BRW_WM_MAX_SUBROUTINE];
+   GLuint dispatch_width;
+
+   /** we may need up to 3 constants per instruction (if use_const_buffer) */
+   struct {
+      GLint index;
+      struct brw_reg reg;
+   } current_const[3];
+};
+
+
+GLuint brw_wm_nr_args( GLuint opcode );
+GLuint brw_wm_is_scalar_result( GLuint opcode );
+
+void brw_wm_pass_fp( struct brw_wm_compile *c );
+void brw_wm_pass0( struct brw_wm_compile *c );
+void brw_wm_pass1( struct brw_wm_compile *c );
+void brw_wm_pass2( struct brw_wm_compile *c );
+void brw_wm_emit( struct brw_wm_compile *c );
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+			 struct brw_wm_value *value );
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+		       struct brw_wm_ref *ref );
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+			struct brw_wm_instruction *inst );
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+			   const char *stage );
+
+void brw_wm_lookup_iz( GLuint line_aa,
+		       GLuint lookup,
+		       GLboolean ps_uses_depth,
+		       struct brw_wm_prog_key *key );
+
+GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
+void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+
+void emit_ddxy(struct brw_compile *p,
+	       const struct brw_reg *dst,
+	       GLuint mask,
+	       GLboolean is_ddx,
+	       const struct brw_reg *arg0);
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c
new file mode 100644
index 0000000000..220821087c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_debug.c
@@ -0,0 +1,174 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+               
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+void brw_wm_print_value( struct brw_wm_compile *c,
+		       struct brw_wm_value *value )
+{
+   assert(value);
+   if (c->state >= PASS2_DONE) 
+      brw_print_reg(value->hw_reg);
+   else if( value == &c->undef_value )
+      _mesa_printf("undef");
+   else if( value - c->vreg >= 0 &&
+	    value - c->vreg < BRW_WM_MAX_VREG)
+      _mesa_printf("r%d", value - c->vreg);
+   else if (value - c->creg >= 0 &&
+	    value - c->creg < BRW_WM_MAX_PARAM)
+      _mesa_printf("c%d", value - c->creg);
+   else if (value - c->payload.input_interp >= 0 &&
+	    value - c->payload.input_interp < FRAG_ATTRIB_MAX)
+      _mesa_printf("i%d", value - c->payload.input_interp);
+   else if (value - c->payload.depth >= 0 &&
+	    value - c->payload.depth < FRAG_ATTRIB_MAX)
+      _mesa_printf("d%d", value - c->payload.depth);
+   else 
+      _mesa_printf("?");
+}
+
+void brw_wm_print_ref( struct brw_wm_compile *c,
+		       struct brw_wm_ref *ref )
+{
+   struct brw_reg hw_reg = ref->hw_reg;
+
+   if (ref->unspill_reg)
+      _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot);
+
+   if (c->state >= PASS2_DONE)
+      brw_print_reg(ref->hw_reg);
+   else {
+      _mesa_printf("%s", hw_reg.negate ? "-" : "");
+      _mesa_printf("%s", hw_reg.abs ? "abs/" : "");
+      brw_wm_print_value(c, ref->value);
+      if ((hw_reg.nr&1) || hw_reg.subnr) {
+	 _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
+      }
+   }
+}
+
+void brw_wm_print_insn( struct brw_wm_compile *c,
+			struct brw_wm_instruction *inst )
+{
+   GLuint i, arg;
+   GLuint nr_args = brw_wm_nr_args(inst->opcode);
+
+   _mesa_printf("[");
+   for (i = 0; i < 4; i++) {
+      if (inst->dst[i]) {
+	 brw_wm_print_value(c, inst->dst[i]);
+	 if (inst->dst[i]->spill_slot)
+	    _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot);
+      }
+      else
+	 _mesa_printf("#");
+      if (i < 3)      
+	 _mesa_printf(",");
+   }
+   _mesa_printf("]");
+
+   if (inst->writemask != WRITEMASK_XYZW)
+      _mesa_printf(".%s%s%s%s", 
+		   GET_BIT(inst->writemask, 0) ? "x" : "",
+		   GET_BIT(inst->writemask, 1) ? "y" : "",
+		   GET_BIT(inst->writemask, 2) ? "z" : "",
+		   GET_BIT(inst->writemask, 3) ? "w" : "");
+
+   switch (inst->opcode) {
+   case WM_PIXELXY:
+      _mesa_printf(" = PIXELXY");
+      break;
+   case WM_DELTAXY:
+      _mesa_printf(" = DELTAXY");
+      break;
+   case WM_PIXELW:
+      _mesa_printf(" = PIXELW");
+      break;
+   case WM_WPOSXY:
+      _mesa_printf(" = WPOSXY");
+      break;
+   case WM_PINTERP:
+      _mesa_printf(" = PINTERP");
+      break;
+   case WM_LINTERP:
+      _mesa_printf(" = LINTERP");
+      break;
+   case WM_CINTERP:
+      _mesa_printf(" = CINTERP");
+      break;
+   case WM_FB_WRITE:
+      _mesa_printf(" = FB_WRITE");
+      break;
+   case WM_FRONTFACING:
+      _mesa_printf(" = FRONTFACING");
+      break;
+   default:
+      _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode));
+      break;
+   }
+
+   if (inst->saturate)
+      _mesa_printf("_SAT");
+
+   for (arg = 0; arg < nr_args; arg++) {
+
+      _mesa_printf(" [");
+
+      for (i = 0; i < 4; i++) {
+	 if (inst->src[arg][i]) {
+	    brw_wm_print_ref(c, inst->src[arg][i]);
+	 }
+	 else
+	    _mesa_printf("%%");
+
+	 if (i < 3) 
+	    _mesa_printf(",");
+	 else
+	    _mesa_printf("]");
+      }
+   }
+   _mesa_printf("\n");
+}
+
+void brw_wm_print_program( struct brw_wm_compile *c,
+			   const char *stage )
+{
+   GLuint insn;
+
+   _mesa_printf("%s:\n", stage);
+   for (insn = 0; insn < c->nr_insns; insn++)
+      brw_wm_print_insn(c, &c->instruction[insn]);
+   _mesa_printf("\n");
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
new file mode 100644
index 0000000000..bf80a2942a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -0,0 +1,1509 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+               
+
+#include "main/macros.h"
+#include "brw_context.h"
+#include "brw_wm.h"
+
+/* Not quite sure how correct this is - need to understand horiz
+ * vs. vertical strides a little better.
+ */
+static INLINE struct brw_reg sechalf( struct brw_reg reg )
+{
+   if (reg.vstride)
+      reg.nr++;
+   return reg;
+}
+
+/* Payload R0:
+ *
+ * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
+ *         corresponding to each of the 16 execution channels.
+ * R0.1..8 -- ?
+ * R1.0 -- triangle vertex 0.X
+ * R1.1 -- triangle vertex 0.Y
+ * R1.2 -- tile 0 x,y coords (2 packed uwords)
+ * R1.3 -- tile 1 x,y coords (2 packed uwords)
+ * R1.4 -- tile 2 x,y coords (2 packed uwords)
+ * R1.5 -- tile 3 x,y coords (2 packed uwords)
+ * R1.6 -- ?
+ * R1.7 -- ?
+ * R1.8 -- ?
+ */
+
+
+static void emit_pixel_xy(struct brw_compile *p,
+			  const struct brw_reg *dst,
+			  GLuint mask)
+{
+   struct brw_reg r1 = brw_vec1_grf(1, 0);
+   struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+   /* Calculate pixel centers by adding 1 or 0 to each of the
+    * micro-tile coordinates passed in r1.
+    */
+   if (mask & WRITEMASK_X) {
+      brw_ADD(p,
+	      vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
+	      stride(suboffset(r1_uw, 4), 2, 4, 0),
+	      brw_imm_v(0x10101010));
+   }
+
+   if (mask & WRITEMASK_Y) {
+      brw_ADD(p,
+	      vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
+	      stride(suboffset(r1_uw,5), 2, 4, 0),
+	      brw_imm_v(0x11001100));
+   }
+
+   brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+}
+
+
+
+static void emit_delta_xy(struct brw_compile *p,
+			  const struct brw_reg *dst,
+			  GLuint mask,
+			  const struct brw_reg *arg0)
+{
+   struct brw_reg r1 = brw_vec1_grf(1, 0);
+
+   /* Calc delta X,Y by subtracting origin in r1 from the pixel
+    * centers.
+    */
+   if (mask & WRITEMASK_X) {
+      brw_ADD(p,
+	      dst[0],
+	      retype(arg0[0], BRW_REGISTER_TYPE_UW),
+	      negate(r1));
+   }
+
+   if (mask & WRITEMASK_Y) {
+      brw_ADD(p,
+	      dst[1],
+	      retype(arg0[1], BRW_REGISTER_TYPE_UW),
+	      negate(suboffset(r1,1)));
+
+   }
+}
+
+static void emit_wpos_xy(struct brw_wm_compile *c,
+			 const struct brw_reg *dst,
+			 GLuint mask,
+			 const struct brw_reg *arg0)
+{
+   struct brw_compile *p = &c->func;
+
+   /* Calculate the pixel offset from window bottom left into destination
+    * X and Y channels.
+    */
+   if (mask & WRITEMASK_X) {
+      /* X' = X - origin */
+      brw_ADD(p,
+	      dst[0],
+	      retype(arg0[0], BRW_REGISTER_TYPE_W),
+	      brw_imm_d(0 - c->key.origin_x));
+   }
+
+   if (mask & WRITEMASK_Y) {
+      /* Y' = height - (Y - origin_y) = height + origin_y - Y */
+      brw_ADD(p,
+	      dst[1],
+	      negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
+	      brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
+   }
+}
+
+
+static void emit_pixel_w( struct brw_compile *p,
+			  const struct brw_reg *dst,
+			  GLuint mask,
+			  const struct brw_reg *arg0,
+			  const struct brw_reg *deltas)
+{
+   /* Don't need this if all you are doing is interpolating color, for
+    * instance.
+    */
+   if (mask & WRITEMASK_W) {      
+      struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
+
+      /* Calc 1/w - just linterp wpos[3] optimized by putting the
+       * result straight into a message reg.
+       */
+      brw_LINE(p, brw_null_reg(), interp3, deltas[0]);
+      brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), deltas[1]);
+
+      /* Calc w */
+      brw_math_16( p, dst[3],
+		   BRW_MATH_FUNCTION_INV,
+		   BRW_MATH_SATURATE_NONE,
+		   2, brw_null_reg(),
+		   BRW_MATH_PRECISION_FULL);
+   }
+}
+
+
+
+static void emit_linterp( struct brw_compile *p, 
+			 const struct brw_reg *dst,
+			 GLuint mask,
+			 const struct brw_reg *arg0,
+			 const struct brw_reg *deltas )
+{
+   struct brw_reg interp[4];
+   GLuint nr = arg0[0].nr;
+   GLuint i;
+
+   interp[0] = brw_vec1_grf(nr, 0);
+   interp[1] = brw_vec1_grf(nr, 4);
+   interp[2] = brw_vec1_grf(nr+1, 0);
+   interp[3] = brw_vec1_grf(nr+1, 4);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+	 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+      }
+   }
+}
+
+
+static void emit_pinterp( struct brw_compile *p, 
+			  const struct brw_reg *dst,
+			  GLuint mask,
+			  const struct brw_reg *arg0,
+			  const struct brw_reg *deltas,
+			  const struct brw_reg *w)
+{
+   struct brw_reg interp[4];
+   GLuint nr = arg0[0].nr;
+   GLuint i;
+
+   interp[0] = brw_vec1_grf(nr, 0);
+   interp[1] = brw_vec1_grf(nr, 4);
+   interp[2] = brw_vec1_grf(nr+1, 0);
+   interp[3] = brw_vec1_grf(nr+1, 4);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_LINE(p, brw_null_reg(), interp[i], deltas[0]);
+	 brw_MAC(p, dst[i], suboffset(interp[i],1), deltas[1]);
+      }
+   }
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_MUL(p, dst[i], dst[i], w[3]);
+      }
+   }
+}
+
+
+static void emit_cinterp( struct brw_compile *p, 
+			 const struct brw_reg *dst,
+			 GLuint mask,
+			 const struct brw_reg *arg0 )
+{
+   struct brw_reg interp[4];
+   GLuint nr = arg0[0].nr;
+   GLuint i;
+
+   interp[0] = brw_vec1_grf(nr, 0);
+   interp[1] = brw_vec1_grf(nr, 4);
+   interp[2] = brw_vec1_grf(nr+1, 0);
+   interp[3] = brw_vec1_grf(nr+1, 4);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+         brw_MOV(p, dst[i], suboffset(interp[i],3));	/* TODO: optimize away like other moves */
+      }
+   }
+}
+
+/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
+static void emit_frontfacing( struct brw_compile *p,
+			      const struct brw_reg *dst,
+			      GLuint mask )
+{
+   struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+   GLuint i;
+
+   if (!(mask & WRITEMASK_XYZW))
+      return;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_MOV(p, dst[i], brw_imm_f(0.0));
+      }
+   }
+
+   /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+    * us front face
+    */
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_MOV(p, dst[i], brw_imm_f(1.0));
+      }
+   }
+   brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+/* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
+ * looking like:
+ *
+ * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ *
+ * and we're trying to produce:
+ *
+ *           DDX                     DDY
+ * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
+ *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
+ *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
+ *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
+ *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
+ *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
+ *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
+ *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
+ *
+ * and add another set of two more subspans if in 16-pixel dispatch mode.
+ *
+ * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
+ * for each pair, and vertstride = 2 jumps us 2 elements after processing a
+ * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
+ * between each other.  We could probably do it like ddx and swizzle the right
+ * order later, but bail for now and just produce
+ * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ */
+void emit_ddxy(struct brw_compile *p,
+	       const struct brw_reg *dst,
+	       GLuint mask,
+	       GLboolean is_ddx,
+	       const struct brw_reg *arg0)
+{
+   int i;
+   struct brw_reg src0, src1;
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 1);
+   for (i = 0; i < 4; i++ ) {
+      if (mask & (1<<i)) {
+	 if (is_ddx) {
+	    src0 = brw_reg(arg0[i].file, arg0[i].nr, 1,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_2,
+			   BRW_WIDTH_2,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	    src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_2,
+			   BRW_WIDTH_2,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	 } else {
+	    src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_4,
+			   BRW_WIDTH_4,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	    src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
+			   BRW_REGISTER_TYPE_F,
+			   BRW_VERTICAL_STRIDE_4,
+			   BRW_WIDTH_4,
+			   BRW_HORIZONTAL_STRIDE_0,
+			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+	 }
+	 brw_ADD(p, dst[i], src0, negate(src1));
+      }
+   }
+   if (mask & SATURATE)
+      brw_set_saturate(p, 0);
+}
+
+static void emit_alu1( struct brw_compile *p, 
+		       struct brw_instruction *(*func)(struct brw_compile *, 
+						       struct brw_reg, 
+						       struct brw_reg),
+		       const struct brw_reg *dst,
+		       GLuint mask,
+		       const struct brw_reg *arg0 )
+{
+   GLuint i;
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 1);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 func(p, dst[i], arg0[i]);
+      }
+   }
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 0);
+}
+
+
+static void emit_alu2( struct brw_compile *p, 
+		       struct brw_instruction *(*func)(struct brw_compile *, 
+						       struct brw_reg, 
+						       struct brw_reg, 
+						       struct brw_reg),
+		       const struct brw_reg *dst,
+		       GLuint mask,
+		       const struct brw_reg *arg0,
+		       const struct brw_reg *arg1 )
+{
+   GLuint i;
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 1);
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 func(p, dst[i], arg0[i], arg1[i]);
+      }
+   }
+
+   if (mask & SATURATE)
+      brw_set_saturate(p, 0);
+}
+
+
+static void emit_mad( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1,
+		      const struct brw_reg *arg2 )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_MUL(p, dst[i], arg0[i], arg1[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_ADD(p, dst[i], dst[i], arg2[i]);
+	 brw_set_saturate(p, 0);
+      }
+   }
+}
+
+static void emit_trunc( struct brw_compile *p,
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0)
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {
+	 brw_RNDZ(p, dst[i], arg0[i]);
+      }
+   }
+}
+
+static void emit_lrp( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1,
+		      const struct brw_reg *arg2 )
+{
+   GLuint i;
+
+   /* Uses dst as a temporary:
+    */
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 /* Can I use the LINE instruction for this? 
+	  */
+	 brw_ADD(p, dst[i], negate(arg0[i]), brw_imm_f(1.0));
+	 brw_MUL(p, brw_null_reg(), dst[i], arg2[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MAC(p, dst[i], arg0[i], arg1[i]);
+	 brw_set_saturate(p, 0);
+      }
+   }
+}
+
+static void emit_sop( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      GLuint cond,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_MOV(p, dst[i], brw_imm_f(0));
+	 brw_CMP(p, brw_null_reg(), cond, arg0[i], arg1[i]);
+	 brw_MOV(p, dst[i], brw_imm_f(1.0));
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+static void emit_slt( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_L, arg0, arg1);
+}
+
+static void emit_sle( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_LE, arg0, arg1);
+}
+
+static void emit_sgt( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_G, arg0, arg1);
+}
+
+static void emit_sge( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_GE, arg0, arg1);
+}
+
+static void emit_seq( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_EQ, arg0, arg1);
+}
+
+static void emit_sne( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   emit_sop(p, dst, mask, BRW_CONDITIONAL_NEQ, arg0, arg1);
+}
+
+static void emit_cmp( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1,
+		      const struct brw_reg *arg2 )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg2[i]);
+	 brw_set_saturate(p, 0);
+
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg1[i]);
+	 brw_set_saturate(p, 0);
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+static void emit_max( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg0[i]);
+	 brw_set_saturate(p, 0);
+
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg1[i]);
+	 brw_set_saturate(p, 0);
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+static void emit_min( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (mask & (1<<i)) {	
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg1[i]);
+	 brw_set_saturate(p, 0);
+
+	 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], arg1[i]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MOV(p, dst[i], arg0[i]);
+	 brw_set_saturate(p, 0);
+	 brw_set_predicate_control_flag_value(p, 0xff);
+      }
+   }
+}
+
+
+static void emit_dp3( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code */
+
+   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+   brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+   brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+
+   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+   brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
+   brw_set_saturate(p, 0);
+}
+
+
+static void emit_dp4( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code */
+
+   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+   brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+   brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+   brw_MAC(p, brw_null_reg(), arg0[2], arg1[2]);
+
+   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+   brw_MAC(p, dst[dst_chan], arg0[3], arg1[3]);
+   brw_set_saturate(p, 0);
+}
+
+
+static void emit_dph( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code */
+
+   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+   brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+   brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
+   brw_MAC(p, dst[dst_chan], arg0[2], arg1[2]);
+
+   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+   brw_ADD(p, dst[dst_chan], dst[dst_chan], arg1[3]);
+   brw_set_saturate(p, 0);
+}
+
+
+static void emit_xpd( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0,
+		      const struct brw_reg *arg1 )
+{
+   GLuint i;
+
+   assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
+   
+   for (i = 0 ; i < 3; i++) {
+      if (mask & (1<<i)) {
+	 GLuint i2 = (i+2)%3;
+	 GLuint i1 = (i+1)%3;
+
+	 brw_MUL(p, brw_null_reg(), negate(arg0[i2]), arg1[i1]);
+
+	 brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+	 brw_MAC(p, dst[i], arg0[i1], arg1[i2]);
+	 brw_set_saturate(p, 0);
+      }
+   }
+}
+
+
+static void emit_math1( struct brw_compile *p, 
+			GLuint function,
+			const struct brw_reg *dst,
+			GLuint mask,
+			const struct brw_reg *arg0 )
+{
+   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code */
+
+   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+   brw_MOV(p, brw_message_reg(2), arg0[0]);
+
+   /* Send two messages to perform all 16 operations:
+    */
+   brw_math_16(p, 
+	       dst[dst_chan],
+	       function,
+	       (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+	       2,
+	       brw_null_reg(),
+	       BRW_MATH_PRECISION_FULL);
+}
+
+
+static void emit_math2( struct brw_compile *p, 
+			GLuint function,
+			const struct brw_reg *dst,
+			GLuint mask,
+			const struct brw_reg *arg0,
+			const struct brw_reg *arg1)
+{
+   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code */
+
+   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+   brw_push_insn_state(p);
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_MOV(p, brw_message_reg(2), arg0[0]);
+   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+   brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
+
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_MOV(p, brw_message_reg(3), arg1[0]);
+   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+   brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
+
+   
+   /* Send two messages to perform all 16 operations:
+    */
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_math(p, 
+	    dst[dst_chan],
+	    function,
+	    (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+	    2,
+	    brw_null_reg(),
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+
+   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+   brw_math(p, 
+	    offset(dst[dst_chan],1),
+	    function,
+	    (mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+	    4,
+	    brw_null_reg(),
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+   
+   brw_pop_insn_state(p);
+}
+		     
+
+
+static void emit_tex( struct brw_wm_compile *c,
+		      const struct brw_wm_instruction *inst,
+		      struct brw_reg *dst,
+		      GLuint dst_flags,
+		      struct brw_reg *arg )
+{
+   struct brw_compile *p = &c->func;
+   GLuint msgLength, responseLength;
+   GLuint i, nr;
+   GLuint emit;
+   GLuint msg_type;
+
+   /* How many input regs are there?
+    */
+   switch (inst->tex_idx) {
+   case TEXTURE_1D_INDEX:
+      emit = WRITEMASK_X;
+      nr = 1;
+      break;
+   case TEXTURE_2D_INDEX:
+   case TEXTURE_RECT_INDEX:
+      emit = WRITEMASK_XY;
+      nr = 2;
+      break;
+   case TEXTURE_3D_INDEX:
+   case TEXTURE_CUBE_INDEX:
+      emit = WRITEMASK_XYZ;
+      nr = 3;
+      break;
+   default:
+      /* unexpected target */
+      abort();
+   }
+
+   if (inst->tex_shadow) {
+      nr = 4;
+      emit |= WRITEMASK_W;
+   }
+
+   msgLength = 1;
+
+   for (i = 0; i < nr; i++) {
+      static const GLuint swz[4] = {0,1,2,2};
+      if (emit & (1<<i)) 
+	 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
+      else
+	 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
+      msgLength += 2;
+   }
+
+   responseLength = 8;		/* always */
+
+   if (BRW_IS_IGDNG(p->brw)) {
+       if (inst->tex_shadow)
+           msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
+       else
+           msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
+   } else {
+       if (inst->tex_shadow)
+           msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+       else
+           msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+   }
+
+   brw_SAMPLE(p, 
+	      retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+	      1,
+	      retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+              SURF_INDEX_TEXTURE(inst->tex_unit),
+	      inst->tex_unit,	  /* sampler */
+	      inst->writemask,
+	      msg_type, 
+	      responseLength,
+	      msgLength,
+	      0,	
+	      1,
+	      BRW_SAMPLER_SIMD_MODE_SIMD16);	
+}
+
+
+static void emit_txb( struct brw_wm_compile *c,
+		      const struct brw_wm_instruction *inst,
+		      struct brw_reg *dst,
+		      GLuint dst_flags,
+		      struct brw_reg *arg )
+{
+   struct brw_compile *p = &c->func;
+   GLuint msgLength;
+   GLuint msg_type;
+   /* Shadow ignored for txb.
+    */
+   switch (inst->tex_idx) {
+   case TEXTURE_1D_INDEX:
+      brw_MOV(p, brw_message_reg(2), arg[0]);
+      brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+      brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+      break;
+   case TEXTURE_2D_INDEX:
+   case TEXTURE_RECT_INDEX:
+      brw_MOV(p, brw_message_reg(2), arg[0]);
+      brw_MOV(p, brw_message_reg(4), arg[1]);
+      brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
+      break;
+   case TEXTURE_3D_INDEX:
+   case TEXTURE_CUBE_INDEX:
+      brw_MOV(p, brw_message_reg(2), arg[0]);
+      brw_MOV(p, brw_message_reg(4), arg[1]);
+      brw_MOV(p, brw_message_reg(6), arg[2]);
+      break;
+   default:
+      /* unexpected target */
+      abort();
+   }
+
+   brw_MOV(p, brw_message_reg(8), arg[3]);
+   msgLength = 9;
+
+   if (BRW_IS_IGDNG(p->brw))
+       msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS_IGDNG;
+   else
+       msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+
+   brw_SAMPLE(p, 
+	      retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
+	      1,
+	      retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
+              SURF_INDEX_TEXTURE(inst->tex_unit),
+	      inst->tex_unit,	  /* sampler */
+	      inst->writemask,
+	      msg_type,
+	      8,		/* responseLength */
+	      msgLength,
+	      0,	
+	      1,
+	      BRW_SAMPLER_SIMD_MODE_SIMD16);	
+}
+
+
+static void emit_lit( struct brw_compile *p, 
+		      const struct brw_reg *dst,
+		      GLuint mask,
+		      const struct brw_reg *arg0 )
+{
+   assert((mask & WRITEMASK_XW) == 0);
+
+   if (mask & WRITEMASK_Y) {
+      brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+      brw_MOV(p, dst[1], arg0[0]);
+      brw_set_saturate(p, 0);
+   }
+
+   if (mask & WRITEMASK_Z) {
+      emit_math2(p, BRW_MATH_FUNCTION_POW,
+		 &dst[2],
+		 WRITEMASK_X | (mask & SATURATE),
+		 &arg0[1],
+		 &arg0[3]);
+   }
+
+   /* Ordinarily you'd use an iff statement to skip or shortcircuit
+    * some of the POW calculations above, but 16-wide iff statements
+    * seem to lock c1 hardware, so this is a nasty workaround:
+    */
+   brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
+   {
+      if (mask & WRITEMASK_Y) 
+	 brw_MOV(p, dst[1], brw_imm_f(0));
+
+      if (mask & WRITEMASK_Z) 
+	 brw_MOV(p, dst[2], brw_imm_f(0)); 
+   }
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
+
+/* Kill pixel - set execution mask to zero for those pixels which
+ * fail.
+ */
+static void emit_kil( struct brw_wm_compile *c,
+		      struct brw_reg *arg0)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+   GLuint i;
+   
+   /* XXX - usually won't need 4 compares!
+    */
+   for (i = 0; i < 4; i++) {
+      brw_push_insn_state(p);
+      brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_GE, arg0[i], brw_imm_f(0));   
+      brw_set_predicate_control_flag_value(p, 0xff);
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_AND(p, r0uw, brw_flag_reg(), r0uw);
+      brw_pop_insn_state(p);
+   }
+}
+
+/* KIL_NV kills the pixels that are currently executing, not based on a test
+ * of the arguments.
+ */
+static void emit_kil_nv( struct brw_wm_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+   brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
+   brw_pop_insn_state(p);
+}
+
+static void fire_fb_write( struct brw_wm_compile *c,
+			   GLuint base_reg,
+			   GLuint nr,
+			   GLuint target,
+			   GLuint eot )
+{
+   struct brw_compile *p = &c->func;
+   
+   /* Pass through control information:
+    */
+/*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
+   {
+      brw_push_insn_state(p);
+      brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_MOV(p, 
+	       brw_message_reg(base_reg + 1),
+	       brw_vec8_grf(1, 0));
+      brw_pop_insn_state(p);
+   }
+
+   /* Send framebuffer write message: */
+/*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
+   brw_fb_WRITE(p,
+		retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+		base_reg,
+		retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+		target,		
+		nr,
+		0, 
+		eot);
+}
+
+
+static void emit_aa( struct brw_wm_compile *c,
+		     struct brw_reg *arg1,
+		     GLuint reg )
+{
+   struct brw_compile *p = &c->func;
+   GLuint comp = c->key.aa_dest_stencil_reg / 2;
+   GLuint off = c->key.aa_dest_stencil_reg % 2;
+   struct brw_reg aa = offset(arg1[comp], off);
+
+   brw_push_insn_state(p);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE); /* ?? */
+   brw_MOV(p, brw_message_reg(reg), aa);
+   brw_pop_insn_state(p);
+}
+
+
+/* Post-fragment-program processing.  Send the results to the
+ * framebuffer.
+ * \param arg0  the fragment color
+ * \param arg1  the pass-through depth value
+ * \param arg2  the shader-computed depth value
+ */
+static void emit_fb_write( struct brw_wm_compile *c,
+			   struct brw_reg *arg0,
+			   struct brw_reg *arg1,
+			   struct brw_reg *arg2,
+			   GLuint target,
+			   GLuint eot)
+{
+   struct brw_compile *p = &c->func;
+   GLuint nr = 2;
+   GLuint channel;
+
+   /* Reserve a space for AA - may not be needed:
+    */
+   if (c->key.aa_dest_stencil_reg)
+      nr += 1;
+
+   /* I don't really understand how this achieves the color interleave
+    * (ie RGBARGBA) in the result:  [Do the saturation here]
+    */
+   {
+      brw_push_insn_state(p);
+      
+      for (channel = 0; channel < 4; channel++) {
+	 /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
+	 /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
+
+	 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+	 brw_MOV(p,
+		 brw_message_reg(nr + channel),
+		 arg0[channel]);
+       
+	 brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+	 brw_MOV(p,
+		 brw_message_reg(nr + channel + 4),
+		 sechalf(arg0[channel]));
+      }
+
+      /* skip over the regs populated above:
+       */
+      nr += 8;
+   
+      brw_pop_insn_state(p);
+   }
+
+   if (c->key.source_depth_to_render_target)
+   {
+      if (c->key.computes_depth) 
+	 brw_MOV(p, brw_message_reg(nr), arg2[2]);
+      else 
+	 brw_MOV(p, brw_message_reg(nr), arg1[1]); /* ? */
+
+      nr += 2;
+   }
+
+   if (c->key.dest_depth_reg)
+   {
+      GLuint comp = c->key.dest_depth_reg / 2;
+      GLuint off = c->key.dest_depth_reg % 2;
+
+      if (off != 0) {
+         brw_push_insn_state(p);
+         brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+         brw_MOV(p, brw_message_reg(nr), offset(arg1[comp],1));
+         /* 2nd half? */
+         brw_MOV(p, brw_message_reg(nr+1), arg1[comp+1]);
+         brw_pop_insn_state(p);
+      }
+      else {
+         brw_MOV(p, brw_message_reg(nr), arg1[comp]);
+      }
+      nr += 2;
+   }
+
+   if (!c->key.runtime_check_aads_emit) {
+      if (c->key.aa_dest_stencil_reg)
+	 emit_aa(c, arg1, 2);
+
+      fire_fb_write(c, 0, nr, target, eot);
+   }
+   else {
+      struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+      struct brw_reg ip = brw_ip_reg();
+      struct brw_instruction *jmp;
+      
+      brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+      brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
+      brw_AND(p, 
+	      v1_null_ud, 
+	      get_element_ud(brw_vec8_grf(1,0), 6), 
+	      brw_imm_ud(1<<26)); 
+
+      jmp = brw_JMPI(p, ip, ip, brw_imm_d(0));
+      {
+	 emit_aa(c, arg1, 2);
+	 fire_fb_write(c, 0, nr, target, eot);
+	 /* note - thread killed in subroutine */
+      }
+      brw_land_fwd_jump(p, jmp);
+
+      /* ELSE: Shuffle up one register to fill in the hole left for AA:
+       */
+      fire_fb_write(c, 1, nr-1, target, eot);
+   }
+}
+
+
+/**
+ * Move a GPR to scratch memory. 
+ */
+static void emit_spill( struct brw_wm_compile *c,
+			struct brw_reg reg,
+			GLuint slot )
+{
+   struct brw_compile *p = &c->func;
+
+   /*
+     mov (16) m2.0<1>:ud   r2.0<8;8,1>:ud   { Align1 Compr }
+   */
+   brw_MOV(p, brw_message_reg(2), reg);
+
+   /*
+     mov (1) r0.2<1>:d    0x00000080:d     { Align1 NoMask }
+     send (16) null.0<1>:uw m1               r0.0<8;8,1>:uw   0x053003ff:ud    { Align1 }
+   */
+   brw_dp_WRITE_16(p, 
+		   retype(vec16(brw_vec8_grf(0, 0)), BRW_REGISTER_TYPE_UW),
+		   slot);
+}
+
+
+/**
+ * Load a GPR from scratch memory. 
+ */
+static void emit_unspill( struct brw_wm_compile *c,
+			  struct brw_reg reg,
+			  GLuint slot )
+{
+   struct brw_compile *p = &c->func;
+
+   /* Slot 0 is the undef value.
+    */
+   if (slot == 0) {
+      brw_MOV(p, reg, brw_imm_f(0));
+      return;
+   }
+
+   /*
+     mov (1) r0.2<1>:d    0x000000c0:d     { Align1 NoMask }
+     send (16) r110.0<1>:uw m1               r0.0<8;8,1>:uw   0x041243ff:ud    { Align1 }
+   */
+
+   brw_dp_READ_16(p,
+		  retype(vec16(reg), BRW_REGISTER_TYPE_UW),
+		  slot);
+}
+
+
+/**
+ * Retrieve up to 4 GEN4 register pairs for the given wm reg:
+ * Args with unspill_reg != 0 will be loaded from scratch memory.
+ */
+static void get_argument_regs( struct brw_wm_compile *c,
+			       struct brw_wm_ref *arg[],
+			       struct brw_reg *regs )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (arg[i]) {
+	 if (arg[i]->unspill_reg)
+	    emit_unspill(c,
+			 brw_vec8_grf(arg[i]->unspill_reg, 0),
+			 arg[i]->value->spill_slot);
+
+	 regs[i] = arg[i]->hw_reg;
+      }
+      else {
+	 regs[i] = brw_null_reg();
+      }
+   }
+}
+
+
+/**
+ * For values that have a spill_slot!=0, write those regs to scratch memory.
+ */
+static void spill_values( struct brw_wm_compile *c,
+			  struct brw_wm_value *values,
+			  GLuint nr )
+{
+   GLuint i;
+
+   for (i = 0; i < nr; i++)
+      if (values[i].spill_slot) 
+	 emit_spill(c, values[i].hw_reg, values[i].spill_slot);
+}
+
+
+/* Emit the fragment program instructions here.
+ */
+void brw_wm_emit( struct brw_wm_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   GLuint insn;
+
+   brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
+
+   /* Check if any of the payload regs need to be spilled:
+    */
+   spill_values(c, c->payload.depth, 4);
+   spill_values(c, c->creg, c->nr_creg);
+   spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
+   
+
+   for (insn = 0; insn < c->nr_insns; insn++) {
+
+      struct brw_wm_instruction *inst = &c->instruction[insn];
+      struct brw_reg args[3][4], dst[4];
+      GLuint i, dst_flags;
+      
+      /* Get argument regs:
+       */
+      for (i = 0; i < 3; i++) 
+	 get_argument_regs(c, inst->src[i], args[i]);
+
+      /* Get dest regs:
+       */
+      for (i = 0; i < 4; i++)
+	 if (inst->dst[i])
+	    dst[i] = inst->dst[i]->hw_reg;
+	 else
+	    dst[i] = brw_null_reg();
+      
+      /* Flags
+       */
+      dst_flags = inst->writemask;
+      if (inst->saturate) 
+	 dst_flags |= SATURATE;
+
+      switch (inst->opcode) {
+	 /* Generated instructions for calculating triangle interpolants:
+	  */
+      case WM_PIXELXY:
+	 emit_pixel_xy(p, dst, dst_flags);
+	 break;
+
+      case WM_DELTAXY:
+	 emit_delta_xy(p, dst, dst_flags, args[0]);
+	 break;
+
+      case WM_WPOSXY:
+	 emit_wpos_xy(c, dst, dst_flags, args[0]);
+	 break;
+
+      case WM_PIXELW:
+	 emit_pixel_w(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case WM_LINTERP:
+	 emit_linterp(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case WM_PINTERP:
+	 emit_pinterp(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case WM_CINTERP:
+	 emit_cinterp(p, dst, dst_flags, args[0]);
+	 break;
+
+      case WM_FB_WRITE:
+	 emit_fb_write(c, args[0], args[1], args[2], inst->target, inst->eot);
+	 break;
+
+      case WM_FRONTFACING:
+	 emit_frontfacing(p, dst, dst_flags);
+	 break;
+
+	 /* Straightforward arithmetic:
+	  */
+      case OPCODE_ADD:
+	 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_FRC:
+	 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_FLR:
+	 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_DDX:
+	 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+	 break;
+
+      case OPCODE_DDY:
+	 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+	 break;
+
+      case OPCODE_DP3:
+	 emit_dp3(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_DP4:
+	 emit_dp4(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_DPH:
+	 emit_dph(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_TRUNC:
+	 emit_trunc(p, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_LRP:
+	 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case OPCODE_MAD:	
+	 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case OPCODE_MOV:
+      case OPCODE_SWZ:
+	 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_MUL:
+	 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_XPD:
+	 emit_xpd(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+	 /* Higher math functions:
+	  */
+      case OPCODE_RCP:
+	 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_RSQ:
+	 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_SIN:
+	 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_COS:
+	 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_EX2:
+	 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_LG2:
+	 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_SCS:
+	 /* There is an scs math function, but it would need some
+	  * fixup for 16-element execution.
+	  */
+	 if (dst_flags & WRITEMASK_X)
+	    emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+	 if (dst_flags & WRITEMASK_Y)
+	    emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+	 break;
+
+      case OPCODE_POW:
+	 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
+	 break;
+
+	 /* Comparisons:
+	  */
+      case OPCODE_CMP:
+	 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
+	 break;
+
+      case OPCODE_MAX:
+	 emit_max(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_MIN:
+	 emit_min(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_SLT:
+	 emit_slt(p, dst, dst_flags, args[0], args[1]);
+	 break;
+
+      case OPCODE_SLE:
+	 emit_sle(p, dst, dst_flags, args[0], args[1]);
+	break;
+      case OPCODE_SGT:
+	 emit_sgt(p, dst, dst_flags, args[0], args[1]);
+	break;
+      case OPCODE_SGE:
+	 emit_sge(p, dst, dst_flags, args[0], args[1]);
+	 break;
+      case OPCODE_SEQ:
+	 emit_seq(p, dst, dst_flags, args[0], args[1]);
+	break;
+      case OPCODE_SNE:
+	 emit_sne(p, dst, dst_flags, args[0], args[1]);
+	break;
+
+      case OPCODE_LIT:
+	 emit_lit(p, dst, dst_flags, args[0]);
+	 break;
+
+	 /* Texturing operations:
+	  */
+      case OPCODE_TEX:
+	 emit_tex(c, inst, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_TXB:
+	 emit_txb(c, inst, dst, dst_flags, args[0]);
+	 break;
+
+      case OPCODE_KIL:
+	 emit_kil(c, args[0]);
+	 break;
+
+      case OPCODE_KIL_NV:
+	 emit_kil_nv(c);
+	 break;
+
+      default:
+	 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
+		      inst->opcode, inst->opcode < MAX_OPCODE ?
+				    _mesa_opcode_string(inst->opcode) :
+				    "unknown");
+      }
+      
+      for (i = 0; i < 4; i++)
+	if (inst->dst[i] && inst->dst[i]->spill_slot) 
+	   emit_spill(c, 
+		      inst->dst[i]->hw_reg, 
+		      inst->dst[i]->spill_slot);
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      int i;
+
+      _mesa_printf("wm-native:\n");
+      for (i = 0; i < p->nr_insn; i++)
+	 brw_disasm(stderr, &p->store[i]);
+      _mesa_printf("\n");
+   }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
new file mode 100644
index 0000000000..4e3edfbbff
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -0,0 +1,1177 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+               
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "brw_util.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+
+
+/** An invalid texture target */
+#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
+
+/** An invalid texture unit */
+#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
+
+#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
+
+#define X    0
+#define Y    1
+#define Z    2
+#define W    3
+
+
+static const char *wm_opcode_strings[] = {   
+   "PIXELXY",
+   "DELTAXY",
+   "PIXELW",
+   "LINTERP",
+   "PINTERP",
+   "CINTERP",
+   "WPOSXY",
+   "FB_WRITE",
+   "FRONTFACING",
+};
+
+#if 0
+static const char *wm_file_strings[] = {   
+   "PAYLOAD"
+};
+#endif
+
+
+/***********************************************************************
+ * Source regs
+ */
+
+static struct prog_src_register src_reg(GLuint file, GLuint idx)
+{
+   struct prog_src_register reg;
+   reg.File = file;
+   reg.Index = idx;
+   reg.Swizzle = SWIZZLE_NOOP;
+   reg.RelAddr = 0;
+   reg.Negate = NEGATE_NONE;
+   reg.Abs = 0;
+   return reg;
+}
+
+static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
+{
+   return src_reg(dst.File, dst.Index);
+}
+
+static struct prog_src_register src_undef( void )
+{
+   return src_reg(PROGRAM_UNDEFINED, 0);
+}
+
+static GLboolean src_is_undef(struct prog_src_register src)
+{
+   return src.File == PROGRAM_UNDEFINED;
+}
+
+static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
+{
+   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
+   return reg;
+}
+
+static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
+{
+   return src_swizzle(reg, x, x, x, x);
+}
+
+static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
+{
+   reg.Swizzle = swizzle;
+   return reg;
+}
+
+
+/***********************************************************************
+ * Dest regs
+ */
+
+static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
+{
+   struct prog_dst_register reg;
+   reg.File = file;
+   reg.Index = idx;
+   reg.WriteMask = WRITEMASK_XYZW;
+   reg.RelAddr = 0;
+   reg.CondMask = COND_TR;
+   reg.CondSwizzle = 0;
+   reg.CondSrc = 0;
+   reg.pad = 0;
+   return reg;
+}
+
+static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
+{
+   reg.WriteMask &= mask;
+   return reg;
+}
+
+static struct prog_dst_register dst_undef( void )
+{
+   return dst_reg(PROGRAM_UNDEFINED, 0);
+}
+
+
+
+static struct prog_dst_register get_temp( struct brw_wm_compile *c )
+{
+   int bit = _mesa_ffs( ~c->fp_temp );
+
+   if (!bit) {
+      _mesa_printf("%s: out of temporaries\n", __FILE__);
+      exit(1);
+   }
+
+   c->fp_temp |= 1<<(bit-1);
+   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
+}
+
+
+static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
+{
+   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
+}
+
+
+/***********************************************************************
+ * Instructions 
+ */
+
+static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
+{
+   return &c->prog_instructions[c->nr_fp_insns++];
+}
+
+static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
+					const struct prog_instruction *inst0)
+{
+   struct prog_instruction *inst = get_fp_inst(c);
+   *inst = *inst0;
+   return inst;
+}
+
+static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
+				       GLuint op,
+				       struct prog_dst_register dest,
+				       GLuint saturate,
+				       GLuint tex_src_unit,
+				       GLuint tex_src_target,
+				       GLuint tex_shadow,
+				       struct prog_src_register src0,
+				       struct prog_src_register src1,
+				       struct prog_src_register src2 )
+{
+   struct prog_instruction *inst = get_fp_inst(c);
+      
+   assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
+          tex_src_unit == TEX_UNIT_NONE);
+   assert(tex_src_target < NUM_TEXTURE_TARGETS ||
+          tex_src_target == TEX_TARGET_NONE);
+
+   /* update mask of which texture units are referenced by this program */
+   if (tex_src_unit != TEX_UNIT_NONE)
+      c->fp->tex_units_used |= (1 << tex_src_unit);
+
+   memset(inst, 0, sizeof(*inst));
+
+   inst->Opcode = op;
+   inst->DstReg = dest;
+   inst->SaturateMode = saturate;   
+   inst->TexSrcUnit = tex_src_unit;
+   inst->TexSrcTarget = tex_src_target;
+   inst->TexShadow = tex_shadow;
+   inst->SrcReg[0] = src0;
+   inst->SrcReg[1] = src1;
+   inst->SrcReg[2] = src2;
+   return inst;
+}
+   
+
+static struct prog_instruction * emit_op(struct brw_wm_compile *c,
+				       GLuint op,
+				       struct prog_dst_register dest,
+				       GLuint saturate,
+				       struct prog_src_register src0,
+				       struct prog_src_register src1,
+				       struct prog_src_register src2 )
+{
+   return emit_tex_op(c, op, dest, saturate,
+                      TEX_UNIT_NONE, TEX_TARGET_NONE, 0,  /* unit, tgt, shadow */
+                      src0, src1, src2);
+}
+
+
+/* Many Mesa opcodes produce the same value across all the result channels.
+ * We'd rather not have to support that splatting in the opcode implementations,
+ * and brw_wm_pass*.c wants to optimize them out by shuffling references around
+ * anyway.  We can easily get both by emitting the opcode to one channel, and
+ * then MOVing it to the others, which brw_wm_pass*.c already understands.
+ */
+static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
+						 const struct prog_instruction *inst0)
+{
+   struct prog_instruction *inst;
+   unsigned int dst_chan;
+   unsigned int other_channel_mask;
+
+   if (inst0->DstReg.WriteMask == 0)
+      return NULL;
+
+   dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
+   inst = get_fp_inst(c);
+   *inst = *inst0;
+   inst->DstReg.WriteMask = 1 << dst_chan;
+
+   other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
+   if (other_channel_mask != 0) {
+      inst = emit_op(c,
+		     OPCODE_MOV,
+		     dst_mask(inst0->DstReg, other_channel_mask),
+		     0,
+		     src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
+		     src_undef(),
+		     src_undef());
+   }
+   return inst;
+}
+
+
+/***********************************************************************
+ * Special instructions for interpolation and other tasks
+ */
+
+static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
+{
+   if (src_is_undef(c->pixel_xy)) {
+      struct prog_dst_register pixel_xy = get_temp(c);
+      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+      
+      
+      /* Emit the out calculations, and hold onto the results.  Use
+       * two instructions as a temporary is required.
+       */   
+      /* pixel_xy.xy = PIXELXY payload[0];
+       */
+      emit_op(c,
+	      WM_PIXELXY,
+	      dst_mask(pixel_xy, WRITEMASK_XY),
+	      0,
+	      payload_r0_depth,
+	      src_undef(),
+	      src_undef());
+
+      c->pixel_xy = src_reg_from_dst(pixel_xy);
+   }
+
+   return c->pixel_xy;
+}
+
+static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
+{
+   if (src_is_undef(c->delta_xy)) {
+      struct prog_dst_register delta_xy = get_temp(c);
+      struct prog_src_register pixel_xy = get_pixel_xy(c);
+      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+      
+      /* deltas.xy = DELTAXY pixel_xy, payload[0]
+       */
+      emit_op(c,
+	      WM_DELTAXY,
+	      dst_mask(delta_xy, WRITEMASK_XY),
+	      0,
+	      pixel_xy, 
+	      payload_r0_depth,
+	      src_undef());
+      
+      c->delta_xy = src_reg_from_dst(delta_xy);
+   }
+
+   return c->delta_xy;
+}
+
+static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
+{
+   if (src_is_undef(c->pixel_w)) {
+      struct prog_dst_register pixel_w = get_temp(c);
+      struct prog_src_register deltas = get_delta_xy(c);
+      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
+
+      /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
+       */
+      emit_op(c,
+	      WM_PIXELW,
+	      dst_mask(pixel_w, WRITEMASK_W),
+	      0,
+	      interp_wpos,
+	      deltas, 
+	      src_undef());
+      
+
+      c->pixel_w = src_reg_from_dst(pixel_w);
+   }
+
+   return c->pixel_w;
+}
+
+static void emit_interp( struct brw_wm_compile *c,
+			 GLuint idx )
+{
+   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
+   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
+   struct prog_src_register deltas = get_delta_xy(c);
+
+   /* Need to use PINTERP on attributes which have been
+    * multiplied by 1/W in the SF program, and LINTERP on those
+    * which have not:
+    */
+   switch (idx) {
+   case FRAG_ATTRIB_WPOS:
+      /* Have to treat wpos.xy specially:
+       */
+      emit_op(c,
+	      WM_WPOSXY,
+	      dst_mask(dst, WRITEMASK_XY),
+	      0,
+	      get_pixel_xy(c),
+	      src_undef(),
+	      src_undef());
+      
+      dst = dst_mask(dst, WRITEMASK_ZW);
+
+      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+       */
+      emit_op(c,
+	      WM_LINTERP,
+	      dst,
+	      0,
+	      interp,
+	      deltas,
+	      src_undef());
+      break;
+   case FRAG_ATTRIB_COL0:
+   case FRAG_ATTRIB_COL1:
+      if (c->key.flat_shade) {
+	 emit_op(c,
+		 WM_CINTERP,
+		 dst,
+		 0,
+		 interp,
+		 src_undef(),
+		 src_undef());
+      }
+      else {
+         if (c->key.linear_color) {
+            emit_op(c,
+                    WM_LINTERP,
+                    dst,
+                    0,
+                    interp,
+                    deltas,
+                    src_undef());
+         }
+         else {
+            /* perspective-corrected color interpolation */
+            emit_op(c,
+                    WM_PINTERP,
+                    dst,
+                    0,
+                    interp,
+                    deltas,
+                    get_pixel_w(c));
+         }
+      }
+      break;
+   case FRAG_ATTRIB_FOGC:
+      /* Interpolate the fog coordinate */
+      emit_op(c,
+	      WM_PINTERP,
+	      dst_mask(dst, WRITEMASK_X),
+	      0,
+	      interp,
+	      deltas,
+	      get_pixel_w(c));
+
+      emit_op(c,
+	      OPCODE_MOV,
+	      dst_mask(dst, WRITEMASK_YZW),
+	      0,
+	      src_swizzle(interp,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ONE),
+	      src_undef(),
+	      src_undef());
+      break;
+
+   case FRAG_ATTRIB_FACE:
+      /* XXX review/test this case */
+      emit_op(c,
+              WM_FRONTFACING,
+              dst_mask(dst, WRITEMASK_X),
+              0,
+              src_undef(),
+              src_undef(),
+              src_undef());
+      break;
+
+   case FRAG_ATTRIB_PNTC:
+      /* XXX review/test this case */
+      emit_op(c,
+	      WM_PINTERP,
+	      dst_mask(dst, WRITEMASK_XY),
+	      0,
+	      interp,
+	      deltas,
+	      get_pixel_w(c));
+
+      emit_op(c,
+	      OPCODE_MOV,
+	      dst_mask(dst, WRITEMASK_ZW),
+	      0,
+	      src_swizzle(interp,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ZERO,
+			  SWIZZLE_ONE),
+	      src_undef(),
+	      src_undef());
+      break;
+
+   default:
+      emit_op(c,
+	      WM_PINTERP,
+	      dst,
+	      0,
+	      interp,
+	      deltas,
+	      get_pixel_w(c));
+      break;
+   }
+
+   c->fp_interp_emitted |= 1<<idx;
+}
+
+/***********************************************************************
+ * Hacks to extend the program parameter and constant lists.
+ */
+
+/* Add the fog parameters to the parameter list of the original
+ * program, rather than creating a new list.  Doesn't really do any
+ * harm and it's not as if the parameter handling isn't a big hack
+ * anyway.
+ */
+static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, 
+                                                     GLint s0,
+                                                     GLint s1,
+                                                     GLint s2,
+                                                     GLint s3,
+                                                     GLint s4)
+{
+   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
+   gl_state_index tokens[STATE_LENGTH];
+   GLuint idx;
+   tokens[0] = s0;
+   tokens[1] = s1;
+   tokens[2] = s2;
+   tokens[3] = s3;
+   tokens[4] = s4;
+   
+   for (idx = 0; idx < paramList->NumParameters; idx++) {
+      if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
+	  memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
+	 return src_reg(PROGRAM_STATE_VAR, idx);
+   }
+
+   idx = _mesa_add_state_reference( paramList, tokens );
+
+   return src_reg(PROGRAM_STATE_VAR, idx);
+}
+
+
+static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, 
+						     GLfloat s0,
+						     GLfloat s1,
+						     GLfloat s2,
+						     GLfloat s3)
+{
+   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
+   GLfloat values[4];
+   GLuint idx;
+   GLuint swizzle;
+
+   values[0] = s0;
+   values[1] = s1;
+   values[2] = s2;
+   values[3] = s3;
+
+   /* Have to search, otherwise multiple compilations will each grow
+    * the parameter list.
+    */
+   for (idx = 0; idx < paramList->NumParameters; idx++) {
+      if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
+	  memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
+
+	 /* XXX: this mimics the mesa bug which puts all constants and
+	  * parameters into the "PROGRAM_STATE_VAR" category:
+	  */
+	 return src_reg(PROGRAM_STATE_VAR, idx);
+   }
+   
+   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
+   assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
+   return src_reg(PROGRAM_STATE_VAR, idx);
+}
+
+
+
+/***********************************************************************
+ * Expand various instructions here to simpler forms.  
+ */
+static void precalc_dst( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   struct prog_src_register src0 = inst->SrcReg[0];
+   struct prog_src_register src1 = inst->SrcReg[1];
+   struct prog_dst_register dst = inst->DstReg;
+   
+   if (dst.WriteMask & WRITEMASK_Y) {      
+      /* dst.y = mul src0.y, src1.y
+       */
+      emit_op(c,
+	      OPCODE_MUL,
+	      dst_mask(dst, WRITEMASK_Y),
+	      inst->SaturateMode,
+	      src0,
+	      src1,
+	      src_undef());
+   }
+
+   if (dst.WriteMask & WRITEMASK_XZ) {
+      struct prog_instruction *swz;
+      GLuint z = GET_SWZ(src0.Swizzle, Z);
+
+      /* dst.xz = swz src0.1zzz
+       */
+      swz = emit_op(c,
+		    OPCODE_SWZ,
+		    dst_mask(dst, WRITEMASK_XZ),
+		    inst->SaturateMode,
+		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
+		    src_undef(),
+		    src_undef());
+      /* Avoid letting negation flag of src0 affect our 1 constant. */
+      swz->SrcReg[0].Negate &= ~NEGATE_X;
+   }
+   if (dst.WriteMask & WRITEMASK_W) {
+      /* dst.w = mov src1.w
+       */
+      emit_op(c,
+	      OPCODE_MOV,
+	      dst_mask(dst, WRITEMASK_W),
+	      inst->SaturateMode,
+	      src1,
+	      src_undef(),
+	      src_undef());
+   }
+}
+
+
+static void precalc_lit( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+   struct prog_src_register src0 = inst->SrcReg[0];
+   struct prog_dst_register dst = inst->DstReg;
+   
+   if (dst.WriteMask & WRITEMASK_XW) {
+      struct prog_instruction *swz;
+
+      /* dst.xw = swz src0.1111
+       */
+      swz = emit_op(c,
+		    OPCODE_SWZ,
+		    dst_mask(dst, WRITEMASK_XW),
+		    0,
+		    src_swizzle1(src0, SWIZZLE_ONE),
+		    src_undef(),
+		    src_undef());
+      /* Avoid letting the negation flag of src0 affect our 1 constant. */
+      swz->SrcReg[0].Negate = NEGATE_NONE;
+   }
+
+   if (dst.WriteMask & WRITEMASK_YZ) {
+      emit_op(c,
+	      OPCODE_LIT,
+	      dst_mask(dst, WRITEMASK_YZ),
+	      inst->SaturateMode,
+	      src0,
+	      src_undef(),
+	      src_undef());
+   }
+}
+
+
+/**
+ * Some TEX instructions require extra code, cube map coordinate
+ * normalization, or coordinate scaling for RECT textures, etc.
+ * This function emits those extra instructions and the TEX
+ * instruction itself.
+ */
+static void precalc_tex( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+   struct prog_src_register coord;
+   struct prog_dst_register tmpcoord;
+   const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+
+   assert(unit < BRW_MAX_TEX_UNIT);
+
+   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
+       struct prog_instruction *out;
+       struct prog_dst_register tmp0 = get_temp(c);
+       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
+       struct prog_dst_register tmp1 = get_temp(c);
+       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
+       struct prog_src_register src0 = inst->SrcReg[0];
+
+       /* find longest component of coord vector and normalize it */
+       tmpcoord = get_temp(c);
+       coord = src_reg_from_dst(tmpcoord);
+
+       /* tmpcoord = src0 (i.e.: coord = src0) */
+       out = emit_op(c, OPCODE_MOV,
+                     tmpcoord,
+                     0,
+                     src0,
+                     src_undef(),
+                     src_undef());
+       out->SrcReg[0].Negate = NEGATE_NONE;
+       out->SrcReg[0].Abs = 1;
+
+       /* tmp0 = MAX(coord.X, coord.Y) */
+       emit_op(c, OPCODE_MAX,
+               tmp0,
+               0,
+               src_swizzle1(coord, X),
+               src_swizzle1(coord, Y),
+               src_undef());
+
+       /* tmp1 = MAX(tmp0, coord.Z) */
+       emit_op(c, OPCODE_MAX,
+               tmp1,
+               0,
+               tmp0src,
+               src_swizzle1(coord, Z),
+               src_undef());
+
+       /* tmp0 = 1 / tmp1 */
+       emit_op(c, OPCODE_RCP,
+               dst_mask(tmp0, WRITEMASK_X),
+               0,
+               tmp1src,
+               src_undef(),
+               src_undef());
+
+       /* tmpCoord = src0 * tmp0 */
+       emit_op(c, OPCODE_MUL,
+               tmpcoord,
+               0,
+               src0,
+               src_swizzle1(tmp0src, SWIZZLE_X),
+               src_undef());
+
+       release_temp(c, tmp0);
+       release_temp(c, tmp1);
+   }
+   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+      struct prog_src_register scale = 
+	 search_or_add_param5( c, 
+			       STATE_INTERNAL, 
+			       STATE_TEXRECT_SCALE,
+			       unit,
+			       0,0 );
+
+      tmpcoord = get_temp(c);
+
+      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
+       */
+      emit_op(c,
+	      OPCODE_MUL,
+	      tmpcoord,
+	      0,
+	      inst->SrcReg[0],
+	      src_swizzle(scale,
+			  SWIZZLE_X,
+			  SWIZZLE_Y,
+			  SWIZZLE_ONE,
+			  SWIZZLE_ONE),
+	      src_undef());
+
+      coord = src_reg_from_dst(tmpcoord);
+   }
+   else {
+      coord = inst->SrcReg[0];
+   }
+
+   /* Need to emit YUV texture conversions by hand.  Probably need to
+    * do this here - the alternative is in brw_wm_emit.c, but the
+    * conversion requires allocating a temporary variable which we
+    * don't have the facility to do that late in the compilation.
+    */
+   if (c->key.yuvtex_mask & (1 << unit)) {
+      /* convert ycbcr to RGBA */
+      GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
+
+      /* 
+	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
+	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
+	 UYV     = TEX ...
+	 UYV.xyz = ADD UYV,     C0
+	 UYV.y   = MUL UYV.y,   C0.w
+ 	 if (UV swaped)
+	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
+	 else
+	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y 
+	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
+      */
+      struct prog_dst_register dst = inst->DstReg;
+      struct prog_dst_register tmp = get_temp(c);
+      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
+      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
+      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
+     
+      /* tmp     = TEX ...
+       */
+      emit_tex_op(c, 
+                  OPCODE_TEX,
+                  tmp,
+                  inst->SaturateMode,
+                  unit,
+                  inst->TexSrcTarget,
+                  inst->TexShadow,
+                  coord,
+                  src_undef(),
+                  src_undef());
+
+      /* tmp.xyz =  ADD TMP, C0
+       */
+      emit_op(c,
+	      OPCODE_ADD,
+	      dst_mask(tmp, WRITEMASK_XYZ),
+	      0,
+	      tmpsrc,
+	      C0,
+	      src_undef());
+
+      /* YUV.y   = MUL YUV.y, C0.w
+       */
+
+      emit_op(c,
+	      OPCODE_MUL,
+	      dst_mask(tmp, WRITEMASK_Y),
+	      0,
+	      tmpsrc,
+	      src_swizzle1(C0, W),
+	      src_undef());
+
+      /* 
+       * if (UV swaped)
+       *     RGB.xyz = MAD YUV.zzx, C1, YUV.y
+       * else
+       *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
+       */
+
+      emit_op(c,
+	      OPCODE_MAD,
+	      dst_mask(dst, WRITEMASK_XYZ),
+	      0,
+	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
+	      C1,
+	      src_swizzle1(tmpsrc, Y));
+
+      /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
+       */
+      emit_op(c,
+	      OPCODE_MAD,
+	      dst_mask(dst, WRITEMASK_Y),
+	      0,
+	      src_swizzle1(tmpsrc, Z),
+	      src_swizzle1(C1, W),
+	      src_swizzle1(src_reg_from_dst(dst), Y));
+
+      release_temp(c, tmp);
+   }
+   else {
+      /* ordinary RGBA tex instruction */
+      emit_tex_op(c, 
+                  OPCODE_TEX,
+                  inst->DstReg,
+                  inst->SaturateMode,
+                  unit,
+                  inst->TexSrcTarget,
+                  inst->TexShadow,
+                  coord,
+                  src_undef(),
+                  src_undef());
+   }
+
+   /* For GL_EXT_texture_swizzle: */
+   if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
+      /* swizzle the result of the TEX instruction */
+      struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
+      emit_op(c, OPCODE_SWZ,
+              inst->DstReg,
+              SATURATE_OFF, /* saturate already done above */
+              src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
+              src_undef(),
+              src_undef());
+   }
+
+   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
+       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
+      release_temp(c, tmpcoord);
+}
+
+
+/**
+ * Check if the given TXP instruction really needs the divide-by-W step.
+ */
+static GLboolean projtex( struct brw_wm_compile *c,
+			  const struct prog_instruction *inst )
+{
+   const struct prog_src_register src = inst->SrcReg[0];
+   GLboolean retVal;
+
+   assert(inst->Opcode == OPCODE_TXP);
+
+   /* Only try to detect the simplest cases.  Could detect (later)
+    * cases where we are trying to emit code like RCP {1.0}, MUL x,
+    * {1.0}, and so on.
+    *
+    * More complex cases than this typically only arise from
+    * user-provided fragment programs anyway:
+    */
+   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
+      retVal = GL_FALSE;  /* ut2004 gun rendering !?! */
+   else if (src.File == PROGRAM_INPUT && 
+	    GET_SWZ(src.Swizzle, W) == W &&
+            (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
+      retVal = GL_FALSE;
+   else
+      retVal = GL_TRUE;
+
+   return retVal;
+}
+
+
+/**
+ * Emit code for TXP.
+ */
+static void precalc_txp( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   struct prog_src_register src0 = inst->SrcReg[0];
+
+   if (projtex(c, inst)) {
+      struct prog_dst_register tmp = get_temp(c);
+      struct prog_instruction tmp_inst;
+
+      /* tmp0.w = RCP inst.arg[0][3]
+       */
+      emit_op(c,
+	      OPCODE_RCP,
+	      dst_mask(tmp, WRITEMASK_W),
+	      0,
+	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
+	      src_undef(),
+	      src_undef());
+
+      /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
+       */
+      emit_op(c,
+	      OPCODE_MUL,
+	      dst_mask(tmp, WRITEMASK_XYZ),
+	      0,
+	      src0,
+	      src_swizzle1(src_reg_from_dst(tmp), W),
+	      src_undef());
+
+      /* dst = precalc(TEX tmp0)
+       */
+      tmp_inst = *inst;
+      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
+      precalc_tex(c, &tmp_inst);
+
+      release_temp(c, tmp);
+   }
+   else
+   {
+      /* dst = precalc(TEX src0)
+       */
+      precalc_tex(c, inst);
+   }
+}
+
+
+
+static void emit_fb_write( struct brw_wm_compile *c )
+{
+   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
+   struct prog_src_register outcolor;
+   GLuint i;
+
+   struct prog_instruction *inst, *last_inst;
+   struct brw_context *brw = c->func.brw;
+
+   /* The inst->Aux field is used for FB write target and the EOT marker */
+
+   if (brw->state.nr_color_regions > 1) {
+      for (i = 0 ; i < brw->state.nr_color_regions; i++) {
+         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
+         last_inst = inst = emit_op(c,
+                                    WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
+                                    outcolor, payload_r0_depth, outdepth);
+         inst->Aux = (i<<1);
+         if (c->fp_fragcolor_emitted) {
+            outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
+            last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+                                       0, outcolor, payload_r0_depth, outdepth);
+            inst->Aux = (i<<1);
+         }
+      }
+      last_inst->Aux |= 1; //eot
+   }
+   else {
+      /* if gl_FragData[0] is written, use it, else use gl_FragColor */
+      if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
+         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
+      else 
+         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
+
+      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
+                     0, outcolor, payload_r0_depth, outdepth);
+      inst->Aux = 1|(0<<1);
+   }
+}
+
+
+
+
+/***********************************************************************
+ * Emit INTERP instructions ahead of first use of each attrib.
+ */
+
+static void validate_src_regs( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
+   GLuint i;
+
+   for (i = 0; i < nr_args; i++) {
+      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
+	 GLuint idx = inst->SrcReg[i].Index;
+	 if (!(c->fp_interp_emitted & (1<<idx))) {
+	    emit_interp(c, idx);
+	 }
+      }
+   }
+}
+	 
+static void validate_dst_regs( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   if (inst->DstReg.File == PROGRAM_OUTPUT) {
+      GLuint idx = inst->DstReg.Index;
+      if (idx == FRAG_RESULT_COLOR)
+         c->fp_fragcolor_emitted = 1;
+   }
+}
+
+static void print_insns( const struct prog_instruction *insn,
+			 GLuint nr )
+{
+   GLuint i;
+   for (i = 0; i < nr; i++, insn++) {
+      _mesa_printf("%3d: ", i);
+      if (insn->Opcode < MAX_OPCODE)
+	 _mesa_print_instruction(insn);
+      else if (insn->Opcode < MAX_WM_OPCODE) {
+	 GLuint idx = insn->Opcode - MAX_OPCODE;
+
+	 _mesa_print_alu_instruction(insn,
+				     wm_opcode_strings[idx],
+				     3);
+      }
+      else 
+	 _mesa_printf("965 Opcode %d\n", insn->Opcode);
+   }
+}
+
+
+/**
+ * Initial pass for fragment program code generation.
+ * This function is used by both the GLSL and non-GLSL paths.
+ */
+void brw_wm_pass_fp( struct brw_wm_compile *c )
+{
+   struct brw_fragment_program *fp = c->fp;
+   GLuint insn;
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      _mesa_printf("pre-fp:\n");
+      _mesa_print_program(&fp->program.Base); 
+      _mesa_printf("\n");
+   }
+
+   c->pixel_xy = src_undef();
+   c->delta_xy = src_undef();
+   c->pixel_w = src_undef();
+   c->nr_fp_insns = 0;
+   c->fp->tex_units_used = 0x0;
+
+   /* Emit preamble instructions.  This is where special instructions such as
+    * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
+    * compute shader inputs from varying vars.
+    */
+   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
+      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
+      validate_src_regs(c, inst);
+      validate_dst_regs(c, inst);
+   }
+
+   /* Loop over all instructions doing assorted simplifications and
+    * transformations.
+    */
+   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
+      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
+      struct prog_instruction *out;
+
+      /* Check for INPUT values, emit INTERP instructions where
+       * necessary:
+       */
+
+      switch (inst->Opcode) {
+      case OPCODE_SWZ: 
+	 out = emit_insn(c, inst);
+	 out->Opcode = OPCODE_MOV;
+	 break;
+	 
+      case OPCODE_ABS:
+	 out = emit_insn(c, inst);
+	 out->Opcode = OPCODE_MOV;
+	 out->SrcReg[0].Negate = NEGATE_NONE;
+	 out->SrcReg[0].Abs = 1;
+	 break;
+
+      case OPCODE_SUB: 
+	 out = emit_insn(c, inst);
+	 out->Opcode = OPCODE_ADD;
+	 out->SrcReg[1].Negate ^= NEGATE_XYZW;
+	 break;
+
+      case OPCODE_SCS: 
+	 out = emit_insn(c, inst);
+	 /* This should probably be done in the parser. 
+	  */
+	 out->DstReg.WriteMask &= WRITEMASK_XY;
+	 break;
+	 
+      case OPCODE_DST:
+	 precalc_dst(c, inst);
+	 break;
+
+      case OPCODE_LIT:
+	 precalc_lit(c, inst);
+	 break;
+
+      case OPCODE_TEX:
+	 precalc_tex(c, inst);
+	 break;
+
+      case OPCODE_TXP:
+	 precalc_txp(c, inst);
+	 break;
+
+      case OPCODE_TXB:
+	 out = emit_insn(c, inst);
+	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+         assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
+	 break;
+
+      case OPCODE_XPD: 
+	 out = emit_insn(c, inst);
+	 /* This should probably be done in the parser. 
+	  */
+	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
+	 break;
+
+      case OPCODE_KIL: 
+	 out = emit_insn(c, inst);
+	 /* This should probably be done in the parser. 
+	  */
+	 out->DstReg.WriteMask = 0;
+	 break;
+      case OPCODE_END:
+	 emit_fb_write(c);
+	 break;
+      case OPCODE_PRINT:
+	 break;
+      default:
+	 if (brw_wm_is_scalar_result(inst->Opcode))
+	    emit_scalar_insn(c, inst);
+	 else
+	    emit_insn(c, inst);
+	 break;
+      }
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      _mesa_printf("pass_fp:\n");
+      print_insns( c->prog_instructions, c->nr_fp_insns );
+      _mesa_printf("\n");
+   }
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
new file mode 100644
index 0000000000..c9fe1dd8ad
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -0,0 +1,3046 @@
+#include "main/macros.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_optimize.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+
+enum _subroutine {
+    SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
+};
+
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
+                                  const struct prog_instruction *inst,
+                                  GLuint component);
+
+/**
+ * Determine if the given fragment program uses GLSL features such
+ * as flow conditionals, loops, subroutines.
+ * Some GLSL shaders may use these features, others might not.
+ */
+GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
+{
+    int i;
+
+    for (i = 0; i < fp->Base.NumInstructions; i++) {
+	const struct prog_instruction *inst = &fp->Base.Instructions[i];
+	switch (inst->Opcode) {
+	    case OPCODE_ARL:
+	    case OPCODE_IF:
+	    case OPCODE_ENDIF:
+	    case OPCODE_CAL:
+	    case OPCODE_BRK:
+	    case OPCODE_RET:
+	    case OPCODE_NOISE1:
+	    case OPCODE_NOISE2:
+	    case OPCODE_NOISE3:
+	    case OPCODE_NOISE4:
+	    case OPCODE_BGNLOOP:
+		return GL_TRUE; 
+	    default:
+		break;
+	}
+    }
+    return GL_FALSE; 
+}
+
+
+
+static void
+reclaim_temps(struct brw_wm_compile *c);
+
+
+/** Mark GRF register as used. */
+static void
+prealloc_grf(struct brw_wm_compile *c, int r)
+{
+   c->used_grf[r] = GL_TRUE;
+}
+
+
+/** Mark given GRF register as not in use. */
+static void
+release_grf(struct brw_wm_compile *c, int r)
+{
+   /*assert(c->used_grf[r]);*/
+   c->used_grf[r] = GL_FALSE;
+   c->first_free_grf = MIN2(c->first_free_grf, r);
+}
+
+
+/** Return index of a free GRF, mark it as used. */
+static int
+alloc_grf(struct brw_wm_compile *c)
+{
+   GLuint r;
+   for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+      if (!c->used_grf[r]) {
+         c->used_grf[r] = GL_TRUE;
+         c->first_free_grf = r + 1;  /* a guess */
+         return r;
+      }
+   }
+
+   /* no free temps, try to reclaim some */
+   reclaim_temps(c);
+   c->first_free_grf = 0;
+
+   /* try alloc again */
+   for (r = c->first_free_grf; r < BRW_WM_MAX_GRF; r++) {
+      if (!c->used_grf[r]) {
+         c->used_grf[r] = GL_TRUE;
+         c->first_free_grf = r + 1;  /* a guess */
+         return r;
+      }
+   }
+
+   for (r = 0; r < BRW_WM_MAX_GRF; r++) {
+      assert(c->used_grf[r]);
+   }
+
+   /* really, no free GRF regs found */
+   if (!c->out_of_regs) {
+      /* print warning once per compilation */
+      _mesa_warning(NULL, "i965: ran out of registers for fragment program");
+      c->out_of_regs = GL_TRUE;
+   }
+
+   return -1;
+}
+
+
+/** Return number of GRF registers used */
+static int
+num_grf_used(const struct brw_wm_compile *c)
+{
+   int r;
+   for (r = BRW_WM_MAX_GRF - 1; r >= 0; r--)
+      if (c->used_grf[r])
+         return r + 1;
+   return 0;
+}
+
+
+
+/**
+ * Record the mapping of a Mesa register to a hardware register.
+ */
+static void set_reg(struct brw_wm_compile *c, int file, int index, 
+	int component, struct brw_reg reg)
+{
+    c->wm_regs[file][index][component].reg = reg;
+    c->wm_regs[file][index][component].inited = GL_TRUE;
+}
+
+static struct brw_reg alloc_tmp(struct brw_wm_compile *c)
+{
+    struct brw_reg reg;
+
+    /* if we need to allocate another temp, grow the tmp_regs[] array */
+    if (c->tmp_index == c->tmp_max) {
+       int r = alloc_grf(c);
+       if (r < 0) {
+          /*printf("Out of temps in %s\n", __FUNCTION__);*/
+          r = 50; /* XXX random register! */
+       }
+       c->tmp_regs[ c->tmp_max++ ] = r;
+    }
+
+    /* form the GRF register */
+    reg = brw_vec8_grf(c->tmp_regs[ c->tmp_index++ ], 0);
+    /*printf("alloc_temp %d\n", reg.nr);*/
+    assert(reg.nr < BRW_WM_MAX_GRF);
+    return reg;
+
+}
+
+/**
+ * Save current temp register info.
+ * There must be a matching call to release_tmps().
+ */
+static int mark_tmps(struct brw_wm_compile *c)
+{
+    return c->tmp_index;
+}
+
+static struct brw_reg lookup_tmp( struct brw_wm_compile *c, int index )
+{
+    return brw_vec8_grf( c->tmp_regs[ index ], 0 );
+}
+
+static void release_tmps(struct brw_wm_compile *c, int mark)
+{
+    c->tmp_index = mark;
+}
+
+/**
+ * Convert Mesa src register to brw register.
+ *
+ * Since we're running in SOA mode each Mesa register corresponds to four
+ * hardware registers.  We allocate the hardware registers as needed here.
+ *
+ * \param file  register file, one of PROGRAM_x
+ * \param index  register number
+ * \param component  src component (X=0, Y=1, Z=2, W=3)
+ * \param nr  not used?!?
+ * \param neg  negate value?
+ * \param abs  take absolute value?
+ */
+static struct brw_reg 
+get_reg(struct brw_wm_compile *c, int file, int index, int component,
+        int nr, GLuint neg, GLuint abs)
+{
+    struct brw_reg reg;
+    switch (file) {
+	case PROGRAM_STATE_VAR:
+	case PROGRAM_CONSTANT:
+	case PROGRAM_UNIFORM:
+	    file = PROGRAM_STATE_VAR;
+	    break;
+	case PROGRAM_UNDEFINED:
+	    return brw_null_reg();	
+	case PROGRAM_TEMPORARY:
+	case PROGRAM_INPUT:
+	case PROGRAM_OUTPUT:
+	case PROGRAM_PAYLOAD:
+	    break;
+	default:
+	    _mesa_problem(NULL, "Unexpected file in get_reg()");
+	    return brw_null_reg();
+    }
+
+    assert(index < 256);
+    assert(component < 4);
+
+    /* see if we've already allocated a HW register for this Mesa register */
+    if (c->wm_regs[file][index][component].inited) {
+       /* yes, re-use */
+       reg = c->wm_regs[file][index][component].reg;
+    }
+    else {
+	/* no, allocate new register */
+       int grf = alloc_grf(c);
+       /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
+       if (grf < 0) {
+          /* totally out of temps */
+          grf = 51; /* XXX random register! */
+       }
+
+       reg = brw_vec8_grf(grf, 0);
+       /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
+
+       set_reg(c, file, index, component, reg);
+    }
+
+    if (neg & (1 << component)) {
+	reg = negate(reg);
+    }
+    if (abs)
+	reg = brw_abs(reg);
+    return reg;
+}
+
+
+
+/**
+ * This is called if we run out of GRF registers.  Examine the live intervals
+ * of temp regs in the program and free those which won't be used again.
+ */
+static void
+reclaim_temps(struct brw_wm_compile *c)
+{
+   GLint intBegin[MAX_PROGRAM_TEMPS];
+   GLint intEnd[MAX_PROGRAM_TEMPS];
+   int index;
+
+   /*printf("Reclaim temps:\n");*/
+
+   _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
+                             intBegin, intEnd);
+
+   for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
+      if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
+         /* program temp[i] can be freed */
+         int component;
+         /*printf("  temp[%d] is dead\n", index);*/
+         for (component = 0; component < 4; component++) {
+            if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
+               int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
+               release_grf(c, r);
+               /*
+               printf("  Reclaim temp %d, reg %d at inst %d\n",
+                      index, r, c->cur_inst);
+               */
+               c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
+            }
+         }
+      }
+   }
+}
+
+
+
+
+/**
+ * Preallocate registers.  This sets up the Mesa to hardware register
+ * mapping for certain registers, such as constants (uniforms/state vars)
+ * and shader inputs.
+ */
+static void prealloc_reg(struct brw_wm_compile *c)
+{
+    int i, j;
+    struct brw_reg reg;
+    int urb_read_length = 0;
+    GLuint inputs = FRAG_BIT_WPOS | c->fp_interp_emitted;
+    GLuint reg_index = 0;
+
+    memset(c->used_grf, GL_FALSE, sizeof(c->used_grf));
+    c->first_free_grf = 0;
+
+    for (i = 0; i < 4; i++) {
+        if (i < c->key.nr_depth_regs) 
+            reg = brw_vec8_grf(i * 2, 0);
+        else
+            reg = brw_vec8_grf(0, 0);
+	set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
+    }
+    reg_index += 2 * c->key.nr_depth_regs;
+
+    /* constants */
+    {
+        const GLuint nr_params = c->fp->program.Base.Parameters->NumParameters;
+        const GLuint nr_temps = c->fp->program.Base.NumTemporaries;
+
+        /* use a real constant buffer, or just use a section of the GRF? */
+        /* XXX this heuristic may need adjustment... */
+        if ((nr_params + nr_temps) * 4 + reg_index > 80)
+           c->fp->use_const_buffer = GL_TRUE;
+        else
+           c->fp->use_const_buffer = GL_FALSE;
+        /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
+
+        if (c->fp->use_const_buffer) {
+           /* We'll use a real constant buffer and fetch constants from
+            * it with a dataport read message.
+            */
+
+           /* number of float constants in CURBE */
+           c->prog_data.nr_params = 0;
+        }
+        else {
+           const struct gl_program_parameter_list *plist = 
+              c->fp->program.Base.Parameters;
+           int index = 0;
+
+           /* number of float constants in CURBE */
+           c->prog_data.nr_params = 4 * nr_params;
+
+           /* loop over program constants (float[4]) */
+           for (i = 0; i < nr_params; i++) {
+              /* loop over XYZW channels */
+              for (j = 0; j < 4; j++, index++) {
+                 reg = brw_vec1_grf(reg_index + index / 8, index % 8);
+                 /* Save pointer to parameter/constant value.
+                  * Constants will be copied in prepare_constant_buffer()
+                  */
+                 c->prog_data.param[index] = &plist->ParameterValues[i][j];
+                 set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
+              }
+           }
+           /* number of constant regs used (each reg is float[8]) */
+           c->nr_creg = 2 * ((4 * nr_params + 15) / 16);
+           reg_index += c->nr_creg;
+        }
+    }
+
+    /* fragment shader inputs */
+    for (i = 0; i < VERT_RESULT_MAX; i++) {
+       int fp_input;
+
+       if (i >= VERT_RESULT_VAR0)
+	  fp_input = i - VERT_RESULT_VAR0 + FRAG_ATTRIB_VAR0;
+       else if (i <= VERT_RESULT_TEX7)
+	  fp_input = i;
+       else
+	  fp_input = -1;
+
+       if (fp_input >= 0 && inputs & (1 << fp_input)) {
+	  urb_read_length = reg_index;
+	  reg = brw_vec8_grf(reg_index, 0);
+	  for (j = 0; j < 4; j++)
+	     set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
+       }
+       if (c->key.vp_outputs_written & (1 << i)) {
+	  reg_index += 2;
+       }
+    }
+
+    c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+    c->prog_data.urb_read_length = urb_read_length;
+    c->prog_data.curb_read_length = c->nr_creg;
+    c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+    reg_index++;
+    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
+    reg_index += 2;
+
+    /* mark GRF regs [0..reg_index-1] as in-use */
+    for (i = 0; i < reg_index; i++)
+       prealloc_grf(c, i);
+
+    /* Don't use GRF 126, 127.  Using them seems to lead to GPU lock-ups */
+    prealloc_grf(c, 126);
+    prealloc_grf(c, 127);
+
+    for (i = 0; i < c->nr_fp_insns; i++) {
+	const struct prog_instruction *inst = &c->prog_instructions[i];
+	struct brw_reg dst[4];
+
+	switch (inst->Opcode) {
+	case OPCODE_TEX:
+	case OPCODE_TXB:
+	    /* Allocate the channels of texture results contiguously,
+	     * since they are written out that way by the sampler unit.
+	     */
+	    for (j = 0; j < 4; j++) {
+		dst[j] = get_dst_reg(c, inst, j);
+		if (j != 0)
+		    assert(dst[j].nr == dst[j - 1].nr + 1);
+	    }
+	    break;
+	default:
+	    break;
+	}
+    }
+
+    /* An instruction may reference up to three constants.
+     * They'll be found in these registers.
+     * XXX alloc these on demand!
+     */
+    if (c->fp->use_const_buffer) {
+       for (i = 0; i < 3; i++) {
+          c->current_const[i].index = -1;
+          c->current_const[i].reg = brw_vec8_grf(alloc_grf(c), 0);
+       }
+    }
+#if 0
+    printf("USE CONST BUFFER? %d\n", c->fp->use_const_buffer);
+    printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index);
+#endif
+}
+
+
+/**
+ * Check if any of the instruction's src registers are constants, uniforms,
+ * or statevars.  If so, fetch any constants that we don't already have in
+ * the three GRF slots.
+ */
+static void fetch_constants(struct brw_wm_compile *c,
+                            const struct prog_instruction *inst)
+{
+   struct brw_compile *p = &c->func;
+   GLuint i;
+
+   /* loop over instruction src regs */
+   for (i = 0; i < 3; i++) {
+      const struct prog_src_register *src = &inst->SrcReg[i];
+      if (src->File == PROGRAM_STATE_VAR ||
+          src->File == PROGRAM_CONSTANT ||
+          src->File == PROGRAM_UNIFORM) {
+	 c->current_const[i].index = src->Index;
+
+#if 0
+	 printf("  fetch const[%d] for arg %d into reg %d\n",
+		src->Index, i, c->current_const[i].reg.nr);
+#endif
+
+	 /* need to fetch the constant now */
+	 brw_dp_READ_4(p,
+		       c->current_const[i].reg,  /* writeback dest */
+		       src->RelAddr,             /* relative indexing? */
+		       16 * src->Index,          /* byte offset */
+		       SURF_INDEX_FRAG_CONST_BUFFER/* binding table index */
+		       );
+      }
+   }
+}
+
+
+/**
+ * Convert Mesa dst register to brw register.
+ */
+static struct brw_reg get_dst_reg(struct brw_wm_compile *c, 
+                                  const struct prog_instruction *inst,
+                                  GLuint component)
+{
+    const int nr = 1;
+    return get_reg(c, inst->DstReg.File, inst->DstReg.Index, component, nr,
+	    0, 0);
+}
+
+
+static struct brw_reg
+get_src_reg_const(struct brw_wm_compile *c,
+                  const struct prog_instruction *inst,
+                  GLuint srcRegIndex, GLuint component)
+{
+   /* We should have already fetched the constant from the constant
+    * buffer in fetch_constants().  Now we just have to return a
+    * register description that extracts the needed component and
+    * smears it across all eight vector components.
+    */
+   const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+   struct brw_reg const_reg;
+
+   assert(component < 4);
+   assert(srcRegIndex < 3);
+   assert(c->current_const[srcRegIndex].index != -1);
+   const_reg = c->current_const[srcRegIndex].reg;
+
+   /* extract desired float from the const_reg, and smear */
+   const_reg = stride(const_reg, 0, 1, 0);
+   const_reg.subnr = component * 4;
+
+   if (src->Negate & (1 << component))
+      const_reg = negate(const_reg);
+   if (src->Abs)
+      const_reg = brw_abs(const_reg);
+
+#if 0
+   printf("  form const[%d].%d for arg %d, reg %d\n",
+          c->current_const[srcRegIndex].index,
+          component,
+          srcRegIndex,
+          const_reg.nr);
+#endif
+
+   return const_reg;
+}
+
+
+/**
+ * Convert Mesa src register to brw register.
+ */
+static struct brw_reg get_src_reg(struct brw_wm_compile *c, 
+                                  const struct prog_instruction *inst,
+                                  GLuint srcRegIndex, GLuint channel)
+{
+    const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+    const GLuint nr = 1;
+    const GLuint component = GET_SWZ(src->Swizzle, channel);
+
+    /* Extended swizzle terms */
+    if (component == SWIZZLE_ZERO) {
+       return brw_imm_f(0.0F);
+    }
+    else if (component == SWIZZLE_ONE) {
+       return brw_imm_f(1.0F);
+    }
+
+    if (c->fp->use_const_buffer &&
+        (src->File == PROGRAM_STATE_VAR ||
+         src->File == PROGRAM_CONSTANT ||
+         src->File == PROGRAM_UNIFORM)) {
+       return get_src_reg_const(c, inst, srcRegIndex, component);
+    }
+    else {
+       /* other type of source register */
+       return get_reg(c, src->File, src->Index, component, nr, 
+                      src->Negate, src->Abs);
+    }
+}
+
+
+/**
+ * Same as \sa get_src_reg() but if the register is a literal, emit
+ * a brw_reg encoding the literal.
+ * Note that a brw instruction only allows one src operand to be a literal.
+ * For instructions with more than one operand, only the second can be a
+ * literal.  This means that we treat some literals as constants/uniforms
+ * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
+ * 
+ */
+static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, 
+                                      const struct prog_instruction *inst,
+                                      GLuint srcRegIndex, GLuint channel)
+{
+    const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
+    if (src->File == PROGRAM_CONSTANT) {
+       /* a literal */
+       const int component = GET_SWZ(src->Swizzle, channel);
+       const GLfloat *param =
+          c->fp->program.Base.Parameters->ParameterValues[src->Index];
+       GLfloat value = param[component];
+       if (src->Negate & (1 << channel))
+          value = -value;
+       if (src->Abs)
+          value = FABSF(value);
+#if 0
+       printf("  form immed value %f for chan %d\n", value, channel);
+#endif
+       return brw_imm_f(value);
+    }
+    else {
+       return get_src_reg(c, inst, srcRegIndex, channel);
+    }
+}
+
+
+/**
+ * Subroutines are minimal support for resusable instruction sequences.
+ * They are implemented as simply as possible to minimise overhead: there
+ * is no explicit support for communication between the caller and callee
+ * other than saving the return address in a temporary register, nor is
+ * there any automatic local storage.  This implies that great care is
+ * required before attempting reentrancy or any kind of nested
+ * subroutine invocations.
+ */
+static void invoke_subroutine( struct brw_wm_compile *c,
+			       enum _subroutine subroutine,
+			       void (*emit)( struct brw_wm_compile * ) )
+{
+    struct brw_compile *p = &c->func;
+
+    assert( subroutine < BRW_WM_MAX_SUBROUTINE );
+    
+    if( c->subroutines[ subroutine ] ) {
+	/* subroutine previously emitted: reuse existing instructions */
+
+	int mark = mark_tmps( c );
+	struct brw_reg return_address = retype( alloc_tmp( c ),
+						BRW_REGISTER_TYPE_UD );
+	int here = p->nr_insn;
+	
+	brw_push_insn_state(p);
+	brw_set_mask_control(p, BRW_MASK_DISABLE);
+	brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
+
+	brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+		 brw_imm_d( ( c->subroutines[ subroutine ] -
+			      here - 1 ) << 4 ) );
+	brw_pop_insn_state(p);
+
+	release_tmps( c, mark );
+    } else {
+	/* previously unused subroutine: emit, and mark for later reuse */
+	
+	int mark = mark_tmps( c );
+	struct brw_reg return_address = retype( alloc_tmp( c ),
+						BRW_REGISTER_TYPE_UD );
+	struct brw_instruction *calc;
+	int base = p->nr_insn;
+	
+	brw_push_insn_state(p);
+	brw_set_mask_control(p, BRW_MASK_DISABLE);
+	calc = brw_ADD( p, return_address, brw_ip_reg(), brw_imm_ud( 0 ) );
+	brw_pop_insn_state(p);
+	
+	c->subroutines[ subroutine ] = p->nr_insn;
+
+	emit( c );
+	
+	brw_push_insn_state(p);
+	brw_set_mask_control(p, BRW_MASK_DISABLE);
+	brw_MOV( p, brw_ip_reg(), return_address );
+	brw_pop_insn_state(p);
+
+	brw_set_src1( calc, brw_imm_ud( ( p->nr_insn - base ) << 4 ) );
+	
+	release_tmps( c, mark );
+    }
+}
+
+static void emit_trunc( struct brw_wm_compile *c,
+                        const struct prog_instruction *inst)
+{
+    int i;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    struct brw_reg src, dst;
+	    dst = get_dst_reg(c, inst, i);
+	    src = get_src_reg(c, inst, 0, i);
+	    brw_RNDZ(p, dst, src);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_mov( struct brw_wm_compile *c,
+                      const struct prog_instruction *inst)
+{
+    int i;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    struct brw_reg src, dst;
+	    dst = get_dst_reg(c, inst, i);
+            /* XXX some moves from immediate value don't work reliably!!! */
+            /*src = get_src_reg_imm(c, inst, 0, i);*/
+            src = get_src_reg(c, inst, 0, i);
+	    brw_MOV(p, dst, src);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_pixel_xy(struct brw_wm_compile *c,
+                          const struct prog_instruction *inst)
+{
+    struct brw_reg r1 = brw_vec1_grf(1, 0);
+    struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
+
+    struct brw_reg dst0, dst1;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    dst0 = get_dst_reg(c, inst, 0);
+    dst1 = get_dst_reg(c, inst, 1);
+    /* Calculate pixel centers by adding 1 or 0 to each of the
+     * micro-tile coordinates passed in r1.
+     */
+    if (mask & WRITEMASK_X) {
+	brw_ADD(p,
+		vec8(retype(dst0, BRW_REGISTER_TYPE_UW)),
+		stride(suboffset(r1_uw, 4), 2, 4, 0),
+		brw_imm_v(0x10101010));
+    }
+
+    if (mask & WRITEMASK_Y) {
+	brw_ADD(p,
+		vec8(retype(dst1, BRW_REGISTER_TYPE_UW)),
+		stride(suboffset(r1_uw, 5), 2, 4, 0),
+		brw_imm_v(0x11001100));
+    }
+}
+
+static void emit_delta_xy(struct brw_wm_compile *c,
+                          const struct prog_instruction *inst)
+{
+    struct brw_reg r1 = brw_vec1_grf(1, 0);
+    struct brw_reg dst0, dst1, src0, src1;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    dst0 = get_dst_reg(c, inst, 0);
+    dst1 = get_dst_reg(c, inst, 1);
+    src0 = get_src_reg(c, inst, 0, 0);
+    src1 = get_src_reg(c, inst, 0, 1);
+    /* Calc delta X,Y by subtracting origin in r1 from the pixel
+     * centers.
+     */
+    if (mask & WRITEMASK_X) {
+	brw_ADD(p,
+		dst0,
+		retype(src0, BRW_REGISTER_TYPE_UW),
+		negate(r1));
+    }
+
+    if (mask & WRITEMASK_Y) {
+	brw_ADD(p,
+		dst1,
+		retype(src1, BRW_REGISTER_TYPE_UW),
+		negate(suboffset(r1,1)));
+
+    }
+}
+
+static void fire_fb_write( struct brw_wm_compile *c,
+                           GLuint base_reg,
+                           GLuint nr,
+                           GLuint target,
+                           GLuint eot)
+{
+    struct brw_compile *p = &c->func;
+    /* Pass through control information:
+     */
+    /*  mov (8) m1.0<1>:ud   r1.0<8;8,1>:ud   { Align1 NoMask } */
+    {
+	brw_push_insn_state(p);
+	brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
+	brw_MOV(p,
+		brw_message_reg(base_reg + 1),
+		brw_vec8_grf(1, 0));
+	brw_pop_insn_state(p);
+    }
+    /* Send framebuffer write message: */
+    brw_fb_WRITE(p,
+	    retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+	    base_reg,
+	    retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+	    target,              
+	    nr,
+	    0,
+	    eot);
+}
+
+static void emit_fb_write(struct brw_wm_compile *c,
+                          const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    int nr = 2;
+    int channel;
+    GLuint target, eot;
+    struct brw_reg src0;
+
+    /* Reserve a space for AA - may not be needed:
+     */
+    if (c->key.aa_dest_stencil_reg)
+	nr += 1;
+
+    brw_push_insn_state(p);
+    for (channel = 0; channel < 4; channel++) {
+        src0 = get_src_reg(c,  inst, 0, channel);
+        /*  mov (8) m2.0<1>:ud   r28.0<8;8,1>:ud  { Align1 } */
+        /*  mov (8) m6.0<1>:ud   r29.0<8;8,1>:ud  { Align1 SecHalf } */
+        brw_MOV(p, brw_message_reg(nr + channel), src0);
+    }
+    /* skip over the regs populated above: */
+    nr += 8;
+    brw_pop_insn_state(p);
+
+    if (c->key.source_depth_to_render_target) {
+       if (c->key.computes_depth) {
+          src0 = get_src_reg(c, inst, 2, 2);
+          brw_MOV(p, brw_message_reg(nr), src0);
+       }
+       else {
+          src0 = get_src_reg(c, inst, 1, 1);
+          brw_MOV(p, brw_message_reg(nr), src0);
+       }
+
+       nr += 2;
+    }
+
+    if (c->key.dest_depth_reg) {
+        const GLuint comp = c->key.dest_depth_reg / 2;
+        const GLuint off = c->key.dest_depth_reg % 2;
+
+        if (off != 0) {
+            /* XXX this code needs review/testing */
+            struct brw_reg arg1_0 = get_src_reg(c, inst, 1, comp);
+            struct brw_reg arg1_1 = get_src_reg(c, inst, 1, comp+1);
+
+            brw_push_insn_state(p);
+            brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+
+            brw_MOV(p, brw_message_reg(nr), offset(arg1_0, 1));
+            /* 2nd half? */
+            brw_MOV(p, brw_message_reg(nr+1), arg1_1);
+            brw_pop_insn_state(p);
+        }
+        else
+        {
+            struct brw_reg src =  get_src_reg(c, inst, 1, 1);
+            brw_MOV(p, brw_message_reg(nr), src);
+        }
+        nr += 2;
+   }
+
+    target = inst->Aux >> 1;
+    eot = inst->Aux & 1;
+    fire_fb_write(c, 0, nr, target, eot);
+}
+
+static void emit_pixel_w( struct brw_wm_compile *c,
+                          const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    if (mask & WRITEMASK_W) {
+	struct brw_reg dst, src0, delta0, delta1;
+	struct brw_reg interp3;
+
+	dst = get_dst_reg(c, inst, 3);
+	src0 = get_src_reg(c, inst, 0, 0);
+	delta0 = get_src_reg(c, inst, 1, 0);
+	delta1 = get_src_reg(c, inst, 1, 1);
+
+	interp3 = brw_vec1_grf(src0.nr+1, 4);
+	/* Calc 1/w - just linterp wpos[3] optimized by putting the
+	 * result straight into a message reg.
+	 */
+	brw_LINE(p, brw_null_reg(), interp3, delta0);
+	brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), delta1);
+
+	/* Calc w */
+	brw_math_16( p, dst,
+		BRW_MATH_FUNCTION_INV,
+		BRW_MATH_SATURATE_NONE,
+		2, brw_null_reg(),
+		BRW_MATH_PRECISION_FULL);
+    }
+}
+
+static void emit_linterp(struct brw_wm_compile *c,
+                         const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg interp[4];
+    struct brw_reg dst, delta0, delta1;
+    struct brw_reg src0;
+    GLuint nr, i;
+
+    src0 = get_src_reg(c, inst, 0, 0);
+    delta0 = get_src_reg(c, inst, 1, 0);
+    delta1 = get_src_reg(c, inst, 1, 1);
+    nr = src0.nr;
+
+    interp[0] = brw_vec1_grf(nr, 0);
+    interp[1] = brw_vec1_grf(nr, 4);
+    interp[2] = brw_vec1_grf(nr+1, 0);
+    interp[3] = brw_vec1_grf(nr+1, 4);
+
+    for(i = 0; i < 4; i++ ) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_LINE(p, brw_null_reg(), interp[i], delta0);
+	    brw_MAC(p, dst, suboffset(interp[i],1), delta1);
+	}
+    }
+}
+
+static void emit_cinterp(struct brw_wm_compile *c,
+                         const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    struct brw_reg interp[4];
+    struct brw_reg dst, src0;
+    GLuint nr, i;
+
+    src0 = get_src_reg(c, inst, 0, 0);
+    nr = src0.nr;
+
+    interp[0] = brw_vec1_grf(nr, 0);
+    interp[1] = brw_vec1_grf(nr, 4);
+    interp[2] = brw_vec1_grf(nr+1, 0);
+    interp[3] = brw_vec1_grf(nr+1, 4);
+
+    for(i = 0; i < 4; i++ ) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_MOV(p, dst, suboffset(interp[i],3));
+	}
+    }
+}
+
+static void emit_pinterp(struct brw_wm_compile *c,
+                         const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+
+    struct brw_reg interp[4];
+    struct brw_reg dst, delta0, delta1;
+    struct brw_reg src0, w;
+    GLuint nr, i;
+
+    src0 = get_src_reg(c, inst, 0, 0);
+    delta0 = get_src_reg(c, inst, 1, 0);
+    delta1 = get_src_reg(c, inst, 1, 1);
+    w = get_src_reg(c, inst, 2, 3);
+    nr = src0.nr;
+
+    interp[0] = brw_vec1_grf(nr, 0);
+    interp[1] = brw_vec1_grf(nr, 4);
+    interp[2] = brw_vec1_grf(nr+1, 0);
+    interp[3] = brw_vec1_grf(nr+1, 4);
+
+    for(i = 0; i < 4; i++ ) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_LINE(p, brw_null_reg(), interp[i], delta0);
+	    brw_MAC(p, dst, suboffset(interp[i],1), 
+		    delta1);
+	    brw_MUL(p, dst, dst, w);
+	}
+    }
+}
+
+/* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
+static void emit_frontfacing(struct brw_wm_compile *c,
+			     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
+    struct brw_reg dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_MOV(p, dst, brw_imm_f(0.0));
+	}
+    }
+
+    /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
+     * us front face
+     */
+    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, r1_6ud, brw_imm_ud(1 << 31));
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_MOV(p, dst, brw_imm_f(1.0));
+	}
+    }
+    brw_set_predicate_control_flag_value(p, 0xff);
+}
+
+static void emit_xpd(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    int i;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    for (i = 0; i < 4; i++) {
+	GLuint i2 = (i+2)%3;
+	GLuint i1 = (i+1)%3;
+	if (mask & (1<<i)) {
+	    struct brw_reg src0, src1, dst;
+	    dst = get_dst_reg(c, inst, i);
+	    src0 = negate(get_src_reg(c, inst, 0, i2));
+	    src1 = get_src_reg_imm(c, inst, 1, i1);
+	    brw_MUL(p, brw_null_reg(), src0, src1);
+	    src0 = get_src_reg(c, inst, 0, i1);
+	    src1 = get_src_reg_imm(c, inst, 1, i2);
+	    brw_set_saturate(p, inst->SaturateMode != SATURATE_OFF);
+	    brw_MAC(p, dst, src0, src1);
+	    brw_set_saturate(p, 0);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_dp3(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_reg src0[3], src1[3], dst;
+    int i;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+    if (!(mask & WRITEMASK_XYZW))
+	return;
+
+    assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+    for (i = 0; i < 3; i++) {
+	src0[i] = get_src_reg(c, inst, 0, i);
+	src1[i] = get_src_reg_imm(c, inst, 1, i);
+    }
+
+    dst = get_dst_reg(c, inst, dst_chan);
+    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    brw_MAC(p, dst, src0[2], src1[2]);
+    brw_set_saturate(p, 0);
+}
+
+static void emit_dp4(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_reg src0[4], src1[4], dst;
+    int i;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+    if (!(mask & WRITEMASK_XYZW))
+	return;
+
+    assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+    for (i = 0; i < 4; i++) {
+	src0[i] = get_src_reg(c, inst, 0, i);
+	src1[i] = get_src_reg_imm(c, inst, 1, i);
+    }
+    dst = get_dst_reg(c, inst, dst_chan);
+    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+    brw_MAC(p, brw_null_reg(), src0[2], src1[2]);
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    brw_MAC(p, dst, src0[3], src1[3]);
+    brw_set_saturate(p, 0);
+}
+
+static void emit_dph(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_reg src0[4], src1[4], dst;
+    int i;
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+    if (!(mask & WRITEMASK_XYZW))
+	return;
+
+    assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+    for (i = 0; i < 4; i++) {
+	src0[i] = get_src_reg(c, inst, 0, i);
+	src1[i] = get_src_reg_imm(c, inst, 1, i);
+    }
+    dst = get_dst_reg(c, inst, dst_chan);
+    brw_MUL(p, brw_null_reg(), src0[0], src1[0]);
+    brw_MAC(p, brw_null_reg(), src0[1], src1[1]);
+    brw_MAC(p, dst, src0[2], src1[2]);
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    brw_ADD(p, dst, dst, src1[3]);
+    brw_set_saturate(p, 0);
+}
+
+/**
+ * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
+ * Note that the result of the function is smeared across the dest
+ * register's X, Y, Z and W channels (subject to writemasking of course).
+ */
+static void emit_math1(struct brw_wm_compile *c,
+                       const struct prog_instruction *inst, GLuint func)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+    if (!(mask & WRITEMASK_XYZW))
+	return;
+
+    assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+    /* Get first component of source register */
+    dst = get_dst_reg(c, inst, dst_chan);
+    src0 = get_src_reg(c, inst, 0, 0);
+
+    brw_MOV(p, brw_message_reg(2), src0);
+    brw_math(p,
+             dst,
+             func,
+             (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+             2,
+             brw_null_reg(),
+             BRW_MATH_DATA_VECTOR,
+             BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_rcp(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
+}
+
+static void emit_rsq(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
+}
+
+static void emit_sin(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
+}
+
+static void emit_cos(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
+}
+
+static void emit_ex2(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
+}
+
+static void emit_lg2(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
+}
+
+static void emit_add(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    src0 = get_src_reg(c, inst, 0, i);
+	    src1 = get_src_reg_imm(c, inst, 1, i);
+	    brw_ADD(p, dst, src0, src1);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_arl(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, addr_reg;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, 
+                           BRW_ARF_ADDRESS, 0);
+    src0 = get_src_reg(c, inst, 0, 0); /* channel 0 */
+    brw_MOV(p, addr_reg, src0);
+    brw_set_saturate(p, 0);
+}
+
+
+static void emit_mul(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    src0 = get_src_reg(c, inst, 0, i);
+	    src1 = get_src_reg_imm(c, inst, 1, i);
+	    brw_MUL(p, dst, src0, src1);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+static void emit_frc(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    src0 = get_src_reg_imm(c, inst, 0, i);
+	    brw_FRC(p, dst, src0);
+	}
+    }
+    if (inst->SaturateMode != SATURATE_OFF)
+	brw_set_saturate(p, 0);
+}
+
+static void emit_flr(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    src0 = get_src_reg_imm(c, inst, 0, i);
+	    brw_RNDD(p, dst, src0);
+	}
+    }
+    brw_set_saturate(p, 0);
+}
+
+
+static void emit_min_max(struct brw_wm_compile *c,
+                         const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    const GLuint mask = inst->DstReg.WriteMask;
+    const int mark = mark_tmps(c);
+    int i;
+    brw_push_insn_state(p);
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+            struct brw_reg real_dst = get_dst_reg(c, inst, i);
+	    struct brw_reg src0 = get_src_reg(c, inst, 0, i);
+	    struct brw_reg src1 = get_src_reg(c, inst, 1, i);
+            struct brw_reg dst;
+            /* if dst==src0 or dst==src1 we need to use a temp reg */
+            GLboolean use_temp = brw_same_reg(dst, src0) ||
+                                 brw_same_reg(dst, src1);
+            if (use_temp)
+               dst = alloc_tmp(c);
+            else
+               dst = real_dst;
+
+            /*
+            printf("  Min/max: dst %d  src0 %d  src1 %d\n",
+                   dst.nr, src0.nr, src1.nr);
+            */
+	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+	    brw_MOV(p, dst, src0);
+	    brw_set_saturate(p, 0);
+
+            if (inst->Opcode == OPCODE_MIN)
+               brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0);
+            else
+               brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, src1, src0);
+
+	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+	    brw_MOV(p, dst, src1);
+	    brw_set_saturate(p, 0);
+	    brw_set_predicate_control_flag_value(p, 0xff);
+            if (use_temp)
+               brw_MOV(p, real_dst, dst);
+	}
+    }
+    brw_pop_insn_state(p);
+    release_tmps(c, mark);
+}
+
+static void emit_pow(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg dst, src0, src1;
+    GLuint mask = inst->DstReg.WriteMask;
+    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+    if (!(mask & WRITEMASK_XYZW))
+	return;
+
+    assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+    dst = get_dst_reg(c, inst, dst_chan);
+    src0 = get_src_reg_imm(c, inst, 0, 0);
+    src1 = get_src_reg_imm(c, inst, 1, 0);
+
+    brw_MOV(p, brw_message_reg(2), src0);
+    brw_MOV(p, brw_message_reg(3), src1);
+
+    brw_math(p,
+	    dst,
+	    BRW_MATH_FUNCTION_POW,
+	    (inst->SaturateMode != SATURATE_OFF) ? BRW_MATH_SATURATE_SATURATE : BRW_MATH_SATURATE_NONE,
+	    2,
+	    brw_null_reg(),
+	    BRW_MATH_DATA_VECTOR,
+	    BRW_MATH_PRECISION_FULL);
+}
+
+static void emit_lrp(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg dst, tmp1, tmp2, src0, src1, src2;
+    int i;
+    int mark = mark_tmps(c);
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    src0 = get_src_reg(c, inst, 0, i);
+
+	    src1 = get_src_reg_imm(c, inst, 1, i);
+
+	    if (src1.nr == dst.nr) {
+		tmp1 = alloc_tmp(c);
+		brw_MOV(p, tmp1, src1);
+	    } else
+		tmp1 = src1;
+
+	    src2 = get_src_reg(c, inst, 2, i);
+	    if (src2.nr == dst.nr) {
+		tmp2 = alloc_tmp(c);
+		brw_MOV(p, tmp2, src2);
+	    } else
+		tmp2 = src2;
+
+	    brw_ADD(p, dst, negate(src0), brw_imm_f(1.0));
+	    brw_MUL(p, brw_null_reg(), dst, tmp2);
+	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+	    brw_MAC(p, dst, src0, tmp1);
+	    brw_set_saturate(p, 0);
+	}
+	release_tmps(c, mark);
+    }
+}
+
+/**
+ * For GLSL shaders, this KIL will be unconditional.
+ * It may be contained inside an IF/ENDIF structure of course.
+ */
+static void emit_kil(struct brw_wm_compile *c)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+    brw_push_insn_state(p);
+    brw_set_mask_control(p, BRW_MASK_DISABLE);
+    brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+    brw_AND(p, depth, c->emit_mask_reg, depth);
+    brw_pop_insn_state(p);
+}
+
+static void emit_mad(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg dst, src0, src1, src2;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    src0 = get_src_reg(c, inst, 0, i);
+	    src1 = get_src_reg_imm(c, inst, 1, i);
+	    src2 = get_src_reg_imm(c, inst, 2, i);
+	    brw_MUL(p, dst, src0, src1);
+
+	    brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0);
+	    brw_ADD(p, dst, dst, src2);
+	    brw_set_saturate(p, 0);
+	}
+    }
+}
+
+static void emit_sop(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst, GLuint cond)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg dst, src0, src1;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    src0 = get_src_reg(c, inst, 0, i);
+	    src1 = get_src_reg_imm(c, inst, 1, i);
+	    brw_push_insn_state(p);
+	    brw_CMP(p, brw_null_reg(), cond, src0, src1);
+	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+	    brw_MOV(p, dst, brw_imm_f(0.0));
+	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+	    brw_MOV(p, dst, brw_imm_f(1.0));
+	    brw_pop_insn_state(p);
+	}
+    }
+}
+
+static void emit_slt(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_L);
+}
+
+static void emit_sle(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_LE);
+}
+
+static void emit_sgt(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_G);
+}
+
+static void emit_sge(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_GE);
+}
+
+static void emit_seq(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_EQ);
+}
+
+static void emit_sne(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
+}
+
+static INLINE struct brw_reg high_words( struct brw_reg reg )
+{
+    return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_W ), 1 ),
+		   0, 8, 2 );
+}
+
+static INLINE struct brw_reg low_words( struct brw_reg reg )
+{
+    return stride( retype( reg, BRW_REGISTER_TYPE_W ), 0, 8, 2 );
+}
+
+static INLINE struct brw_reg even_bytes( struct brw_reg reg )
+{
+    return stride( retype( reg, BRW_REGISTER_TYPE_B ), 0, 16, 2 );
+}
+
+static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
+{
+    return stride( suboffset( retype( reg, BRW_REGISTER_TYPE_B ), 1 ),
+		   0, 16, 2 );
+}
+
+/* One-, two- and three-dimensional Perlin noise, similar to the description
+   in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
+static void noise1_sub( struct brw_wm_compile *c ) {
+
+    struct brw_compile *p = &c->func;
+    struct brw_reg param,
+	x0, x1, /* gradients at each end */       
+	t, tmp[ 2 ], /* float temporaries */
+	itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
+    int i;
+    int mark = mark_tmps( c );
+
+    x0 = alloc_tmp( c );
+    x1 = alloc_tmp( c );
+    t = alloc_tmp( c );
+    tmp[ 0 ] = alloc_tmp( c );
+    tmp[ 1 ] = alloc_tmp( c );
+    itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD );
+    itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD );
+    itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD );
+    itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD );
+    itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD );
+    
+    param = lookup_tmp( c, mark - 2 );
+
+    brw_set_access_mode( p, BRW_ALIGN_1 );
+
+    brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+
+    /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
+       be hashed.  Also compute the remainder (offset within the unit
+       length), interleaved to reduce register dependency penalties. */
+    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param );
+    brw_FRC( p, param, param );
+    brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) );
+    brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+    brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+
+    /* We're now ready to perform the hashing.  The two hashes are
+       interleaved for performance.  The hash function used is
+       designed to rapidly achieve avalanche and require only 32x16
+       bit multiplication, and 16-bit swizzles (which we get for
+       free).  We can't use immediate operands in the multiplies,
+       because immediates are permitted only in src1 and the 16-bit
+       factor is permitted only in src0. */
+    for( i = 0; i < 2; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] );
+    for( i = 0; i < 2; i++ )
+       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		high_words( itmp[ i ] ) );
+    for( i = 0; i < 2; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] );
+    for( i = 0; i < 2; i++ )
+       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		high_words( itmp[ i ] ) );
+    for( i = 0; i < 2; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+    for( i = 0; i < 2; i++ )
+       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		high_words( itmp[ i ] ) );
+
+    /* Now we want to initialise the two gradients based on the
+       hashes.  Format conversion from signed integer to float leaves
+       everything scaled too high by a factor of pow( 2, 31 ), but
+       we correct for that right at the end. */
+    brw_ADD( p, t, param, brw_imm_f( -1.0 ) );
+    brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) );
+    brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) );
+
+    brw_MUL( p, x0, x0, param );
+    brw_MUL( p, x1, x1, t );
+    
+    /* We interpolate between the gradients using the polynomial
+       6t^5 - 15t^4 + 10t^3 (Perlin). */
+    brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+    brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the
+					   pipeline */
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
+    brw_MUL( p, param, tmp[ 0 ], param );
+    brw_MUL( p, x1, x1, param );
+    brw_ADD( p, x0, x0, x1 );    
+    /* scale by pow( 2, -30 ), to compensate for the format conversion
+       above and an extra factor of 2 so that a single gradient covers
+       the [-1,1] range */
+    brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) );
+
+    release_tmps( c, mark );
+}
+
+static void emit_noise1( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src, param, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    int mark = mark_tmps( c );
+
+    assert( mark == 0 );
+    
+    src = get_src_reg( c, inst, 0, 0 );
+
+    param = alloc_tmp( c );
+
+    brw_MOV( p, param, src );
+
+    invoke_subroutine( c, SUB_NOISE1, noise1_sub );
+    
+    /* Fill in the result: */
+    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_MOV( p, dst, param );
+	}
+    }
+    if( inst->SaturateMode == SATURATE_ZERO_ONE )
+	brw_set_saturate( p, 0 );
+    
+    release_tmps( c, mark );
+}
+    
+static void noise2_sub( struct brw_wm_compile *c ) {
+
+    struct brw_compile *p = &c->func;
+    struct brw_reg param0, param1,
+	x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */       
+	t, tmp[ 4 ], /* float temporaries */
+	itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
+    int i;
+    int mark = mark_tmps( c );
+
+    x0y0 = alloc_tmp( c );
+    x0y1 = alloc_tmp( c );
+    x1y0 = alloc_tmp( c );
+    x1y1 = alloc_tmp( c );
+    t = alloc_tmp( c );
+    for( i = 0; i < 4; i++ ) {
+	tmp[ i ] = alloc_tmp( c );
+	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+    }
+    itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD );
+    itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD );
+    itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD );
+    
+    param0 = lookup_tmp( c, mark - 3 );
+    param1 = lookup_tmp( c, mark - 2 );
+
+    brw_set_access_mode( p, BRW_ALIGN_1 );
+    
+    /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
+       be hashed.  Also compute the remainders (offsets within the unit
+       square), interleaved to reduce register dependency penalties. */
+    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
+    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
+    brw_FRC( p, param0, param0 );
+    brw_FRC( p, param1, param1 );
+    brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
+    brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ),
+	     low_words( itmp[ 1 ] ) );
+    brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
+    brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
+    brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) );
+    brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) );
+    brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) );
+
+    /* We're now ready to perform the hashing.  The four hashes are
+       interleaved for performance.  The hash function used is
+       designed to rapidly achieve avalanche and require only 32x16
+       bit multiplication, and 16-bit swizzles (which we get for
+       free).  We can't use immediate operands in the multiplies,
+       because immediates are permitted only in src1 and the 16-bit
+       factor is permitted only in src0. */
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		 high_words( itmp[ i ] ) );
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		 high_words( itmp[ i ] ) );
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
+		 high_words( itmp[ i ] ) );
+
+    /* Now we want to initialise the four gradients based on the
+       hashes.  Format conversion from signed integer to float leaves
+       everything scaled too high by a factor of pow( 2, 15 ), but
+       we correct for that right at the end. */
+    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+    brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) );
+    brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) );
+    
+    brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) );
+    
+    brw_MUL( p, x1y0, x1y0, t );
+    brw_MUL( p, x1y1, x1y1, t );
+    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+    brw_MUL( p, x0y0, x0y0, param0 );
+    brw_MUL( p, x0y1, x0y1, param0 );
+
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 );
+    brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 );
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t );
+    brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t );
+
+    brw_ADD( p, x0y0, x0y0, tmp[ 0 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 2 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 1 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 3 ] );
+    
+    /* We interpolate between the gradients using the polynomial
+       6t^5 - 15t^4 + 10t^3 (Perlin). */
+    brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) );
+    brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
+    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) );
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the
+						 pipeline */
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
+    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) );
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the
+						 pipeline */
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
+    brw_MUL( p, param0, tmp[ 0 ], param0 );
+    brw_MUL( p, param1, tmp[ 1 ], param1 );
+    
+    /* Here we interpolate in the y dimension... */
+    brw_MUL( p, x0y1, x0y1, param1 );
+    brw_MUL( p, x1y1, x1y1, param1 );
+    brw_ADD( p, x0y0, x0y0, x0y1 );
+    brw_ADD( p, x1y0, x1y0, x1y1 );
+
+    /* And now in x.  There are horrible register dependencies here,
+       but we have nothing else to do. */
+    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+    brw_MUL( p, x1y0, x1y0, param0 );
+    brw_ADD( p, x0y0, x0y0, x1y0 );
+    
+    /* scale by pow( 2, -15 ), as described above */
+    brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) );
+
+    release_tmps( c, mark );
+}
+
+static void emit_noise2( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, param0, param1, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    int mark = mark_tmps( c );
+
+    assert( mark == 0 );
+    
+    src0 = get_src_reg( c, inst, 0, 0 );
+    src1 = get_src_reg( c, inst, 0, 1 );
+
+    param0 = alloc_tmp( c );
+    param1 = alloc_tmp( c );
+
+    brw_MOV( p, param0, src0 );
+    brw_MOV( p, param1, src1 );
+
+    invoke_subroutine( c, SUB_NOISE2, noise2_sub );
+    
+    /* Fill in the result: */
+    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_MOV( p, dst, param0 );
+	}
+    }
+    if( inst->SaturateMode == SATURATE_ZERO_ONE )
+	brw_set_saturate( p, 0 );
+    
+    release_tmps( c, mark );
+}
+
+/**
+ * The three-dimensional case is much like the one- and two- versions above,
+ * but since the number of corners is rapidly growing we now pack 16 16-bit
+ * hashes into each register to extract more parallelism from the EUs.
+ */
+static void noise3_sub( struct brw_wm_compile *c ) {
+
+    struct brw_compile *p = &c->func;
+    struct brw_reg param0, param1, param2,
+	x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
+	xi, yi, zi, /* interpolation coefficients */
+	t, tmp[ 8 ], /* float temporaries */
+	itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
+	wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
+    int i;
+    int mark = mark_tmps( c );
+
+    x0y0 = alloc_tmp( c );
+    x0y1 = alloc_tmp( c );
+    x1y0 = alloc_tmp( c );
+    x1y1 = alloc_tmp( c );
+    xi = alloc_tmp( c );
+    yi = alloc_tmp( c );
+    zi = alloc_tmp( c );
+    t = alloc_tmp( c );
+    for( i = 0; i < 8; i++ ) {
+	tmp[ i ] = alloc_tmp( c );
+	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+	wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
+    }
+    
+    param0 = lookup_tmp( c, mark - 4 );
+    param1 = lookup_tmp( c, mark - 3 );
+    param2 = lookup_tmp( c, mark - 2 );
+
+    brw_set_access_mode( p, BRW_ALIGN_1 );
+    
+    /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
+       be hashed.  Also compute the remainders (offsets within the unit
+       cube), interleaved to reduce register dependency penalties. */
+    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
+    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
+    brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 );
+    brw_FRC( p, param0, param0 );
+    brw_FRC( p, param1, param1 );
+    brw_FRC( p, param2, param2 );
+    /* Since we now have only 16 bits of precision in the hash, we must
+       be more careful about thorough mixing to maintain entropy as we
+       squash the input vector into a small scalar. */
+    brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) );
+    brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) );
+    brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ),
+	     brw_imm_uw( 0x9B93 ) );
+    brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
+	     brw_imm_uw( 0xBC8F ) );
+
+    /* Temporarily disable the execution mask while we work with ExecSize=16
+       channels (the mask is set for ExecSize=8 and is probably incorrect).
+       Although this might cause execution of unwanted channels, the code
+       writes only to temporary registers and has no side effects, so
+       disabling the mask is harmless. */
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
+    brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
+    brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
+
+    /* We're now ready to perform the hashing.  The eight hashes are
+       interleaved for performance.  The hash function used is
+       designed to rapidly achieve avalanche and require only 16x16
+       bit multiplication, and 8-bit swizzles (which we get for
+       free). */
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+		 odd_bytes( wtmp[ i ] ) );
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+		 odd_bytes( wtmp[ i ] ) );
+    brw_pop_insn_state( p );
+
+    /* Now we want to initialise the four rear gradients based on the
+       hashes.  Format conversion from signed integer to float leaves
+       everything scaled too high by a factor of pow( 2, 15 ), but
+       we correct for that right at the end. */
+    /* x component */
+    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+    brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
+    brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
+    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
+    brw_pop_insn_state( p );
+    
+    brw_MUL( p, x1y0, x1y0, t );
+    brw_MUL( p, x1y1, x1y1, t );
+    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+    brw_MUL( p, x0y0, x0y0, param0 );
+    brw_MUL( p, x0y1, x0y1, param0 );
+
+    /* y component */
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+    
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
+    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
+    
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    
+    /* z component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    
+    /* We interpolate between the gradients using the polynomial
+       6t^5 - 15t^4 + 10t^3 (Perlin). */
+    brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) );
+    brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) );
+    brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) );
+    brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) );
+    brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) );
+    brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) );
+    brw_MUL( p, xi, xi, param0 );
+    brw_MUL( p, yi, yi, param1 );
+    brw_MUL( p, zi, zi, param2 );
+    brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) );
+    brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) );
+    brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) );
+    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */
+    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */
+    brw_MUL( p, xi, xi, param0 );
+    brw_MUL( p, yi, yi, param1 );
+    brw_MUL( p, zi, zi, param2 );
+    brw_MUL( p, xi, xi, param0 );
+    brw_MUL( p, yi, yi, param1 );
+    brw_MUL( p, zi, zi, param2 );
+    brw_MUL( p, xi, xi, param0 );
+    brw_MUL( p, yi, yi, param1 );
+    brw_MUL( p, zi, zi, param2 );
+    
+    /* Here we interpolate in the y dimension... */
+    brw_MUL( p, x0y1, x0y1, yi );
+    brw_MUL( p, x1y1, x1y1, yi );
+    brw_ADD( p, x0y0, x0y0, x0y1 );
+    brw_ADD( p, x1y0, x1y0, x1y1 );
+
+    /* And now in x.  Leave the result in tmp[ 0 ] (see below)... */
+    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+    brw_MUL( p, x1y0, x1y0, xi );
+    brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
+
+    /* Now do the same thing for the front four gradients... */
+    /* x component */
+    brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
+    brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
+    brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
+    brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
+    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, x1y0, x1y0, t );
+    brw_MUL( p, x1y1, x1y1, t );
+    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
+    brw_MUL( p, x0y0, x0y0, param0 );
+    brw_MUL( p, x0y1, x0y1, param0 );
+
+    /* y component */
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+    
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
+    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    brw_ADD( p, t, param2, brw_imm_f( -1.0 ) );
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
+    
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    
+    /* z component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    
+    /* The interpolation coefficients are still around from last time, so
+       again interpolate in the y dimension... */
+    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+    brw_MUL( p, x0y1, x0y1, yi );
+    brw_MUL( p, x1y1, x1y1, yi );
+    brw_ADD( p, x0y0, x0y0, x0y1 );
+    brw_ADD( p, x1y0, x1y0, x1y1 );
+
+    /* And now in x.  The rear face is in tmp[ 0 ] (see above), so this
+       time put the front face in tmp[ 1 ] and we're nearly there... */
+    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+    brw_MUL( p, x1y0, x1y0, xi );
+    brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
+
+    /* The final interpolation, in the z dimension: */
+    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );    
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
+    
+    /* scale by pow( 2, -15 ), as described above */
+    brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
+
+    release_tmps( c, mark );
+}
+
+static void emit_noise3( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, src2, param0, param1, param2, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    int mark = mark_tmps( c );
+
+    assert( mark == 0 );
+    
+    src0 = get_src_reg( c, inst, 0, 0 );
+    src1 = get_src_reg( c, inst, 0, 1 );
+    src2 = get_src_reg( c, inst, 0, 2 );
+
+    param0 = alloc_tmp( c );
+    param1 = alloc_tmp( c );
+    param2 = alloc_tmp( c );
+
+    brw_MOV( p, param0, src0 );
+    brw_MOV( p, param1, src1 );
+    brw_MOV( p, param2, src2 );
+
+    invoke_subroutine( c, SUB_NOISE3, noise3_sub );
+    
+    /* Fill in the result: */
+    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_MOV( p, dst, param0 );
+	}
+    }
+    if( inst->SaturateMode == SATURATE_ZERO_ONE )
+	brw_set_saturate( p, 0 );
+    
+    release_tmps( c, mark );
+}
+    
+/**
+ * For the four-dimensional case, the little micro-optimisation benefits
+ * we obtain by unrolling all the loops aren't worth the massive bloat it
+ * now causes.  Instead, we loop twice around performing a similar operation
+ * to noise3, once for the w=0 cube and once for the w=1, with a bit more
+ * code to glue it all together.
+ */
+static void noise4_sub( struct brw_wm_compile *c )
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg param[ 4 ],
+	x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
+	w0, /* noise for the w=0 cube */
+	floors[ 2 ], /* integer coordinates of base corner of hypercube */
+	interp[ 4 ], /* interpolation coefficients */
+	t, tmp[ 8 ], /* float temporaries */
+	itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
+	wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
+    int i, j;
+    int mark = mark_tmps( c );
+    GLuint loop, origin;
+    
+    x0y0 = alloc_tmp( c );
+    x0y1 = alloc_tmp( c );
+    x1y0 = alloc_tmp( c );
+    x1y1 = alloc_tmp( c );
+    t = alloc_tmp( c );
+    w0 = alloc_tmp( c );    
+    floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
+    floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
+
+    for( i = 0; i < 4; i++ ) {
+	param[ i ] = lookup_tmp( c, mark - 5 + i );
+	interp[ i ] = alloc_tmp( c );
+    }
+    
+    for( i = 0; i < 8; i++ ) {
+	tmp[ i ] = alloc_tmp( c );
+	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
+	wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
+    }
+
+    brw_set_access_mode( p, BRW_ALIGN_1 );
+
+    /* We only want 16 bits of precision from the integral part of each
+       co-ordinate, but unfortunately the RNDD semantics would saturate
+       at 16 bits if we performed the operation directly to a 16-bit
+       destination.  Therefore, we round to 32-bit temporaries where
+       appropriate, and then store only the lower 16 bits. */
+    brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] );
+    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] );
+    brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] );
+    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] );
+    brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) );
+    brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) );
+
+    /* Modify the flag register here, because the side effect is useful
+       later (see below).  We know for certain that all flags will be
+       cleared, since the FRC instruction cannot possibly generate
+       negative results.  Even for exceptional inputs (infinities, denormals,
+       NaNs), the architecture guarantees that the L conditional is false. */
+    brw_set_conditionalmod( p, BRW_CONDITIONAL_L );
+    brw_FRC( p, param[ 0 ], param[ 0 ] );
+    brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+    for( i = 1; i < 4; i++ )	
+	brw_FRC( p, param[ i ], param[ i ] );
+    
+    /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
+       of all. */
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) );
+    for( i = 0; i < 4; i++ )
+	brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) );
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
+    for( i = 0; i < 4; i++ )
+	brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) );
+    for( j = 0; j < 3; j++ )
+	for( i = 0; i < 4; i++ )
+	    brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
+
+    /* Mark the current address, as it will be a jump destination.  The
+       following code will be executed twice: first, with the flag
+       register clear indicating the w=0 case, and second with flags
+       set for w=1. */
+    loop = p->nr_insn;
+    
+    /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
+       be hashed.  Since we have only 16 bits of precision in the hash, we
+       must be careful about thorough mixing to maintain entropy as we
+       squash the input vector into a small scalar. */
+    brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ),
+	     brw_imm_uw( 0xBC8F ) );
+    brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ),
+	     brw_imm_uw( 0xD0BD ) );
+    brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ),
+	     brw_imm_uw( 0x9B93 ) );
+    brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ),
+	     brw_imm_uw( 0xA359 ) );
+    brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
+	     brw_imm_uw( 0xBC8F ) );
+
+    /* Temporarily disable the execution mask while we work with ExecSize=16
+       channels (the mask is set for ExecSize=8 and is probably incorrect).
+       Although this might cause execution of unwanted channels, the code
+       writes only to temporary registers and has no side effects, so
+       disabling the mask is harmless. */
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
+    brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
+    brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
+
+    /* We're now ready to perform the hashing.  The eight hashes are
+       interleaved for performance.  The hash function used is
+       designed to rapidly achieve avalanche and require only 16x16
+       bit multiplication, and 8-bit swizzles (which we get for
+       free). */
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+		 odd_bytes( wtmp[ i ] ) );
+    for( i = 0; i < 4; i++ )
+	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
+    for( i = 0; i < 4; i++ )
+	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
+		 odd_bytes( wtmp[ i ] ) );
+    brw_pop_insn_state( p );
+
+    /* Now we want to initialise the four rear gradients based on the
+       hashes.  Format conversion from signed integer to float leaves
+       everything scaled too high by a factor of pow( 2, 15 ), but
+       we correct for that right at the end. */
+    /* x component */
+    brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
+    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
+    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
+    brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
+    brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+    
+    brw_MUL( p, x1y0, x1y0, t );
+    brw_MUL( p, x1y1, x1y1, t );
+    brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
+    brw_MUL( p, x0y0, x0y0, param[ 0 ] );
+    brw_MUL( p, x0y1, x0y1, param[ 0 ] );
+
+    /* y component */
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+    
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );    
+    /* prepare t for the w component (used below): w the first time through
+       the loop; w - 1 the second time) */
+    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+    brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
+    p->current->header.predicate_inverse = 1;
+    brw_MOV( p, t, param[ 3 ] );
+    p->current->header.predicate_inverse = 0;
+    brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
+    
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    
+    /* z component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+    /* w component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+    /* Here we interpolate in the y dimension... */
+    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+    brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
+    brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
+    brw_ADD( p, x0y0, x0y0, x0y1 );
+    brw_ADD( p, x1y0, x1y0, x1y1 );
+
+    /* And now in x.  Leave the result in tmp[ 0 ] (see below)... */
+    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+    brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
+    brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
+
+    /* Now do the same thing for the front four gradients... */
+    /* x component */
+    brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
+    brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
+    brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
+    brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, x1y0, x1y0, t );
+    brw_MUL( p, x1y1, x1y1, t );
+    brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
+    brw_MUL( p, x0y0, x0y0, param[ 0 ] );
+    brw_MUL( p, x0y1, x0y1, param[ 0 ] );
+
+    /* y component */
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+    
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) );
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
+    
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    
+    /* z component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
+    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
+    brw_pop_insn_state( p );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    /* prepare t for the w component (used below): w the first time through
+       the loop; w - 1 the second time) */
+    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+    brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
+    p->current->header.predicate_inverse = 1;
+    brw_MOV( p, t, param[ 3 ] );
+    p->current->header.predicate_inverse = 0;
+    brw_set_predicate_control( p, BRW_PREDICATE_NONE );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+    /* w component */
+    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
+    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
+    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
+
+    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
+    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
+    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
+    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
+    
+    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
+    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
+    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
+    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
+
+    /* Interpolate in the y dimension: */
+    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
+    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
+    brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
+    brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
+    brw_ADD( p, x0y0, x0y0, x0y1 );
+    brw_ADD( p, x1y0, x1y0, x1y1 );
+
+    /* And now in x.  The rear face is in tmp[ 0 ] (see above), so this
+       time put the front face in tmp[ 1 ] and we're nearly there... */
+    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
+    brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
+    brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
+
+    /* Another interpolation, in the z dimension: */
+    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );    
+    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
+
+    /* Exit the loop if we've computed both cubes... */
+    origin = p->nr_insn;
+    brw_push_insn_state( p );
+    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
+    brw_pop_insn_state( p );
+
+    /* Save the result for the w=0 case, and increment the w coordinate: */
+    brw_MOV( p, w0, tmp[ 0 ] );
+    brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ),
+	     brw_imm_uw( 1 ) );
+
+    /* Loop around for the other cube.  Explicitly set the flag register
+       (unfortunately we must spend an extra instruction to do this: we
+       can't rely on a side effect of the previous MOV or ADD because
+       conditional modifiers which are normally true might be false in
+       exceptional circumstances, e.g. given a NaN input; the add to
+       brw_ip_reg() is not suitable because the IP is not an 8-vector). */
+    brw_push_insn_state( p );
+    brw_set_mask_control( p, BRW_MASK_DISABLE );
+    brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) );
+    brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
+	     brw_imm_d( ( loop - p->nr_insn ) << 4 ) );
+    brw_pop_insn_state( p );
+
+    /* Patch the previous conditional branch now that we know the
+       destination address. */
+    brw_set_src1( p->store + origin,
+		  brw_imm_d( ( p->nr_insn - origin ) << 4 ) );
+
+    /* The very last interpolation. */
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) );    
+    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] );
+    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 );
+
+    /* scale by pow( 2, -15 ), as described above */
+    brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
+
+    release_tmps( c, mark );
+}
+
+static void emit_noise4( struct brw_wm_compile *c,
+			 const struct prog_instruction *inst )
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst;
+    GLuint mask = inst->DstReg.WriteMask;
+    int i;
+    int mark = mark_tmps( c );
+
+    assert( mark == 0 );
+    
+    src0 = get_src_reg( c, inst, 0, 0 );
+    src1 = get_src_reg( c, inst, 0, 1 );
+    src2 = get_src_reg( c, inst, 0, 2 );
+    src3 = get_src_reg( c, inst, 0, 3 );
+
+    param0 = alloc_tmp( c );
+    param1 = alloc_tmp( c );
+    param2 = alloc_tmp( c );
+    param3 = alloc_tmp( c );
+
+    brw_MOV( p, param0, src0 );
+    brw_MOV( p, param1, src1 );
+    brw_MOV( p, param2, src2 );
+    brw_MOV( p, param3, src3 );
+
+    invoke_subroutine( c, SUB_NOISE4, noise4_sub );
+    
+    /* Fill in the result: */
+    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
+    for (i = 0 ; i < 4; i++) {
+	if (mask & (1<<i)) {
+	    dst = get_dst_reg(c, inst, i);
+	    brw_MOV( p, dst, param0 );
+	}
+    }
+    if( inst->SaturateMode == SATURATE_ZERO_ONE )
+	brw_set_saturate( p, 0 );
+    
+    release_tmps( c, mark );
+}
+    
+static void emit_wpos_xy(struct brw_wm_compile *c,
+                         const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    GLuint mask = inst->DstReg.WriteMask;
+    struct brw_reg src0[2], dst[2];
+
+    dst[0] = get_dst_reg(c, inst, 0);
+    dst[1] = get_dst_reg(c, inst, 1);
+
+    src0[0] = get_src_reg(c, inst, 0, 0);
+    src0[1] = get_src_reg(c, inst, 0, 1);
+
+    /* Calculate the pixel offset from window bottom left into destination
+     * X and Y channels.
+     */
+    if (mask & WRITEMASK_X) {
+	/* X' = X - origin_x */
+	brw_ADD(p,
+		dst[0],
+		retype(src0[0], BRW_REGISTER_TYPE_W),
+		brw_imm_d(0 - c->key.origin_x));
+    }
+
+    if (mask & WRITEMASK_Y) {
+	/* Y' = height - (Y - origin_y) = height + origin_y - Y */
+	brw_ADD(p,
+		dst[1],
+		negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
+		brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
+    }
+}
+
+/* TODO
+   BIAS on SIMD8 not working yet...
+ */	
+static void emit_txb(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg dst[4], src[4], payload_reg;
+    /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
+    const GLuint unit = inst->TexSrcUnit;
+    GLuint i;
+    GLuint msg_type;
+
+    assert(unit < BRW_MAX_TEX_UNIT);
+
+    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
+    for (i = 0; i < 4; i++) 
+	dst[i] = get_dst_reg(c, inst, i);
+    for (i = 0; i < 4; i++)
+	src[i] = get_src_reg(c, inst, 0, i);
+
+    switch (inst->TexSrcTarget) {
+	case TEXTURE_1D_INDEX:
+	    brw_MOV(p, brw_message_reg(2), src[0]);         /* s coord */
+	    brw_MOV(p, brw_message_reg(3), brw_imm_f(0));   /* t coord */
+	    brw_MOV(p, brw_message_reg(4), brw_imm_f(0));   /* r coord */
+	    break;
+	case TEXTURE_2D_INDEX:
+	case TEXTURE_RECT_INDEX:
+	    brw_MOV(p, brw_message_reg(2), src[0]);
+	    brw_MOV(p, brw_message_reg(3), src[1]);
+	    brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
+	    break;
+	case TEXTURE_3D_INDEX:
+	case TEXTURE_CUBE_INDEX:
+	    brw_MOV(p, brw_message_reg(2), src[0]);
+	    brw_MOV(p, brw_message_reg(3), src[1]);
+	    brw_MOV(p, brw_message_reg(4), src[2]);
+	    break;
+	default:
+            /* invalid target */
+            abort();
+    }
+    brw_MOV(p, brw_message_reg(5), src[3]);          /* bias */
+    brw_MOV(p, brw_message_reg(6), brw_imm_f(0));    /* ref (unused?) */
+
+    if (BRW_IS_IGDNG(p->brw)) {
+        msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG;
+    } else {
+        /* Does it work well on SIMD8? */
+        msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+    }
+
+    brw_SAMPLE(p,
+               retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW),  /* dest */
+               1,                                           /* msg_reg_nr */
+               retype(payload_reg, BRW_REGISTER_TYPE_UW),   /* src0 */
+               SURF_INDEX_TEXTURE(unit),
+               unit,                                        /* sampler */
+               inst->DstReg.WriteMask,                      /* writemask */
+               msg_type,                                    /* msg_type */
+               4,                                           /* response_length */
+               4,                                           /* msg_length */
+               0,                                           /* eot */
+               1,
+               BRW_SAMPLER_SIMD_MODE_SIMD8);	
+}
+
+
+static void emit_tex(struct brw_wm_compile *c,
+                     const struct prog_instruction *inst)
+{
+    struct brw_compile *p = &c->func;
+    struct brw_reg dst[4], src[4], payload_reg;
+    /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
+    const GLuint unit = inst->TexSrcUnit;
+    GLuint msg_len;
+    GLuint i, nr;
+    GLuint emit;
+    GLboolean shadow = (c->key.shadowtex_mask & (1<<unit)) ? 1 : 0;
+    GLuint msg_type;
+
+    assert(unit < BRW_MAX_TEX_UNIT);
+
+    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+
+    for (i = 0; i < 4; i++) 
+	dst[i] = get_dst_reg(c, inst, i);
+    for (i = 0; i < 4; i++)
+	src[i] = get_src_reg(c, inst, 0, i);
+
+    switch (inst->TexSrcTarget) {
+	case TEXTURE_1D_INDEX:
+	    emit = WRITEMASK_X;
+	    nr = 1;
+	    break;
+	case TEXTURE_2D_INDEX:
+	case TEXTURE_RECT_INDEX:
+	    emit = WRITEMASK_XY;
+	    nr = 2;
+	    break;
+	case TEXTURE_3D_INDEX:
+	case TEXTURE_CUBE_INDEX:
+	    emit = WRITEMASK_XYZ;
+	    nr = 3;
+	    break;
+	default:
+           /* invalid target */
+           abort();
+    }
+    msg_len = 1;
+
+    /* move/load S, T, R coords */
+    for (i = 0; i < nr; i++) {
+	static const GLuint swz[4] = {0,1,2,2};
+	if (emit & (1<<i))
+	    brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]);
+	else
+	    brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0));
+	msg_len += 1;
+    }
+
+    if (shadow) {
+       brw_MOV(p, brw_message_reg(5), brw_imm_f(0));  /* lod / bias */
+       brw_MOV(p, brw_message_reg(6), src[2]);        /* ref value / R coord */
+    }
+
+    if (BRW_IS_IGDNG(p->brw)) {
+        if (shadow)
+            msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG;
+        else
+            msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG;
+    } else {
+        /* Does it work for shadow on SIMD8 ? */
+        msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
+    }
+    
+    brw_SAMPLE(p,
+               retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), /* dest */
+               1,                                          /* msg_reg_nr */
+               retype(payload_reg, BRW_REGISTER_TYPE_UW),  /* src0 */
+               SURF_INDEX_TEXTURE(unit),
+               unit,                                       /* sampler */
+               inst->DstReg.WriteMask,                     /* writemask */
+               msg_type,                                   /* msg_type */
+               4,                                          /* response_length */
+               shadow ? 6 : 4,                             /* msg_length */
+               0,                                          /* eot */
+               1,
+               BRW_SAMPLER_SIMD_MODE_SIMD8);	
+
+    if (shadow)
+	brw_MOV(p, dst[3], brw_imm_f(1.0));
+}
+
+
+/**
+ * Resolve subroutine calls after code emit is done.
+ */
+static void post_wm_emit( struct brw_wm_compile *c )
+{
+    brw_resolve_cals(&c->func);
+}
+
+static void
+get_argument_regs(struct brw_wm_compile *c,
+		  const struct prog_instruction *inst,
+		  int index,
+		  struct brw_reg *regs,
+		  int mask)
+{
+    int i;
+
+    for (i = 0; i < 4; i++) {
+	if (mask & (1 << i))
+	    regs[i] = get_src_reg(c, inst, index, i);
+    }
+}
+
+static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
+{
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+    struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+    GLuint i, if_depth = 0, loop_depth = 0;
+    struct brw_compile *p = &c->func;
+    struct brw_indirect stack_index = brw_indirect(0, 0);
+
+    c->out_of_regs = GL_FALSE;
+
+    prealloc_reg(c);
+    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+    brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+
+    for (i = 0; i < c->nr_fp_insns; i++) {
+        const struct prog_instruction *inst = &c->prog_instructions[i];
+	int dst_flags;
+	struct brw_reg args[3][4], dst[4];
+	int j;
+
+        c->cur_inst = i;
+
+#if 0
+        _mesa_printf("Inst %d: ", i);
+        _mesa_print_instruction(inst);
+#endif
+
+        /* fetch any constants that this instruction needs */
+        if (c->fp->use_const_buffer)
+           fetch_constants(c, inst);
+
+	if (inst->CondUpdate)
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+	else
+	    brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE);
+
+	dst_flags = inst->DstReg.WriteMask;
+	if (inst->SaturateMode == SATURATE_ZERO_ONE)
+	    dst_flags |= SATURATE;
+
+	switch (inst->Opcode) {
+	    case WM_PIXELXY:
+		emit_pixel_xy(c, inst);
+		break;
+	    case WM_DELTAXY: 
+		emit_delta_xy(c, inst);
+		break;
+	    case WM_PIXELW:
+		emit_pixel_w(c, inst);
+		break;	
+	    case WM_LINTERP:
+		emit_linterp(c, inst);
+		break;
+	    case WM_PINTERP:
+		emit_pinterp(c, inst);
+		break;
+	    case WM_CINTERP:
+		emit_cinterp(c, inst);
+		break;
+	    case WM_WPOSXY:
+		emit_wpos_xy(c, inst);
+		break;
+	    case WM_FB_WRITE:
+		emit_fb_write(c, inst);
+		break;
+	    case WM_FRONTFACING:
+		emit_frontfacing(c, inst);
+		break;
+	    case OPCODE_ADD:
+		emit_add(c, inst);
+		break;
+	    case OPCODE_ARL:
+		emit_arl(c, inst);
+		break;
+	    case OPCODE_FRC:
+		emit_frc(c, inst);
+		break;
+	    case OPCODE_FLR:
+		emit_flr(c, inst);
+		break;
+	    case OPCODE_LRP:
+		emit_lrp(c, inst);
+		break;
+	    case OPCODE_TRUNC:
+		emit_trunc(c, inst);
+		break;
+	    case OPCODE_MOV:
+	    case OPCODE_SWZ:
+		emit_mov(c, inst);
+		break;
+	    case OPCODE_DP3:
+		emit_dp3(c, inst);
+		break;
+	    case OPCODE_DP4:
+		emit_dp4(c, inst);
+		break;
+	    case OPCODE_XPD:
+		emit_xpd(c, inst);
+		break;
+	    case OPCODE_DPH:
+		emit_dph(c, inst);
+		break;
+	    case OPCODE_RCP:
+		emit_rcp(c, inst);
+		break;
+	    case OPCODE_RSQ:
+		emit_rsq(c, inst);
+		break;
+	    case OPCODE_SIN:
+		emit_sin(c, inst);
+		break;
+	    case OPCODE_COS:
+		emit_cos(c, inst);
+		break;
+	    case OPCODE_EX2:
+		emit_ex2(c, inst);
+		break;
+	    case OPCODE_LG2:
+		emit_lg2(c, inst);
+		break;
+	    case OPCODE_MIN:	
+	    case OPCODE_MAX:	
+		emit_min_max(c, inst);
+		break;
+	    case OPCODE_DDX:
+	    case OPCODE_DDY:
+		for (j = 0; j < 4; j++) {
+		    if (inst->DstReg.WriteMask & (1 << j))
+			dst[j] = get_dst_reg(c, inst, j);
+		    else
+			dst[j] = brw_null_reg();
+		}
+		get_argument_regs(c, inst, 0, args[0], WRITEMASK_XYZW);
+		emit_ddxy(p, dst, dst_flags, (inst->Opcode == OPCODE_DDX),
+			  args[0]);
+                break;
+	    case OPCODE_SLT:
+		emit_slt(c, inst);
+		break;
+	    case OPCODE_SLE:
+		emit_sle(c, inst);
+		break;
+	    case OPCODE_SGT:
+		emit_sgt(c, inst);
+		break;
+	    case OPCODE_SGE:
+		emit_sge(c, inst);
+		break;
+	    case OPCODE_SEQ:
+		emit_seq(c, inst);
+		break;
+	    case OPCODE_SNE:
+		emit_sne(c, inst);
+		break;
+	    case OPCODE_MUL:
+		emit_mul(c, inst);
+		break;
+	    case OPCODE_POW:
+		emit_pow(c, inst);
+		break;
+	    case OPCODE_MAD:
+		emit_mad(c, inst);
+		break;
+	    case OPCODE_NOISE1:
+		emit_noise1(c, inst);
+		break;
+	    case OPCODE_NOISE2:
+		emit_noise2(c, inst);
+		break;
+	    case OPCODE_NOISE3:
+		emit_noise3(c, inst);
+		break;
+	    case OPCODE_NOISE4:
+		emit_noise4(c, inst);
+		break;
+	    case OPCODE_TEX:
+		emit_tex(c, inst);
+		break;
+	    case OPCODE_TXB:
+		emit_txb(c, inst);
+		break;
+	    case OPCODE_KIL_NV:
+		emit_kil(c);
+		break;
+	    case OPCODE_IF:
+		assert(if_depth < MAX_IF_DEPTH);
+		if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
+		break;
+	    case OPCODE_ELSE:
+		if_inst[if_depth-1]  = brw_ELSE(p, if_inst[if_depth-1]);
+		break;
+	    case OPCODE_ENDIF:
+		assert(if_depth > 0);
+		brw_ENDIF(p, if_inst[--if_depth]);
+		break;
+	    case OPCODE_BGNSUB:
+		brw_save_label(p, inst->Comment, p->nr_insn);
+		break;
+	    case OPCODE_ENDSUB:
+		/* no-op */
+		break;
+	    case OPCODE_CAL: 
+		brw_push_insn_state(p);
+		brw_set_mask_control(p, BRW_MASK_DISABLE);
+                brw_set_access_mode(p, BRW_ALIGN_1);
+                brw_ADD(p, deref_1ud(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+                brw_set_access_mode(p, BRW_ALIGN_16);
+                brw_ADD(p, get_addr_reg(stack_index),
+                         get_addr_reg(stack_index), brw_imm_d(4));
+		brw_save_call(&c->func, inst->Comment, p->nr_insn);
+                brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+                brw_pop_insn_state(p);
+		break;
+
+	    case OPCODE_RET:
+		brw_push_insn_state(p);
+		brw_set_mask_control(p, BRW_MASK_DISABLE);
+                brw_ADD(p, get_addr_reg(stack_index),
+                        get_addr_reg(stack_index), brw_imm_d(-4));
+                brw_set_access_mode(p, BRW_ALIGN_1);
+                brw_MOV(p, brw_ip_reg(), deref_1ud(stack_index, 0));
+                brw_set_access_mode(p, BRW_ALIGN_16);
+		brw_pop_insn_state(p);
+
+		break;
+	    case OPCODE_BGNLOOP:
+                /* XXX may need to invalidate the current_constant regs */
+		loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+		break;
+	    case OPCODE_BRK:
+		brw_BREAK(p);
+		brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+		break;
+	    case OPCODE_CONT:
+		brw_CONT(p);
+		brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+		break;
+	    case OPCODE_ENDLOOP: 
+               {
+                  struct brw_instruction *inst0, *inst1;
+                  GLuint br = 1;
+
+                  if (BRW_IS_IGDNG(brw))
+                     br = 2;
+ 
+                  loop_depth--;
+                  inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+                  /* patch all the BREAK/CONT instructions from last BGNLOOP */
+                  while (inst0 > loop_inst[loop_depth]) {
+                     inst0--;
+                     if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+			inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+			inst0->bits3.if_else.pop_count = 0;
+                     }
+                     else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+                        inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+                        inst0->bits3.if_else.pop_count = 0;
+                     }
+                  }
+               }
+               break;
+	    default:
+		_mesa_printf("unsupported IR in fragment shader %d\n",
+			inst->Opcode);
+	}
+
+	if (inst->CondUpdate)
+	    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+	else
+	    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+    }
+    post_wm_emit(c);
+
+    if (INTEL_DEBUG & DEBUG_WM) {
+      _mesa_printf("wm-native:\n");
+      for (i = 0; i < p->nr_insn; i++)
+	 brw_disasm(stderr, &p->store[i]);
+      _mesa_printf("\n");
+    }
+}
+
+/**
+ * Do GPU code generation for shaders that use GLSL features such as
+ * flow control.  Other shaders will be compiled with the 
+ */
+void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+    if (INTEL_DEBUG & DEBUG_WM) {
+        _mesa_printf("brw_wm_glsl_emit:\n");
+    }
+
+    /* initial instruction translation/simplification */
+    brw_wm_pass_fp(c);
+
+    /* actual code generation */
+    brw_wm_emit_glsl(brw, c);
+
+    if (INTEL_DEBUG & DEBUG_WM) {
+        brw_wm_print_program(c, "brw_wm_glsl_emit done");
+    }
+
+    c->prog_data.total_grf = num_grf_used(c);
+    c->prog_data.total_scratch = 0;
+}
diff --git a/src/gallium/drivers/i965/brw_wm_iz.c b/src/gallium/drivers/i965/brw_wm_iz.c
new file mode 100644
index 0000000000..5e399ac62a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_iz.c
@@ -0,0 +1,157 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                
+
+#include "main/mtypes.h"
+#include "brw_wm.h"
+
+
+#undef P			/* prompted depth */
+#undef C			/* computed */
+#undef N			/* non-promoted? */
+
+#define P 0
+#define C 1
+#define N 2
+
+const struct {
+   GLuint mode:2;
+   GLuint sd_present:1;
+   GLuint sd_to_rt:1;
+   GLuint dd_present:1;
+   GLuint ds_present:1;
+} wm_iz_table[IZ_BIT_MAX] =
+{
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { N, 0, 1, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { C, 0, 1, 1, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 1 }, 
+ { N, 0, 1, 0, 1 }, 
+ { N, 0, 1, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { N, 1, 1, 0, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 0, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 0, 1, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 1, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { C, 0, 1, 0, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { P, 0, 0, 0, 0 }, 
+ { C, 1, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 }, 
+ { C, 0, 1, 1, 1 } 
+};
+
+/**
+ * \param line_aa  AA_NEVER, AA_ALWAYS or AA_SOMETIMES
+ * \param lookup  bitmask of IZ_* flags
+ */
+void brw_wm_lookup_iz( GLuint line_aa,
+		       GLuint lookup,
+		       GLboolean ps_uses_depth,
+		       struct brw_wm_prog_key *key )
+{
+   GLuint reg = 2;
+
+   assert (lookup < IZ_BIT_MAX);
+      
+   if (lookup & IZ_PS_COMPUTES_DEPTH_BIT)
+      key->computes_depth = 1;
+
+   if (wm_iz_table[lookup].sd_present || ps_uses_depth) {
+      key->source_depth_reg = reg;
+      reg += 2;
+   }
+
+   if (wm_iz_table[lookup].sd_to_rt)
+      key->source_depth_to_render_target = 1;
+
+   if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) {
+      key->aa_dest_stencil_reg = reg;
+      key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present &&
+				      line_aa == AA_SOMETIMES);
+      reg++;
+   }
+
+   if (wm_iz_table[lookup].dd_present) {
+      key->dest_depth_reg = reg;
+      reg+=2;
+   }
+
+   key->nr_depth_regs = (reg+1)/2;
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
new file mode 100644
index 0000000000..6279258339
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -0,0 +1,442 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                 
+
+#include "brw_context.h"
+#include "brw_wm.h"
+#include "shader/prog_parameter.h"
+
+
+
+/***********************************************************************
+ */
+
+static struct brw_wm_ref *get_ref( struct brw_wm_compile *c )
+{
+   assert(c->nr_refs < BRW_WM_MAX_REF);
+   return &c->refs[c->nr_refs++];
+}
+
+static struct brw_wm_value *get_value( struct brw_wm_compile *c)
+{
+   assert(c->nr_refs < BRW_WM_MAX_VREG);
+   return &c->vreg[c->nr_vreg++];
+}
+
+/** return pointer to a newly allocated instruction */
+static struct brw_wm_instruction *get_instruction( struct brw_wm_compile *c )
+{
+   assert(c->nr_insns < BRW_WM_MAX_INSN);
+   return &c->instruction[c->nr_insns++];
+}
+
+/***********************************************************************
+ */
+
+/** Init the "undef" register */
+static void pass0_init_undef( struct brw_wm_compile *c)
+{
+   struct brw_wm_ref *ref = &c->undef_ref;
+   ref->value = &c->undef_value;
+   ref->hw_reg = brw_vec8_grf(0, 0);
+   ref->insn = 0;
+   ref->prevuse = NULL;
+}
+
+/** Set a FP register to a value */
+static void pass0_set_fpreg_value( struct brw_wm_compile *c,
+				   GLuint file,
+				   GLuint idx,
+				   GLuint component,
+				   struct brw_wm_value *value )
+{
+   struct brw_wm_ref *ref = get_ref(c);
+   ref->value = value;
+   ref->hw_reg = brw_vec8_grf(0, 0);
+   ref->insn = 0;
+   ref->prevuse = NULL;
+   c->pass0_fp_reg[file][idx][component] = ref;
+}
+
+/** Set a FP register to a ref */
+static void pass0_set_fpreg_ref( struct brw_wm_compile *c,
+				 GLuint file,
+				 GLuint idx,
+				 GLuint component,
+				 const struct brw_wm_ref *src_ref )
+{
+   c->pass0_fp_reg[file][idx][component] = src_ref;
+}
+
+static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, 
+					       const GLfloat *param_ptr )
+{
+   GLuint i = c->prog_data.nr_params++;
+   
+   if (i >= BRW_WM_MAX_PARAM) {
+      _mesa_printf("%s: out of params\n", __FUNCTION__);
+      c->prog_data.error = 1;
+      return NULL;
+   }
+   else {
+      struct brw_wm_ref *ref = get_ref(c);
+
+      c->prog_data.param[i] = param_ptr;
+      c->nr_creg = (i+16)/16;
+
+      /* Push the offsets into hw_reg.  These will be added to the
+       * real register numbers once one is allocated in pass2.
+       */
+      ref->hw_reg = brw_vec1_grf((i&8)?1:0, i%8);
+      ref->value = &c->creg[i/16];
+      ref->insn = 0;
+      ref->prevuse = NULL;
+
+      return ref;
+   }
+}
+
+
+/** Return a ref to a constant/literal value */
+static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
+					       const GLfloat *constval )
+{
+   GLuint i;
+
+   /* Search for an existing const value matching the request:
+    */
+   for (i = 0; i < c->nr_constrefs; i++) {
+      if (c->constref[i].constval == *constval) 
+	 return c->constref[i].ref;
+   }
+
+   /* Else try to add a new one:
+    */
+   if (c->nr_constrefs < BRW_WM_MAX_CONST) {
+      GLuint i = c->nr_constrefs++;
+
+      /* A constant is a special type of parameter:
+       */
+      c->constref[i].constval = *constval;
+      c->constref[i].ref = get_param_ref(c, constval);
+
+      return c->constref[i].ref;
+   }
+   else {
+      _mesa_printf("%s: out of constrefs\n", __FUNCTION__);
+      c->prog_data.error = 1;
+      return NULL;
+   }
+}
+
+
+/* Lookup our internal registers
+ */
+static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
+					       GLuint file,
+					       GLuint idx,
+					       GLuint component )
+{
+   const struct brw_wm_ref *ref = c->pass0_fp_reg[file][idx][component];
+
+   if (!ref) {
+      switch (file) {
+      case PROGRAM_INPUT:
+      case PROGRAM_PAYLOAD:
+      case PROGRAM_TEMPORARY:
+      case PROGRAM_OUTPUT:
+      case PROGRAM_VARYING:
+	 break;
+
+      case PROGRAM_LOCAL_PARAM:
+	 ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]);
+	 break;
+
+      case PROGRAM_ENV_PARAM:
+	 ref = get_param_ref(c, &c->env_param[idx][component]);
+	 break;
+
+      case PROGRAM_STATE_VAR:
+      case PROGRAM_UNIFORM:
+      case PROGRAM_CONSTANT:
+      case PROGRAM_NAMED_PARAM: {
+	 struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
+	 
+	 /* There's something really hokey about parameters parsed in
+	  * arb programs - they all end up in here, whether they be
+	  * state values, parameters or constants.  This duplicates the
+	  * structure above & also seems to subvert the limits set for
+	  * each type of constant/param.
+	  */ 
+	 switch (plist->Parameters[idx].Type) {
+	 case PROGRAM_NAMED_PARAM:
+	 case PROGRAM_CONSTANT:
+	    /* These are invarient:
+	     */
+	    ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+	    break;
+
+	 case PROGRAM_STATE_VAR:
+	 case PROGRAM_UNIFORM:
+	    /* These may change from run to run:
+	     */
+	    ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
+	    break;
+
+	 default:
+	    assert(0);
+	    break;
+	 }
+	 break;
+      }
+
+      default:
+	 assert(0);
+	 break;
+      }
+
+      c->pass0_fp_reg[file][idx][component] = ref;
+   }
+
+   if (!ref)
+      ref = &c->undef_ref;
+
+   return ref;
+}
+
+
+
+/***********************************************************************
+ * Straight translation to internal instruction format
+ */
+
+static void pass0_set_dst( struct brw_wm_compile *c,
+			   struct brw_wm_instruction *out,
+			   const struct prog_instruction *inst,
+			   GLuint writemask )
+{
+   const struct prog_dst_register *dst = &inst->DstReg;
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      if (writemask & (1<<i)) {
+	 out->dst[i] = get_value(c);
+	 pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]);
+      }
+   }
+
+   out->writemask = writemask;
+}
+
+
+static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
+						    struct prog_src_register src,
+						    GLuint i )
+{
+   GLuint component = GET_SWZ(src.Swizzle,i);
+   const struct brw_wm_ref *src_ref;
+   static const GLfloat const_zero = 0.0;
+   static const GLfloat const_one = 1.0;
+
+   if (component == SWIZZLE_ZERO) 
+      src_ref = get_const_ref(c, &const_zero);
+   else if (component == SWIZZLE_ONE) 
+      src_ref = get_const_ref(c, &const_one);
+   else 
+      src_ref = pass0_get_reg(c, src.File, src.Index, component);
+
+   return src_ref;
+}
+
+
+static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
+				       struct prog_src_register src,
+				       GLuint i,
+				       struct brw_wm_instruction *insn)
+{
+   const struct brw_wm_ref *ref = get_fp_src_reg_ref(c, src, i);
+   struct brw_wm_ref *newref = get_ref(c);
+
+   newref->value = ref->value;
+   newref->hw_reg = ref->hw_reg;
+
+   if (insn) {
+      newref->insn = insn - c->instruction;
+      newref->prevuse = newref->value->lastuse;
+      newref->value->lastuse = newref;
+   }
+
+   if (src.Negate & (1 << i))
+      newref->hw_reg.negate ^= 1;
+
+   if (src.Abs) {
+      newref->hw_reg.negate = 0;
+      newref->hw_reg.abs = 1;
+   }
+
+   return newref;
+}
+
+
+static void
+translate_insn(struct brw_wm_compile *c,
+               const struct prog_instruction *inst)
+{
+   struct brw_wm_instruction *out = get_instruction(c);
+   GLuint writemask = inst->DstReg.WriteMask;
+   GLuint nr_args = brw_wm_nr_args(inst->Opcode);
+   GLuint i, j;
+
+   /* Copy some data out of the instruction
+    */
+   out->opcode = inst->Opcode;
+   out->saturate = (inst->SaturateMode != SATURATE_OFF);
+   out->tex_unit = inst->TexSrcUnit;
+   out->tex_idx = inst->TexSrcTarget;
+   out->tex_shadow = inst->TexShadow;
+   out->eot = inst->Aux & 1;
+   out->target = inst->Aux >> 1;
+
+   /* Args:
+    */
+   for (i = 0; i < nr_args; i++) {
+      for (j = 0; j < 4; j++) {
+	 out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out);
+      }
+   }
+
+   /* Dst:
+    */
+   pass0_set_dst(c, out, inst, writemask);
+}
+
+
+
+/***********************************************************************
+ * Optimize moves and swizzles away:
+ */ 
+static void pass0_precalc_mov( struct brw_wm_compile *c,
+			       const struct prog_instruction *inst )
+{
+   const struct prog_dst_register *dst = &inst->DstReg;
+   GLuint writemask = inst->DstReg.WriteMask;
+   struct brw_wm_ref *refs[4];
+   GLuint i;
+
+   /* Get the effect of a MOV by manipulating our register table:
+    * First get all refs, then assign refs.  This ensures that "in-place"
+    * swizzles such as:
+    *   MOV t, t.xxyx
+    * are handled correctly.  Previously, these two steps were done in
+    * one loop and the above case was incorrectly handled.
+    */
+   for (i = 0; i < 4; i++) {
+      refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL);
+   }
+   for (i = 0; i < 4; i++) {
+      if (writemask & (1 << i)) {	    
+         pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]);
+      }
+   }
+}
+
+
+/* Initialize payload "registers".
+ */
+static void pass0_init_payload( struct brw_wm_compile *c )
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      GLuint j = i >= c->key.nr_depth_regs ? 0 : i;
+      pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, 
+			     &c->payload.depth[j] );
+   }
+
+#if 0
+   /* This seems to be an alternative to the INTERP_WPOS stuff I do
+    * elsewhere:
+    */
+   if (c->key.source_depth_reg)
+      pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2,
+			    &c->payload.depth[c->key.source_depth_reg/2]);
+#endif
+   
+   for (i = 0; i < FRAG_ATTRIB_MAX; i++)
+      pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, 
+			     &c->payload.input_interp[i] );      
+}
+
+
+/***********************************************************************
+ * PASS 0
+ *
+ * Work forwards to give each calculated value a unique number.  Where
+ * an instruction produces duplicate values (eg DP3), all are given
+ * the same number.
+ *
+ * Translate away swizzling and eliminate non-saturating moves.
+ */
+void brw_wm_pass0( struct brw_wm_compile *c )
+{
+   GLuint insn;
+
+   c->nr_vreg = 0;
+   c->nr_insns = 0;
+
+   pass0_init_undef(c);
+   pass0_init_payload(c);
+
+   for (insn = 0; insn < c->nr_fp_insns; insn++) {
+      const struct prog_instruction *inst = &c->prog_instructions[insn];
+
+      /* Optimize away moves, otherwise emit translated instruction:
+       */      
+      switch (inst->Opcode) {
+      case OPCODE_MOV: 
+      case OPCODE_SWZ: 
+	 if (!inst->SaturateMode) {
+	    pass0_precalc_mov(c, inst);
+	 }
+	 else {
+	    translate_insn(c, inst);
+	 }
+	 break;
+      default:
+	 translate_insn(c, inst);
+	 break;
+      }
+   }
+ 
+   if (INTEL_DEBUG & DEBUG_WM) {
+      brw_wm_print_program(c, "pass0");
+   }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c
new file mode 100644
index 0000000000..b449394029
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass1.c
@@ -0,0 +1,291 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                  
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+static GLuint get_tracked_mask(struct brw_wm_compile *c,
+			       struct brw_wm_instruction *inst)
+{
+   GLuint i;
+   for (i = 0; i < 4; i++) {
+      if (inst->writemask & (1<<i)) {
+	 if (!inst->dst[i]->contributes_to_output) {
+	    inst->writemask &= ~(1<<i);
+	    inst->dst[i] = 0;
+	 }
+      }
+   }
+
+   return inst->writemask;
+}
+
+/* Remove a reference from a value's usage chain.
+ */
+static void unlink_ref(struct brw_wm_ref *ref)
+{
+   struct brw_wm_value *value = ref->value;
+
+   if (ref == value->lastuse) {
+      value->lastuse = ref->prevuse;
+   }
+   else {
+      struct brw_wm_ref *i = value->lastuse;
+      while (i->prevuse != ref) i = i->prevuse;
+      i->prevuse = ref->prevuse;
+   }
+}
+
+static void track_arg(struct brw_wm_compile *c,
+		      struct brw_wm_instruction *inst,
+		      GLuint arg,
+		      GLuint readmask)
+{
+   GLuint i;
+
+   for (i = 0; i < 4; i++) {
+      struct brw_wm_ref *ref = inst->src[arg][i];
+      if (ref) {
+	 if (readmask & (1<<i)) {
+	    ref->value->contributes_to_output = 1;
+         }
+	 else {
+	    unlink_ref(ref);
+	    inst->src[arg][i] = NULL;
+	 }
+      }
+   }
+}
+
+static GLuint get_texcoord_mask( GLuint tex_idx )
+{
+   switch (tex_idx) {
+   case TEXTURE_1D_INDEX:
+      return WRITEMASK_X;
+   case TEXTURE_2D_INDEX:
+      return WRITEMASK_XY;
+   case TEXTURE_3D_INDEX:
+      return WRITEMASK_XYZ;
+   case TEXTURE_CUBE_INDEX:
+      return WRITEMASK_XYZ;
+   case TEXTURE_RECT_INDEX:
+      return WRITEMASK_XY;
+   default: return 0;
+   }
+}
+
+
+/* Step two: Basically this is dead code elimination.  
+ *
+ * Iterate backwards over instructions, noting which values
+ * contribute to the final result.  Adjust writemasks to only
+ * calculate these values.
+ */
+void brw_wm_pass1( struct brw_wm_compile *c )
+{
+   GLint insn;
+
+   for (insn = c->nr_insns-1; insn >= 0; insn--) {
+      struct brw_wm_instruction *inst = &c->instruction[insn];
+      GLuint writemask;
+      GLuint read0, read1, read2;
+
+      if (inst->opcode == OPCODE_KIL) {
+	 track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */
+	 continue;
+      }
+
+      if (inst->opcode == WM_FB_WRITE) {
+	 track_arg(c, inst, 0, WRITEMASK_XYZW); 
+	 track_arg(c, inst, 1, WRITEMASK_XYZW); 
+	 if (c->key.source_depth_to_render_target &&
+	     c->key.computes_depth)
+	    track_arg(c, inst, 2, WRITEMASK_Z); 
+	 else
+	    track_arg(c, inst, 2, 0); 
+	 continue;
+      }
+
+      /* Lookup all the registers which were written by this
+       * instruction and get a mask of those that contribute to the output:
+       */
+      writemask = get_tracked_mask(c, inst);
+      if (!writemask) {
+	 GLuint arg;
+	 for (arg = 0; arg < 3; arg++)
+	    track_arg(c, inst, arg, 0);
+	 continue;
+      }
+
+      read0 = 0;
+      read1 = 0;
+      read2 = 0;
+
+      /* Mark all inputs which contribute to the marked outputs:
+       */
+      switch (inst->opcode) {
+      case OPCODE_ABS:
+      case OPCODE_FLR:
+      case OPCODE_FRC:
+      case OPCODE_MOV:
+      case OPCODE_SWZ:
+      case OPCODE_TRUNC:
+	 read0 = writemask;
+	 break;
+
+      case OPCODE_SUB:
+      case OPCODE_SLT:
+      case OPCODE_SLE:
+      case OPCODE_SGE:
+      case OPCODE_SGT:
+      case OPCODE_SEQ:
+      case OPCODE_SNE:
+      case OPCODE_ADD:
+      case OPCODE_MAX:
+      case OPCODE_MIN:
+      case OPCODE_MUL:
+	 read0 = writemask;
+	 read1 = writemask;
+	 break;
+
+      case OPCODE_DDX:
+      case OPCODE_DDY:
+	 read0 = writemask;
+	 break;
+
+      case OPCODE_MAD:	
+      case OPCODE_CMP:
+      case OPCODE_LRP:
+	 read0 = writemask;
+	 read1 = writemask;	
+	 read2 = writemask;	
+	 break;
+
+      case OPCODE_XPD: 
+	 if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ;	 
+	 if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ;	 
+	 if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY;
+	 read1 = read0;
+	 break;
+
+      case OPCODE_COS:
+      case OPCODE_EX2:
+      case OPCODE_LG2:
+      case OPCODE_RCP:
+      case OPCODE_RSQ:
+      case OPCODE_SIN:
+      case OPCODE_SCS:
+      case WM_CINTERP:
+      case WM_PIXELXY:
+	 read0 = WRITEMASK_X;
+	 break;
+
+      case OPCODE_POW:
+	 read0 = WRITEMASK_X;
+	 read1 = WRITEMASK_X;
+	 break;
+
+      case OPCODE_TEX:
+      case OPCODE_TXP:
+	 read0 = get_texcoord_mask(inst->tex_idx);
+
+         if (inst->tex_shadow)
+	    read0 |= WRITEMASK_Z;
+	 break;
+
+      case OPCODE_TXB:
+	 /* Shadow ignored for txb.
+	  */
+	 read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W;
+	 break;
+
+      case WM_WPOSXY:
+	 read0 = writemask & WRITEMASK_XY;
+	 break;
+
+      case WM_DELTAXY:
+	 read0 = writemask & WRITEMASK_XY;
+	 read1 = WRITEMASK_X;
+	 break;
+
+      case WM_PIXELW:
+	 read0 = WRITEMASK_X;
+	 read1 = WRITEMASK_XY;
+	 break;
+
+      case WM_LINTERP:
+	 read0 = WRITEMASK_X;
+	 read1 = WRITEMASK_XY;
+	 break;
+
+      case WM_PINTERP:
+	 read0 = WRITEMASK_X; /* interpolant */
+	 read1 = WRITEMASK_XY; /* deltas */
+	 read2 = WRITEMASK_W; /* pixel w */
+	 break;
+
+      case OPCODE_DP3:	
+	 read0 = WRITEMASK_XYZ;
+	 read1 = WRITEMASK_XYZ;
+	 break;
+
+      case OPCODE_DPH:
+	 read0 = WRITEMASK_XYZ;
+	 read1 = WRITEMASK_XYZW;
+	 break;
+
+      case OPCODE_DP4:
+	 read0 = WRITEMASK_XYZW;
+	 read1 = WRITEMASK_XYZW;
+	 break;
+
+      case OPCODE_LIT: 
+	 read0 = WRITEMASK_XYW;
+	 break;
+
+      case OPCODE_DST:
+      case WM_FRONTFACING:
+      case OPCODE_KIL_NV:
+      default:
+	 break;
+      }
+
+      track_arg(c, inst, 0, read0);
+      track_arg(c, inst, 1, read1);
+      track_arg(c, inst, 2, read2);
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      brw_wm_print_program(c, "pass1");
+   }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c
new file mode 100644
index 0000000000..6faea018fb
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_pass2.c
@@ -0,0 +1,343 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+#include "brw_context.h"
+#include "brw_wm.h"
+
+
+/* Use these to force spilling so that that functionality can be
+ * tested with known-good examples rather than having to construct new
+ * tests.
+ */
+#define TEST_PAYLOAD_SPILLS 0
+#define TEST_DST_SPILLS 0
+
+static void spill_value(struct brw_wm_compile *c,
+			struct brw_wm_value *value);
+
+static void prealloc_reg(struct brw_wm_compile *c,
+			 struct brw_wm_value *value,
+			 GLuint reg)
+{
+   if (value->lastuse) {
+      /* Set nextuse to zero, it will be corrected by
+       * update_register_usage().
+       */
+      c->pass2_grf[reg].value = value;
+      c->pass2_grf[reg].nextuse = 0;
+
+      value->resident = &c->pass2_grf[reg];
+      value->hw_reg = brw_vec8_grf(reg*2, 0);
+
+      if (TEST_PAYLOAD_SPILLS)
+	 spill_value(c, value);
+   }
+}
+
+
+/* Initialize all the register values.  Do the initial setup
+ * calculations for interpolants.
+ */
+static void init_registers( struct brw_wm_compile *c )
+{
+   GLuint nr_interp_regs = 0;
+   GLuint i = 0;
+   GLuint j;
+
+   for (j = 0; j < c->grf_limit; j++) 
+      c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
+
+   for (j = 0; j < c->key.nr_depth_regs; j++) 
+      prealloc_reg(c, &c->payload.depth[j], i++);
+
+   for (j = 0; j < c->nr_creg; j++) 
+      prealloc_reg(c, &c->creg[j], i++);
+
+   for (j = 0; j < FRAG_ATTRIB_MAX; j++) {
+      if (c->key.vp_outputs_written & (1<<j)) {
+	 int fp_index;
+
+	 if (j >= VERT_RESULT_VAR0)
+	    fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
+	 else if (j <= VERT_RESULT_TEX7)
+	    fp_index = j;
+	 else
+	    fp_index = -1;
+
+	 nr_interp_regs++;
+	 if (fp_index >= 0)
+	    prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
+      }
+   }
+
+   assert(nr_interp_regs >= 1);
+
+   c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+   c->prog_data.urb_read_length = nr_interp_regs * 2;
+   c->prog_data.curb_read_length = c->nr_creg * 2;
+
+   c->max_wm_grf = i * 2;
+}
+
+
+/* Update the nextuse value for each register in our file.
+ */
+static void update_register_usage(struct brw_wm_compile *c,
+				  GLuint thisinsn)
+{
+   GLuint i;
+
+   for (i = 1; i < c->grf_limit; i++) {
+      struct brw_wm_grf *grf = &c->pass2_grf[i];
+
+      /* Only search those which can change:
+       */
+      if (grf->nextuse < thisinsn) {
+	 const struct brw_wm_ref *ref = grf->value->lastuse;
+
+	 /* Has last use of value been passed?
+	  */
+	 if (ref->insn < thisinsn) {
+	    grf->value->resident = 0;
+	    grf->value = 0;
+	    grf->nextuse = BRW_WM_MAX_INSN;
+	 }
+	 else {
+	    /* Else loop through chain to update:
+	     */
+	    while (ref->prevuse && ref->prevuse->insn >= thisinsn)
+	       ref = ref->prevuse;
+
+	    grf->nextuse = ref->insn;
+	 }
+      }
+   }
+}
+
+
+static void spill_value(struct brw_wm_compile *c,
+			struct brw_wm_value *value)
+{	
+   /* Allocate a spill slot.  Note that allocations start from 0x40 -
+    * the first slot is reserved to mean "undef" in brw_wm_emit.c
+    */
+   if (!value->spill_slot) {
+      c->last_scratch += 0x40;	
+      value->spill_slot = c->last_scratch;
+   }
+
+   /* The spill will be done in brw_wm_emit.c immediately after the
+    * value is calculated, so we can just take this reg without any
+    * further work.
+    */
+   value->resident->value = NULL;
+   value->resident->nextuse = BRW_WM_MAX_INSN;
+   value->resident = NULL;
+}
+
+
+
+/* Search for contiguous region with the most distant nearest
+ * member.  Free regs count as very distant.
+ *
+ * TODO: implement spill-to-reg so that we can rearrange discontigous
+ * free regs and then spill the oldest non-free regs in sequence.
+ * This would mean inserting instructions in this pass.
+ */
+static GLuint search_contiguous_regs(struct brw_wm_compile *c,
+				     GLuint nr,
+				     GLuint thisinsn)
+{
+   struct brw_wm_grf *grf = c->pass2_grf;
+   GLuint furthest = 0;
+   GLuint reg = 0;
+   GLuint i, j;
+
+   /* Start search at 1: r0 is special and can't be used or spilled.
+    */
+   for (i = 1; i < c->grf_limit && furthest < BRW_WM_MAX_INSN; i++) {
+      GLuint group_nextuse = BRW_WM_MAX_INSN;
+
+      for (j = 0; j < nr; j++) {
+	 if (grf[i+j].nextuse < group_nextuse)
+	    group_nextuse = grf[i+j].nextuse;
+      }
+
+      if (group_nextuse > furthest) {
+	 furthest = group_nextuse;
+	 reg = i;
+      }
+   }
+
+   assert(furthest != thisinsn);
+
+   /* Any non-empty regs will need to be spilled:
+    */
+   for (j = 0; j < nr; j++) 
+      if (grf[reg+j].value)
+	 spill_value(c, grf[reg+j].value);
+
+   return reg;
+}
+
+
+static void alloc_contiguous_dest(struct brw_wm_compile *c, 
+				  struct brw_wm_value *dst[],
+				  GLuint nr,
+				  GLuint thisinsn)
+{
+   GLuint reg = search_contiguous_regs(c, nr, thisinsn);
+   GLuint i;
+
+   for (i = 0; i < nr; i++) {
+      if (!dst[i]) {
+	 /* Need to grab a dummy value in TEX case.  Don't introduce
+	  * it into the tracking scheme.
+	  */
+	 dst[i] = &c->vreg[c->nr_vreg++];
+      }
+      else {
+	 assert(!dst[i]->resident);
+	 assert(c->pass2_grf[reg+i].nextuse != thisinsn);
+
+	 c->pass2_grf[reg+i].value = dst[i];
+	 c->pass2_grf[reg+i].nextuse = thisinsn;
+
+	 dst[i]->resident = &c->pass2_grf[reg+i];
+      }
+
+      dst[i]->hw_reg = brw_vec8_grf((reg+i)*2, 0);
+   }
+
+   if ((reg+nr)*2 > c->max_wm_grf)
+      c->max_wm_grf = (reg+nr) * 2;
+}
+
+
+static void load_args(struct brw_wm_compile *c, 
+		      struct brw_wm_instruction *inst)
+{
+   GLuint thisinsn = inst - c->instruction;
+   GLuint i,j;
+
+   for (i = 0; i < 3; i++) {
+      for (j = 0; j < 4; j++) {
+	 struct brw_wm_ref *ref = inst->src[i][j];
+
+	 if (ref) {
+	    if (!ref->value->resident) {
+	       /* Need to bring the value in from scratch space.  The code for
+		* this will be done in brw_wm_emit.c, here we just do the
+		* register allocation and mark the ref as requiring a fill.
+		*/
+	       GLuint reg = search_contiguous_regs(c, 1, thisinsn);
+
+	       c->pass2_grf[reg].value = ref->value;
+	       c->pass2_grf[reg].nextuse = thisinsn;
+
+	       ref->value->resident = &c->pass2_grf[reg];
+
+	       /* Note that a fill is required:
+		*/
+	       ref->unspill_reg = reg*2;
+	    }
+
+	    /* Adjust the hw_reg to point at the value's current location:
+	     */
+	    assert(ref->value == ref->value->resident->value);
+	    ref->hw_reg.nr += (ref->value->resident - c->pass2_grf) * 2;
+	 }
+      }
+   }
+}
+
+
+
+/* Step 3: Work forwards once again.  Perform register allocations,
+ * taking into account instructions like TEX which require contiguous
+ * result registers.  Where necessary spill registers to scratch space
+ * and reload later.
+ */
+void brw_wm_pass2( struct brw_wm_compile *c )
+{
+   GLuint insn;
+   GLuint i;
+
+   init_registers(c);
+
+   for (insn = 0; insn < c->nr_insns; insn++) {
+      struct brw_wm_instruction *inst = &c->instruction[insn];
+
+      /* Update registers' nextuse values:
+       */
+      update_register_usage(c, insn);
+
+      /* May need to unspill some args.
+       */
+      load_args(c, inst);
+
+      /* Allocate registers to hold results:
+       */
+      switch (inst->opcode) {
+      case OPCODE_TEX:
+      case OPCODE_TXB:
+      case OPCODE_TXP:
+	 alloc_contiguous_dest(c, inst->dst, 4, insn);
+	 break;
+
+      default:
+	 for (i = 0; i < 4; i++) {
+	    if (inst->writemask & (1<<i)) {
+	       assert(inst->dst[i]);
+	       alloc_contiguous_dest(c, &inst->dst[i], 1, insn);
+	    }
+	 }
+	 break;
+      }
+
+      if (TEST_DST_SPILLS && inst->opcode != WM_PIXELXY) {
+	 for (i = 0; i < 4; i++)	
+	    if (inst->dst[i])
+	       spill_value(c, inst->dst[i]);
+      }
+   }
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      brw_wm_print_program(c, "pass2");
+   }
+
+   c->state = PASS2_DONE;
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+       brw_wm_print_program(c, "pass2/done");
+   }
+}
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
new file mode 100644
index 0000000000..dff466587a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -0,0 +1,369 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#include "main/macros.h"
+
+
+
+/* Samplers aren't strictly wm state from the hardware's perspective,
+ * but that is the only situation in which we use them in this driver.
+ */
+
+
+
+/* The brw (and related graphics cores) do not support GL_CLAMP.  The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint translate_wrap_mode( GLenum wrap )
+{
+   switch( wrap ) {
+   case GL_REPEAT: 
+      return BRW_TEXCOORDMODE_WRAP;
+   case GL_CLAMP:  
+      return BRW_TEXCOORDMODE_CLAMP;
+   case GL_CLAMP_TO_EDGE: 
+      return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
+   case GL_CLAMP_TO_BORDER: 
+      return BRW_TEXCOORDMODE_CLAMP_BORDER;
+   case GL_MIRRORED_REPEAT: 
+      return BRW_TEXCOORDMODE_MIRROR;
+   default: 
+      return BRW_TEXCOORDMODE_WRAP;
+   }
+}
+
+
+static GLuint U_FIXED(GLfloat value, GLuint frac_bits)
+{
+   value *= (1<<frac_bits);
+   return value < 0 ? 0 : value;
+}
+
+static GLint S_FIXED(GLfloat value, GLuint frac_bits)
+{
+   return value * (1<<frac_bits);
+}
+
+
+static dri_bo *upload_default_color( struct brw_context *brw,
+				     const GLfloat *color )
+{
+   struct brw_sampler_default_color sdc;
+
+   COPY_4V(sdc.color, color); 
+   
+   return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc,
+			  NULL, 0 );
+}
+
+
+struct wm_sampler_key {
+   int sampler_count;
+
+   struct wm_sampler_entry {
+      GLenum tex_target;
+      GLenum wrap_r, wrap_s, wrap_t;
+      float maxlod, minlod;
+      float lod_bias;
+      float max_aniso;
+      GLenum minfilter, magfilter;
+      GLenum comparemode, comparefunc;
+      dri_bo *sdc_bo;
+
+      /** If target is cubemap, take context setting.
+       */
+      GLboolean seamless_cube_map;
+   } sampler[BRW_MAX_TEX_UNIT];
+};
+
+/**
+ * Sets the sampler state for a single unit based off of the sampler key
+ * entry.
+ */
+static void brw_update_sampler_state(struct wm_sampler_entry *key,
+				     dri_bo *sdc_bo,
+				     struct brw_sampler_state *sampler)
+{
+   _mesa_memset(sampler, 0, sizeof(*sampler));
+
+   switch (key->minfilter) {
+   case GL_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      break;
+   case GL_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      break;
+   case GL_NEAREST_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_LINEAR_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_NEAREST_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   case GL_LINEAR_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   default:
+      break;
+   }
+
+   /* Set Anisotropy: 
+    */
+   if (key->max_aniso > 1.0) {
+      sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; 
+      sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+
+      if (key->max_aniso > 2.0) {
+	 sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2,
+				       BRW_ANISORATIO_16);
+      }
+   }
+   else {
+      switch (key->magfilter) {
+      case GL_NEAREST:
+	 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+	 break;
+      default:
+	 break;
+      }  
+   }
+
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
+
+   /* Cube-maps on 965 and later must use the same wrap mode for all 3
+    * coordinate dimensions.  Futher, only CUBE and CLAMP are valid.
+    */
+   if (key->tex_target == GL_TEXTURE_CUBE_MAP) {
+      if (key->seamless_cube_map &&
+	  (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) {
+	 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+	 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+	 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+      } else {
+	 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+	 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+	 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+      }
+   } else if (key->tex_target == GL_TEXTURE_1D) {
+      /* There's a bug in 1D texture sampling - it actually pays
+       * attention to the wrap_t value, though it should not.
+       * Override the wrap_t value here to GL_REPEAT to keep
+       * any nonexistent border pixels from floating in.
+       */
+      sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+   }
+
+
+   /* Set shadow function: 
+    */
+   if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+      /* Shadowing is "enabled" by emitting a particular sampler
+       * message (sample_c).  So need to recompile WM program when
+       * shadow comparison is enabled on each/any texture unit.
+       */
+      sampler->ss0.shadow_function =
+	 intel_translate_shadow_compare_func(key->comparefunc);
+   }
+
+   /* Set LOD bias: 
+    */
+   sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6);
+
+   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+   /* Set BaseMipLevel, MaxLOD, MinLOD: 
+    *
+    * XXX: I don't think that using firstLevel, lastLevel works,
+    * because we always setup the surface state as if firstLevel ==
+    * level zero.  Probably have to subtract firstLevel from each of
+    * these:
+    */
+   sampler->ss0.base_level = U_FIXED(0, 1);
+
+   sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6);
+   sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6);
+   
+   sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
+}
+
+
+/** Sets up the cache key for sampler state for all texture units */
+static void
+brw_wm_sampler_populate_key(struct brw_context *brw,
+			    struct wm_sampler_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   int unit;
+
+   memset(key, 0, sizeof(*key));
+
+   for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
+      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+	 struct wm_sampler_entry *entry = &key->sampler[unit];
+	 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
+	 struct gl_texture_object *texObj = texUnit->_Current;
+	 struct intel_texture_object *intelObj = intel_texture_object(texObj);
+	 struct gl_texture_image *firstImage =
+	    texObj->Image[0][intelObj->firstLevel];
+
+         entry->tex_target = texObj->Target;
+
+	 entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
+	    ? ctx->Texture.CubeMapSeamless : GL_FALSE;
+
+	 entry->wrap_r = texObj->WrapR;
+	 entry->wrap_s = texObj->WrapS;
+	 entry->wrap_t = texObj->WrapT;
+
+	 entry->maxlod = texObj->MaxLod;
+	 entry->minlod = texObj->MinLod;
+	 entry->lod_bias = texUnit->LodBias + texObj->LodBias;
+	 entry->max_aniso = texObj->MaxAnisotropy;
+	 entry->minfilter = texObj->MinFilter;
+	 entry->magfilter = texObj->MagFilter;
+	 entry->comparemode = texObj->CompareMode;
+         entry->comparefunc = texObj->CompareFunc;
+
+	 dri_bo_unreference(brw->wm.sdc_bo[unit]);
+	 if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+	    float bordercolor[4] = {
+	       texObj->BorderColor[0],
+	       texObj->BorderColor[0],
+	       texObj->BorderColor[0],
+	       texObj->BorderColor[0]
+	    };
+	    /* GL specs that border color for depth textures is taken from the
+	     * R channel, while the hardware uses A.  Spam R into all the
+	     * channels for safety.
+	     */
+	    brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor);
+	 } else {
+	    brw->wm.sdc_bo[unit] = upload_default_color(brw,
+							texObj->BorderColor);
+	 }
+	 key->sampler_count = unit + 1;
+      }
+   }
+}
+
+/* All samplers must be uploaded in a single contiguous array, which
+ * complicates various things.  However, this is still too confusing -
+ * FIXME: simplify all the different new texture state flags.
+ */
+static void upload_wm_samplers( struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct wm_sampler_key key;
+   int i;
+
+   brw_wm_sampler_populate_key(brw, &key);
+
+   if (brw->wm.sampler_count != key.sampler_count) {
+      brw->wm.sampler_count = key.sampler_count;
+      brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+   }
+
+   dri_bo_unreference(brw->wm.sampler_bo);
+   brw->wm.sampler_bo = NULL;
+   if (brw->wm.sampler_count == 0)
+      return;
+
+   brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
+					 &key, sizeof(key),
+					 brw->wm.sdc_bo, key.sampler_count,
+					 NULL);
+
+   /* If we didnt find it in the cache, compute the state and put it in the
+    * cache.
+    */
+   if (brw->wm.sampler_bo == NULL) {
+      struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
+
+      memset(sampler, 0, sizeof(sampler));
+      for (i = 0; i < key.sampler_count; i++) {
+	 if (brw->wm.sdc_bo[i] == NULL)
+	    continue;
+
+	 brw_update_sampler_state(&key.sampler[i], brw->wm.sdc_bo[i],
+				  &sampler[i]);
+      }
+
+      brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
+					    &key, sizeof(key),
+					    brw->wm.sdc_bo, key.sampler_count,
+					    &sampler, sizeof(sampler),
+					    NULL, NULL);
+
+      /* Emit SDC relocations */
+      for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+	 if (!ctx->Texture.Unit[i]._ReallyEnabled)
+	    continue;
+
+	 dri_bo_emit_reloc(brw->wm.sampler_bo,
+			   I915_GEM_DOMAIN_SAMPLER, 0,
+			   0,
+			   i * sizeof(struct brw_sampler_state) +
+			   offsetof(struct brw_sampler_state, ss2),
+			   brw->wm.sdc_bo[i]);
+      }
+   }
+}
+
+const struct brw_tracked_state brw_wm_samplers = {
+   .dirty = {
+      .mesa = _NEW_TEXTURE,
+      .brw = 0,
+      .cache = 0
+   },
+   .prepare = upload_wm_samplers,
+};
+
+
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
new file mode 100644
index 0000000000..361f91292b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -0,0 +1,317 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+
+/***********************************************************************
+ * WM unit - fragment programs and rasterization
+ */
+
+struct brw_wm_unit_key {
+   unsigned int total_grf, total_scratch;
+   unsigned int urb_entry_read_length;
+   unsigned int curb_entry_read_length;
+   unsigned int dispatch_grf_start_reg;
+
+   unsigned int curbe_offset;
+   unsigned int urb_size;
+
+   unsigned int max_threads;
+
+   unsigned int nr_surfaces, sampler_count;
+   GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
+   GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
+   GLfloat offset_units, offset_factor;
+};
+
+static void
+wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   const struct gl_fragment_program *fp = brw->fragment_program;
+   const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
+   struct intel_context *intel = &brw->intel;
+
+   memset(key, 0, sizeof(*key));
+
+   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+      key->max_threads = 1;
+   else {
+      /* WM maximum threads is number of EUs times number of threads per EU. */
+      if (BRW_IS_IGDNG(brw))
+         key->max_threads = 12 * 6;
+      else if (BRW_IS_G4X(brw))
+	 key->max_threads = 10 * 5;
+      else
+	 key->max_threads = 8 * 4;
+   }
+
+   /* CACHE_NEW_WM_PROG */
+   key->total_grf = brw->wm.prog_data->total_grf;
+   key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
+   key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
+   key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
+   key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
+
+   /* BRW_NEW_URB_FENCE */
+   key->urb_size = brw->urb.vsize;
+
+   /* BRW_NEW_CURBE_OFFSETS */
+   key->curbe_offset = brw->curbe.wm_start;
+
+   /* BRW_NEW_NR_SURFACEs */
+   key->nr_surfaces = brw->wm.nr_surfaces;
+
+   /* CACHE_NEW_SAMPLER */
+   key->sampler_count = brw->wm.sampler_count;
+
+   /* _NEW_POLYGONSTIPPLE */
+   key->polygon_stipple = ctx->Polygon.StippleFlag;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
+
+   /* as far as we can tell */
+   key->computes_depth =
+      (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
+   /* BRW_NEW_DEPTH_BUFFER
+    * Override for NULL depthbuffer case, required by the Pixel Shader Computed
+    * Depth field.
+    */
+   if (brw->state.depth_region == NULL)
+      key->computes_depth = 0;
+
+   /* _NEW_COLOR */
+   key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
+   key->is_glsl = bfp->isGLSL;
+
+   /* temporary sanity check assertion */
+   ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+
+   /* _NEW_DEPTH */
+   key->stats_wm = intel->stats_wm;
+
+   /* _NEW_LINE */
+   key->line_stipple = ctx->Line.StippleFlag;
+
+   /* _NEW_POLYGON */
+   key->offset_enable = ctx->Polygon.OffsetFill;
+   key->offset_units = ctx->Polygon.OffsetUnits;
+   key->offset_factor = ctx->Polygon.OffsetFactor;
+}
+
+/**
+ * Setup wm hardware state.  See page 225 of Volume 2
+ */
+static dri_bo *
+wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
+			dri_bo **reloc_bufs)
+{
+   struct brw_wm_unit_state wm;
+   dri_bo *bo;
+
+   memset(&wm, 0, sizeof(wm));
+
+   wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
+   wm.thread1.depth_coef_urb_read_offset = 1;
+   wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
+
+   if (BRW_IS_IGDNG(brw))
+      wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
+   else
+      wm.thread1.binding_table_entry_count = key->nr_surfaces;
+
+   if (key->total_scratch != 0) {
+      wm.thread2.scratch_space_base_pointer =
+	 brw->wm.scratch_bo->offset >> 10; /* reloc */
+      wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
+   } else {
+      wm.thread2.scratch_space_base_pointer = 0;
+      wm.thread2.per_thread_scratch_space = 0;
+   }
+
+   wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg;
+   wm.thread3.urb_entry_read_length = key->urb_entry_read_length;
+   wm.thread3.urb_entry_read_offset = 0;
+   wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
+   wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
+
+   if (BRW_IS_IGDNG(brw)) 
+      wm.wm4.sampler_count = 0; /* hardware requirement */
+   else
+      wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
+
+   if (brw->wm.sampler_bo != NULL) {
+      /* reloc */
+      wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
+   } else {
+      wm.wm4.sampler_state_pointer = 0;
+   }
+
+   wm.wm5.program_uses_depth = key->uses_depth;
+   wm.wm5.program_computes_depth = key->computes_depth;
+   wm.wm5.program_uses_killpixel = key->uses_kill;
+
+   if (key->is_glsl)
+      wm.wm5.enable_8_pix = 1;
+   else
+      wm.wm5.enable_16_pix = 1;
+
+   wm.wm5.max_threads = key->max_threads - 1;
+   wm.wm5.thread_dispatch_enable = 1;	/* AKA: color_write */
+   wm.wm5.legacy_line_rast = 0;
+   wm.wm5.legacy_global_depth_bias = 0;
+   wm.wm5.early_depth_test = 1;	        /* never need to disable */
+   wm.wm5.line_aa_region_width = 0;
+   wm.wm5.line_endcap_aa_region_width = 1;
+
+   wm.wm5.polygon_stipple = key->polygon_stipple;
+
+   if (key->offset_enable) {
+      wm.wm5.depth_offset = 1;
+      /* Something wierd going on with legacy_global_depth_bias,
+       * offset_constant, scaling and MRD.  This value passes glean
+       * but gives some odd results elsewere (eg. the
+       * quad-offset-units test).
+       */
+      wm.global_depth_offset_constant = key->offset_units * 2;
+
+      /* This is the only value that passes glean:
+       */
+      wm.global_depth_offset_scale = key->offset_factor;
+   }
+
+   wm.wm5.line_stipple = key->line_stipple;
+
+   if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
+      wm.wm4.stats_enable = 1;
+
+   bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
+			 key, sizeof(*key),
+			 reloc_bufs, 3,
+			 &wm, sizeof(wm),
+			 NULL, NULL);
+
+   /* Emit WM program relocation */
+   dri_bo_emit_reloc(bo,
+		     I915_GEM_DOMAIN_INSTRUCTION, 0,
+		     wm.thread0.grf_reg_count << 1,
+		     offsetof(struct brw_wm_unit_state, thread0),
+		     brw->wm.prog_bo);
+
+   /* Emit scratch space relocation */
+   if (key->total_scratch != 0) {
+      dri_bo_emit_reloc(bo,
+			0, 0,
+			wm.thread2.per_thread_scratch_space,
+			offsetof(struct brw_wm_unit_state, thread2),
+			brw->wm.scratch_bo);
+   }
+
+   /* Emit sampler state relocation */
+   if (key->sampler_count != 0) {
+      dri_bo_emit_reloc(bo,
+			I915_GEM_DOMAIN_INSTRUCTION, 0,
+			wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
+			offsetof(struct brw_wm_unit_state, wm4),
+			brw->wm.sampler_bo);
+   }
+
+   return bo;
+}
+
+
+static void upload_wm_unit( struct brw_context *brw )
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_wm_unit_key key;
+   dri_bo *reloc_bufs[3];
+   wm_unit_populate_key(brw, &key);
+
+   /* Allocate the necessary scratch space if we haven't already.  Don't
+    * bother reducing the allocation later, since we use scratch so
+    * rarely.
+    */
+   assert(key.total_scratch <= 12 * 1024);
+   if (key.total_scratch) {
+      GLuint total = key.total_scratch * key.max_threads;
+
+      if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
+	 dri_bo_unreference(brw->wm.scratch_bo);
+	 brw->wm.scratch_bo = NULL;
+      }
+      if (brw->wm.scratch_bo == NULL) {
+	 brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr,
+                                           "wm scratch",
+                                           total,
+                                           4096);
+      }
+   }
+
+   reloc_bufs[0] = brw->wm.prog_bo;
+   reloc_bufs[1] = brw->wm.scratch_bo;
+   reloc_bufs[2] = brw->wm.sampler_bo;
+
+   dri_bo_unreference(brw->wm.state_bo);
+   brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
+				       &key, sizeof(key),
+				       reloc_bufs, 3,
+				       NULL);
+   if (brw->wm.state_bo == NULL) {
+      brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
+   }
+}
+
+const struct brw_tracked_state brw_wm_unit = {
+   .dirty = {
+      .mesa = (_NEW_POLYGON | 
+	       _NEW_POLYGONSTIPPLE | 
+	       _NEW_LINE | 
+	       _NEW_COLOR |
+	       _NEW_DEPTH),
+
+      .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
+	      BRW_NEW_CURBE_OFFSETS |
+	      BRW_NEW_DEPTH_BUFFER |
+	      BRW_NEW_NR_WM_SURFACES),
+
+      .cache = (CACHE_NEW_WM_PROG |
+		CACHE_NEW_SAMPLER)
+   },
+   .prepare = upload_wm_unit,
+};
+
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
new file mode 100644
index 0000000000..f7cc5153a8
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -0,0 +1,752 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+                   
+
+#include "main/mtypes.h"
+#include "main/texformat.h"
+#include "main/texstore.h"
+#include "shader/prog_parameter.h"
+
+#include "intel_mipmap_tree.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_fbo.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+
+static GLuint translate_tex_target( GLenum target )
+{
+   switch (target) {
+   case GL_TEXTURE_1D: 
+      return BRW_SURFACE_1D;
+
+   case GL_TEXTURE_RECTANGLE_NV: 
+      return BRW_SURFACE_2D;
+
+   case GL_TEXTURE_2D: 
+      return BRW_SURFACE_2D;
+
+   case GL_TEXTURE_3D: 
+      return BRW_SURFACE_3D;
+
+   case GL_TEXTURE_CUBE_MAP: 
+      return BRW_SURFACE_CUBE;
+
+   default: 
+      assert(0); 
+      return 0;
+   }
+}
+
+
+static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
+				    GLenum depth_mode )
+{
+   switch( mesa_format ) {
+   case MESA_FORMAT_L8:
+      return BRW_SURFACEFORMAT_L8_UNORM;
+
+   case MESA_FORMAT_I8:
+      return BRW_SURFACEFORMAT_I8_UNORM;
+
+   case MESA_FORMAT_A8:
+      return BRW_SURFACEFORMAT_A8_UNORM; 
+
+   case MESA_FORMAT_AL88:
+      return BRW_SURFACEFORMAT_L8A8_UNORM;
+
+   case MESA_FORMAT_RGB888:
+      assert(0);		/* not supported for sampling */
+      return BRW_SURFACEFORMAT_R8G8B8_UNORM;      
+
+   case MESA_FORMAT_ARGB8888:
+      if (internal_format == GL_RGB)
+	 return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
+      else
+	 return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+   case MESA_FORMAT_RGBA8888_REV:
+      if (internal_format == GL_RGB)
+	 return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
+      else
+	 return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+   case MESA_FORMAT_RGB565:
+      return BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+   case MESA_FORMAT_ARGB1555:
+      return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+
+   case MESA_FORMAT_ARGB4444:
+      return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+
+   case MESA_FORMAT_YCBCR_REV:
+      return BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+   case MESA_FORMAT_YCBCR:
+      return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
+
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+      return BRW_SURFACEFORMAT_FXT1;
+
+   case MESA_FORMAT_Z16:
+      if (depth_mode == GL_INTENSITY) 
+	  return BRW_SURFACEFORMAT_I16_UNORM;
+      else if (depth_mode == GL_ALPHA)
+	  return BRW_SURFACEFORMAT_A16_UNORM;
+      else
+	  return BRW_SURFACEFORMAT_L16_UNORM;
+
+   case MESA_FORMAT_RGB_DXT1:
+       return BRW_SURFACEFORMAT_DXT1_RGB;
+
+   case MESA_FORMAT_RGBA_DXT1:
+       return BRW_SURFACEFORMAT_BC1_UNORM;
+       
+   case MESA_FORMAT_RGBA_DXT3:
+       return BRW_SURFACEFORMAT_BC2_UNORM;
+       
+   case MESA_FORMAT_RGBA_DXT5:
+       return BRW_SURFACEFORMAT_BC3_UNORM;
+
+   case MESA_FORMAT_SARGB8:
+      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
+
+   case MESA_FORMAT_SLA8:
+      return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
+
+   case MESA_FORMAT_SL8:
+      return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
+
+   case MESA_FORMAT_SRGB_DXT1:
+      return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
+
+   case MESA_FORMAT_S8_Z24:
+      /* XXX: these different surface formats don't seem to
+       * make any difference for shadow sampler/compares.
+       */
+      if (depth_mode == GL_INTENSITY) 
+         return BRW_SURFACEFORMAT_I24X8_UNORM;
+      else if (depth_mode == GL_ALPHA)
+         return BRW_SURFACEFORMAT_A24X8_UNORM;
+      else
+         return BRW_SURFACEFORMAT_L24X8_UNORM;
+
+   case MESA_FORMAT_DUDV8:
+      return BRW_SURFACEFORMAT_R8G8_SNORM;
+
+   case MESA_FORMAT_SIGNED_RGBA8888_REV:
+      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+static void
+brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
+{
+   switch (tiling) {
+   case I915_TILING_NONE:
+      surf->ss3.tiled_surface = 0;
+      surf->ss3.tile_walk = 0;
+      break;
+   case I915_TILING_X:
+      surf->ss3.tiled_surface = 1;
+      surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+      break;
+   case I915_TILING_Y:
+      surf->ss3.tiled_surface = 1;
+      surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+      break;
+   }
+}
+
+static dri_bo *
+brw_create_texture_surface( struct brw_context *brw,
+			    struct brw_surface_key *key )
+{
+   struct brw_surface_state surf;
+   dri_bo *bo;
+
+   memset(&surf, 0, sizeof(surf));
+
+   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   surf.ss0.surface_type = translate_tex_target(key->target);
+   if (key->bo) {
+      surf.ss0.surface_format = translate_tex_format(key->format,
+						     key->internal_format,
+						     key->depthmode);
+   }
+   else {
+      switch (key->depth) {
+      case 32:
+         surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+         break;
+      default:
+      case 24:
+         surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
+         break;
+      case 16:
+         surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+         break;
+      }
+   }
+
+   /* This is ok for all textures with channel width 8bit or less:
+    */
+/*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+   if (key->bo)
+      surf.ss1.base_addr = key->bo->offset; /* reloc */
+   else
+      surf.ss1.base_addr = key->offset;
+
+   surf.ss2.mip_count = key->last_level - key->first_level;
+   surf.ss2.width = key->width - 1;
+   surf.ss2.height = key->height - 1;
+   brw_set_surface_tiling(&surf, key->tiling);
+   surf.ss3.pitch = (key->pitch * key->cpp) - 1;
+   surf.ss3.depth = key->depth - 1;
+
+   surf.ss4.min_lod = 0;
+ 
+   if (key->target == GL_TEXTURE_CUBE_MAP) {
+      surf.ss0.cube_pos_x = 1;
+      surf.ss0.cube_pos_y = 1;
+      surf.ss0.cube_pos_z = 1;
+      surf.ss0.cube_neg_x = 1;
+      surf.ss0.cube_neg_y = 1;
+      surf.ss0.cube_neg_z = 1;
+   }
+
+   bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+			 key, sizeof(*key),
+			 &key->bo, key->bo ? 1 : 0,
+			 &surf, sizeof(surf),
+			 NULL, NULL);
+
+   if (key->bo) {
+      /* Emit relocation to surface contents */
+      dri_bo_emit_reloc(bo,
+			I915_GEM_DOMAIN_SAMPLER, 0,
+			0,
+			offsetof(struct brw_surface_state, ss1),
+			key->bo);
+   }
+   return bo;
+}
+
+static void
+brw_update_texture_surface( GLcontext *ctx, GLuint unit )
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+   struct brw_surface_key key;
+   const GLuint surf = SURF_INDEX_TEXTURE(unit);
+
+   memset(&key, 0, sizeof(key));
+
+   if (intelObj->imageOverride) {
+      key.pitch = intelObj->pitchOverride / intelObj->mt->cpp;
+      key.depth = intelObj->depthOverride;
+      key.bo = NULL;
+      key.offset = intelObj->textureOffset;
+   } else {
+      key.format = firstImage->TexFormat->MesaFormat;
+      key.internal_format = firstImage->InternalFormat;
+      key.pitch = intelObj->mt->pitch;
+      key.depth = firstImage->Depth;
+      key.bo = intelObj->mt->region->buffer;
+      key.offset = 0;
+   }
+
+   key.target = tObj->Target;
+   key.depthmode = tObj->DepthMode;
+   key.first_level = intelObj->firstLevel;
+   key.last_level = intelObj->lastLevel;
+   key.width = firstImage->Width;
+   key.height = firstImage->Height;
+   key.cpp = intelObj->mt->cpp;
+   key.tiling = intelObj->mt->region->tiling;
+
+   dri_bo_unreference(brw->wm.surf_bo[surf]);
+   brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+                                            BRW_SS_SURFACE,
+                                            &key, sizeof(key),
+                                            &key.bo, key.bo ? 1 : 0,
+                                            NULL);
+   if (brw->wm.surf_bo[surf] == NULL) {
+      brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
+   }
+}
+
+
+
+/**
+ * Create the constant buffer surface.  Vertex/fragment shader constants will be
+ * read from this buffer with Data Port Read instructions/messages.
+ */
+dri_bo *
+brw_create_constant_surface( struct brw_context *brw,
+                             struct brw_surface_key *key )
+{
+   const GLint w = key->width - 1;
+   struct brw_surface_state surf;
+   dri_bo *bo;
+
+   memset(&surf, 0, sizeof(surf));
+
+   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   surf.ss0.surface_type = BRW_SURFACE_BUFFER;
+   surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+   assert(key->bo);
+   if (key->bo)
+      surf.ss1.base_addr = key->bo->offset; /* reloc */
+   else
+      surf.ss1.base_addr = key->offset;
+
+   surf.ss2.width = w & 0x7f;            /* bits 6:0 of size or width */
+   surf.ss2.height = (w >> 7) & 0x1fff;  /* bits 19:7 of size or width */
+   surf.ss3.depth = (w >> 20) & 0x7f;    /* bits 26:20 of size or width */
+   surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
+   brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
+ 
+   bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+			 key, sizeof(*key),
+			 &key->bo, key->bo ? 1 : 0,
+			 &surf, sizeof(surf),
+			 NULL, NULL);
+
+   if (key->bo) {
+      /* Emit relocation to surface contents */
+      dri_bo_emit_reloc(bo,
+			I915_GEM_DOMAIN_SAMPLER, 0,
+			0,
+			offsetof(struct brw_surface_state, ss1),
+			key->bo);
+   }
+
+   return bo;
+}
+
+/* Creates a new WM constant buffer reflecting the current fragment program's
+ * constants, if needed by the fragment program.
+ *
+ * Otherwise, constants go through the CURBEs using the brw_constant_buffer
+ * state atom.
+ */
+static drm_intel_bo *
+brw_wm_update_constant_buffer(struct brw_context *brw)
+{
+   struct intel_context *intel = &brw->intel;
+   struct brw_fragment_program *fp =
+      (struct brw_fragment_program *) brw->fragment_program;
+   const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
+   const int size = params->NumParameters * 4 * sizeof(GLfloat);
+   drm_intel_bo *const_buffer;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (!fp->use_const_buffer)
+      return NULL;
+
+   const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer",
+				     size, 64);
+
+   /* _NEW_PROGRAM_CONSTANTS */
+   dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+
+   return const_buffer;
+}
+
+/**
+ * Update the surface state for a WM constant buffer.
+ * The constant buffer will be (re)allocated here if needed.
+ */
+static void
+brw_update_wm_constant_surface( GLcontext *ctx,
+                                GLuint surf)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct brw_surface_key key;
+   struct brw_fragment_program *fp =
+      (struct brw_fragment_program *) brw->fragment_program;
+   const struct gl_program_parameter_list *params =
+      fp->program.Base.Parameters;
+
+   /* If we're in this state update atom, we need to update WM constants, so
+    * free the old buffer and create a new one for the new contents.
+    */
+   dri_bo_unreference(fp->const_buffer);
+   fp->const_buffer = brw_wm_update_constant_buffer(brw);
+
+   /* If there's no constant buffer, then no surface BO is needed to point at
+    * it.
+    */
+   if (fp->const_buffer == 0) {
+      drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+      brw->wm.surf_bo[surf] = NULL;
+      return;
+   }
+
+   memset(&key, 0, sizeof(key));
+
+   key.format = MESA_FORMAT_RGBA_FLOAT32;
+   key.internal_format = GL_RGBA;
+   key.bo = fp->const_buffer;
+   key.depthmode = GL_NONE;
+   key.pitch = params->NumParameters;
+   key.width = params->NumParameters;
+   key.height = 1;
+   key.depth = 1;
+   key.cpp = 16;
+
+   /*
+   printf("%s:\n", __FUNCTION__);
+   printf("  width %d  height %d  depth %d  cpp %d  pitch %d\n",
+          key.width, key.height, key.depth, key.cpp, key.pitch);
+   */
+
+   dri_bo_unreference(brw->wm.surf_bo[surf]);
+   brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+                                            BRW_SS_SURFACE,
+                                            &key, sizeof(key),
+                                            &key.bo, key.bo ? 1 : 0,
+                                            NULL);
+   if (brw->wm.surf_bo[surf] == NULL) {
+      brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+   }
+   brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+}
+
+/**
+ * Updates surface / buffer for fragment shader constant buffer, if
+ * one is required.
+ *
+ * This consumes the state updates for the constant buffer, and produces
+ * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
+ * inclusion in the binding table.
+ */
+static void prepare_wm_constant_surface(struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   struct brw_fragment_program *fp =
+      (struct brw_fragment_program *) brw->fragment_program;
+   GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
+
+   drm_intel_bo_unreference(fp->const_buffer);
+   fp->const_buffer = brw_wm_update_constant_buffer(brw);
+
+   /* If there's no constant buffer, then no surface BO is needed to point at
+    * it.
+    */
+   if (fp->const_buffer == 0) {
+      if (brw->wm.surf_bo[surf] != NULL) {
+	 drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+	 brw->wm.surf_bo[surf] = NULL;
+	 brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+      }
+      return;
+   }
+
+   brw_update_wm_constant_surface(ctx, surf);
+}
+
+const struct brw_tracked_state brw_wm_constant_surface = {
+   .dirty = {
+      .mesa = (_NEW_PROGRAM_CONSTANTS),
+      .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+      .cache = 0
+   },
+   .prepare = prepare_wm_constant_surface,
+};
+
+
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static void
+brw_update_renderbuffer_surface(struct brw_context *brw,
+				struct gl_renderbuffer *rb,
+				unsigned int unit)
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   dri_bo *region_bo = NULL;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct intel_region *region = irb ? irb->region : NULL;
+   struct {
+      unsigned int surface_type;
+      unsigned int surface_format;
+      unsigned int width, height, pitch, cpp;
+      GLubyte color_mask[4];
+      GLboolean color_blend;
+      uint32_t tiling;
+      uint32_t draw_offset;
+   } key;
+
+   memset(&key, 0, sizeof(key));
+
+   if (region != NULL) {
+      region_bo = region->buffer;
+
+      key.surface_type = BRW_SURFACE_2D;
+      switch (irb->texformat->MesaFormat) {
+      case MESA_FORMAT_ARGB8888:
+	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+	 break;
+      case MESA_FORMAT_RGB565:
+	 key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+	 break;
+      case MESA_FORMAT_ARGB1555:
+	 key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+	 break;
+      case MESA_FORMAT_ARGB4444:
+	 key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+	 break;
+      default:
+	 _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
+		       irb->texformat->MesaFormat);
+      }
+      key.tiling = region->tiling;
+      if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {
+	 key.width = rb->Width;
+	 key.height = rb->Height;
+      } else {
+	 key.width = region->width;
+	 key.height = region->height;
+      }
+      key.pitch = region->pitch;
+      key.cpp = region->cpp;
+      key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
+   } else {
+      key.surface_type = BRW_SURFACE_NULL;
+      key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+      key.tiling = I915_TILING_X;
+      key.width = 1;
+      key.height = 1;
+      key.cpp = 4;
+      key.draw_offset = 0;
+   }
+   memcpy(key.color_mask, ctx->Color.ColorMask,
+	  sizeof(key.color_mask));
+   key.color_blend = (!ctx->Color._LogicOpEnabled &&
+		      ctx->Color.BlendEnabled);
+
+   dri_bo_unreference(brw->wm.surf_bo[unit]);
+   brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
+					    BRW_SS_SURFACE,
+					    &key, sizeof(key),
+					    &region_bo, 1,
+					    NULL);
+
+   if (brw->wm.surf_bo[unit] == NULL) {
+      struct brw_surface_state surf;
+
+      memset(&surf, 0, sizeof(surf));
+
+      surf.ss0.surface_format = key.surface_format;
+      surf.ss0.surface_type = key.surface_type;
+      if (key.tiling == I915_TILING_NONE) {
+	 surf.ss1.base_addr = key.draw_offset;
+      } else {
+	 uint32_t tile_offset = key.draw_offset % 4096;
+
+	 surf.ss1.base_addr = key.draw_offset - tile_offset;
+
+	 assert(BRW_IS_G4X(brw) || tile_offset == 0);
+	 if (BRW_IS_G4X(brw)) {
+	    if (key.tiling == I915_TILING_X) {
+	       /* Note that the low bits of these fields are missing, so
+		* there's the possibility of getting in trouble.
+		*/
+	       surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4;
+	       surf.ss5.y_offset = tile_offset / 512 / 2;
+	    } else {
+	       surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4;
+	       surf.ss5.y_offset = tile_offset / 128 / 2;
+	    }
+	 }
+      }
+      if (region_bo != NULL)
+	 surf.ss1.base_addr += region_bo->offset; /* reloc */
+
+      surf.ss2.width = key.width - 1;
+      surf.ss2.height = key.height - 1;
+      brw_set_surface_tiling(&surf, key.tiling);
+      surf.ss3.pitch = (key.pitch * key.cpp) - 1;
+
+      /* _NEW_COLOR */
+      surf.ss0.color_blend = key.color_blend;
+      surf.ss0.writedisable_red =   !key.color_mask[0];
+      surf.ss0.writedisable_green = !key.color_mask[1];
+      surf.ss0.writedisable_blue =  !key.color_mask[2];
+      surf.ss0.writedisable_alpha = !key.color_mask[3];
+
+      /* Key size will never match key size for textures, so we're safe. */
+      brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
+                                               BRW_SS_SURFACE,
+                                               &key, sizeof(key),
+					       &region_bo, 1,
+					       &surf, sizeof(surf),
+					       NULL, NULL);
+      if (region_bo != NULL) {
+	 /* We might sample from it, and we might render to it, so flag
+	  * them both.  We might be able to figure out from other state
+	  * a more restrictive relocation to emit.
+	  */
+	 drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
+				 offsetof(struct brw_surface_state, ss1),
+				 region_bo,
+				 surf.ss1.base_addr - region_bo->offset,
+				 I915_GEM_DOMAIN_RENDER,
+				 I915_GEM_DOMAIN_RENDER);
+      }
+   }
+}
+
+
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static dri_bo *
+brw_wm_get_binding_table(struct brw_context *brw)
+{
+   dri_bo *bind_bo;
+
+   assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
+
+   bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+			      NULL, 0,
+			      brw->wm.surf_bo, brw->wm.nr_surfaces,
+			      NULL);
+
+   if (bind_bo == NULL) {
+      GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint);
+      uint32_t data[BRW_WM_MAX_SURF];
+      int i;
+
+      for (i = 0; i < brw->wm.nr_surfaces; i++)
+         if (brw->wm.surf_bo[i])
+            data[i] = brw->wm.surf_bo[i]->offset;
+         else
+            data[i] = 0;
+
+      bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+				  NULL, 0,
+				  brw->wm.surf_bo, brw->wm.nr_surfaces,
+				  data, data_size,
+				  NULL, NULL);
+
+      /* Emit binding table relocations to surface state */
+      for (i = 0; i < BRW_WM_MAX_SURF; i++) {
+	 if (brw->wm.surf_bo[i] != NULL) {
+	    dri_bo_emit_reloc(bind_bo,
+			      I915_GEM_DOMAIN_INSTRUCTION, 0,
+			      0,
+			      i * sizeof(GLuint),
+			      brw->wm.surf_bo[i]);
+	 }
+      }
+   }
+
+   return bind_bo;
+}
+
+static void prepare_wm_surfaces(struct brw_context *brw )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   GLuint i;
+   int old_nr_surfaces;
+
+   /* _NEW_BUFFERS */
+   /* Update surfaces for drawing buffers */
+   if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
+      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+         brw_update_renderbuffer_surface(brw,
+					 ctx->DrawBuffer->_ColorDrawBuffers[i],
+					 i);
+      }
+   } else {
+      brw_update_renderbuffer_surface(brw, NULL, 0);
+   }
+
+   old_nr_surfaces = brw->wm.nr_surfaces;
+   brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
+
+   if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
+       brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
+
+   /* Update surfaces for textures */
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
+      const GLuint surf = SURF_INDEX_TEXTURE(i);
+
+      /* _NEW_TEXTURE, BRW_NEW_TEXDATA */
+      if (texUnit->_ReallyEnabled) {
+	 brw_update_texture_surface(ctx, i);
+	 brw->wm.nr_surfaces = surf + 1;
+      } else {
+         dri_bo_unreference(brw->wm.surf_bo[surf]);
+         brw->wm.surf_bo[surf] = NULL;
+      }
+   }
+
+   dri_bo_unreference(brw->wm.bind_bo);
+   brw->wm.bind_bo = brw_wm_get_binding_table(brw);
+
+   if (brw->wm.nr_surfaces != old_nr_surfaces)
+      brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+}
+
+const struct brw_tracked_state brw_wm_surfaces = {
+   .dirty = {
+      .mesa = (_NEW_COLOR |
+               _NEW_TEXTURE |
+               _NEW_BUFFERS),
+      .brw = (BRW_NEW_CONTEXT |
+	      BRW_NEW_WM_SURFACES),
+      .cache = 0
+   },
+   .prepare = prepare_wm_surfaces,
+};
+
+
+
diff --git a/src/gallium/drivers/i965/intel_batchbuffer.h b/src/gallium/drivers/i965/intel_batchbuffer.h
new file mode 100644
index 0000000000..d4899aab7f
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_batchbuffer.h
@@ -0,0 +1,184 @@
+#ifndef INTEL_BATCHBUFFER_H
+#define INTEL_BATCHBUFFER_H
+
+#include "main/mtypes.h"
+
+#include "intel_context.h"
+#include "intel_bufmgr.h"
+#include "intel_reg.h"
+
+#define BATCH_SZ 16384
+#define BATCH_RESERVED 16
+
+enum cliprect_mode {
+   /**
+    * Batchbuffer contents may be looped over per cliprect, but do not
+    * require it.
+    */
+   IGNORE_CLIPRECTS,
+   /**
+    * Batchbuffer contents require looping over per cliprect at batch submit
+    * time.
+    *
+    * This will be upgraded to NO_LOOP_CLIPRECTS when there's a single
+    * constant cliprect, as in DRI2 or FBO rendering.
+    */
+   LOOP_CLIPRECTS,
+   /**
+    * Batchbuffer contents contain drawing that should not be executed multiple
+    * times.
+    */
+   NO_LOOP_CLIPRECTS,
+   /**
+    * Batchbuffer contents contain drawing that already handles cliprects, such
+    * as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE.
+    *
+    * Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch
+    * outside of LOCK/UNLOCK.  This is upgraded to just NO_LOOP_CLIPRECTS when
+    * there's a constant cliprect, as in DRI2 or FBO rendering.
+    */
+   REFERENCES_CLIPRECTS
+};
+
+struct intel_batchbuffer
+{
+   struct intel_context *intel;
+
+   dri_bo *buf;
+
+   GLubyte *buffer;
+
+   GLubyte *map;
+   GLubyte *ptr;
+
+   enum cliprect_mode cliprect_mode;
+
+   GLuint size;
+
+   /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
+   struct {
+      GLuint total;
+      GLubyte *start_ptr;
+   } emit;
+
+   GLuint dirty_state;
+};
+
+struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
+                                                  *intel);
+
+void intel_batchbuffer_free(struct intel_batchbuffer *batch);
+
+
+void _intel_batchbuffer_flush(struct intel_batchbuffer *batch,
+			      const char *file, int line);
+
+#define intel_batchbuffer_flush(batch) \
+	_intel_batchbuffer_flush(batch, __FILE__, __LINE__)
+
+void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
+
+
+/* Unlike bmBufferData, this currently requires the buffer be mapped.
+ * Consider it a convenience function wrapping multple
+ * intel_buffer_dword() calls.
+ */
+void intel_batchbuffer_data(struct intel_batchbuffer *batch,
+                            const void *data, GLuint bytes,
+			    enum cliprect_mode cliprect_mode);
+
+void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
+                                     GLuint bytes);
+
+GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+                                       dri_bo *buffer,
+				       uint32_t read_domains,
+				       uint32_t write_domain,
+				       uint32_t offset);
+
+/* Inline functions - might actually be better off with these
+ * non-inlined.  Certainly better off switching all command packets to
+ * be passed as structs rather than dwords, but that's a little bit of
+ * work...
+ */
+static INLINE GLint
+intel_batchbuffer_space(struct intel_batchbuffer *batch)
+{
+   return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
+}
+
+
+static INLINE void
+intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
+{
+   assert(batch->map);
+   assert(intel_batchbuffer_space(batch) >= 4);
+   *(GLuint *) (batch->ptr) = dword;
+   batch->ptr += 4;
+}
+
+static INLINE void
+intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
+                                GLuint sz,
+				enum cliprect_mode cliprect_mode)
+{
+   assert(sz < batch->size - 8);
+   if (intel_batchbuffer_space(batch) < sz)
+      intel_batchbuffer_flush(batch);
+
+   if ((cliprect_mode == LOOP_CLIPRECTS ||
+	cliprect_mode == REFERENCES_CLIPRECTS) &&
+       batch->intel->constant_cliprect)
+      cliprect_mode = NO_LOOP_CLIPRECTS;
+
+   if (cliprect_mode != IGNORE_CLIPRECTS) {
+      if (batch->cliprect_mode == IGNORE_CLIPRECTS) {
+	 batch->cliprect_mode = cliprect_mode;
+      } else {
+	 if (batch->cliprect_mode != cliprect_mode) {
+	    intel_batchbuffer_flush(batch);
+	    batch->cliprect_mode = cliprect_mode;
+	 }
+      }
+   }
+}
+
+/* Here are the crusty old macros, to be removed:
+ */
+#define BATCH_LOCALS
+
+#define BEGIN_BATCH(n, cliprect_mode) do {				\
+   intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \
+   assert(intel->batch->emit.start_ptr == NULL);			\
+   intel->batch->emit.total = (n) * 4;					\
+   intel->batch->emit.start_ptr = intel->batch->ptr;			\
+} while (0)
+
+#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
+
+#define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
+   assert((unsigned) (delta) < buf->size);				\
+   intel_batchbuffer_emit_reloc(intel->batch, buf,			\
+				read_domains, write_domain, delta);	\
+} while (0)
+
+#define ADVANCE_BATCH() do {						\
+   unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr;	\
+   assert(intel->batch->emit.start_ptr != NULL);			\
+   if (_n != intel->batch->emit.total) {				\
+      fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",	\
+	      _n, intel->batch->emit.total);				\
+      abort();								\
+   }									\
+   intel->batch->emit.start_ptr = NULL;					\
+} while(0)
+
+
+static INLINE void
+intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
+{
+   intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS);
+   intel_batchbuffer_emit_dword(batch, MI_FLUSH);
+}
+
+#endif
diff --git a/src/gallium/drivers/i965/intel_chipset.h b/src/gallium/drivers/i965/intel_chipset.h
new file mode 100644
index 0000000000..3dc8653a73
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_chipset.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+#define PCI_CHIP_I810			0x7121
+#define PCI_CHIP_I810_DC100		0x7123
+#define PCI_CHIP_I810_E			0x7125
+#define PCI_CHIP_I815			0x1132
+
+#define PCI_CHIP_I830_M			0x3577
+#define PCI_CHIP_845_G			0x2562
+#define PCI_CHIP_I855_GM		0x3582
+#define PCI_CHIP_I865_G			0x2572
+
+#define PCI_CHIP_I915_G			0x2582
+#define PCI_CHIP_E7221_G		0x258A
+#define PCI_CHIP_I915_GM		0x2592
+#define PCI_CHIP_I945_G			0x2772
+#define PCI_CHIP_I945_GM		0x27A2
+#define PCI_CHIP_I945_GME		0x27AE
+
+#define PCI_CHIP_Q35_G			0x29B2
+#define PCI_CHIP_G33_G			0x29C2
+#define PCI_CHIP_Q33_G			0x29D2
+
+#define PCI_CHIP_IGD_GM			0xA011
+#define PCI_CHIP_IGD_G			0xA001
+
+#define IS_IGDGM(devid)	(devid == PCI_CHIP_IGD_GM)
+#define IS_IGDG(devid)	(devid == PCI_CHIP_IGD_G)
+#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid))
+
+#define PCI_CHIP_I965_G			0x29A2
+#define PCI_CHIP_I965_Q			0x2992
+#define PCI_CHIP_I965_G_1		0x2982
+#define PCI_CHIP_I946_GZ		0x2972
+#define PCI_CHIP_I965_GM                0x2A02
+#define PCI_CHIP_I965_GME               0x2A12
+
+#define PCI_CHIP_GM45_GM                0x2A42
+
+#define PCI_CHIP_IGD_E_G                0x2E02
+#define PCI_CHIP_Q45_G                  0x2E12
+#define PCI_CHIP_G45_G                  0x2E22
+#define PCI_CHIP_G41_G                  0x2E32
+#define PCI_CHIP_B43_G                  0x2E42
+
+#define PCI_CHIP_ILD_G                  0x0042
+#define PCI_CHIP_ILM_G                  0x0046
+
+#define IS_MOBILE(devid)	(devid == PCI_CHIP_I855_GM || \
+				 devid == PCI_CHIP_I915_GM || \
+				 devid == PCI_CHIP_I945_GM || \
+				 devid == PCI_CHIP_I945_GME || \
+				 devid == PCI_CHIP_I965_GM || \
+				 devid == PCI_CHIP_I965_GME || \
+				 devid == PCI_CHIP_GM45_GM || \
+				 IS_IGD(devid) || \
+				 devid == PCI_CHIP_ILM_G)
+
+#define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G || \
+                                 devid == PCI_CHIP_Q45_G || \
+                                 devid == PCI_CHIP_G45_G || \
+                                 devid == PCI_CHIP_G41_G || \
+                                 devid == PCI_CHIP_B43_G)
+#define IS_GM45(devid)          (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid)		(IS_G45(devid) || IS_GM45(devid))
+
+#define IS_ILD(devid)           (devid == PCI_CHIP_ILD_G)
+#define IS_ILM(devid)           (devid == PCI_CHIP_ILM_G)
+#define IS_IGDNG(devid)           (IS_ILD(devid) || IS_ILM(devid))
+
+#define IS_915(devid)		(devid == PCI_CHIP_I915_G || \
+				 devid == PCI_CHIP_E7221_G || \
+				 devid == PCI_CHIP_I915_GM)
+
+#define IS_945(devid)		(devid == PCI_CHIP_I945_G || \
+				 devid == PCI_CHIP_I945_GM || \
+				 devid == PCI_CHIP_I945_GME || \
+				 devid == PCI_CHIP_G33_G || \
+				 devid == PCI_CHIP_Q33_G || \
+				 devid == PCI_CHIP_Q35_G || IS_IGD(devid))
+
+#define IS_965(devid)		(devid == PCI_CHIP_I965_G || \
+				 devid == PCI_CHIP_I965_Q || \
+				 devid == PCI_CHIP_I965_G_1 || \
+				 devid == PCI_CHIP_I965_GM || \
+				 devid == PCI_CHIP_I965_GME || \
+				 devid == PCI_CHIP_I946_GZ || \
+				 IS_G4X(devid) || \
+				 IS_IGDNG(devid))
+
+#define IS_9XX(devid)		(IS_915(devid) || \
+				 IS_945(devid) || \
+				 IS_965(devid))
diff --git a/src/gallium/drivers/i965/intel_structs.h b/src/gallium/drivers/i965/intel_structs.h
new file mode 100644
index 0000000000..522e3bd92c
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_structs.h
@@ -0,0 +1,132 @@
+#ifndef INTEL_STRUCTS_H
+#define INTEL_STRUCTS_H
+
+struct br0 {
+   GLuint length:8;
+   GLuint pad0:3;
+   GLuint dst_tiled:1;
+   GLuint pad1:8;
+   GLuint write_rgb:1;
+   GLuint write_alpha:1;
+   GLuint opcode:7;
+   GLuint client:3;
+};
+
+   
+struct br13 {
+   GLint dest_pitch:16;
+   GLuint rop:8;
+   GLuint color_depth:2;
+   GLuint pad1:3;
+   GLuint mono_source_transparency:1;
+   GLuint clipping_enable:1;
+   GLuint pad0:1;
+};
+
+
+
+/* This is an attempt to move some of the 2D interaction in this
+ * driver to using structs for packets rather than a bunch of #defines
+ * and dwords.
+ */
+struct xy_color_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw2;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw3;
+   
+   GLuint dest_base_addr;
+   GLuint color;
+};
+
+struct xy_src_copy_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw2;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw3;
+   
+   GLuint dest_base_addr;
+
+   struct {
+      GLuint src_x1:16;
+      GLuint src_y1:16;
+   } dw5;
+
+   struct {
+      GLint src_pitch:16;
+      GLuint pad:16;
+   } dw6;
+   
+   GLuint src_base_addr;
+};
+
+struct xy_setup_blit {
+   struct br0 br0;
+   struct br13 br13;
+
+   struct {
+      GLuint clip_x1:16;
+      GLuint clip_y1:16;
+   } dw2;
+
+   struct {
+      GLuint clip_x2:16;
+      GLuint clip_y2:16;
+   } dw3;
+      
+   GLuint dest_base_addr;
+   GLuint background_color;
+   GLuint foreground_color;
+   GLuint pattern_base_addr;
+};
+
+
+struct xy_text_immediate_blit {
+   struct {
+      GLuint length:8;
+      GLuint pad2:3;
+      GLuint dst_tiled:1;
+      GLuint pad1:4;
+      GLuint byte_packed:1;
+      GLuint pad0:5;
+      GLuint opcode:7;
+      GLuint client:3;
+   } dw0;
+
+   struct {
+      GLuint dest_x1:16;
+      GLuint dest_y1:16;
+   } dw1;
+
+   struct {
+      GLuint dest_x2:16;
+      GLuint dest_y2:16;
+   } dw2;   
+
+   /* Src bitmap data follows as inline dwords.
+    */
+};
+
+
+#define CLIENT_2D 0x2
+#define OPCODE_XY_SETUP_BLT 0x1
+#define OPCODE_XY_COLOR_BLT 0x50
+#define OPCODE_XY_TEXT_IMMEDIATE_BLT 0x31
+
+#endif
diff --git a/src/gallium/drivers/i965/intel_tex_format.c b/src/gallium/drivers/i965/intel_tex_format.c
new file mode 100644
index 0000000000..3322a71130
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_tex_format.c
@@ -0,0 +1,225 @@
+#include "intel_context.h"
+#include "intel_tex.h"
+#include "intel_chipset.h"
+#include "main/texformat.h"
+#include "main/enums.h"
+
+
+/**
+ * Choose hardware texture format given the user's glTexImage parameters.
+ *
+ * It works out that this function is fine for all the supported
+ * hardware.  However, there is still a need to map the formats onto
+ * hardware descriptors.
+ *
+ * Note that the i915 can actually support many more formats than
+ * these if we take the step of simply swizzling the colors
+ * immediately after sampling...
+ */
+const struct gl_texture_format *
+intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
+                         GLenum format, GLenum type)
+{
+   struct intel_context *intel = intel_context(ctx);
+   const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24);
+
+#if 0
+   printf("%s intFmt=0x%x format=0x%x type=0x%x\n",
+          __FUNCTION__, internalFormat, format, type);
+#endif
+
+   switch (internalFormat) {
+   case 4:
+   case GL_RGBA:
+   case GL_COMPRESSED_RGBA:
+      if (format == GL_BGRA) {
+         if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) {
+            return &_mesa_texformat_argb8888;
+         }
+         else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) {
+            return &_mesa_texformat_argb4444;
+         }
+         else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) {
+            return &_mesa_texformat_argb1555;
+         }
+      }
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+
+   case 3:
+   case GL_RGB:
+   case GL_COMPRESSED_RGB:
+      if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
+         return &_mesa_texformat_rgb565;
+      }
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
+
+   case GL_RGBA8:
+   case GL_RGB10_A2:
+   case GL_RGBA12:
+   case GL_RGBA16:
+      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
+
+   case GL_RGBA4:
+   case GL_RGBA2:
+      return &_mesa_texformat_argb4444;
+
+   case GL_RGB5_A1:
+      return &_mesa_texformat_argb1555;
+
+   case GL_RGB8:
+   case GL_RGB10:
+   case GL_RGB12:
+   case GL_RGB16:
+      return &_mesa_texformat_argb8888;
+
+   case GL_RGB5:
+   case GL_RGB4:
+   case GL_R3_G3_B2:
+      return &_mesa_texformat_rgb565;
+
+   case GL_ALPHA:
+   case GL_ALPHA4:
+   case GL_ALPHA8:
+   case GL_ALPHA12:
+   case GL_ALPHA16:
+   case GL_COMPRESSED_ALPHA:
+      return &_mesa_texformat_a8;
+
+   case 1:
+   case GL_LUMINANCE:
+   case GL_LUMINANCE4:
+   case GL_LUMINANCE8:
+   case GL_LUMINANCE12:
+   case GL_LUMINANCE16:
+   case GL_COMPRESSED_LUMINANCE:
+      return &_mesa_texformat_l8;
+
+   case 2:
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE4_ALPHA4:
+   case GL_LUMINANCE6_ALPHA2:
+   case GL_LUMINANCE8_ALPHA8:
+   case GL_LUMINANCE12_ALPHA4:
+   case GL_LUMINANCE12_ALPHA12:
+   case GL_LUMINANCE16_ALPHA16:
+   case GL_COMPRESSED_LUMINANCE_ALPHA:
+      return &_mesa_texformat_al88;
+
+   case GL_INTENSITY:
+   case GL_INTENSITY4:
+   case GL_INTENSITY8:
+   case GL_INTENSITY12:
+   case GL_INTENSITY16:
+   case GL_COMPRESSED_INTENSITY:
+      return &_mesa_texformat_i8;
+
+   case GL_YCBCR_MESA:
+      if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE)
+         return &_mesa_texformat_ycbcr;
+      else
+         return &_mesa_texformat_ycbcr_rev;
+
+   case GL_COMPRESSED_RGB_FXT1_3DFX:
+      return &_mesa_texformat_rgb_fxt1;
+   case GL_COMPRESSED_RGBA_FXT1_3DFX:
+      return &_mesa_texformat_rgba_fxt1;
+
+   case GL_RGB_S3TC:
+   case GL_RGB4_S3TC:
+   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+      return &_mesa_texformat_rgb_dxt1;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+      return &_mesa_texformat_rgba_dxt1;
+
+   case GL_RGBA_S3TC:
+   case GL_RGBA4_S3TC:
+   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+      return &_mesa_texformat_rgba_dxt3;
+
+   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+      return &_mesa_texformat_rgba_dxt5;
+
+   case GL_DEPTH_COMPONENT:
+   case GL_DEPTH_COMPONENT16:
+   case GL_DEPTH_COMPONENT24:
+   case GL_DEPTH_COMPONENT32:
+#if 0
+      return &_mesa_texformat_z16;
+#else
+      /* fall-through.
+       * 16bpp depth texture can't be paired with a stencil buffer so
+       * always used combined depth/stencil format.
+       */
+#endif
+   case GL_DEPTH_STENCIL_EXT:
+   case GL_DEPTH24_STENCIL8_EXT:
+      return &_mesa_texformat_s8_z24;
+
+#ifndef I915
+   case GL_SRGB_EXT:
+   case GL_SRGB8_EXT:
+   case GL_SRGB_ALPHA_EXT:
+   case GL_SRGB8_ALPHA8_EXT:
+   case GL_COMPRESSED_SRGB_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_EXT:
+   case GL_COMPRESSED_SLUMINANCE_EXT:
+   case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
+      return &_mesa_texformat_sargb8;
+   case GL_SLUMINANCE_EXT:
+   case GL_SLUMINANCE8_EXT:
+      if (IS_G4X(intel->intelScreen->deviceID))
+         return &_mesa_texformat_sl8;
+      else
+         return &_mesa_texformat_sargb8;
+   case GL_SLUMINANCE_ALPHA_EXT:
+   case GL_SLUMINANCE8_ALPHA8_EXT:
+      if (IS_G4X(intel->intelScreen->deviceID))
+         return &_mesa_texformat_sla8;
+      else
+         return &_mesa_texformat_sargb8;
+   case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
+   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
+      return &_mesa_texformat_srgb_dxt1;
+
+   /* i915 could also do this */
+   case GL_DUDV_ATI:
+   case GL_DU8DV8_ATI:
+      return &_mesa_texformat_dudv8;
+   case GL_RGBA_SNORM:
+   case GL_RGBA8_SNORM:
+      return &_mesa_texformat_signed_rgba8888_rev;
+#endif
+
+   default:
+      fprintf(stderr, "unexpected texture format %s in %s\n",
+              _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__);
+      return NULL;
+   }
+
+   return NULL;                 /* never get here */
+}
+
+int intel_compressed_num_bytes(GLuint mesaFormat)
+{
+   int bytes = 0;
+   switch(mesaFormat) {
+     
+   case MESA_FORMAT_RGB_FXT1:
+   case MESA_FORMAT_RGBA_FXT1:
+   case MESA_FORMAT_RGB_DXT1:
+   case MESA_FORMAT_RGBA_DXT1:
+     bytes = 2;
+     break;
+     
+   case MESA_FORMAT_RGBA_DXT3:
+   case MESA_FORMAT_RGBA_DXT5:
+     bytes = 4;
+   default:
+     break;
+   }
+   
+   return bytes;
+}
diff --git a/src/gallium/drivers/i965/intel_tex_layout.c b/src/gallium/drivers/i965/intel_tex_layout.c
new file mode 100644
index 0000000000..7d69ea4484
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_tex_layout.c
@@ -0,0 +1,140 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  *   Michel Dänzer <michel@tungstengraphics.com>
+  */
+
+#include "intel_mipmap_tree.h"
+#include "intel_tex_layout.h"
+#include "intel_context.h"
+#include "main/macros.h"
+
+void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h)
+{
+    switch (internalFormat) {
+    case GL_COMPRESSED_RGB_FXT1_3DFX:
+    case GL_COMPRESSED_RGBA_FXT1_3DFX:
+        *w = 8;
+        *h = 4;
+        break;
+
+    case GL_RGB_S3TC:
+    case GL_RGB4_S3TC:
+    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
+    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+    case GL_RGBA_S3TC:
+    case GL_RGBA4_S3TC:
+    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+        *w = 4;
+        *h = 4;
+        break;
+
+    default:
+        *w = 4;
+        *h = 2;
+        break;
+    }
+}
+
+void i945_miptree_layout_2d( struct intel_context *intel,
+			     struct intel_mipmap_tree *mt,
+			     uint32_t tiling )
+{
+   GLuint align_h = 2, align_w = 4;
+   GLuint level;
+   GLuint x = 0;
+   GLuint y = 0;
+   GLuint width = mt->width0;
+   GLuint height = mt->height0;
+
+   mt->pitch = mt->width0;
+   intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+
+   if (mt->compressed) {
+       mt->pitch = ALIGN(mt->width0, align_w);
+   }
+
+   /* May need to adjust pitch to accomodate the placement of
+    * the 2nd mipmap.  This occurs when the alignment
+    * constraints of mipmap placement push the right edge of the
+    * 2nd mipmap out past the width of its parent.
+    */
+   if (mt->first_level != mt->last_level) {
+       GLuint mip1_width;
+
+       if (mt->compressed) {
+           mip1_width = ALIGN(minify(mt->width0), align_w)
+               + ALIGN(minify(minify(mt->width0)), align_w);
+       } else {
+           mip1_width = ALIGN(minify(mt->width0), align_w)
+               + minify(minify(mt->width0));
+       }
+
+       if (mip1_width > mt->pitch) {
+           mt->pitch = mip1_width;
+       }
+   }
+
+   /* Pitch must be a whole number of dwords, even though we
+    * express it in texels.
+    */
+   mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch);
+   mt->total_height = 0;
+
+   for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
+      GLuint img_height;
+
+      intel_miptree_set_level_info(mt, level, 1, x, y, width, 
+				   height, 1);
+
+      if (mt->compressed)
+	 img_height = MAX2(1, height/4);
+      else
+	 img_height = ALIGN(height, align_h);
+
+
+      /* Because the images are packed better, the final offset
+       * might not be the maximal one:
+       */
+      mt->total_height = MAX2(mt->total_height, y + img_height);
+
+      /* Layout_below: step right after second mipmap.
+       */
+      if (level == mt->first_level + 1) {
+	 x += ALIGN(width, align_w);
+      }
+      else {
+	 y += img_height;
+      }
+
+      width  = minify(width);
+      height = minify(height);
+   }
+}
-- 
cgit v1.2.3


From 57a920cb1a0b6051068e730747b3fb475de88aca Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 23 Oct 2009 17:01:32 +0100
Subject: i965g: wip

---
 src/gallium/drivers/i965/brw_bo.c             |   12 +
 src/gallium/drivers/i965/brw_cc.c             |  180 +----
 src/gallium/drivers/i965/brw_clip.c           |  127 +--
 src/gallium/drivers/i965/brw_clip.h           |    5 +-
 src/gallium/drivers/i965/brw_clip_line.c      |    7 -
 src/gallium/drivers/i965/brw_clip_point.c     |    7 -
 src/gallium/drivers/i965/brw_clip_state.c     |    7 +-
 src/gallium/drivers/i965/brw_clip_tri.c       |    7 -
 src/gallium/drivers/i965/brw_clip_unfilled.c  |    5 -
 src/gallium/drivers/i965/brw_clip_util.c      |    7 -
 src/gallium/drivers/i965/brw_context.c        |  135 ++--
 src/gallium/drivers/i965/brw_context.h        |    7 +-
 src/gallium/drivers/i965/brw_curbe.c          |   89 +--
 src/gallium/drivers/i965/brw_defines.h        |    4 +-
 src/gallium/drivers/i965/brw_disasm.c         |    2 -
 src/gallium/drivers/i965/brw_draw.c           |  244 +-----
 src/gallium/drivers/i965/brw_draw_upload.c    |  566 ++++---------
 src/gallium/drivers/i965/brw_gs.c             |    2 +-
 src/gallium/drivers/i965/brw_pipe_blend.c     |   41 +
 src/gallium/drivers/i965/brw_pipe_debug.c     |    2 +
 src/gallium/drivers/i965/brw_pipe_depth.c     |   52 ++
 src/gallium/drivers/i965/brw_pipe_fb.c        |   25 +
 src/gallium/drivers/i965/brw_pipe_flush.c     |   64 ++
 src/gallium/drivers/i965/brw_screen_surface.c |   27 +
 src/gallium/drivers/i965/brw_sf.c             |    4 +-
 src/gallium/drivers/i965/brw_sf_emit.c        |    4 +-
 src/gallium/drivers/i965/brw_state_upload.c   |   63 +-
 src/gallium/drivers/i965/brw_swtnl.c          |  114 +++
 src/gallium/drivers/i965/brw_types.h          |   11 +
 src/gallium/drivers/i965/brw_util.c           |    8 -
 src/gallium/drivers/i965/brw_vs.c             |   12 +-
 src/gallium/drivers/i965/brw_vs_emit.c        |  250 ++----
 src/gallium/drivers/i965/brw_wm.c             |   59 +-
 src/gallium/drivers/i965/brw_wm.h             |    1 -
 src/gallium/drivers/i965/brw_wm_emit.c        |   17 +-
 src/gallium/drivers/i965/brw_wm_fp.c          |  193 ++---
 src/gallium/drivers/i965/brw_wm_glsl.c        | 1060 +------------------------
 src/gallium/drivers/i965/brw_wm_pass0.c       |    1 -
 src/gallium/drivers/i965/brw_wm_pass1.c       |   81 +-
 src/gallium/drivers/i965/intel_chipset.h      |    4 +-
 40 files changed, 907 insertions(+), 2599 deletions(-)
 create mode 100644 src/gallium/drivers/i965/brw_bo.c
 create mode 100644 src/gallium/drivers/i965/brw_pipe_blend.c
 create mode 100644 src/gallium/drivers/i965/brw_pipe_debug.c
 create mode 100644 src/gallium/drivers/i965/brw_pipe_depth.c
 create mode 100644 src/gallium/drivers/i965/brw_pipe_fb.c
 create mode 100644 src/gallium/drivers/i965/brw_pipe_flush.c
 create mode 100644 src/gallium/drivers/i965/brw_screen_surface.c
 create mode 100644 src/gallium/drivers/i965/brw_swtnl.c
 create mode 100644 src/gallium/drivers/i965/brw_types.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_bo.c b/src/gallium/drivers/i965/brw_bo.c
new file mode 100644
index 0000000000..e7a4dac666
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_bo.c
@@ -0,0 +1,12 @@
+
+
+void brw_buffer_subdata()
+{
+      if (intel->intelScreen->kernel_exec_fencing) {
+	 drm_intel_gem_bo_map_gtt(bo);
+	 memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
+	 drm_intel_gem_bo_unmap_gtt(bo);
+      } else {
+	 dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
+      }
+}
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index 1088a7a607..9ab5638137 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -62,84 +62,21 @@ const struct brw_tracked_state brw_cc_vp = {
 };
 
 struct brw_cc_unit_key {
-   GLboolean stencil, stencil_two_side, color_blend, alpha_enabled;
-
-   GLenum stencil_func[2], stencil_fail_op[2];
-   GLenum stencil_pass_depth_fail_op[2], stencil_pass_depth_pass_op[2];
-   GLubyte stencil_ref[2], stencil_write_mask[2], stencil_test_mask[2];
-   GLenum logic_op;
-
-   GLenum blend_eq_rgb, blend_eq_a;
-   GLenum blend_src_rgb, blend_src_a;
-   GLenum blend_dst_rgb, blend_dst_a;
-
-   GLenum alpha_func;
-   GLclampf alpha_ref;
-
-   GLboolean dither;
-
-   GLboolean depth_test, depth_write;
-   GLenum depth_func;
+   struct pipe_depth_stencil_alpha_state dsa;
+   struct pipe_blend_state blend; /* no color mask */
 };
 
 static void
 cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   const unsigned back = ctx->Stencil._BackFace;
-
    memset(key, 0, sizeof(*key));
+   
+   key->dsa = brw->curr.dsa.base;
+   key->blend = brw->curr.blend.base;
 
-   key->stencil = ctx->Stencil._Enabled;
-   key->stencil_two_side = ctx->Stencil._TestTwoSide;
-
-   if (key->stencil) {
-      key->stencil_func[0] = ctx->Stencil.Function[0];
-      key->stencil_fail_op[0] = ctx->Stencil.FailFunc[0];
-      key->stencil_pass_depth_fail_op[0] = ctx->Stencil.ZFailFunc[0];
-      key->stencil_pass_depth_pass_op[0] = ctx->Stencil.ZPassFunc[0];
-      key->stencil_ref[0] = ctx->Stencil.Ref[0];
-      key->stencil_write_mask[0] = ctx->Stencil.WriteMask[0];
-      key->stencil_test_mask[0] = ctx->Stencil.ValueMask[0];
-   }
-   if (key->stencil_two_side) {
-      key->stencil_func[1] = ctx->Stencil.Function[back];
-      key->stencil_fail_op[1] = ctx->Stencil.FailFunc[back];
-      key->stencil_pass_depth_fail_op[1] = ctx->Stencil.ZFailFunc[back];
-      key->stencil_pass_depth_pass_op[1] = ctx->Stencil.ZPassFunc[back];
-      key->stencil_ref[1] = ctx->Stencil.Ref[back];
-      key->stencil_write_mask[1] = ctx->Stencil.WriteMask[back];
-      key->stencil_test_mask[1] = ctx->Stencil.ValueMask[back];
-   }
-
-   if (ctx->Color._LogicOpEnabled)
-      key->logic_op = ctx->Color.LogicOp;
-   else
-      key->logic_op = GL_COPY;
-
-   key->color_blend = ctx->Color.BlendEnabled;
-   if (key->color_blend) {
-      key->blend_eq_rgb = ctx->Color.BlendEquationRGB;
-      key->blend_eq_a = ctx->Color.BlendEquationA;
-      key->blend_src_rgb = ctx->Color.BlendSrcRGB;
-      key->blend_dst_rgb = ctx->Color.BlendDstRGB;
-      key->blend_src_a = ctx->Color.BlendSrcA;
-      key->blend_dst_a = ctx->Color.BlendDstA;
-   }
-
-   key->alpha_enabled = ctx->Color.AlphaEnabled;
-   if (key->alpha_enabled) {
-      key->alpha_func = ctx->Color.AlphaFunc;
-      key->alpha_ref = ctx->Color.AlphaRef;
-   }
-
-   key->dither = ctx->Color.DitherFlag;
-
-   key->depth_test = ctx->Depth.Test;
-   if (key->depth_test) {
-      key->depth_func = ctx->Depth.Func;
-      key->depth_write = ctx->Depth.Mask;
-   }
+   /* Clear non-respected values:
+    */
+   key->blend.colormask = 0xf;
 }
 
 /**
@@ -153,103 +90,16 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
 
    memset(&cc, 0, sizeof(cc));
 
-   /* _NEW_STENCIL */
-   if (key->stencil) {
-      cc.cc0.stencil_enable = 1;
-      cc.cc0.stencil_func =
-	 intel_translate_compare_func(key->stencil_func[0]);
-      cc.cc0.stencil_fail_op =
-	 intel_translate_stencil_op(key->stencil_fail_op[0]);
-      cc.cc0.stencil_pass_depth_fail_op =
-	 intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
-      cc.cc0.stencil_pass_depth_pass_op =
-	 intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
-      cc.cc1.stencil_ref = key->stencil_ref[0];
-      cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
-      cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
-
-      if (key->stencil_two_side) {
-	 cc.cc0.bf_stencil_enable = 1;
-	 cc.cc0.bf_stencil_func =
-	    intel_translate_compare_func(key->stencil_func[1]);
-	 cc.cc0.bf_stencil_fail_op =
-	    intel_translate_stencil_op(key->stencil_fail_op[1]);
-	 cc.cc0.bf_stencil_pass_depth_fail_op =
-	    intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
-	 cc.cc0.bf_stencil_pass_depth_pass_op =
-	    intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
-	 cc.cc1.bf_stencil_ref = key->stencil_ref[1];
-	 cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1];
-	 cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1];
-      }
-
-      /* Not really sure about this:
-       */
-      if (key->stencil_write_mask[0] ||
-	  (key->stencil_two_side && key->stencil_write_mask[1]))
-	 cc.cc0.stencil_write_enable = 1;
-   }
-
-   /* _NEW_COLOR */
-   if (key->logic_op != GL_COPY) {
-      cc.cc2.logicop_enable = 1;
-      cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op);
-   } else if (key->color_blend) {
-      GLenum eqRGB = key->blend_eq_rgb;
-      GLenum eqA = key->blend_eq_a;
-      GLenum srcRGB = key->blend_src_rgb;
-      GLenum dstRGB = key->blend_dst_rgb;
-      GLenum srcA = key->blend_src_a;
-      GLenum dstA = key->blend_dst_a;
-
-      if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
-	 srcRGB = dstRGB = GL_ONE;
-      }
-
-      if (eqA == GL_MIN || eqA == GL_MAX) {
-	 srcA = dstA = GL_ONE;
-      }
-
-      cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
-      cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
-      cc.cc6.blend_function = brw_translate_blend_equation(eqRGB);
-
-      cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
-      cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
-      cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA);
-
-      cc.cc3.blend_enable = 1;
-      cc.cc3.ia_blend_enable = (srcA != srcRGB ||
-				dstA != dstRGB ||
-				eqA != eqRGB);
-   }
-
-   if (key->alpha_enabled) {
-      cc.cc3.alpha_test = 1;
-      cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func);
-      cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
-
-      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref);
-   }
-
-   if (key->dither) {
-      cc.cc5.dither_enable = 1;
-      cc.cc6.y_dither_offset = 0;
-      cc.cc6.x_dither_offset = 0;
-   }
-
-   /* _NEW_DEPTH */
-   if (key->depth_test) {
-      cc.cc2.depth_test = 1;
-      cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
-      cc.cc2.depth_write_enable = key->depth_write;
-   }
+   cc.cc0 = brw->dsa.cc0;
+   cc.cc1 = brw->dsa.cc1;
+   cc.cc2 = brw->dsa.cc2;
+   cc.cc3 = brw->dsa.cc3 | brw->blend.cc3;
 
    /* CACHE_NEW_CC_VP */
    cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
 
-   if (INTEL_DEBUG & DEBUG_STATS)
-      cc.cc5.statistics_enable = 1;
+   cc.cc5 = brw->blend.cc5 | brw->debug.cc5;
+
 
    bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
 			 key, sizeof(*key),
@@ -286,7 +136,7 @@ static void prepare_cc_unit( struct brw_context *brw )
 
 const struct brw_tracked_state brw_cc_unit = {
    .dirty = {
-      .mesa = _NEW_STENCIL | _NEW_COLOR | _NEW_DEPTH,
+      .mesa = PIPE_NEW_DEPTH_STENCIL_ALPHA | PIPE_NEW_BLEND,
       .brw = 0,
       .cache = CACHE_NEW_CC_VP
    },
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index 20a927cf38..df1b3718d0 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -29,9 +29,9 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
+#include "pipe/p_state.h"
+
+#include "util/u_math.h"
 
 #include "intel_batchbuffer.h"
 
@@ -83,7 +83,7 @@ static void compile_clip_prog( struct brw_context *brw,
 	 delta += ATTR_SIZE;
       }
 
-   c.nr_attrs = brw_count_bits(c.key.attrs);
+   c.nr_attrs = util_count_bits(c.key.attrs);
    
    if (BRW_IS_IGDNG(brw))
        c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
@@ -104,16 +104,16 @@ static void compile_clip_prog( struct brw_context *brw,
     * do all three:
     */
    switch (key->primitive) {
-   case GL_TRIANGLES: 
+   case PIPE_PRIM_TRIANGLES: 
       if (key->do_unfilled)
 	 brw_emit_unfilled_clip( &c );
       else
 	 brw_emit_tri_clip( &c );
       break;
-   case GL_LINES:
+   case PIPE_PRIM_LINES:
       brw_emit_line_clip( &c );
       break;
-   case GL_POINTS:
+   case PIPE_PRIM_POINTS:
       brw_emit_point_clip( &c );
       break;
    default:
@@ -143,7 +143,6 @@ static void compile_clip_prog( struct brw_context *brw,
  */
 static void upload_clip_prog(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_clip_prog_key key;
 
    memset(&key, 0, sizeof(key));
@@ -151,101 +150,51 @@ static void upload_clip_prog(struct brw_context *brw)
    /* Populate the key:
     */
    /* BRW_NEW_REDUCED_PRIMITIVE */
-   key.primitive = brw->intel.reduced_primitive;
+   key.primitive = brw->reduced_primitive;
    /* CACHE_NEW_VS_PROG */
    key.attrs = brw->vs.prog_data->outputs_written;
-   /* _NEW_LIGHT */
-   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
-   /* _NEW_TRANSFORM */
-   key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+   /* PIPE_NEW_RAST */
+   key.do_flat_shading = brw->rast.base.flatshade;
+   /* PIPE_NEW_UCP */
+   key.nr_userclip = brw->nr_ucp;
 
    if (BRW_IS_IGDNG(brw))
        key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
    else
        key.clip_mode = BRW_CLIPMODE_NORMAL;
 
-   /* _NEW_POLYGON */
-   if (key.primitive == GL_TRIANGLES) {
-      if (ctx->Polygon.CullFlag &&
-	  ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK)
+   /* PIPE_NEW_RAST */
+   if (key.primitive == PIPE_PRIM_TRIANGLES) {
+      if (brw->rast->cull_mode = PIPE_WINDING_BOTH)
 	 key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
       else {
-	 GLuint fill_front = CLIP_CULL;
-	 GLuint fill_back = CLIP_CULL;
-	 GLuint offset_front = 0;
-	 GLuint offset_back = 0;
-
-	 if (!ctx->Polygon.CullFlag ||
-	     ctx->Polygon.CullFaceMode != GL_FRONT) {
-	    switch (ctx->Polygon.FrontMode) {
-	    case GL_FILL: 
-	       fill_front = CLIP_FILL; 
-	       offset_front = 0;
-	       break;
-	    case GL_LINE:
-	       fill_front = CLIP_LINE;
-	       offset_front = ctx->Polygon.OffsetLine;
-	       break;
-	    case GL_POINT:
-	       fill_front = CLIP_POINT;
-	       offset_front = ctx->Polygon.OffsetPoint;
-	       break;
-	    }
+	 key.fill_ccw = CLIP_CULL;
+	 key.fill_cw = CLIP_CULL;
+
+	 if (!(brw->rast->cull_mode & PIPE_WINDING_CCW)) {
+	    key.fill_ccw = translate_fill(brw->rast.fill_ccw);
 	 }
 
-	 if (!ctx->Polygon.CullFlag ||
-	     ctx->Polygon.CullFaceMode != GL_BACK) {
-	    switch (ctx->Polygon.BackMode) {
-	    case GL_FILL: 
-	       fill_back = CLIP_FILL; 
-	       offset_back = 0;
-	       break;
-	    case GL_LINE:
-	       fill_back = CLIP_LINE;
-	       offset_back = ctx->Polygon.OffsetLine;
-	       break;
-	    case GL_POINT:
-	       fill_back = CLIP_POINT;
-	       offset_back = ctx->Polygon.OffsetPoint;
-	       break;
-	    }
+	 if (!(brw->rast->cull_mode & PIPE_WINDING_CW)) {
+	    key.fill_cw = translate_fill(brw->rast.fill_cw);
 	 }
 
-	 if (ctx->Polygon.BackMode != GL_FILL ||
-	     ctx->Polygon.FrontMode != GL_FILL) {
+	 if (key.fill_cw != CLIP_FILL ||
+	     key.fill_ccw != CLIP_FILL) {
 	    key.do_unfilled = 1;
-
-	    /* Most cases the fixed function units will handle.  Cases where
-	     * one or more polygon faces are unfilled will require help:
-	     */
 	    key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+	 }
+
+	 key.offset_ccw = brw->rast.offset_ccw;
+	 key.offset_cw = brw->rast.offset_cw;
+
+	 if (brw->rast.light_twoside &&
+	     key.fill_cw != CLIP_CULL) 
+	    key.copy_bfc_cw = 1;
 
-	    if (offset_back || offset_front) {
-	       /* _NEW_POLYGON, _NEW_BUFFERS */
-	       key.offset_units = ctx->Polygon.OffsetUnits * brw->intel.polygon_offset_scale;
-	       key.offset_factor = ctx->Polygon.OffsetFactor * ctx->DrawBuffer->_MRD;
-	    }
-
-	    switch (ctx->Polygon.FrontFace) {
-	    case GL_CCW:
-	       key.fill_ccw = fill_front;
-	       key.fill_cw = fill_back;
-	       key.offset_ccw = offset_front;
-	       key.offset_cw = offset_back;
-	       if (ctx->Light.Model.TwoSide &&
-		   key.fill_cw != CLIP_CULL) 
-		  key.copy_bfc_cw = 1;
-	       break;
-	    case GL_CW:
-	       key.fill_cw = fill_front;
-	       key.fill_ccw = fill_back;
-	       key.offset_cw = offset_front;
-	       key.offset_ccw = offset_back;
-	       if (ctx->Light.Model.TwoSide &&
-		   key.fill_ccw != CLIP_CULL) 
-		  key.copy_bfc_ccw = 1;
-	       break;
-	    }
+	 if (brw->rast.light_twoside &&
+	     key.fill_ccw != CLIP_CULL) 
+	    key.copy_bfc_ccw = 1;
 	 }
       }
    }
@@ -262,10 +211,8 @@ static void upload_clip_prog(struct brw_context *brw)
 
 const struct brw_tracked_state brw_clip_prog = {
    .dirty = {
-      .mesa  = (_NEW_LIGHT | 
-		_NEW_TRANSFORM |
-		_NEW_POLYGON | 
-		_NEW_BUFFERS),
+      .mesa  = (PIPE_NEW_RAST | 
+		PIPE_NEW_UCP),
       .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
       .cache = CACHE_NEW_VS_PROG
    },
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
index 957df441ab..d80ec819b9 100644
--- a/src/gallium/drivers/i965/brw_clip.h
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -43,6 +43,7 @@
  */
 struct brw_clip_prog_key {
    GLuint attrs:32;		
+
    GLuint primitive:4;
    GLuint nr_userclip:3;
    GLuint do_flat_shading:1;
@@ -51,12 +52,10 @@ struct brw_clip_prog_key {
    GLuint fill_ccw:2;		/* includes cull information */
    GLuint offset_cw:1;
    GLuint offset_ccw:1;
-   GLuint pad0:17;
-
    GLuint copy_bfc_cw:1;
    GLuint copy_bfc_ccw:1;
    GLuint clip_mode:3;
-   GLuint pad1:27;
+   GLuint pad1:12;
    
    GLfloat offset_factor;
    GLfloat offset_units;
diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c
index 048ca620fa..6b4da25644 100644
--- a/src/gallium/drivers/i965/brw_clip_line.c
+++ b/src/gallium/drivers/i965/brw_clip_line.c
@@ -29,13 +29,6 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-
-#include "intel_batchbuffer.h"
-
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_eu.h"
diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c
index 8458f61c5a..b2cf7b2011 100644
--- a/src/gallium/drivers/i965/brw_clip_point.c
+++ b/src/gallium/drivers/i965/brw_clip_point.c
@@ -29,13 +29,6 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-
-#include "intel_batchbuffer.h"
-
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_eu.h"
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 234b3744bf..72e27205e2 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -32,7 +32,6 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "main/macros.h"
 
 struct brw_clip_unit_key {
    unsigned int total_grf;
@@ -66,8 +65,8 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
    key->nr_urb_entries = brw->urb.nr_clip_entries;
    key->urb_size = brw->urb.vsize;
 
-   /* _NEW_TRANSOFORM */
-   key->depth_clamp = ctx->Transform.DepthClamp;
+   /*  */
+   key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp;
 }
 
 static dri_bo *
@@ -175,7 +174,7 @@ static void upload_clip_unit( struct brw_context *brw )
 
 const struct brw_tracked_state brw_clip_unit = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM,
+      .mesa  = 0,
       .brw   = (BRW_NEW_CURBE_OFFSETS |
 		BRW_NEW_URB_FENCE),
       .cache = CACHE_NEW_CLIP_PROG
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
index 0efd77225e..d8feca6a87 100644
--- a/src/gallium/drivers/i965/brw_clip_tri.c
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -29,13 +29,6 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-
-#include "intel_batchbuffer.h"
-
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_eu.h"
diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c
index ad1bfa435f..4baff55806 100644
--- a/src/gallium/drivers/i965/brw_clip_unfilled.c
+++ b/src/gallium/drivers/i965/brw_clip_unfilled.c
@@ -29,11 +29,6 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-
 #include "intel_batchbuffer.h"
 
 #include "brw_defines.h"
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
index 5a73abdfee..7a6c46ce07 100644
--- a/src/gallium/drivers/i965/brw_clip_util.c
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -30,13 +30,6 @@
   */
 
 
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/program.h"
-
-#include "intel_batchbuffer.h"
-
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_eu.h"
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index c300c33adc..bf0ec89e13 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -52,122 +52,77 @@
 #include "utils.h"
 
 
-/***************************************
- * Mesa's Driver Functions
- ***************************************/
-
-static void brwUseProgram(GLcontext *ctx, GLuint program)
-{
-   _mesa_use_program(ctx, program);
-}
-
-static void brwInitProgFuncs( struct dd_function_table *functions )
-{
-   functions->UseProgram = brwUseProgram;
-}
-static void brwInitDriverFunctions( struct dd_function_table *functions )
-{
-   intelInitDriverFunctions( functions );
-
-   brwInitFragProgFuncs( functions );
-   brwInitProgFuncs( functions );
-   brw_init_queryobj_functions(functions);
-
-   functions->Viewport = intel_viewport;
-}
 
 GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
 			    __DRIcontextPrivate *driContextPriv,
 			    void *sharedContextPrivate)
 {
-   struct dd_function_table functions;
    struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
-   struct intel_context *intel = &brw->intel;
-   GLcontext *ctx = &intel->ctx;
 
    if (!brw) {
-      _mesa_printf("%s: failed to alloc context\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   brwInitVtbl( brw );
-   brwInitDriverFunctions( &functions );
-
-   if (!intelInitContext( intel, mesaVis, driContextPriv,
-			  sharedContextPrivate, &functions )) {
-      _mesa_printf("%s: failed to init intel context\n", __FUNCTION__);
-      FREE(brw);
+      debug_printf("%s: failed to alloc context\n", __FUNCTION__);
       return GL_FALSE;
    }
 
-   /* Initialize swrast, tnl driver tables: */
-   intelInitSpanFuncs(ctx);
-
-   TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
-
-   ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
-   ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
-   ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
-                                     ctx->Const.MaxTextureImageUnits);
-   ctx->Const.MaxVertexTextureImageUnits = 0; /* no vertex shader textures */
-
-   /* Mesa limits textures to 4kx4k; it would be nice to fix that someday
-    */
-   ctx->Const.MaxTextureLevels = 13;
-   ctx->Const.Max3DTextureLevels = 9;
-   ctx->Const.MaxCubeTextureLevels = 12;
-   ctx->Const.MaxTextureRectSize = (1<<12);
-   
-   ctx->Const.MaxTextureMaxAnisotropy = 16.0;
-
-   /* if conformance mode is set, swrast can handle any size AA point */
-   ctx->Const.MaxPointSizeAA = 255.0;
-
    /* We want the GLSL compiler to emit code that uses condition codes */
    ctx->Shader.EmitCondCodes = GL_TRUE;
    ctx->Shader.EmitNVTempInitialization = GL_TRUE;
 
-   ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
-   ctx->Const.VertexProgram.MaxAluInstructions = 0;
-   ctx->Const.VertexProgram.MaxTexInstructions = 0;
-   ctx->Const.VertexProgram.MaxTexIndirections = 0;
-   ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
-   ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
-   ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
-   ctx->Const.VertexProgram.MaxNativeAttribs = 16;
-   ctx->Const.VertexProgram.MaxNativeTemps = 256;
-   ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
-   ctx->Const.VertexProgram.MaxNativeParameters = 1024;
-   ctx->Const.VertexProgram.MaxEnvParams =
-      MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
-	   ctx->Const.VertexProgram.MaxEnvParams);
-
-   ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024);
-   ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024);
-   ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024);
-   ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024);
-   ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
-   ctx->Const.FragmentProgram.MaxNativeTemps = 256;
-   ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
-   ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
-   ctx->Const.FragmentProgram.MaxEnvParams =
-      MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
-	   ctx->Const.FragmentProgram.MaxEnvParams);
 
+   brw_init_query( brw );
    brw_init_state( brw );
+   brw_draw_init( brw );
 
    brw->state.dirty.mesa = ~0;
    brw->state.dirty.brw = ~0;
 
    brw->emit_state_always = 0;
 
-   ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
-   ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
-
    make_empty_list(&brw->query.active_head);
 
-   brw_draw_init( brw );
 
    return GL_TRUE;
 }
 
+/**
+ * called from intelDestroyContext()
+ */
+static void brw_destroy_context( struct intel_context *intel )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   int i;
+
+   brw_destroy_state(brw);
+   brw_draw_destroy( brw );
+
+   _mesa_free(brw->wm.compile_data);
+
+   for (i = 0; i < brw->state.nr_color_regions; i++)
+      intel_region_release(&brw->state.color_regions[i]);
+   brw->state.nr_color_regions = 0;
+   intel_region_release(&brw->state.depth_region);
+
+   dri_bo_unreference(brw->curbe.curbe_bo);
+   dri_bo_unreference(brw->vs.prog_bo);
+   dri_bo_unreference(brw->vs.state_bo);
+   dri_bo_unreference(brw->vs.bind_bo);
+   dri_bo_unreference(brw->gs.prog_bo);
+   dri_bo_unreference(brw->gs.state_bo);
+   dri_bo_unreference(brw->clip.prog_bo);
+   dri_bo_unreference(brw->clip.state_bo);
+   dri_bo_unreference(brw->clip.vp_bo);
+   dri_bo_unreference(brw->sf.prog_bo);
+   dri_bo_unreference(brw->sf.state_bo);
+   dri_bo_unreference(brw->sf.vp_bo);
+   for (i = 0; i < BRW_MAX_TEX_UNIT; i++)
+      dri_bo_unreference(brw->wm.sdc_bo[i]);
+   dri_bo_unreference(brw->wm.bind_bo);
+   for (i = 0; i < BRW_WM_MAX_SURF; i++)
+      dri_bo_unreference(brw->wm.surf_bo[i]);
+   dri_bo_unreference(brw->wm.sampler_bo);
+   dri_bo_unreference(brw->wm.prog_bo);
+   dri_bo_unreference(brw->wm.state_bo);
+   dri_bo_unreference(brw->cc.prog_bo);
+   dri_bo_unreference(brw->cc.state_bo);
+   dri_bo_unreference(brw->cc.vp_bo);
+}
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index fa3e32c7ff..009e28b227 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -115,7 +115,6 @@
  * Handles blending and (presumably) depth and stencil testing.
  */
 
-#define BRW_FALLBACK_TEXTURE		 0x1
 #define BRW_MAX_CURBE                    (32*16)
 
 struct brw_context;
@@ -450,11 +449,9 @@ struct brw_query_object {
  */
 struct brw_context 
 {
-   struct intel_context intel;  /**< base class, must be first field */
    GLuint primitive;
 
    GLboolean emit_state_always;
-   GLboolean tmp_fallback;
    GLboolean no_batch_wrap;
 
    struct {
@@ -692,7 +689,7 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
 /*======================================================================
  * brw_queryobj.c
  */
-void brw_init_queryobj_functions(struct dd_function_table *functions);
+void brw_init_query(struct brw_context *brw);
 void brw_prepare_query_begin(struct brw_context *brw);
 void brw_emit_query_begin(struct brw_context *brw);
 void brw_emit_query_end(struct brw_context *brw);
@@ -730,7 +727,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst);
  * macros used previously:
  */
 static INLINE struct brw_context *
-brw_context( GLcontext *ctx )
+brw_context( struct pipe_context *ctx )
 {
    return (struct brw_context *)ctx;
 }
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 4be6c77aa1..3e32c4983d 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -30,14 +30,6 @@
   */
 
 
-
-#include "main/glheader.h"
-#include "main/context.h"
-#include "main/macros.h"
-#include "main/enums.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_statevars.h"
 #include "intel_batchbuffer.h"
 #include "intel_regions.h"
 #include "brw_context.h"
@@ -64,31 +56,17 @@ static void calculate_curbe_offsets( struct brw_context *brw )
    GLuint nr_clip_regs = 0;
    GLuint total_regs;
 
-   /* _NEW_TRANSFORM */
-   if (ctx->Transform.ClipPlanesEnabled) {
-      GLuint nr_planes = 6 + brw_count_bits(ctx->Transform.ClipPlanesEnabled);
+   /* PIPE_NEW_UCP */
+   if (brw->nr_ucp) {
+      GLuint nr_planes = 6 + brw->nr_ucp;
       nr_clip_regs = (nr_planes * 4 + 15) / 16;
    }
 
 
    total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
 
-   /* This can happen - what to do?  Probably rather than falling
-    * back, the best thing to do is emit programs which code the
-    * constants as immediate values.  Could do this either as a static
-    * cap on WM and VS, or adaptively.
-    *
-    * Unfortunately, this is currently dependent on the results of the
-    * program generation process (in the case of wm), so this would
-    * introduce the need to re-generate programs in the event of a
-    * curbe allocation failure.
-    */
-   /* Max size is 32 - just large enough to
-    * hold the 128 parameters allowed by
-    * the fragment and vertex program
-    * api's.  It's not clear what happens
-    * when both VP and FP want to use 128
-    * parameters, though. 
+   /* When this is > 32, want to use a true constant buffer to hold
+    * the extra constants.
     */
    assert(total_regs <= 32);
 
@@ -113,8 +91,8 @@ static void calculate_curbe_offsets( struct brw_context *brw )
       brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
       brw->curbe.total_size = reg;
 
-      if (0)
-	 _mesa_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
+      if (BRW_DEBUG & DEBUG_CURBE)
+	 debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
 		      brw->curbe.wm_start,
 		      brw->curbe.wm_size,
 		      brw->curbe.clip_start,
@@ -129,7 +107,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
 
 const struct brw_tracked_state brw_curbe_offsets = {
    .dirty = {
-      .mesa = _NEW_TRANSFORM,
+      .mesa = PIPE_NEW_UCP,
       .brw  = BRW_NEW_VERTEX_PROGRAM,
       .cache = CACHE_NEW_WM_PROG
    },
@@ -204,11 +182,13 @@ static void prepare_constant_buffer(struct brw_context *brw)
    if (brw->curbe.wm_size) {
       GLuint offset = brw->curbe.wm_start * 16;
 
-      _mesa_load_state_parameters(ctx, fp->program.Base.Parameters); 
+      /* map fs constant buffer */
 
       /* copy float constants */
       for (i = 0; i < brw->wm.prog_data->nr_params; i++) 
 	 buf[offset + i] = *brw->wm.prog_data->param[i];
+
+      /* unmap fs constant buffer */
    }
 
 
@@ -228,18 +208,15 @@ static void prepare_constant_buffer(struct brw_context *brw)
 	 buf[offset + i * 4 + 3] = fixed_plane[i][3];
       }
 
-      /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
-       * clip-space:
+      /* Clip planes:
        */
-      assert(MAX_CLIP_PLANES == 6);
-      for (j = 0; j < MAX_CLIP_PLANES; j++) {
-	 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
-	    buf[offset + i * 4 + 0] = ctx->Transform._ClipUserPlane[j][0];
-	    buf[offset + i * 4 + 1] = ctx->Transform._ClipUserPlane[j][1];
-	    buf[offset + i * 4 + 2] = ctx->Transform._ClipUserPlane[j][2];
-	    buf[offset + i * 4 + 3] = ctx->Transform._ClipUserPlane[j][3];
-	    i++;
-	 }
+      assert(brw->nr_ucp <= 6);
+      for (j = 0; j < brw->nr_ucp; j++) {
+	 buf[offset + i * 4 + 0] = brw->ucp[j][0];
+	 buf[offset + i * 4 + 1] = brw->ucp[j][1];
+	 buf[offset + i * 4 + 2] = brw->ucp[j][2];
+	 buf[offset + i * 4 + 3] = brw->ucp[j][3];
+	 i++;
       }
    }
 
@@ -248,13 +225,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
       GLuint offset = brw->curbe.vs_start * 16;
       GLuint nr = brw->vs.prog_data->nr_params / 4;
 
-      if (brw->vertex_program->IsNVProgram)
-	 _mesa_load_tracked_matrices(ctx);
-
-      /* Updates the ParamaterValues[i] pointers for all parameters of the
-       * basic type of PROGRAM_STATE_VAR.
-       */
-      _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); 
+      /* map vs constant buffer */
 
       /* XXX just use a memcpy here */
       for (i = 0; i < nr; i++) {
@@ -264,14 +235,16 @@ static void prepare_constant_buffer(struct brw_context *brw)
 	 buf[offset + i * 4 + 2] = value[2];
 	 buf[offset + i * 4 + 3] = value[3];
       }
+
+      /* unmap vs constant buffer */
    }
 
    if (0) {
       for (i = 0; i < sz*16; i+=4) 
-	 _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+	 debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
 		      buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
 
-      _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+      debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
 		   brw->curbe.last_buf, buf,
 		   bufsz, brw->curbe.last_bufsz,
 		   brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
@@ -282,12 +255,12 @@ static void prepare_constant_buffer(struct brw_context *brw)
        bufsz == brw->curbe.last_bufsz &&
        memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
       /* constants have not changed */
-      _mesa_free(buf);
+      FREE(buf);
    } 
    else {
       /* constants have changed */
       if (brw->curbe.last_buf)
-	 _mesa_free(brw->curbe.last_buf);
+	 FREE(brw->curbe.last_buf);
 
       brw->curbe.last_buf = buf;
       brw->curbe.last_bufsz = bufsz;
@@ -353,15 +326,11 @@ static void emit_constant_buffer(struct brw_context *brw)
    ADVANCE_BATCH();
 }
 
-/* This tracked state is unique in that the state it monitors varies
- * dynamically depending on the parameters tracked by the fragment and
- * vertex programs.  This is the template used as a starting point,
- * each context will maintain a copy of this internally and update as
- * required.
- */
 const struct brw_tracked_state brw_constant_buffer = {
    .dirty = {
-      .mesa = _NEW_PROGRAM_CONSTANTS,
+      .mesa = (PIPE_NEW_FS_CONSTANTS |
+	       PIPE_NEW_VS_CONSTANTS |
+	       PIPE_NEW_UCP),
       .brw  = (BRW_NEW_FRAGMENT_PROGRAM |
 	       BRW_NEW_VERTEX_PROGRAM |
 	       BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
index 78d457ad2b..282c5b18f4 100644
--- a/src/gallium/drivers/i965/brw_defines.h
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -840,8 +840,8 @@
 
 #include "intel_chipset.h"
 
-#define BRW_IS_G4X(brw)         (IS_G4X((brw)->intel.intelScreen->deviceID))
-#define BRW_IS_IGDNG(brw)         (IS_IGDNG((brw)->intel.intelScreen->deviceID))
+#define BRW_IS_G4X(brw)         (IS_G4X((brw)->brw_screen->deviceID))
+#define BRW_IS_IGDNG(brw)         (IS_IGDNG((brw)->brw_screen->deviceID))
 #define BRW_IS_965(brw)         (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))
 #define CMD_PIPELINE_SELECT(brw)        ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
 #define CMD_VF_STATISTICS(brw)          ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
index 9fef230507..a84c581c03 100644
--- a/src/gallium/drivers/i965/brw_disasm.c
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -27,8 +27,6 @@
 #include <unistd.h>
 #include <stdarg.h>
 
-#include "main/mtypes.h"
-
 #include "brw_context.h"
 #include "brw_defines.h"
 
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 44bb7bd588..8cd117c24f 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -39,14 +39,13 @@
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_state.h"
-#include "brw_fallback.h"
 
 #include "intel_batchbuffer.h"
 #include "intel_buffer_objects.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BATCH
 
-static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
+static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
    _3DPRIM_POINTLIST,
    _3DPRIM_LINELIST,
    _3DPRIM_LINELOOP,
@@ -60,19 +59,6 @@ static GLuint prim_to_hw_prim[GL_POLYGON+1] = {
 };
 
 
-static const GLenum reduced_prim[GL_POLYGON+1] = {  
-   GL_POINTS,
-   GL_LINES,
-   GL_LINES,
-   GL_LINES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES
-};
-
 
 /* When the primitive changes, set a state bit and re-validate.  Not
  * the nicest and would rather deal with this by having all the
@@ -196,102 +182,6 @@ static void brw_merge_inputs( struct brw_context *brw,
       brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
 }
 
-/* XXX: could split the primitive list to fallback only on the
- * non-conformant primitives.
- */
-static GLboolean check_fallbacks( struct brw_context *brw,
-				  const struct _mesa_prim *prim,
-				  GLuint nr_prims )
-{
-   GLcontext *ctx = &brw->intel.ctx;
-   GLuint i;
-
-   /* If we don't require strict OpenGL conformance, never 
-    * use fallbacks.  If we're forcing fallbacks, always
-    * use fallfacks.
-    */
-   if (brw->intel.conformance_mode == 0)
-      return GL_FALSE;
-
-   if (brw->intel.conformance_mode == 2)
-      return GL_TRUE;
-
-   if (ctx->Polygon.SmoothFlag) {
-      for (i = 0; i < nr_prims; i++)
-	 if (reduced_prim[prim[i].mode] == GL_TRIANGLES) 
-	    return GL_TRUE;
-   }
-
-   /* BRW hardware will do AA lines, but they are non-conformant it
-    * seems.  TBD whether we keep this fallback:
-    */
-   if (ctx->Line.SmoothFlag) {
-      for (i = 0; i < nr_prims; i++)
-	 if (reduced_prim[prim[i].mode] == GL_LINES) 
-	    return GL_TRUE;
-   }
-
-   /* Stipple -- these fallbacks could be resolved with a little
-    * bit of work?
-    */
-   if (ctx->Line.StippleFlag) {
-      for (i = 0; i < nr_prims; i++) {
-	 /* GS doesn't get enough information to know when to reset
-	  * the stipple counter?!?
-	  */
-	 if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) 
-	    return GL_TRUE;
-	    
-	 if (prim[i].mode == GL_POLYGON &&
-	     (ctx->Polygon.FrontMode == GL_LINE ||
-	      ctx->Polygon.BackMode == GL_LINE))
-	    return GL_TRUE;
-      }
-   }
-
-   if (ctx->Point.SmoothFlag) {
-      for (i = 0; i < nr_prims; i++)
-	 if (prim[i].mode == GL_POINTS) 
-	    return GL_TRUE;
-   }
-
-   /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
-    * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
-    * as GL_CLAMP_TO_EDGE instead.  If we're using GL_CLAMP, and
-    * we want strict conformance, force the fallback.
-    * Right now, we only do this for 2D textures.
-    */
-   {
-      int u;
-      for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
-         struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
-         if (texUnit->Enabled) {
-            if (texUnit->Enabled & TEXTURE_1D_BIT) {
-               if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_2D_BIT) {
-               if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
-                   texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_3D_BIT) {
-               if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
-                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
-                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-         }
-      }
-   }
-      
-   /* Nothing stopping us from the fast path now */
-   return GL_FALSE;
-}
-
 /* May fail if out of video memory for texture or vbo upload, or on
  * fallback conditions.
  */
@@ -308,23 +198,12 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
    GLboolean retval = GL_FALSE;
    GLboolean warn = GL_FALSE;
    GLboolean first_time = GL_TRUE;
+   uint32_t hw_prim;
    GLuint i;
 
    if (ctx->NewState)
       _mesa_update_state( ctx );
 
-   /* We have to validate the textures *before* checking for fallbacks;
-    * otherwise, the software fallback won't be able to rely on the
-    * texture state, the firstLevel and lastLevel fields won't be
-    * set in the intel texture object (they'll both be 0), and the 
-    * software fallback will segfault if it attempts to access any
-    * texture level other than level 0.
-    */
-   brw_validate_textures( brw );
-
-   if (check_fallbacks(brw, prim, nr_prims))
-      return GL_FALSE;
-
    /* Bind all inputs, derive varying and size information:
     */
    brw_merge_inputs( brw, arrays );
@@ -336,90 +215,30 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
    brw->vb.max_index = max_index;
    brw->state.dirty.brw |= BRW_NEW_VERTICES;
 
-   /* Have to validate state quite late.  Will rebuild tnl_program,
-    * which depends on varying information.  
-    * 
-    * Note this is where brw->vs->prog_data.inputs_read is calculated,
-    * so can't access it earlier.
-    */
-
-   LOCK_HARDWARE(intel);
-
-   if (!intel->constant_cliprect && intel->driDrawable->numClipRects == 0) {
-      UNLOCK_HARDWARE(intel);
-      return GL_TRUE;
-   }
-
-   for (i = 0; i < nr_prims; i++) {
-      uint32_t hw_prim;
-
-      /* Flush the batch if it's approaching full, so that we don't wrap while
-       * we've got validated state that needs to be in the same batch as the
-       * primitives.  This fraction is just a guess (minimal full state plus
-       * a primitive is around 512 bytes), and would be better if we had
-       * an upper bound of how much we might emit in a single
-       * brw_try_draw_prims().
-       */
-      intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4,
-				      LOOP_CLIPRECTS);
-
-      hw_prim = brw_set_prim(brw, prim[i].mode);
-
-      if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) {
-	 first_time = GL_FALSE;
-
-	 brw_validate_state(brw);
-
-	 /* Various fallback checks:  */
-	 if (brw->intel.Fallback)
-	    goto out;
-
-	 /* Check that we can fit our state in with our existing batchbuffer, or
-	  * flush otherwise.
-	  */
-	 if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
-					     brw->state.validated_bo_count)) {
-	    static GLboolean warned;
-	    intel_batchbuffer_flush(intel->batch);
-
-	    /* Validate the state after we flushed the batch (which would have
-	     * changed the set of dirty state).  If we still fail to
-	     * check_aperture, warn of what's happening, but attempt to continue
-	     * on since it may succeed anyway, and the user would probably rather
-	     * see a failure and a warning than a fallback.
-	     */
-	    brw_validate_state(brw);
-	    if (!warned &&
-		dri_bufmgr_check_aperture_space(brw->state.validated_bos,
-						brw->state.validated_bo_count)) {
-	       warn = GL_TRUE;
-	       warned = GL_TRUE;
-	    }
-	 }
-
-	 brw_upload_state(brw);
-      }
+   hw_prim = brw_set_prim(brw, prim[i].mode);
 
-      brw_emit_prim(brw, &prim[i], hw_prim);
+   brw_validate_state(brw);
 
-      retval = GL_TRUE;
-   }
+   /* Check that we can fit our state in with our existing batchbuffer, or
+    * flush otherwise.
+    */
+   ret = dri_bufmgr_check_aperture_space(brw->state.validated_bos,
+					 brw->state.validated_bo_count);
+   if (ret)
+      return ret;
+
+   ret = brw_upload_state(brw);
+   if (ret)
+      return ret;
+   
+   ret = brw_emit_prim(brw, &prim[i], hw_prim);
+   if (ret)
+      return ret;
 
    if (intel->always_flush_batch)
       intel_batchbuffer_flush(intel->batch);
- out:
-   UNLOCK_HARDWARE(intel);
-
-   brw_state_cache_check_size(brw);
-
-   if (warn)
-      fprintf(stderr, "i965: Single primitive emit potentially exceeded "
-	      "available aperture space\n");
 
-   if (!retval)
-      DBG("%s failed\n", __FUNCTION__);
-
-   return retval;
+   return 0;
 }
 
 void brw_draw_prims( GLcontext *ctx,
@@ -431,37 +250,26 @@ void brw_draw_prims( GLcontext *ctx,
 		     GLuint min_index,
 		     GLuint max_index )
 {
-   GLboolean retval;
+   enum pipe_error ret;
 
    if (!vbo_all_varyings_in_vbos(arrays)) {
       if (!index_bounds_valid)
 	 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
-
-      /* Decide if we want to rebase.  If so we end up recursing once
-       * only into this function.
-       */
-      if (min_index != 0) {
-	 vbo_rebase_prims(ctx, arrays,
-			  prim, nr_prims,
-			  ib, min_index, max_index,
-			  brw_draw_prims );
-	 return;
-      }
    }
 
    /* Make a first attempt at drawing:
     */
-   retval = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+   ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
 
    /* Otherwise, we really are out of memory.  Pass the drawing
     * command to the software tnl module and which will in turn call
     * swrast to do the drawing.
     */
-   if (!retval) {
-       _swsetup_Wakeup(ctx);
-      _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+   if (ret != 0) {
+      intel_batchbuffer_flush(intel->batch);
+      ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+      assert(ret == 0);
    }
-
 }
 
 void brw_draw_init( struct brw_context *brw )
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index a3ff6c58d8..ad3ef6b7dd 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -25,13 +25,9 @@
  * 
  **************************************************************************/
 
+#include "pipe/p_context.h"
 
-#include "main/glheader.h"
-#include "main/bufferobj.h"
-#include "main/context.h"
-#include "main/state.h"
-#include "main/api_validate.h"
-#include "main/enums.h"
+#include "util/u_upload_mgr.h"
 
 #include "brw_draw.h"
 #include "brw_defines.h"
@@ -43,303 +39,157 @@
 #include "intel_buffer_objects.h"
 #include "intel_tex.h"
 
-static GLuint double_types[5] = {
-   0,
-   BRW_SURFACEFORMAT_R64_FLOAT,
-   BRW_SURFACEFORMAT_R64G64_FLOAT,
-   BRW_SURFACEFORMAT_R64G64B64_FLOAT,
-   BRW_SURFACEFORMAT_R64G64B64A64_FLOAT
-};
-
-static GLuint float_types[5] = {
-   0,
-   BRW_SURFACEFORMAT_R32_FLOAT,
-   BRW_SURFACEFORMAT_R32G32_FLOAT,
-   BRW_SURFACEFORMAT_R32G32B32_FLOAT,
-   BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
-};
-
-static GLuint uint_types_norm[5] = {
-   0,
-   BRW_SURFACEFORMAT_R32_UNORM,
-   BRW_SURFACEFORMAT_R32G32_UNORM,
-   BRW_SURFACEFORMAT_R32G32B32_UNORM,
-   BRW_SURFACEFORMAT_R32G32B32A32_UNORM
-};
-
-static GLuint uint_types_scale[5] = {
-   0,
-   BRW_SURFACEFORMAT_R32_USCALED,
-   BRW_SURFACEFORMAT_R32G32_USCALED,
-   BRW_SURFACEFORMAT_R32G32B32_USCALED,
-   BRW_SURFACEFORMAT_R32G32B32A32_USCALED
-};
-
-static GLuint int_types_norm[5] = {
-   0,
-   BRW_SURFACEFORMAT_R32_SNORM,
-   BRW_SURFACEFORMAT_R32G32_SNORM,
-   BRW_SURFACEFORMAT_R32G32B32_SNORM,
-   BRW_SURFACEFORMAT_R32G32B32A32_SNORM
-};
-
-static GLuint int_types_scale[5] = {
-   0,
-   BRW_SURFACEFORMAT_R32_SSCALED,
-   BRW_SURFACEFORMAT_R32G32_SSCALED,
-   BRW_SURFACEFORMAT_R32G32B32_SSCALED,
-   BRW_SURFACEFORMAT_R32G32B32A32_SSCALED
-};
-
-static GLuint ushort_types_norm[5] = {
-   0,
-   BRW_SURFACEFORMAT_R16_UNORM,
-   BRW_SURFACEFORMAT_R16G16_UNORM,
-   BRW_SURFACEFORMAT_R16G16B16_UNORM,
-   BRW_SURFACEFORMAT_R16G16B16A16_UNORM
-};
-
-static GLuint ushort_types_scale[5] = {
-   0,
-   BRW_SURFACEFORMAT_R16_USCALED,
-   BRW_SURFACEFORMAT_R16G16_USCALED,
-   BRW_SURFACEFORMAT_R16G16B16_USCALED,
-   BRW_SURFACEFORMAT_R16G16B16A16_USCALED
-};
-
-static GLuint short_types_norm[5] = {
-   0,
-   BRW_SURFACEFORMAT_R16_SNORM,
-   BRW_SURFACEFORMAT_R16G16_SNORM,
-   BRW_SURFACEFORMAT_R16G16B16_SNORM,
-   BRW_SURFACEFORMAT_R16G16B16A16_SNORM
-};
-
-static GLuint short_types_scale[5] = {
-   0,
-   BRW_SURFACEFORMAT_R16_SSCALED,
-   BRW_SURFACEFORMAT_R16G16_SSCALED,
-   BRW_SURFACEFORMAT_R16G16B16_SSCALED,
-   BRW_SURFACEFORMAT_R16G16B16A16_SSCALED
-};
 
-static GLuint ubyte_types_norm[5] = {
-   0,
-   BRW_SURFACEFORMAT_R8_UNORM,
-   BRW_SURFACEFORMAT_R8G8_UNORM,
-   BRW_SURFACEFORMAT_R8G8B8_UNORM,
-   BRW_SURFACEFORMAT_R8G8B8A8_UNORM
-};
 
-static GLuint ubyte_types_scale[5] = {
-   0,
-   BRW_SURFACEFORMAT_R8_USCALED,
-   BRW_SURFACEFORMAT_R8G8_USCALED,
-   BRW_SURFACEFORMAT_R8G8B8_USCALED,
-   BRW_SURFACEFORMAT_R8G8B8A8_USCALED
-};
-
-static GLuint byte_types_norm[5] = {
-   0,
-   BRW_SURFACEFORMAT_R8_SNORM,
-   BRW_SURFACEFORMAT_R8G8_SNORM,
-   BRW_SURFACEFORMAT_R8G8B8_SNORM,
-   BRW_SURFACEFORMAT_R8G8B8A8_SNORM
-};
 
-static GLuint byte_types_scale[5] = {
-   0,
-   BRW_SURFACEFORMAT_R8_SSCALED,
-   BRW_SURFACEFORMAT_R8G8_SSCALED,
-   BRW_SURFACEFORMAT_R8G8B8_SSCALED,
-   BRW_SURFACEFORMAT_R8G8B8A8_SSCALED
-};
-
-
-/**
- * Given vertex array type/size/format/normalized info, return
- * the appopriate hardware surface type.
- * Format will be GL_RGBA or possibly GL_BGRA for GLubyte[4] color arrays.
- */
-static GLuint get_surface_type( GLenum type, GLuint size,
-                                GLenum format, GLboolean normalized )
+unsigned brw_translate_surface_format( unsigned id )
 {
-   if (INTEL_DEBUG & DEBUG_VERTS)
-      _mesa_printf("type %s size %d normalized %d\n", 
-		   _mesa_lookup_enum_by_nr(type), size, normalized);
-
-   if (normalized) {
-      switch (type) {
-      case GL_DOUBLE: return double_types[size];
-      case GL_FLOAT: return float_types[size];
-      case GL_INT: return int_types_norm[size];
-      case GL_SHORT: return short_types_norm[size];
-      case GL_BYTE: return byte_types_norm[size];
-      case GL_UNSIGNED_INT: return uint_types_norm[size];
-      case GL_UNSIGNED_SHORT: return ushort_types_norm[size];
-      case GL_UNSIGNED_BYTE:
-         if (format == GL_BGRA) {
-            /* See GL_EXT_vertex_array_bgra */
-            assert(size == 4);
-            return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-         }
-         else {
-            return ubyte_types_norm[size];
-         }
-      default: assert(0); return 0;
-      }      
-   }
-   else {
-      assert(format == GL_RGBA); /* sanity check */
-      switch (type) {
-      case GL_DOUBLE: return double_types[size];
-      case GL_FLOAT: return float_types[size];
-      case GL_INT: return int_types_scale[size];
-      case GL_SHORT: return short_types_scale[size];
-      case GL_BYTE: return byte_types_scale[size];
-      case GL_UNSIGNED_INT: return uint_types_scale[size];
-      case GL_UNSIGNED_SHORT: return ushort_types_scale[size];
-      case GL_UNSIGNED_BYTE: return ubyte_types_scale[size];
-      default: assert(0); return 0;
-      }      
+   switch (id) {
+   case PIPE_FORMAT_R64_FLOAT:
+      return BRW_SURFACEFORMAT_R64_FLOAT;
+   case PIPE_FORMAT_R64G64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64_FLOAT;
+   case PIPE_FORMAT_R64G64B64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64B64_FLOAT;
+   case PIPE_FORMAT_R64G64B64A64_FLOAT:
+      return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT;
+
+   case PIPE_FORMAT_R32_FLOAT:
+      return BRW_SURFACEFORMAT_R32_FLOAT;
+   case PIPE_FORMAT_R32G32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32_FLOAT;
+   case PIPE_FORMAT_R32G32B32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32B32_FLOAT;
+   case PIPE_FORMAT_R32G32B32A32_FLOAT:
+      return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+   case PIPE_FORMAT_R32_UNORM:
+      return BRW_SURFACEFORMAT_R32_UNORM;
+   case PIPE_FORMAT_R32G32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32_UNORM;
+   case PIPE_FORMAT_R32G32B32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32B32_UNORM;
+   case PIPE_FORMAT_R32G32B32A32_UNORM:
+      return BRW_SURFACEFORMAT_R32G32B32A32_UNORM;
+
+   case PIPE_FORMAT_R32_USCALED:
+      return BRW_SURFACEFORMAT_R32_USCALED;
+   case PIPE_FORMAT_R32G32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32_USCALED;
+   case PIPE_FORMAT_R32G32B32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32B32_USCALED;
+   case PIPE_FORMAT_R32G32B32A32_USCALED:
+      return BRW_SURFACEFORMAT_R32G32B32A32_USCALED;
+
+   case PIPE_FORMAT_R32_SNORM:
+      return BRW_SURFACEFORMAT_R32_SNORM;
+   case PIPE_FORMAT_R32G32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32_SNORM;
+   case PIPE_FORMAT_R32G32B32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32B32_SNORM;
+   case PIPE_FORMAT_R32G32B32A32_SNORM:
+      return BRW_SURFACEFORMAT_R32G32B32A32_SNORM;
+
+   case PIPE_FORMAT_R32_SSCALED:
+      return BRW_SURFACEFORMAT_R32_SSCALED;
+   case PIPE_FORMAT_R32G32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32_SSCALED;
+   case PIPE_FORMAT_R32G32B32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32B32_SSCALED;
+   case PIPE_FORMAT_R32G32B32A32_SSCALED:
+      return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED;
+
+   case PIPE_FORMAT_R16_UNORM:
+      return BRW_SURFACEFORMAT_R16_UNORM;
+   case PIPE_FORMAT_R16G16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16_UNORM;
+   case PIPE_FORMAT_R16G16B16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16B16_UNORM;
+   case PIPE_FORMAT_R16G16B16A16_UNORM:
+      return BRW_SURFACEFORMAT_R16G16B16A16_UNORM;
+
+   case PIPE_FORMAT_R16_USCALED:
+      return BRW_SURFACEFORMAT_R16_USCALED;
+   case PIPE_FORMAT_R16G16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16_USCALED;
+   case PIPE_FORMAT_R16G16B16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16B16_USCALED;
+   case PIPE_FORMAT_R16G16B16A16_USCALED:
+      return BRW_SURFACEFORMAT_R16G16B16A16_USCALED;
+
+   case PIPE_FORMAT_R16_SNORM:
+      return BRW_SURFACEFORMAT_R16_SNORM;
+   case PIPE_FORMAT_R16G16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16_SNORM;
+   case PIPE_FORMAT_R16G16B16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16B16_SNORM;
+   case PIPE_FORMAT_R16G16B16A16_SNORM:
+      return BRW_SURFACEFORMAT_R16G16B16A16_SNORM;
+
+   case PIPE_FORMAT_R16_SSCALED:
+      return BRW_SURFACEFORMAT_R16_SSCALED;
+   case PIPE_FORMAT_R16G16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16_SSCALED;
+   case PIPE_FORMAT_R16G16B16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16B16_SSCALED;
+   case PIPE_FORMAT_R16G16B16A16_SSCALED:
+      return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED;
+
+   case PIPE_FORMAT_R8_UNORM:
+      return BRW_SURFACEFORMAT_R8_UNORM;
+   case PIPE_FORMAT_R8G8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8_UNORM;
+   case PIPE_FORMAT_R8G8B8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8B8_UNORM;
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+
+   case PIPE_FORMAT_R8_USCALED:
+      return BRW_SURFACEFORMAT_R8_USCALED;
+   case PIPE_FORMAT_R8G8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8_USCALED;
+   case PIPE_FORMAT_R8G8B8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8B8_USCALED;
+   case PIPE_FORMAT_R8G8B8A8_USCALED:
+      return BRW_SURFACEFORMAT_R8G8B8A8_USCALED;
+
+   case PIPE_FORMAT_R8_SNORM:
+      return BRW_SURFACEFORMAT_R8_SNORM;
+   case PIPE_FORMAT_R8G8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8_SNORM;
+   case PIPE_FORMAT_R8G8B8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8B8_SNORM;
+   case PIPE_FORMAT_R8G8B8A8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+   case PIPE_FORMAT_R8_SSCALED:
+      return BRW_SURFACEFORMAT_R8_SSCALED;
+   case PIPE_FORMAT_R8G8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8_SSCALED;
+   case PIPE_FORMAT_R8G8B8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8B8_SSCALED;
+   case PIPE_FORMAT_R8G8B8A8_SSCALED:
+      return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED;
+
+   default:
+      assert(0);
+      return 0;
    }
 }
 
-
-static GLuint get_size( GLenum type )
-{
-   switch (type) {
-   case GL_DOUBLE: return sizeof(GLdouble);
-   case GL_FLOAT: return sizeof(GLfloat);
-   case GL_INT: return sizeof(GLint);
-   case GL_SHORT: return sizeof(GLshort);
-   case GL_BYTE: return sizeof(GLbyte);
-   case GL_UNSIGNED_INT: return sizeof(GLuint);
-   case GL_UNSIGNED_SHORT: return sizeof(GLushort);
-   case GL_UNSIGNED_BYTE: return sizeof(GLubyte);
-   default: return 0;
-   }      
-}
-
-static GLuint get_index_type(GLenum type) 
+static unsigned get_index_type(int type)
 {
    switch (type) {
-   case GL_UNSIGNED_BYTE:  return BRW_INDEX_BYTE;
-   case GL_UNSIGNED_SHORT: return BRW_INDEX_WORD;
-   case GL_UNSIGNED_INT:   return BRW_INDEX_DWORD;
+   case 1: return BRW_INDEX_BYTE;
+   case 2: return BRW_INDEX_WORD;
+   case 4: return BRW_INDEX_DWORD;
    default: assert(0); return 0;
    }
 }
 
-static void wrap_buffers( struct brw_context *brw,
-			  GLuint size )
-{
-   if (size < BRW_UPLOAD_INIT_SIZE)
-      size = BRW_UPLOAD_INIT_SIZE;
-
-   brw->vb.upload.offset = 0;
-
-   if (brw->vb.upload.bo != NULL)
-      dri_bo_unreference(brw->vb.upload.bo);
-   brw->vb.upload.bo = dri_bo_alloc(brw->intel.bufmgr, "temporary VBO",
-				    size, 1);
-
-   /* Set the internal VBO\ to no-backing-store.  We only use them as a
-    * temporary within a brw_try_draw_prims while the lock is held.
-    */
-   /* DON'T DO THIS AS IF WE HAVE TO RE-ORG MEMORY WE NEED SOMEWHERE WITH
-      FAKE TO PUSH THIS STUFF */
-//   if (!brw->intel.ttm)
-//      dri_bo_fake_disable_backing_store(brw->vb.upload.bo, NULL, NULL);
-}
-
-static void get_space( struct brw_context *brw,
-		       GLuint size,
-		       dri_bo **bo_return,
-		       GLuint *offset_return )
-{
-   size = ALIGN(size, 64);
-
-   if (brw->vb.upload.bo == NULL ||
-       brw->vb.upload.offset + size > brw->vb.upload.bo->size) {
-      wrap_buffers(brw, size);
-   }
-
-   assert(*bo_return == NULL);
-   dri_bo_reference(brw->vb.upload.bo);
-   *bo_return = brw->vb.upload.bo;
-   *offset_return = brw->vb.upload.offset;
-   brw->vb.upload.offset += size;
-}
-
-static void
-copy_array_to_vbo_array( struct brw_context *brw,
-			 struct brw_vertex_element *element,
-			 GLuint dst_stride)
-{
-   struct intel_context *intel = &brw->intel;
-   GLuint size = element->count * dst_stride;
-
-   get_space(brw, size, &element->bo, &element->offset);
 
-   if (element->glarray->StrideB == 0) {
-      assert(element->count == 1);
-      element->stride = 0;
-   } else {
-      element->stride = dst_stride;
-   }
-
-   if (dst_stride == element->glarray->StrideB) {
-      if (intel->intelScreen->kernel_exec_fencing) {
-	 drm_intel_gem_bo_map_gtt(element->bo);
-	 memcpy((char *)element->bo->virtual + element->offset,
-		element->glarray->Ptr, size);
-	 drm_intel_gem_bo_unmap_gtt(element->bo);
-      } else {
-	 dri_bo_subdata(element->bo,
-			element->offset,
-			size,
-			element->glarray->Ptr);
-      }
-   } else {
-      char *dest;
-      const unsigned char *src = element->glarray->Ptr;
-      int i;
-
-      if (intel->intelScreen->kernel_exec_fencing) {
-	 drm_intel_gem_bo_map_gtt(element->bo);
-	 dest = element->bo->virtual;
-	 dest += element->offset;
-
-	 for (i = 0; i < element->count; i++) {
-	    memcpy(dest, src, dst_stride);
-	    src += element->glarray->StrideB;
-	    dest += dst_stride;
-	 }
-
-	 drm_intel_gem_bo_unmap_gtt(element->bo);
-      } else {
-	 void *data;
-
-	 data = _mesa_malloc(dst_stride * element->count);
-	 dest = data;
-	 for (i = 0; i < element->count; i++) {
-	    memcpy(dest, src, dst_stride);
-	    src += element->glarray->StrideB;
-	    dest += dst_stride;
-	 }
-
-	 dri_bo_subdata(element->bo,
-			element->offset,
-			size,
-			data);
-
-	 _mesa_free(data);
-      }
-   }
-}
 
-static void brw_prepare_vertices(struct brw_context *brw)
+static boolean brw_prepare_vertices(struct brw_context *brw)
 {
    GLcontext *ctx = &brw->intel.ctx;
    struct intel_context *intel = intel_context(ctx);
@@ -358,123 +208,38 @@ static void brw_prepare_vertices(struct brw_context *brw)
    if (0)
       _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
 
-   /* Accumulate the list of enabled arrays. */
-   brw->vb.nr_enabled = 0;
-   while (vs_inputs) {
-      GLuint i = _mesa_ffsll(vs_inputs) - 1;
-      struct brw_vertex_element *input = &brw->vb.inputs[i];
 
-      vs_inputs &= ~(1 << i);
-      brw->vb.enabled[brw->vb.nr_enabled++] = input;
-   }
-
-   /* XXX: In the rare cases where this happens we fallback all
-    * the way to software rasterization, although a tnl fallback
-    * would be sufficient.  I don't know of *any* real world
-    * cases with > 17 vertex attributes enabled, so it probably
-    * isn't an issue at this point.
-    */
-   if (brw->vb.nr_enabled >= BRW_VEP_MAX) {
-      intel->Fallback = 1;
-      return;
-   }
 
    for (i = 0; i < brw->vb.nr_enabled; i++) {
       struct brw_vertex_element *input = brw->vb.enabled[i];
 
       input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
 
-      if (_mesa_is_bufferobj(input->glarray->BufferObj)) {
-	 struct intel_buffer_object *intel_buffer =
-	    intel_buffer_object(input->glarray->BufferObj);
-
-	 /* Named buffer object: Just reference its contents directly. */
-	 dri_bo_unreference(input->bo);
-	 input->bo = intel_bufferobj_buffer(intel, intel_buffer,
-					    INTEL_READ);
-	 dri_bo_reference(input->bo);
-	 input->offset = (unsigned long)input->glarray->Ptr;
-	 input->stride = input->glarray->StrideB;
-	 input->count = input->glarray->_MaxElement;
-
-	 /* This is a common place to reach if the user mistakenly supplies
-	  * a pointer in place of a VBO offset.  If we just let it go through,
-	  * we may end up dereferencing a pointer beyond the bounds of the
-	  * GTT.  We would hope that the VBO's max_index would save us, but
-	  * Mesa appears to hand us min/max values not clipped to the
-	  * array object's _MaxElement, and _MaxElement frequently appears
-	  * to be wrong anyway.
-	  *
-	  * The VBO spec allows application termination in this case, and it's
-	  * probably a service to the poor programmer to do so rather than
-	  * trying to just not render.
-	  */
-	 assert(input->offset < input->bo->size);
-      } else {
-	 input->count = input->glarray->StrideB ? max_index + 1 - min_index : 1;
-	 if (input->bo != NULL) {
-	    /* Already-uploaded vertex data is present from a previous
-	     * prepare_vertices, but we had to re-validate state due to
-	     * check_aperture failing and a new batch being produced.
-	     */
-	    continue;
-	 }
-
-	 /* Queue the buffer object up to be uploaded in the next pass,
-	  * when we've decided if we're doing interleaved or not.
-	  */
-	 if (input->attrib == VERT_ATTRIB_POS) {
-	    /* Position array not properly enabled:
-	     */
-            if (input->glarray->StrideB == 0) {
-               intel->Fallback = 1;
-               return;
-            }
-
-	    interleave = input->glarray->StrideB;
-	    ptr = input->glarray->Ptr;
-	 }
-	 else if (interleave != input->glarray->StrideB ||
-		  (const unsigned char *)input->glarray->Ptr - ptr < 0 ||
-		  (const unsigned char *)input->glarray->Ptr - ptr > interleave)
-	 {
-	    interleave = 0;
-	 }
-
-	 upload[nr_uploads++] = input;
-	 
-	 /* We rebase drawing to start at element zero only when
-	  * varyings are not in vbos, which means we can end up
-	  * uploading non-varying arrays (stride != 0) when min_index
-	  * is zero.  This doesn't matter as the amount to upload is
-	  * the same for these arrays whether the draw call is rebased
-	  * or not - we just have to upload the one element.
-	  */
-	 assert(min_index == 0 || input->glarray->StrideB == 0);
-      }
-   }
-
-   /* Handle any arrays to be uploaded. */
-   if (nr_uploads > 1 && interleave && interleave <= 256) {
-      /* All uploads are interleaved, so upload the arrays together as
-       * interleaved.  First, upload the contents and set up upload[0].
-       */
-      copy_array_to_vbo_array(brw, upload[0], interleave);
-
-      for (i = 1; i < nr_uploads; i++) {
-	 /* Then, just point upload[i] at upload[0]'s buffer. */
-	 upload[i]->stride = interleave;
-	 upload[i]->offset = upload[0]->offset +
-	    ((const unsigned char *)upload[i]->glarray->Ptr - ptr);
-	 upload[i]->bo = upload[0]->bo;
-	 dri_bo_reference(upload[i]->bo);
+      if (brw_is_user_buffer(vb)) {
+	 u_upload_buffer( brw->upload, 
+			  min_index * vb->stride,
+			  (max_index + 1 - min_index) * vb->stride,
+			  &offset,
+			  &buffer );
       }
-   }
-   else {
-      /* Upload non-interleaved arrays */
-      for (i = 0; i < nr_uploads; i++) {
-          copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size);
+      else
+      {
+	 offset = 0;
+	 buffer = vb->buffer;
+	 count = stride == 0 ? 1 : max_index + 1 - min_index;
       }
+
+      /* Named buffer object: Just reference its contents directly. */
+      dri_bo_unreference(input->bo);
+      input->bo = intel_bufferobj_buffer(intel, intel_buffer,
+					 INTEL_READ);
+      dri_bo_reference(input->bo);
+
+      input->offset = (unsigned long)offset;
+      input->stride = vb->stride;
+      input->count = count;
+
+      assert(input->offset < input->bo->size);
    }
 
    brw_prepare_query_begin(brw);
@@ -632,13 +397,8 @@ static void brw_prepare_indices(struct brw_context *brw)
 
       /* Straight upload
        */
-      if (intel->intelScreen->kernel_exec_fencing) {
-	 drm_intel_gem_bo_map_gtt(bo);
-	 memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
-	 drm_intel_gem_bo_unmap_gtt(bo);
-      } else {
-	 dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
-      }
+      brw_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
+
    } else {
       offset = (GLuint) (unsigned long) index_buffer->ptr;
       brw->ib.start_vertex_offset = 0;
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
index 48c2b9a41c..5ec0c585fe 100644
--- a/src/gallium/drivers/i965/brw_gs.c
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -58,7 +58,7 @@ static void compile_gs_prog( struct brw_context *brw,
    /* Need to locate the two positions present in vertex + header.
     * These are currently hardcoded:
     */
-   c.nr_attrs = brw_count_bits(c.key.attrs);
+   c.nr_attrs = util_count_bits(c.key.attrs);
 
    if (BRW_IS_IGDNG(brw))
        c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c
new file mode 100644
index 0000000000..b351794dce
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_blend.c
@@ -0,0 +1,41 @@
+
+   /* _NEW_COLOR */
+   if (key->logic_op != GL_COPY) {
+      cc.cc2.logicop_enable = 1;
+      cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op);
+   } else if (key->color_blend) {
+      GLenum eqRGB = key->blend_eq_rgb;
+      GLenum eqA = key->blend_eq_a;
+      GLenum srcRGB = key->blend_src_rgb;
+      GLenum dstRGB = key->blend_dst_rgb;
+      GLenum srcA = key->blend_src_a;
+      GLenum dstA = key->blend_dst_a;
+
+      if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
+	 srcRGB = dstRGB = GL_ONE;
+      }
+
+      if (eqA == GL_MIN || eqA == GL_MAX) {
+	 srcA = dstA = GL_ONE;
+      }
+
+      cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
+      cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
+      cc.cc6.blend_function = brw_translate_blend_equation(eqRGB);
+
+      cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
+      cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
+      cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA);
+
+      cc.cc3.blend_enable = 1;
+      cc.cc3.ia_blend_enable = (srcA != srcRGB ||
+				dstA != dstRGB ||
+				eqA != eqRGB);
+   }
+
+   if (key->dither) {
+      cc.cc5.dither_enable = 1;
+      cc.cc6.y_dither_offset = 0;
+      cc.cc6.x_dither_offset = 0;
+   }
+
diff --git a/src/gallium/drivers/i965/brw_pipe_debug.c b/src/gallium/drivers/i965/brw_pipe_debug.c
new file mode 100644
index 0000000000..34d6d4028a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_debug.c
@@ -0,0 +1,2 @@
+   if (INTEL_DEBUG & DEBUG_STATS)
+      cc.cc5.statistics_enable = 1;
diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c
new file mode 100644
index 0000000000..da29bc8bcb
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_depth.c
@@ -0,0 +1,52 @@
+   /* _NEW_STENCIL */
+   if (key->dsa.stencil[0].enable) {
+      cc.cc0.stencil_enable = 1;
+      cc.cc0.stencil_func =
+	 intel_translate_compare_func(key->stencil_func[0]);
+      cc.cc0.stencil_fail_op =
+	 intel_translate_stencil_op(key->stencil_fail_op[0]);
+      cc.cc0.stencil_pass_depth_fail_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
+      cc.cc0.stencil_pass_depth_pass_op =
+	 intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
+      cc.cc1.stencil_ref = key->stencil_ref[0];
+      cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
+      cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
+
+      if (key->stencil_two_side) {
+	 cc.cc0.bf_stencil_enable = 1;
+	 cc.cc0.bf_stencil_func =
+	    intel_translate_compare_func(key->stencil_func[1]);
+	 cc.cc0.bf_stencil_fail_op =
+	    intel_translate_stencil_op(key->stencil_fail_op[1]);
+	 cc.cc0.bf_stencil_pass_depth_fail_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
+	 cc.cc0.bf_stencil_pass_depth_pass_op =
+	    intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
+	 cc.cc1.bf_stencil_ref = key->stencil_ref[1];
+	 cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1];
+	 cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1];
+      }
+
+      /* Not really sure about this:
+       */
+      if (key->stencil_write_mask[0] ||
+	  (key->stencil_two_side && key->stencil_write_mask[1]))
+	 cc.cc0.stencil_write_enable = 1;
+   }
+
+
+   if (key->alpha_enabled) {
+      cc.cc3.alpha_test = 1;
+      cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func);
+      cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+
+      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref);
+   }
+
+   /* _NEW_DEPTH */
+   if (key->depth_test) {
+      cc.cc2.depth_test = 1;
+      cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
+      cc.cc2.depth_write_enable = key->depth_write;
+   }
diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
new file mode 100644
index 0000000000..d4ae332f46
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -0,0 +1,25 @@
+
+/**
+ * called from intelDrawBuffer()
+ */
+static void brw_set_draw_region( struct intel_context *intel, 
+                                 struct intel_region *color_regions[],
+                                 struct intel_region *depth_region,
+                                 GLuint num_color_regions)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   GLuint i;
+
+   /* release old color/depth regions */
+   if (brw->state.depth_region != depth_region)
+      brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER;
+   for (i = 0; i < brw->state.nr_color_regions; i++)
+       intel_region_release(&brw->state.color_regions[i]);
+   intel_region_release(&brw->state.depth_region);
+
+   /* reference new color/depth regions */
+   for (i = 0; i < num_color_regions; i++)
+       intel_region_reference(&brw->state.color_regions[i], color_regions[i]);
+   intel_region_reference(&brw->state.depth_region, depth_region);
+   brw->state.nr_color_regions = num_color_regions;
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
new file mode 100644
index 0000000000..008f623151
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -0,0 +1,64 @@
+
+/**
+ * called from intel_batchbuffer_flush and children before sending a
+ * batchbuffer off.
+ */
+static void brw_finish_batch(struct intel_context *intel)
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+   brw_emit_query_end(brw);
+}
+
+
+/**
+ * called from intelFlushBatchLocked
+ */
+static void brw_new_batch( struct intel_context *intel )
+{
+   struct brw_context *brw = brw_context(&intel->ctx);
+
+   /* Check that we didn't just wrap our batchbuffer at a bad time. */
+   assert(!brw->no_batch_wrap);
+
+   brw->curbe.need_new_bo = GL_TRUE;
+
+   /* Mark all context state as needing to be re-emitted.
+    * This is probably not as severe as on 915, since almost all of our state
+    * is just in referenced buffers.
+    */
+   brw->state.dirty.brw |= BRW_NEW_CONTEXT;
+
+   brw->state.dirty.mesa |= ~0;
+   brw->state.dirty.brw |= ~0;
+   brw->state.dirty.cache |= ~0;
+
+   /* Move to the end of the current upload buffer so that we'll force choosing
+    * a new buffer next time.
+    */
+   if (brw->vb.upload.bo != NULL) {
+      dri_bo_unreference(brw->vb.upload.bo);
+      brw->vb.upload.bo = NULL;
+      brw->vb.upload.offset = 0;
+   }
+}
+
+
+static void brw_note_fence( struct intel_context *intel, GLuint fence )
+{
+   brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
+}
+
+/* called from intelWaitForIdle() and intelFlush()
+ *
+ * For now, just flush everything.  Could be smarter later.
+ */
+static GLuint brw_flush_cmd( void )
+{
+   struct brw_mi_flush flush;
+   flush.opcode = CMD_MI_FLUSH;
+   flush.pad = 0;
+   flush.flags = BRW_FLUSH_STATE_CACHE;
+   return *(GLuint *)&flush;
+}
+
+
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
new file mode 100644
index 0000000000..d199d0b81a
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -0,0 +1,27 @@
+   /* _NEW_BUFFERS */
+   if (IS_965(intel->intelScreen->deviceID) &&
+       !IS_G4X(intel->intelScreen->deviceID)) {
+      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+	 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+	 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+	 /* The original gen4 hardware couldn't set up WM surfaces pointing
+	  * at an offset within a tile, which can happen when rendering to
+	  * anything but the base level of a texture or the +X face/0 depth.
+	  * This was fixed with the 4 Series hardware.
+	  *
+	  * For these original chips, you would have to make the depth and
+	  * color destination surfaces include information on the texture
+	  * type, LOD, face, and various limits to use them as a destination.
+	  * I would have done this, but there's also a nasty requirement that
+	  * the depth and the color surfaces all be of the same LOD, which
+	  * may be a worse requirement than this alignment.  (Also, we may
+	  * want to just demote the texture to untiled, instead).
+	  */
+	 if (irb->region && 
+	     irb->region->tiling != I915_TILING_NONE &&
+	     (irb->region->draw_offset & 4095)) {
+	    DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n");
+	    return GL_TRUE;
+	 }
+      }
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index e1c2c7777b..90513245ee 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -59,9 +59,9 @@ static void compile_sf_prog( struct brw_context *brw,
    brw_init_compile(brw, &c.func);
 
    c.key = *key;
-   c.nr_attrs = brw_count_bits(c.key.attrs);
+   c.nr_attrs = util_count_bits(c.key.attrs);
    c.nr_attr_regs = (c.nr_attrs+1)/2;
-   c.nr_setup_attrs = brw_count_bits(c.key.attrs & DO_SETUP_BITS);
+   c.nr_setup_attrs = util_count_bits(c.key.attrs & DO_SETUP_BITS);
    c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
 
    c.prog_data.urb_read_length = c.nr_attr_regs;
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
index ca8f97f9f9..4cc427a935 100644
--- a/src/gallium/drivers/i965/brw_sf_emit.c
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -150,7 +150,7 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
 {
    struct brw_compile *p = &c->func;
    struct brw_reg ip = brw_ip_reg();
-   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+   GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
    GLuint jmpi = 1;
 
    if (!nr)
@@ -188,7 +188,7 @@ static void do_flatshade_line( struct brw_sf_compile *c )
 {
    struct brw_compile *p = &c->func;
    struct brw_reg ip = brw_ip_reg();
-   GLuint nr = brw_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
+   GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
    GLuint jmpi = 1;
 
    if (!nr)
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index b817b741e7..6801084616 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -270,7 +270,7 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
 /***********************************************************************
  * Emit all state:
  */
-void brw_validate_state( struct brw_context *brw )
+enum pipe_error brw_validate_state( struct brw_context *brw )
 {
    GLcontext *ctx = &brw->intel.ctx;
    struct intel_context *intel = &brw->intel;
@@ -278,10 +278,6 @@ void brw_validate_state( struct brw_context *brw )
    GLuint i;
 
    brw_clear_validated_bos(brw);
-
-   state->mesa |= brw->intel.NewGLState;
-   brw->intel.NewGLState = 0;
-
    brw_add_validated_bo(brw, intel->batch->buf);
 
    if (brw->emit_state_always) {
@@ -290,36 +286,23 @@ void brw_validate_state( struct brw_context *brw )
       state->cache |= ~0;
    }
 
-   if (brw->fragment_program != ctx->FragmentProgram._Current) {
-      brw->fragment_program = ctx->FragmentProgram._Current;
-      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
-   }
-
-   if (brw->vertex_program != ctx->VertexProgram._Current) {
-      brw->vertex_program = ctx->VertexProgram._Current;
-      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
-   }
-
    if (state->mesa == 0 &&
        state->cache == 0 &&
        state->brw == 0)
-      return;
+      return 0;
 
    if (brw->state.dirty.brw & BRW_NEW_CONTEXT)
       brw_clear_batch_cache(brw);
 
-   brw->intel.Fallback = 0;
-
    /* do prepare stage for all atoms */
    for (i = 0; i < Elements(atoms); i++) {
       const struct brw_tracked_state *atom = atoms[i];
 
-      if (brw->intel.Fallback)
-         break;
-
       if (check_state(state, &atom->dirty)) {
          if (atom->prepare) {
-            atom->prepare(brw);
+            ret = atom->prepare(brw);
+	    if (ret)
+	       return ret;
         }
       }
    }
@@ -329,17 +312,18 @@ void brw_validate_state( struct brw_context *brw )
     * If this fails, we can experience GPU lock-ups.
     */
    {
-      const struct brw_fragment_program *fp;
-      fp = brw_fragment_program_const(brw->fragment_program);
+      const struct brw_fragment_program *fp = brw->fragment_program;
       if (fp) {
-         assert((fp->tex_units_used & ctx->Texture._EnabledUnits)
-                == fp->tex_units_used);
+         assert(fp->info.max_sampler <= brw->nr_samplers &&
+		fp->info.max_texture <= brw->nr_textures);
       }
    }
+
+   return 0;
 }
 
 
-void brw_upload_state(struct brw_context *brw)
+enum pipe_error brw_upload_state(struct brw_context *brw)
 {
    struct brw_state_flags *state = &brw->state.dirty;
    int i;
@@ -356,7 +340,7 @@ void brw_upload_state(struct brw_context *brw)
       _mesa_memset(&examined, 0, sizeof(examined));
       prev = *state;
 
-      for (i = 0; i < Elements(atoms); i++) {	 
+      for (i = 0; i < Elements(atoms); i++) {
 	 const struct brw_tracked_state *atom = atoms[i];
 	 struct brw_state_flags generated;
 
@@ -364,12 +348,11 @@ void brw_upload_state(struct brw_context *brw)
 		atom->dirty.brw ||
 		atom->dirty.cache);
 
-	 if (brw->intel.Fallback)
-	    break;
-
 	 if (check_state(state, &atom->dirty)) {
 	    if (atom->emit) {
-	       atom->emit( brw );
+	       ret = atom->emit( brw );
+	       if (ret)
+		  return ret;
 	    }
 	 }
 
@@ -388,12 +371,11 @@ void brw_upload_state(struct brw_context *brw)
       for (i = 0; i < Elements(atoms); i++) {	 
 	 const struct brw_tracked_state *atom = atoms[i];
 
-	 if (brw->intel.Fallback)
-	    break;
-
 	 if (check_state(state, &atom->dirty)) {
 	    if (atom->emit) {
-	       atom->emit( brw );
+	       ret = atom->emit( brw );
+	       if (ret)
+		  return ret;
 	    }
 	 }
       }
@@ -407,10 +389,11 @@ void brw_upload_state(struct brw_context *brw)
 	 brw_print_dirty_count(mesa_bits, state->mesa);
 	 brw_print_dirty_count(brw_bits, state->brw);
 	 brw_print_dirty_count(cache_bits, state->cache);
-	 fprintf(stderr, "\n");
+	 debug_printf("\n");
       }
    }
-
-   if (!brw->intel.Fallback)
-      memset(state, 0, sizeof(*state));
+   
+   /* Clear dirty flags:
+    */
+   memset(state, 0, sizeof(*state));
 }
diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c
new file mode 100644
index 0000000000..6684f442d5
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_swtnl.c
@@ -0,0 +1,114 @@
+
+/* XXX: could split the primitive list to fallback only on the
+ * non-conformant primitives.
+ */
+static GLboolean check_fallbacks( struct brw_context *brw,
+				  const struct _mesa_prim *prim,
+				  GLuint nr_prims )
+{
+   GLcontext *ctx = &brw->intel.ctx;
+   GLuint i;
+
+   /* If we don't require strict OpenGL conformance, never 
+    * use fallbacks.  If we're forcing fallbacks, always
+    * use fallfacks.
+    */
+   if (brw->intel.conformance_mode == 0)
+      return GL_FALSE;
+
+   if (brw->intel.conformance_mode == 2)
+      return GL_TRUE;
+
+   if (ctx->Polygon.SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (reduced_prim[prim[i].mode] == GL_TRIANGLES) 
+	    return GL_TRUE;
+   }
+
+   /* BRW hardware will do AA lines, but they are non-conformant it
+    * seems.  TBD whether we keep this fallback:
+    */
+   if (ctx->Line.SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (reduced_prim[prim[i].mode] == GL_LINES) 
+	    return GL_TRUE;
+   }
+
+   /* Stipple -- these fallbacks could be resolved with a little
+    * bit of work?
+    */
+   if (ctx->Line.StippleFlag) {
+      for (i = 0; i < nr_prims; i++) {
+	 /* GS doesn't get enough information to know when to reset
+	  * the stipple counter?!?
+	  */
+	 if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) 
+	    return GL_TRUE;
+	    
+	 if (prim[i].mode == GL_POLYGON &&
+	     (ctx->Polygon.FrontMode == GL_LINE ||
+	      ctx->Polygon.BackMode == GL_LINE))
+	    return GL_TRUE;
+      }
+   }
+
+   if (ctx->Point.SmoothFlag) {
+      for (i = 0; i < nr_prims; i++)
+	 if (prim[i].mode == GL_POINTS) 
+	    return GL_TRUE;
+   }
+
+   /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
+    * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
+    * as GL_CLAMP_TO_EDGE instead.  If we're using GL_CLAMP, and
+    * we want strict conformance, force the fallback.
+    * Right now, we only do this for 2D textures.
+    */
+   {
+      int u;
+      for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
+         struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
+         if (texUnit->Enabled) {
+            if (texUnit->Enabled & TEXTURE_1D_BIT) {
+               if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
+                   return GL_TRUE;
+               }
+            }
+            if (texUnit->Enabled & TEXTURE_2D_BIT) {
+               if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
+                   texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
+                   return GL_TRUE;
+               }
+            }
+            if (texUnit->Enabled & TEXTURE_3D_BIT) {
+               if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
+                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
+                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
+                   return GL_TRUE;
+               }
+            }
+         }
+      }
+   }
+
+   /* Exceeding hw limits on number of VS inputs?
+    */
+   if (brw->nr_ve == 0 ||
+       brw->nr_ve >= BRW_VEP_MAX) {
+      return TRUE;
+   }
+
+   /* Position array with zero stride?
+    */
+   if (brw->vs[brw->ve[0]]->stride == 0)
+      return TRUE;
+
+
+      
+   /* Nothing stopping us from the fast path now */
+   return GL_FALSE;
+}
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h
new file mode 100644
index 0000000000..32b62848da
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_types.h
@@ -0,0 +1,11 @@
+#ifndef BRW_TYPES_H
+#define BRW_TYPES_H
+
+typedef GLuint uint32_t;
+typedef GLubyte uint8_t;
+typedef GLushort uint16_t;
+/* no GLenum, translate all away */
+
+typedef GLboolean uint8_t;
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c
index ce21aa4869..17f671a8fa 100644
--- a/src/gallium/drivers/i965/brw_util.c
+++ b/src/gallium/drivers/i965/brw_util.c
@@ -35,14 +35,6 @@
 #include "brw_util.h"
 #include "brw_defines.h"
 
-GLuint brw_count_bits( GLuint val )
-{
-   GLuint i;
-   for (i = 0; val ; val >>= 1)
-      if (val & 1)
-	 i++;
-   return i;
-}
 
 
 GLuint brw_translate_blend_equation( GLenum mode )
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index f0c79efbd9..53a5560105 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -61,9 +61,7 @@ static void do_vs_prog( struct brw_context *brw,
    }
 
    if (0)
-      _mesa_print_program(&c.vp->program.Base);
-
-
+      tgsi_dump(&c.vp->tokens, 0);
 
    /* Emit GEN4 code.
     */
@@ -96,9 +94,9 @@ static void brw_upload_vs_prog(struct brw_context *brw)
     * the inputs it asks for, whether they are varying or not.
     */
    key.program_string_id = vp->id;
-   key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
-   key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
-			ctx->Polygon.BackMode != GL_FILL);
+   key.nr_userclip = brw->nr_userclip;
+   key.copy_edgeflag = (brw->rast->fill_ccw != PIPE_POLYGON_MODE_FILL ||
+			brw->rast->fill_cw != PIPE_POLYGON_MODE_FILL);
 
    /* Make an early check for the key.
     */
@@ -116,7 +114,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
  */
 const struct brw_tracked_state brw_vs_prog = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM | _NEW_POLYGON,
+      .mesa  = PIPE_NEW_UCP | PIPE_NEW_RAST,
       .brw   = BRW_NEW_VERTEX_PROGRAM,
       .cache = 0
    },
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 1638ef8111..7f20c4baca 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -33,7 +33,7 @@
 #include "main/macros.h"
 #include "shader/program.h"
 #include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
+#include "pipe/p_shader_tokens.h"
 #include "brw_context.h"
 #include "brw_vs.h"
 
@@ -129,6 +129,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 	 reg++;
       }
    }
+
    /* If there are no inputs, we'll still be reading one attribute's worth
     * because it's required -- see urb_read_length setting.
     */
@@ -226,6 +227,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     * vertex urb, so is half the amount:
     */
    c->prog_data.urb_read_length = (c->nr_inputs + 1) / 2;
+
    /* Setting this field to 0 leads to undefined behavior according to the
     * the VS_STATE docs.  Our VUEs will always have at least one attribute
     * sitting in them, even if it's padding.
@@ -960,9 +962,6 @@ static void emit_arl( struct brw_vs_compile *c,
 
 /**
  * Return the brw reg for the given instruction's src argument.
- * Will return mangled results for SWZ op.  The emit_swz() function
- * ignores this result and recalculates taking extended swizzles into
- * account.
  */
 static struct brw_reg get_arg( struct brw_vs_compile *c,
                                const struct prog_instruction *inst,
@@ -1024,74 +1023,6 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
 }
 
 
-static void emit_swz( struct brw_vs_compile *c, 
-		      struct brw_reg dst,
-                      const struct prog_instruction *inst)
-{
-   const GLuint argIndex = 0;
-   const struct prog_src_register src = inst->SrcReg[argIndex];
-   struct brw_compile *p = &c->func;
-   GLuint zeros_mask = 0;
-   GLuint ones_mask = 0;
-   GLuint src_mask = 0;
-   GLubyte src_swz[4];
-   GLboolean need_tmp = (src.Negate &&
-			 dst.file != BRW_GENERAL_REGISTER_FILE);
-   struct brw_reg tmp = dst;
-   GLuint i;
-
-   if (need_tmp)
-      tmp = get_tmp(c);
-
-   for (i = 0; i < 4; i++) {
-      if (dst.dw1.bits.writemask & (1<<i)) {
-	 GLubyte s = GET_SWZ(src.Swizzle, i);
-	 switch (s) {
-	 case SWIZZLE_X:
-	 case SWIZZLE_Y:
-	 case SWIZZLE_Z:
-	 case SWIZZLE_W:
-	    src_mask |= 1<<i;
-	    src_swz[i] = s;
-	    break;
-	 case SWIZZLE_ZERO:
-	    zeros_mask |= 1<<i;
-	    break;
-	 case SWIZZLE_ONE:
-	    ones_mask |= 1<<i;
-	    break;
-	 }
-      }
-   }
-   
-   /* Do src first, in case dst aliases src:
-    */
-   if (src_mask) {
-      struct brw_reg arg0;
-
-      arg0 = get_src_reg(c, inst, argIndex);
-
-      arg0 = brw_swizzle(arg0, 
-			 src_swz[0], src_swz[1], 
-			 src_swz[2], src_swz[3]);
-
-      brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
-   } 
-   
-   if (zeros_mask) 
-      brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
-
-   if (ones_mask) 
-      brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
-
-   if (src.Negate)
-      brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
-   
-   if (need_tmp) {
-      brw_MOV(p, dst, tmp);
-      release_tmp(c, tmp);
-   }
-}
 
 
 /**
@@ -1332,20 +1263,6 @@ void brw_vs_emit(struct brw_vs_compile *c )
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_access_mode(p, BRW_ALIGN_16);
    
-   /* Message registers can't be read, so copy the output into GRF register
-      if they are used in source registers */
-   for (insn = 0; insn < nr_insns; insn++) {
-       GLuint i;
-       struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
-       for (i = 0; i < 3; i++) {
-	   struct prog_src_register *src = &inst->SrcReg[i];
-	   GLuint index = src->Index;
-	   GLuint file = src->File;	
-	   if (file == PROGRAM_OUTPUT && index != VERT_RESULT_HPOS)
-	       c->output_regs[index].used_in_src = GL_TRUE;
-       }
-   }
-
    /* Static register allocation
     */
    brw_vs_alloc_regs(c);
@@ -1362,18 +1279,14 @@ void brw_vs_emit(struct brw_vs_compile *c )
       _mesa_print_instruction(inst);
 #endif
 
-      /* Get argument regs.  SWZ is special and does this itself.
+      /* Get argument regs.
        */
-      if (inst->Opcode != OPCODE_SWZ)
-	  for (i = 0; i < 3; i++) {
-	      const struct prog_src_register *src = &inst->SrcReg[i];
-	      index = src->Index;
-	      file = src->File;	
-	      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
-		  args[i] = c->output_regs[index].reg;
-	      else
-                  args[i] = get_arg(c, inst, i);
-	  }
+      for (i = 0; i < 3; i++) {
+	 const struct prog_src_register *src = &inst->SrcReg[i];
+	 index = src->Index;
+	 file = src->File;	
+	 args[i] = get_arg(c, inst, i);
+      }
 
       /* Get dest regs.  Note that it is possible for a reg to be both
        * dst and arg, given the static allocation of registers.  So
@@ -1381,10 +1294,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
        */ 
       index = inst->DstReg.Index;
       file = inst->DstReg.File;
-      if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src)
-	  dst = c->output_regs[index].reg;
-      else
-	  dst = get_dst(c, inst->DstReg);
+      dst = get_dst(c, inst->DstReg);
 
       if (inst->SaturateMode != SATURATE_OFF) {
 	 _mesa_problem(NULL, "Unsupported saturate %d in vertex shader",
@@ -1392,151 +1302,144 @@ void brw_vs_emit(struct brw_vs_compile *c )
       }
 
       switch (inst->Opcode) {
-      case OPCODE_ABS:
+      case TGSI_OPCODE_ABS:
 	 brw_MOV(p, dst, brw_abs(args[0]));
 	 break;
-      case OPCODE_ADD:
+      case TGSI_OPCODE_ADD:
 	 brw_ADD(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_COS:
+      case TGSI_OPCODE_COS:
 	 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
 	 break;
-      case OPCODE_DP3:
+      case TGSI_OPCODE_DP3:
 	 brw_DP3(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_DP4:
+      case TGSI_OPCODE_DP4:
 	 brw_DP4(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_DPH:
+      case TGSI_OPCODE_DPH:
 	 brw_DPH(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_NRM3:
+      case TGSI_OPCODE_NRM3:
 	 emit_nrm(c, dst, args[0], 3);
 	 break;
-      case OPCODE_NRM4:
+      case TGSI_OPCODE_NRM4:
 	 emit_nrm(c, dst, args[0], 4);
 	 break;
-      case OPCODE_DST:
+      case TGSI_OPCODE_DST:
 	 unalias2(c, dst, args[0], args[1], emit_dst_noalias); 
 	 break;
-      case OPCODE_EXP:
+      case TGSI_OPCODE_EXP:
 	 unalias1(c, dst, args[0], emit_exp_noalias);
 	 break;
-      case OPCODE_EX2:
+      case TGSI_OPCODE_EX2:
 	 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
 	 break;
-      case OPCODE_ARL:
+      case TGSI_OPCODE_ARL:
 	 emit_arl(c, dst, args[0]);
 	 break;
-      case OPCODE_FLR:
+      case TGSI_OPCODE_FLR:
 	 brw_RNDD(p, dst, args[0]);
 	 break;
-      case OPCODE_FRC:
+      case TGSI_OPCODE_FRC:
 	 brw_FRC(p, dst, args[0]);
 	 break;
-      case OPCODE_LOG:
+      case TGSI_OPCODE_LOG:
 	 unalias1(c, dst, args[0], emit_log_noalias);
 	 break;
-      case OPCODE_LG2:
+      case TGSI_OPCODE_LG2:
 	 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
 	 break;
-      case OPCODE_LIT:
+      case TGSI_OPCODE_LIT:
 	 unalias1(c, dst, args[0], emit_lit_noalias);
 	 break;
-      case OPCODE_LRP:
+      case TGSI_OPCODE_LRP:
 	 unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
 	 break;
-      case OPCODE_MAD:
+      case TGSI_OPCODE_MAD:
 	 brw_MOV(p, brw_acc_reg(), args[2]);
 	 brw_MAC(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_MAX:
+      case TGSI_OPCODE_MAX:
 	 emit_max(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_MIN:
+      case TGSI_OPCODE_MIN:
 	 emit_min(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_MOV:
+      case TGSI_OPCODE_MOV:
 	 brw_MOV(p, dst, args[0]);
 	 break;
-      case OPCODE_MUL:
+      case TGSI_OPCODE_MUL:
 	 brw_MUL(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_POW:
+      case TGSI_OPCODE_POW:
 	 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); 
 	 break;
-      case OPCODE_RCP:
+      case TGSI_OPCODE_RCP:
 	 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
 	 break;
-      case OPCODE_RSQ:
+      case TGSI_OPCODE_RSQ:
 	 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
 	 break;
-
-      case OPCODE_SEQ:
+      case TGSI_OPCODE_SEQ:
          emit_seq(p, dst, args[0], args[1]);
          break;
-      case OPCODE_SIN:
+      case TGSI_OPCODE_SIN:
 	 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
 	 break;
-      case OPCODE_SNE:
+      case TGSI_OPCODE_SNE:
          emit_sne(p, dst, args[0], args[1]);
          break;
-      case OPCODE_SGE:
+      case TGSI_OPCODE_SGE:
 	 emit_sge(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_SGT:
+      case TGSI_OPCODE_SGT:
          emit_sgt(p, dst, args[0], args[1]);
          break;
-      case OPCODE_SLT:
+      case TGSI_OPCODE_SLT:
 	 emit_slt(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_SLE:
+      case TGSI_OPCODE_SLE:
          emit_sle(p, dst, args[0], args[1]);
          break;
-      case OPCODE_SUB:
+      case TGSI_OPCODE_SUB:
 	 brw_ADD(p, dst, args[0], negate(args[1]));
 	 break;
-      case OPCODE_SWZ:
-	 /* The args[0] value can't be used here as it won't have
-	  * correctly encoded the full swizzle:
-	  */
-	 emit_swz(c, dst, inst);
-	 break;
-      case OPCODE_TRUNC:
+      case TGSI_OPCODE_TRUNC:
          /* round toward zero */
 	 brw_RNDZ(p, dst, args[0]);
 	 break;
-      case OPCODE_XPD:
+      case TGSI_OPCODE_XPD:
 	 emit_xpd(p, dst, args[0], args[1]);
 	 break;
-      case OPCODE_IF:
+      case TGSI_OPCODE_IF:
 	 assert(if_depth < MAX_IF_DEPTH);
 	 if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
 	 /* Note that brw_IF smashes the predicate_control field. */
 	 if_inst[if_depth]->header.predicate_control = get_predicate(inst);
 	 if_depth++;
 	 break;
-      case OPCODE_ELSE:
+      case TGSI_OPCODE_ELSE:
 	 if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
 	 break;
-      case OPCODE_ENDIF:
+      case TGSI_OPCODE_ENDIF:
          assert(if_depth > 0);
 	 brw_ENDIF(p, if_inst[--if_depth]);
 	 break;			
-      case OPCODE_BGNLOOP:
+      case TGSI_OPCODE_BGNLOOP:
          loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
          break;
-      case OPCODE_BRK:
+      case TGSI_OPCODE_BRK:
 	 brw_set_predicate_control(p, get_predicate(inst));
          brw_BREAK(p);
 	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
          break;
-      case OPCODE_CONT:
+      case TGSI_OPCODE_CONT:
 	 brw_set_predicate_control(p, get_predicate(inst));
          brw_CONT(p);
          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
          break;
-      case OPCODE_ENDLOOP: 
+      case TGSI_OPCODE_ENDLOOP: 
          {
             struct brw_instruction *inst0, *inst1;
 	    GLuint br = 1;
@@ -1550,23 +1453,23 @@ void brw_vs_emit(struct brw_vs_compile *c )
             /* patch all the BREAK/CONT instructions from last BEGINLOOP */
             while (inst0 > loop_inst[loop_depth]) {
                inst0--;
-               if (inst0->header.opcode == BRW_OPCODE_BREAK) {
+               if (inst0->header.opcode == BRW_TGSI_OPCODE_BREAK) {
                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
                   inst0->bits3.if_else.pop_count = 0;
                }
-               else if (inst0->header.opcode == BRW_OPCODE_CONTINUE) {
+               else if (inst0->header.opcode == BRW_TGSI_OPCODE_CONTINUE) {
                   inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
                   inst0->bits3.if_else.pop_count = 0;
                }
             }
          }
          break;
-      case OPCODE_BRA:
+      case TGSI_OPCODE_BRA:
 	 brw_set_predicate_control(p, get_predicate(inst));
          brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
 	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
          break;
-      case OPCODE_CAL:
+      case TGSI_OPCODE_CAL:
 	 brw_set_access_mode(p, BRW_ALIGN_1);
 	 brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
 	 brw_set_access_mode(p, BRW_ALIGN_16);
@@ -1575,27 +1478,27 @@ void brw_vs_emit(struct brw_vs_compile *c )
          brw_save_call(p, inst->Comment, p->nr_insn);
 	 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
          break;
-      case OPCODE_RET:
+      case TGSI_OPCODE_RET:
 	 brw_ADD(p, get_addr_reg(stack_index),
 			 get_addr_reg(stack_index), brw_imm_d(-4));
 	 brw_set_access_mode(p, BRW_ALIGN_1);
          brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
 	 brw_set_access_mode(p, BRW_ALIGN_16);
 	 break;
-      case OPCODE_END:	
+      case TGSI_OPCODE_END:	
          end_offset = p->nr_insn;
          /* this instruction will get patched later to jump past subroutine
           * code, etc.
           */
          brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
          break;
-      case OPCODE_PRINT:
+      case TGSI_OPCODE_PRINT:
          /* no-op */
          break;
-      case OPCODE_BGNSUB:
+      case TGSI_OPCODE_BGNSUB:
          brw_save_label(p, inst->Comment, p->nr_insn);
          break;
-      case OPCODE_ENDSUB:
+      case TGSI_OPCODE_ENDSUB:
          /* no-op */
          break;
       default:
@@ -1618,33 +1521,6 @@ void brw_vs_emit(struct brw_vs_compile *c )
 	 hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
       }
 
-      if ((inst->DstReg.File == PROGRAM_OUTPUT)
-          && (inst->DstReg.Index != VERT_RESULT_HPOS)
-          && c->output_regs[inst->DstReg.Index].used_in_src) {
-         brw_MOV(p, get_dst(c, inst->DstReg), dst);
-      }
-
-      /* Result color clamping.
-       *
-       * When destination register is an output register and
-       * it's primary/secondary front/back color, we have to clamp
-       * the result to [0,1]. This is done by enabling the
-       * saturation bit for the last instruction.
-       *
-       * We don't use brw_set_saturate() as it modifies
-       * p->current->header.saturate, which affects all the subsequent
-       * instructions. Instead, we directly modify the header
-       * of the last (already stored) instruction.
-       */
-      if (inst->DstReg.File == PROGRAM_OUTPUT) {
-         if ((inst->DstReg.Index == VERT_RESULT_COL0)
-             || (inst->DstReg.Index == VERT_RESULT_COL1)
-             || (inst->DstReg.Index == VERT_RESULT_BFC0)
-             || (inst->DstReg.Index == VERT_RESULT_BFC1)) {
-            p->store[p->nr_insn-1].header.saturate = 1;
-         }
-      }
-
       release_tmps(c);
    }
 
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 2292de94c4..20d31880b4 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -269,61 +269,46 @@ static void brw_wm_populate_key( struct brw_context *brw,
 		    uses_depth,
 		    key);
 
+   /* Revisit this, figure out if it's really useful, and either push
+    * it into the state tracker so that everyone benefits (use to
+    * create fs varients with TEX rather than TXP), or discard.
+    */
+   key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/
 
-   /* BRW_NEW_WM_INPUT_DIMENSIONS */
-   key->proj_attrib_mask = brw->wm.input_size_masks[4-1];
-
-   /* _NEW_LIGHT */
-   key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
+   /* PIPE_NEW_RAST */
+   key->flat_shade = brw->rast.flat_shade;
 
-   /* _NEW_HINT */
-   key->linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+   /* This can be determined by looking at the INTERP mode each input decl.
+    */
+   key->linear_color = 0;
 
    /* _NEW_TEXTURE */
    for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
-      const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
-
-      if (unit->_ReallyEnabled) {
-         const struct gl_texture_object *t = unit->_Current;
-         const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+      if (i < brw->nr_textures) {
+	 const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
+	 const struct gl_texture_object *t = unit->_Current;
+	 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
+	 
 	 if (img->InternalFormat == GL_YCBCR_MESA) {
 	    key->yuvtex_mask |= 1 << i;
 	    if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR)
-		key->yuvtex_swap_mask |= 1 << i;
+	       key->yuvtex_swap_mask |= 1 << i;
 	 }
 
-         key->tex_swizzles[i] = t->_Swizzle;
+	 key->tex_swizzles[i] = t->_Swizzle;
+	 
+	 if (0)
+	    key->shadowtex_mask |= 1<<i;
       }
       else {
          key->tex_swizzles[i] = SWIZZLE_NOOP;
       }
    }
 
-   /* Shadow */
-   key->shadowtex_mask = fp->program.Base.ShadowSamplers;
 
-   /* _NEW_BUFFERS */
-   /*
-    * Include the draw buffer origin and height so that we can calculate
-    * fragment position values relative to the bottom left of the drawable,
-    * from the incoming screen origin relative position we get as part of our
-    * payload.
-    *
-    * We could avoid recompiling by including this as a constant referenced by
-    * our program, but if we were to do that it would also be nice to handle
-    * getting that constant updated at batchbuffer submit time (when we
-    * hold the lock and know where the buffer really is) rather than at emit
-    * time when we don't hold the lock and are just guessing.  We could also
-    * just avoid using this as key data if the program doesn't use
-    * fragment.position.
-    *
-    * This pretty much becomes moot with DRI2 and redirected buffers anyway,
-    * as our origins will always be zero then.
-    */
+   /* _NEW_FRAMEBUFFER */
    if (brw->intel.driDrawable != NULL) {
-      key->origin_x = brw->intel.driDrawable->x;
-      key->origin_y = brw->intel.driDrawable->y;
-      key->drawable_height = brw->intel.driDrawable->h;
+      key->drawable_height = brw->fb.cbufs[0].height;
    }
 
    /* CACHE_NEW_VS_PROG */
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 872b1f3ecf..756a680150 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -76,7 +76,6 @@ struct brw_wm_prog_key {
    GLuint tex_swizzles[BRW_MAX_TEX_UNIT];
 
    GLuint program_string_id:32;
-   GLuint origin_x, origin_y;
    GLuint drawable_height;
    GLuint vp_outputs_written;
 };
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index bf80a2942a..9c47c46a3d 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -125,23 +125,21 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
 {
    struct brw_compile *p = &c->func;
 
-   /* Calculate the pixel offset from window bottom left into destination
-    * X and Y channels.
-    */
    if (mask & WRITEMASK_X) {
-      /* X' = X - origin */
-      brw_ADD(p,
+      /* X' = X */
+      brw_MOV(p,
 	      dst[0],
-	      retype(arg0[0], BRW_REGISTER_TYPE_W),
-	      brw_imm_d(0 - c->key.origin_x));
+	      retype(arg0[0], BRW_REGISTER_TYPE_W));
    }
 
+   /* XXX: is this needed any more, or is this a NOOP?
+    */
    if (mask & WRITEMASK_Y) {
-      /* Y' = height - (Y - origin_y) = height + origin_y - Y */
+      /* Y' = height - 1 - Y */
       brw_ADD(p,
 	      dst[1],
 	      negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
-	      brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
+	      brw_imm_d(c->key.drawable_height - 1));
    }
 }
 
@@ -1376,7 +1374,6 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 break;
 
       case OPCODE_MOV:
-      case OPCODE_SWZ:
 	 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
 	 break;
 
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index 4e3edfbbff..5f47d86f71 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -30,25 +30,12 @@
   */
                
 
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
+#include "pipe/p_shader_constants.h"
+
 #include "brw_context.h"
 #include "brw_wm.h"
 #include "brw_util.h"
 
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_statevars.h"
-
-
-/** An invalid texture target */
-#define TEX_TARGET_NONE NUM_TEXTURE_TARGETS
-
-/** An invalid texture unit */
-#define TEX_UNIT_NONE BRW_MAX_TEX_UNIT
-
-#define FIRST_INTERNAL_TEMP MAX_NV_FRAGMENT_PROGRAM_TEMPS
 
 #define X    0
 #define Y    1
@@ -68,11 +55,6 @@ static const char *wm_opcode_strings[] = {
    "FRONTFACING",
 };
 
-#if 0
-static const char *wm_file_strings[] = {   
-   "PAYLOAD"
-};
-#endif
 
 
 /***********************************************************************
@@ -165,13 +147,13 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c )
    }
 
    c->fp_temp |= 1<<(bit-1);
-   return dst_reg(PROGRAM_TEMPORARY, FIRST_INTERNAL_TEMP+(bit-1));
+   return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1));
 }
 
 
 static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
 {
-   c->fp_temp &= ~(1 << (temp.Index - FIRST_INTERNAL_TEMP));
+   c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp));
 }
 
 
@@ -192,58 +174,29 @@ static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
    return inst;
 }
 
-static struct prog_instruction * emit_tex_op(struct brw_wm_compile *c,
-				       GLuint op,
-				       struct prog_dst_register dest,
-				       GLuint saturate,
-				       GLuint tex_src_unit,
-				       GLuint tex_src_target,
-				       GLuint tex_shadow,
-				       struct prog_src_register src0,
-				       struct prog_src_register src1,
-				       struct prog_src_register src2 )
+static struct prog_instruction * emit_op(struct brw_wm_compile *c,
+					 GLuint op,
+					 struct prog_dst_register dest,
+					 GLuint saturate,
+					 struct prog_src_register src0,
+					 struct prog_src_register src1,
+					 struct prog_src_register src2 )
 {
    struct prog_instruction *inst = get_fp_inst(c);
       
-   assert(tex_src_unit < BRW_MAX_TEX_UNIT ||
-          tex_src_unit == TEX_UNIT_NONE);
-   assert(tex_src_target < NUM_TEXTURE_TARGETS ||
-          tex_src_target == TEX_TARGET_NONE);
-
-   /* update mask of which texture units are referenced by this program */
-   if (tex_src_unit != TEX_UNIT_NONE)
-      c->fp->tex_units_used |= (1 << tex_src_unit);
-
    memset(inst, 0, sizeof(*inst));
 
    inst->Opcode = op;
    inst->DstReg = dest;
    inst->SaturateMode = saturate;   
-   inst->TexSrcUnit = tex_src_unit;
-   inst->TexSrcTarget = tex_src_target;
-   inst->TexShadow = tex_shadow;
    inst->SrcReg[0] = src0;
    inst->SrcReg[1] = src1;
    inst->SrcReg[2] = src2;
    return inst;
 }
-   
-
-static struct prog_instruction * emit_op(struct brw_wm_compile *c,
-				       GLuint op,
-				       struct prog_dst_register dest,
-				       GLuint saturate,
-				       struct prog_src_register src0,
-				       struct prog_src_register src1,
-				       struct prog_src_register src2 )
-{
-   return emit_tex_op(c, op, dest, saturate,
-                      TEX_UNIT_NONE, TEX_TARGET_NONE, 0,  /* unit, tgt, shadow */
-                      src0, src1, src2);
-}
 
 
-/* Many Mesa opcodes produce the same value across all the result channels.
+/* Many opcodes produce the same value across all the result channels.
  * We'd rather not have to support that splatting in the opcode implementations,
  * and brw_wm_pass*.c wants to optimize them out by shuffling references around
  * anyway.  We can easily get both by emitting the opcode to one channel, and
@@ -267,7 +220,7 @@ static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
    other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
    if (other_channel_mask != 0) {
       inst = emit_op(c,
-		     OPCODE_MOV,
+		     TGSI_OPCODE_MOV,
 		     dst_mask(inst0->DstReg, other_channel_mask),
 		     0,
 		     src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
@@ -356,7 +309,9 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
 }
 
 static void emit_interp( struct brw_wm_compile *c,
-			 GLuint idx )
+			 GLuint semantic,
+			 GLuint semantic_index,
+			 GLuint interp_mode )
 {
    struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
    struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
@@ -366,7 +321,7 @@ static void emit_interp( struct brw_wm_compile *c,
     * multiplied by 1/W in the SF program, and LINTERP on those
     * which have not:
     */
-   switch (idx) {
+   switch (semantic) {
    case FRAG_ATTRIB_WPOS:
       /* Have to treat wpos.xy specially:
        */
@@ -390,8 +345,8 @@ static void emit_interp( struct brw_wm_compile *c,
 	      deltas,
 	      src_undef());
       break;
-   case FRAG_ATTRIB_COL0:
-   case FRAG_ATTRIB_COL1:
+
+   case TGSI_SEMANTIC_COLOR:
       if (c->key.flat_shade) {
 	 emit_op(c,
 		 WM_CINTERP,
@@ -402,25 +357,13 @@ static void emit_interp( struct brw_wm_compile *c,
 		 src_undef());
       }
       else {
-         if (c->key.linear_color) {
-            emit_op(c,
-                    WM_LINTERP,
-                    dst,
-                    0,
-                    interp,
-                    deltas,
-                    src_undef());
-         }
-         else {
-            /* perspective-corrected color interpolation */
-            emit_op(c,
-                    WM_PINTERP,
-                    dst,
-                    0,
-                    interp,
-                    deltas,
-                    get_pixel_w(c));
-         }
+	 emit_op(c,
+		 translate_interp_mode(interp_mode),
+		 dst,
+		 0,
+		 interp,
+		 deltas,
+		 src_undef());
       }
       break;
    case FRAG_ATTRIB_FOGC:
@@ -434,7 +377,7 @@ static void emit_interp( struct brw_wm_compile *c,
 	      get_pixel_w(c));
 
       emit_op(c,
-	      OPCODE_MOV,
+	      TGSI_OPCODE_MOV,
 	      dst_mask(dst, WRITEMASK_YZW),
 	      0,
 	      src_swizzle(interp,
@@ -468,7 +411,7 @@ static void emit_interp( struct brw_wm_compile *c,
 	      get_pixel_w(c));
 
       emit_op(c,
-	      OPCODE_MOV,
+	      TGSI_OPCODE_MOV,
 	      dst_mask(dst, WRITEMASK_ZW),
 	      0,
 	      src_swizzle(interp,
@@ -482,7 +425,7 @@ static void emit_interp( struct brw_wm_compile *c,
 
    default:
       emit_op(c,
-	      WM_PINTERP,
+	      translate_interp_mode(interp_mode),
 	      dst,
 	      0,
 	      interp,
@@ -490,8 +433,6 @@ static void emit_interp( struct brw_wm_compile *c,
 	      get_pixel_w(c));
       break;
    }
-
-   c->fp_interp_emitted |= 1<<idx;
 }
 
 /***********************************************************************
@@ -581,7 +522,7 @@ static void precalc_dst( struct brw_wm_compile *c,
       /* dst.y = mul src0.y, src1.y
        */
       emit_op(c,
-	      OPCODE_MUL,
+	      TGSI_OPCODE_MUL,
 	      dst_mask(dst, WRITEMASK_Y),
 	      inst->SaturateMode,
 	      src0,
@@ -596,7 +537,7 @@ static void precalc_dst( struct brw_wm_compile *c,
       /* dst.xz = swz src0.1zzz
        */
       swz = emit_op(c,
-		    OPCODE_SWZ,
+		    TGSI_OPCODE_MOV,
 		    dst_mask(dst, WRITEMASK_XZ),
 		    inst->SaturateMode,
 		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
@@ -609,7 +550,7 @@ static void precalc_dst( struct brw_wm_compile *c,
       /* dst.w = mov src1.w
        */
       emit_op(c,
-	      OPCODE_MOV,
+	      TGSI_OPCODE_MOV,
 	      dst_mask(dst, WRITEMASK_W),
 	      inst->SaturateMode,
 	      src1,
@@ -631,7 +572,7 @@ static void precalc_lit( struct brw_wm_compile *c,
       /* dst.xw = swz src0.1111
        */
       swz = emit_op(c,
-		    OPCODE_SWZ,
+		    TGSI_OPCODE_MOV,
 		    dst_mask(dst, WRITEMASK_XW),
 		    0,
 		    src_swizzle1(src0, SWIZZLE_ONE),
@@ -643,7 +584,7 @@ static void precalc_lit( struct brw_wm_compile *c,
 
    if (dst.WriteMask & WRITEMASK_YZ) {
       emit_op(c,
-	      OPCODE_LIT,
+	      TGSI_OPCODE_LIT,
 	      dst_mask(dst, WRITEMASK_YZ),
 	      inst->SaturateMode,
 	      src0,
@@ -681,7 +622,7 @@ static void precalc_tex( struct brw_wm_compile *c,
        coord = src_reg_from_dst(tmpcoord);
 
        /* tmpcoord = src0 (i.e.: coord = src0) */
-       out = emit_op(c, OPCODE_MOV,
+       out = emit_op(c, TGSI_OPCODE_MOV,
                      tmpcoord,
                      0,
                      src0,
@@ -691,7 +632,7 @@ static void precalc_tex( struct brw_wm_compile *c,
        out->SrcReg[0].Abs = 1;
 
        /* tmp0 = MAX(coord.X, coord.Y) */
-       emit_op(c, OPCODE_MAX,
+       emit_op(c, TGSI_OPCODE_MAX,
                tmp0,
                0,
                src_swizzle1(coord, X),
@@ -699,7 +640,7 @@ static void precalc_tex( struct brw_wm_compile *c,
                src_undef());
 
        /* tmp1 = MAX(tmp0, coord.Z) */
-       emit_op(c, OPCODE_MAX,
+       emit_op(c, TGSI_OPCODE_MAX,
                tmp1,
                0,
                tmp0src,
@@ -707,7 +648,7 @@ static void precalc_tex( struct brw_wm_compile *c,
                src_undef());
 
        /* tmp0 = 1 / tmp1 */
-       emit_op(c, OPCODE_RCP,
+       emit_op(c, TGSI_OPCODE_RCP,
                dst_mask(tmp0, WRITEMASK_X),
                0,
                tmp1src,
@@ -715,7 +656,7 @@ static void precalc_tex( struct brw_wm_compile *c,
                src_undef());
 
        /* tmpCoord = src0 * tmp0 */
-       emit_op(c, OPCODE_MUL,
+       emit_op(c, TGSI_OPCODE_MUL,
                tmpcoord,
                0,
                src0,
@@ -738,7 +679,7 @@ static void precalc_tex( struct brw_wm_compile *c,
       /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
        */
       emit_op(c,
-	      OPCODE_MUL,
+	      TGSI_OPCODE_MUL,
 	      tmpcoord,
 	      0,
 	      inst->SrcReg[0],
@@ -785,7 +726,7 @@ static void precalc_tex( struct brw_wm_compile *c,
       /* tmp     = TEX ...
        */
       emit_tex_op(c, 
-                  OPCODE_TEX,
+                  TGSI_OPCODE_TEX,
                   tmp,
                   inst->SaturateMode,
                   unit,
@@ -798,7 +739,7 @@ static void precalc_tex( struct brw_wm_compile *c,
       /* tmp.xyz =  ADD TMP, C0
        */
       emit_op(c,
-	      OPCODE_ADD,
+	      TGSI_OPCODE_ADD,
 	      dst_mask(tmp, WRITEMASK_XYZ),
 	      0,
 	      tmpsrc,
@@ -809,7 +750,7 @@ static void precalc_tex( struct brw_wm_compile *c,
        */
 
       emit_op(c,
-	      OPCODE_MUL,
+	      TGSI_OPCODE_MUL,
 	      dst_mask(tmp, WRITEMASK_Y),
 	      0,
 	      tmpsrc,
@@ -824,7 +765,7 @@ static void precalc_tex( struct brw_wm_compile *c,
        */
 
       emit_op(c,
-	      OPCODE_MAD,
+	      TGSI_OPCODE_MAD,
 	      dst_mask(dst, WRITEMASK_XYZ),
 	      0,
 	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
@@ -834,7 +775,7 @@ static void precalc_tex( struct brw_wm_compile *c,
       /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
        */
       emit_op(c,
-	      OPCODE_MAD,
+	      TGSI_OPCODE_MAD,
 	      dst_mask(dst, WRITEMASK_Y),
 	      0,
 	      src_swizzle1(tmpsrc, Z),
@@ -846,7 +787,7 @@ static void precalc_tex( struct brw_wm_compile *c,
    else {
       /* ordinary RGBA tex instruction */
       emit_tex_op(c, 
-                  OPCODE_TEX,
+                  TGSI_OPCODE_TEX,
                   inst->DstReg,
                   inst->SaturateMode,
                   unit,
@@ -861,7 +802,7 @@ static void precalc_tex( struct brw_wm_compile *c,
    if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
       /* swizzle the result of the TEX instruction */
       struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
-      emit_op(c, OPCODE_SWZ,
+      emit_op(c, TGSI_OPCODE_MOV,
               inst->DstReg,
               SATURATE_OFF, /* saturate already done above */
               src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
@@ -884,7 +825,7 @@ static GLboolean projtex( struct brw_wm_compile *c,
    const struct prog_src_register src = inst->SrcReg[0];
    GLboolean retVal;
 
-   assert(inst->Opcode == OPCODE_TXP);
+   assert(inst->Opcode == TGSI_OPCODE_TXP);
 
    /* Only try to detect the simplest cases.  Could detect (later)
     * cases where we are trying to emit code like RCP {1.0}, MUL x,
@@ -921,7 +862,7 @@ static void precalc_txp( struct brw_wm_compile *c,
       /* tmp0.w = RCP inst.arg[0][3]
        */
       emit_op(c,
-	      OPCODE_RCP,
+	      TGSI_OPCODE_RCP,
 	      dst_mask(tmp, WRITEMASK_W),
 	      0,
 	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
@@ -931,7 +872,7 @@ static void precalc_txp( struct brw_wm_compile *c,
       /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
        */
       emit_op(c,
-	      OPCODE_MUL,
+	      TGSI_OPCODE_MUL,
 	      dst_mask(tmp, WRITEMASK_XYZ),
 	      0,
 	      src0,
@@ -1015,6 +956,7 @@ static void validate_src_regs( struct brw_wm_compile *c,
 	 GLuint idx = inst->SrcReg[i].Index;
 	 if (!(c->fp_interp_emitted & (1<<idx))) {
 	    emit_interp(c, idx);
+	    c->fp_interp_emitted |= 1<<idx;
 	 }
       }
    }
@@ -1094,71 +1036,64 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
        */
 
       switch (inst->Opcode) {
-      case OPCODE_SWZ: 
+      case TGSI_OPCODE_ABS:
 	 out = emit_insn(c, inst);
-	 out->Opcode = OPCODE_MOV;
-	 break;
-	 
-      case OPCODE_ABS:
-	 out = emit_insn(c, inst);
-	 out->Opcode = OPCODE_MOV;
+	 out->Opcode = TGSI_OPCODE_MOV;
 	 out->SrcReg[0].Negate = NEGATE_NONE;
 	 out->SrcReg[0].Abs = 1;
 	 break;
 
-      case OPCODE_SUB: 
+      case TGSI_OPCODE_SUB: 
 	 out = emit_insn(c, inst);
-	 out->Opcode = OPCODE_ADD;
+	 out->Opcode = TGSI_OPCODE_ADD;
 	 out->SrcReg[1].Negate ^= NEGATE_XYZW;
 	 break;
 
-      case OPCODE_SCS: 
+      case TGSI_OPCODE_SCS: 
 	 out = emit_insn(c, inst);
 	 /* This should probably be done in the parser. 
 	  */
 	 out->DstReg.WriteMask &= WRITEMASK_XY;
 	 break;
 	 
-      case OPCODE_DST:
+      case TGSI_OPCODE_DST:
 	 precalc_dst(c, inst);
 	 break;
 
-      case OPCODE_LIT:
+      case TGSI_OPCODE_LIT:
 	 precalc_lit(c, inst);
 	 break;
 
-      case OPCODE_TEX:
+      case TGSI_OPCODE_TEX:
 	 precalc_tex(c, inst);
 	 break;
 
-      case OPCODE_TXP:
+      case TGSI_OPCODE_TXP:
 	 precalc_txp(c, inst);
 	 break;
 
-      case OPCODE_TXB:
+      case TGSI_OPCODE_TXB:
 	 out = emit_insn(c, inst);
 	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
          assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
 	 break;
 
-      case OPCODE_XPD: 
+      case TGSI_OPCODE_XPD: 
 	 out = emit_insn(c, inst);
 	 /* This should probably be done in the parser. 
 	  */
 	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
 	 break;
 
-      case OPCODE_KIL: 
+      case TGSI_OPCODE_KIL: 
 	 out = emit_insn(c, inst);
 	 /* This should probably be done in the parser. 
 	  */
 	 out->DstReg.WriteMask = 0;
 	 break;
-      case OPCODE_END:
+      case TGSI_OPCODE_END:
 	 emit_fb_write(c);
 	 break;
-      case OPCODE_PRINT:
-	 break;
       default:
 	 if (brw_wm_is_scalar_result(inst->Opcode))
 	    emit_scalar_insn(c, inst);
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index c9fe1dd8ad..d836e2fb34 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -6,9 +6,6 @@
 #include "brw_eu.h"
 #include "brw_wm.h"
 
-enum _subroutine {
-    SUB_NOISE1, SUB_NOISE2, SUB_NOISE3, SUB_NOISE4
-};
 
 static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
                                   const struct prog_instruction *inst,
@@ -32,10 +29,6 @@ GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
 	    case OPCODE_CAL:
 	    case OPCODE_BRK:
 	    case OPCODE_RET:
-	    case OPCODE_NOISE1:
-	    case OPCODE_NOISE2:
-	    case OPCODE_NOISE3:
-	    case OPCODE_NOISE4:
 	    case OPCODE_BGNLOOP:
 		return GL_TRUE; 
 	    default:
@@ -1495,1036 +1488,7 @@ static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
 		   0, 16, 2 );
 }
 
-/* One-, two- and three-dimensional Perlin noise, similar to the description
-   in _Improving Noise_, Ken Perlin, Computer Graphics vol. 35 no. 3. */
-static void noise1_sub( struct brw_wm_compile *c ) {
 
-    struct brw_compile *p = &c->func;
-    struct brw_reg param,
-	x0, x1, /* gradients at each end */       
-	t, tmp[ 2 ], /* float temporaries */
-	itmp[ 5 ]; /* unsigned integer temporaries (aliases of floats above) */
-    int i;
-    int mark = mark_tmps( c );
-
-    x0 = alloc_tmp( c );
-    x1 = alloc_tmp( c );
-    t = alloc_tmp( c );
-    tmp[ 0 ] = alloc_tmp( c );
-    tmp[ 1 ] = alloc_tmp( c );
-    itmp[ 0 ] = retype( tmp[ 0 ], BRW_REGISTER_TYPE_UD );
-    itmp[ 1 ] = retype( tmp[ 1 ], BRW_REGISTER_TYPE_UD );
-    itmp[ 2 ] = retype( x0, BRW_REGISTER_TYPE_UD );
-    itmp[ 3 ] = retype( x1, BRW_REGISTER_TYPE_UD );
-    itmp[ 4 ] = retype( t, BRW_REGISTER_TYPE_UD );
-    
-    param = lookup_tmp( c, mark - 2 );
-
-    brw_set_access_mode( p, BRW_ALIGN_1 );
-
-    brw_MOV( p, itmp[ 2 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
-
-    /* Arrange the two end coordinates into scalars (itmp0/itmp1) to
-       be hashed.  Also compute the remainder (offset within the unit
-       length), interleaved to reduce register dependency penalties. */
-    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param );
-    brw_FRC( p, param, param );
-    brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 1 ) );
-    brw_MOV( p, itmp[ 3 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
-    brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
-
-    /* We're now ready to perform the hashing.  The two hashes are
-       interleaved for performance.  The hash function used is
-       designed to rapidly achieve avalanche and require only 32x16
-       bit multiplication, and 16-bit swizzles (which we get for
-       free).  We can't use immediate operands in the multiplies,
-       because immediates are permitted only in src1 and the 16-bit
-       factor is permitted only in src0. */
-    for( i = 0; i < 2; i++ )
-	brw_MUL( p, itmp[ i ], itmp[ 2 ], itmp[ i ] );
-    for( i = 0; i < 2; i++ )
-       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
-		high_words( itmp[ i ] ) );
-    for( i = 0; i < 2; i++ )
-	brw_MUL( p, itmp[ i ], itmp[ 3 ], itmp[ i ] );
-    for( i = 0; i < 2; i++ )
-       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
-		high_words( itmp[ i ] ) );
-    for( i = 0; i < 2; i++ )
-	brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
-    for( i = 0; i < 2; i++ )
-       brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
-		high_words( itmp[ i ] ) );
-
-    /* Now we want to initialise the two gradients based on the
-       hashes.  Format conversion from signed integer to float leaves
-       everything scaled too high by a factor of pow( 2, 31 ), but
-       we correct for that right at the end. */
-    brw_ADD( p, t, param, brw_imm_f( -1.0 ) );
-    brw_MOV( p, x0, retype( tmp[ 0 ], BRW_REGISTER_TYPE_D ) );
-    brw_MOV( p, x1, retype( tmp[ 1 ], BRW_REGISTER_TYPE_D ) );
-
-    brw_MUL( p, x0, x0, param );
-    brw_MUL( p, x1, x1, t );
-    
-    /* We interpolate between the gradients using the polynomial
-       6t^5 - 15t^4 + 10t^3 (Perlin). */
-    brw_MUL( p, tmp[ 0 ], param, brw_imm_f( 6.0 ) );
-    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
-    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
-    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
-    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
-    brw_ADD( p, x1, x1, negate( x0 ) ); /* unrelated work to fill the
-					   pipeline */
-    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param );
-    brw_MUL( p, param, tmp[ 0 ], param );
-    brw_MUL( p, x1, x1, param );
-    brw_ADD( p, x0, x0, x1 );    
-    /* scale by pow( 2, -30 ), to compensate for the format conversion
-       above and an extra factor of 2 so that a single gradient covers
-       the [-1,1] range */
-    brw_MUL( p, param, x0, brw_imm_f( 0.000000000931322574615478515625 ) );
-
-    release_tmps( c, mark );
-}
-
-static void emit_noise1( struct brw_wm_compile *c,
-			 const struct prog_instruction *inst )
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg src, param, dst;
-    GLuint mask = inst->DstReg.WriteMask;
-    int i;
-    int mark = mark_tmps( c );
-
-    assert( mark == 0 );
-    
-    src = get_src_reg( c, inst, 0, 0 );
-
-    param = alloc_tmp( c );
-
-    brw_MOV( p, param, src );
-
-    invoke_subroutine( c, SUB_NOISE1, noise1_sub );
-    
-    /* Fill in the result: */
-    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
-    for (i = 0 ; i < 4; i++) {
-	if (mask & (1<<i)) {
-	    dst = get_dst_reg(c, inst, i);
-	    brw_MOV( p, dst, param );
-	}
-    }
-    if( inst->SaturateMode == SATURATE_ZERO_ONE )
-	brw_set_saturate( p, 0 );
-    
-    release_tmps( c, mark );
-}
-    
-static void noise2_sub( struct brw_wm_compile *c ) {
-
-    struct brw_compile *p = &c->func;
-    struct brw_reg param0, param1,
-	x0y0, x0y1, x1y0, x1y1, /* gradients at each corner */       
-	t, tmp[ 4 ], /* float temporaries */
-	itmp[ 7 ]; /* unsigned integer temporaries (aliases of floats above) */
-    int i;
-    int mark = mark_tmps( c );
-
-    x0y0 = alloc_tmp( c );
-    x0y1 = alloc_tmp( c );
-    x1y0 = alloc_tmp( c );
-    x1y1 = alloc_tmp( c );
-    t = alloc_tmp( c );
-    for( i = 0; i < 4; i++ ) {
-	tmp[ i ] = alloc_tmp( c );
-	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
-    }
-    itmp[ 4 ] = retype( x0y0, BRW_REGISTER_TYPE_UD );
-    itmp[ 5 ] = retype( x0y1, BRW_REGISTER_TYPE_UD );
-    itmp[ 6 ] = retype( x1y0, BRW_REGISTER_TYPE_UD );
-    
-    param0 = lookup_tmp( c, mark - 3 );
-    param1 = lookup_tmp( c, mark - 2 );
-
-    brw_set_access_mode( p, BRW_ALIGN_1 );
-    
-    /* Arrange the four corner coordinates into scalars (itmp0..itmp3) to
-       be hashed.  Also compute the remainders (offsets within the unit
-       square), interleaved to reduce register dependency penalties. */
-    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
-    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
-    brw_FRC( p, param0, param0 );
-    brw_FRC( p, param1, param1 );
-    brw_MOV( p, itmp[ 4 ], brw_imm_ud( 0xBA97 ) ); /* constant used later */
-    brw_ADD( p, high_words( itmp[ 0 ] ), high_words( itmp[ 0 ] ),
-	     low_words( itmp[ 1 ] ) );
-    brw_MOV( p, itmp[ 5 ], brw_imm_ud( 0x79D9 ) ); /* constant used later */
-    brw_MOV( p, itmp[ 6 ], brw_imm_ud( 0xD5B1 ) ); /* constant used later */
-    brw_ADD( p, itmp[ 1 ], itmp[ 0 ], brw_imm_ud( 0x10000 ) );
-    brw_ADD( p, itmp[ 2 ], itmp[ 0 ], brw_imm_ud( 0x1 ) );
-    brw_ADD( p, itmp[ 3 ], itmp[ 0 ], brw_imm_ud( 0x10001 ) );
-
-    /* We're now ready to perform the hashing.  The four hashes are
-       interleaved for performance.  The hash function used is
-       designed to rapidly achieve avalanche and require only 32x16
-       bit multiplication, and 16-bit swizzles (which we get for
-       free).  We can't use immediate operands in the multiplies,
-       because immediates are permitted only in src1 and the 16-bit
-       factor is permitted only in src0. */
-    for( i = 0; i < 4; i++ )
-	brw_MUL( p, itmp[ i ], itmp[ 4 ], itmp[ i ] );
-    for( i = 0; i < 4; i++ )
-	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
-		 high_words( itmp[ i ] ) );
-    for( i = 0; i < 4; i++ )
-	brw_MUL( p, itmp[ i ], itmp[ 5 ], itmp[ i ] );
-    for( i = 0; i < 4; i++ )
-	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
-		 high_words( itmp[ i ] ) );
-    for( i = 0; i < 4; i++ )
-	brw_MUL( p, itmp[ i ], itmp[ 6 ], itmp[ i ] );
-    for( i = 0; i < 4; i++ )
-	brw_XOR( p, low_words( itmp[ i ] ), low_words( itmp[ i ] ),
-		 high_words( itmp[ i ] ) );
-
-    /* Now we want to initialise the four gradients based on the
-       hashes.  Format conversion from signed integer to float leaves
-       everything scaled too high by a factor of pow( 2, 15 ), but
-       we correct for that right at the end. */
-    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
-    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
-    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
-    brw_MOV( p, x1y0, low_words( tmp[ 2 ] ) );
-    brw_MOV( p, x1y1, low_words( tmp[ 3 ] ) );
-    
-    brw_MOV( p, tmp[ 0 ], high_words( tmp[ 0 ] ) );
-    brw_MOV( p, tmp[ 1 ], high_words( tmp[ 1 ] ) );
-    brw_MOV( p, tmp[ 2 ], high_words( tmp[ 2 ] ) );
-    brw_MOV( p, tmp[ 3 ], high_words( tmp[ 3 ] ) );
-    
-    brw_MUL( p, x1y0, x1y0, t );
-    brw_MUL( p, x1y1, x1y1, t );
-    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
-    brw_MUL( p, x0y0, x0y0, param0 );
-    brw_MUL( p, x0y1, x0y1, param0 );
-
-    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param1 );
-    brw_MUL( p, tmp[ 2 ], tmp[ 2 ], param1 );
-    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], t );
-    brw_MUL( p, tmp[ 3 ], tmp[ 3 ], t );
-
-    brw_ADD( p, x0y0, x0y0, tmp[ 0 ] );
-    brw_ADD( p, x1y0, x1y0, tmp[ 2 ] );
-    brw_ADD( p, x0y1, x0y1, tmp[ 1 ] );
-    brw_ADD( p, x1y1, x1y1, tmp[ 3 ] );
-    
-    /* We interpolate between the gradients using the polynomial
-       6t^5 - 15t^4 + 10t^3 (Perlin). */
-    brw_MUL( p, tmp[ 0 ], param0, brw_imm_f( 6.0 ) );
-    brw_MUL( p, tmp[ 1 ], param1, brw_imm_f( 6.0 ) );
-    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( -15.0 ) );
-    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( -15.0 ) );
-    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
-    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
-    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work to fill the
-						 pipeline */
-    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], brw_imm_f( 10.0 ) );
-    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], brw_imm_f( 10.0 ) );
-    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
-    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
-    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work to fill the
-						 pipeline */
-    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], param0 );
-    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], param1 );
-    brw_MUL( p, param0, tmp[ 0 ], param0 );
-    brw_MUL( p, param1, tmp[ 1 ], param1 );
-    
-    /* Here we interpolate in the y dimension... */
-    brw_MUL( p, x0y1, x0y1, param1 );
-    brw_MUL( p, x1y1, x1y1, param1 );
-    brw_ADD( p, x0y0, x0y0, x0y1 );
-    brw_ADD( p, x1y0, x1y0, x1y1 );
-
-    /* And now in x.  There are horrible register dependencies here,
-       but we have nothing else to do. */
-    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
-    brw_MUL( p, x1y0, x1y0, param0 );
-    brw_ADD( p, x0y0, x0y0, x1y0 );
-    
-    /* scale by pow( 2, -15 ), as described above */
-    brw_MUL( p, param0, x0y0, brw_imm_f( 0.000030517578125 ) );
-
-    release_tmps( c, mark );
-}
-
-static void emit_noise2( struct brw_wm_compile *c,
-			 const struct prog_instruction *inst )
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg src0, src1, param0, param1, dst;
-    GLuint mask = inst->DstReg.WriteMask;
-    int i;
-    int mark = mark_tmps( c );
-
-    assert( mark == 0 );
-    
-    src0 = get_src_reg( c, inst, 0, 0 );
-    src1 = get_src_reg( c, inst, 0, 1 );
-
-    param0 = alloc_tmp( c );
-    param1 = alloc_tmp( c );
-
-    brw_MOV( p, param0, src0 );
-    brw_MOV( p, param1, src1 );
-
-    invoke_subroutine( c, SUB_NOISE2, noise2_sub );
-    
-    /* Fill in the result: */
-    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
-    for (i = 0 ; i < 4; i++) {
-	if (mask & (1<<i)) {
-	    dst = get_dst_reg(c, inst, i);
-	    brw_MOV( p, dst, param0 );
-	}
-    }
-    if( inst->SaturateMode == SATURATE_ZERO_ONE )
-	brw_set_saturate( p, 0 );
-    
-    release_tmps( c, mark );
-}
-
-/**
- * The three-dimensional case is much like the one- and two- versions above,
- * but since the number of corners is rapidly growing we now pack 16 16-bit
- * hashes into each register to extract more parallelism from the EUs.
- */
-static void noise3_sub( struct brw_wm_compile *c ) {
-
-    struct brw_compile *p = &c->func;
-    struct brw_reg param0, param1, param2,
-	x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
-	xi, yi, zi, /* interpolation coefficients */
-	t, tmp[ 8 ], /* float temporaries */
-	itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
-	wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
-    int i;
-    int mark = mark_tmps( c );
-
-    x0y0 = alloc_tmp( c );
-    x0y1 = alloc_tmp( c );
-    x1y0 = alloc_tmp( c );
-    x1y1 = alloc_tmp( c );
-    xi = alloc_tmp( c );
-    yi = alloc_tmp( c );
-    zi = alloc_tmp( c );
-    t = alloc_tmp( c );
-    for( i = 0; i < 8; i++ ) {
-	tmp[ i ] = alloc_tmp( c );
-	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
-	wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
-    }
-    
-    param0 = lookup_tmp( c, mark - 4 );
-    param1 = lookup_tmp( c, mark - 3 );
-    param2 = lookup_tmp( c, mark - 2 );
-
-    brw_set_access_mode( p, BRW_ALIGN_1 );
-    
-    /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
-       be hashed.  Also compute the remainders (offsets within the unit
-       cube), interleaved to reduce register dependency penalties. */
-    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param0 );
-    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param1 );
-    brw_RNDD( p, retype( itmp[ 2 ], BRW_REGISTER_TYPE_D ), param2 );
-    brw_FRC( p, param0, param0 );
-    brw_FRC( p, param1, param1 );
-    brw_FRC( p, param2, param2 );
-    /* Since we now have only 16 bits of precision in the hash, we must
-       be more careful about thorough mixing to maintain entropy as we
-       squash the input vector into a small scalar. */
-    brw_MUL( p, brw_null_reg(), low_words( itmp[ 0 ] ), brw_imm_uw( 0xBC8F ) );
-    brw_MAC( p, brw_null_reg(), low_words( itmp[ 1 ] ), brw_imm_uw( 0xD0BD ) );
-    brw_MAC( p, low_words( itmp[ 0 ] ), low_words( itmp[ 2 ] ),
-	     brw_imm_uw( 0x9B93 ) );
-    brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
-	     brw_imm_uw( 0xBC8F ) );
-
-    /* Temporarily disable the execution mask while we work with ExecSize=16
-       channels (the mask is set for ExecSize=8 and is probably incorrect).
-       Although this might cause execution of unwanted channels, the code
-       writes only to temporary registers and has no side effects, so
-       disabling the mask is harmless. */
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
-    brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
-    brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
-
-    /* We're now ready to perform the hashing.  The eight hashes are
-       interleaved for performance.  The hash function used is
-       designed to rapidly achieve avalanche and require only 16x16
-       bit multiplication, and 8-bit swizzles (which we get for
-       free). */
-    for( i = 0; i < 4; i++ )
-	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
-    for( i = 0; i < 4; i++ )
-	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
-		 odd_bytes( wtmp[ i ] ) );
-    for( i = 0; i < 4; i++ )
-	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
-    for( i = 0; i < 4; i++ )
-	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
-		 odd_bytes( wtmp[ i ] ) );
-    brw_pop_insn_state( p );
-
-    /* Now we want to initialise the four rear gradients based on the
-       hashes.  Format conversion from signed integer to float leaves
-       everything scaled too high by a factor of pow( 2, 15 ), but
-       we correct for that right at the end. */
-    /* x component */
-    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
-    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
-    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
-    brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
-    brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
-
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
-    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
-    brw_pop_insn_state( p );
-    
-    brw_MUL( p, x1y0, x1y0, t );
-    brw_MUL( p, x1y1, x1y1, t );
-    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
-    brw_MUL( p, x0y0, x0y0, param0 );
-    brw_MUL( p, x0y1, x0y1, param0 );
-
-    /* y component */
-    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
-    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
-    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
-    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
-    
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 5 ) );
-    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 5 ) );
-    brw_pop_insn_state( p );
-
-    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
-    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
-    brw_ADD( p, t, param0, brw_imm_f( -1.0 ) );
-    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
-    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
-    
-    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
-    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
-    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-    
-    /* z component */
-    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
-    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
-    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
-    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
-
-    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param2 );
-    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param2 );
-    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param2 );
-    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param2 );
-    
-    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
-    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
-    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-    
-    /* We interpolate between the gradients using the polynomial
-       6t^5 - 15t^4 + 10t^3 (Perlin). */
-    brw_MUL( p, xi, param0, brw_imm_f( 6.0 ) );
-    brw_MUL( p, yi, param1, brw_imm_f( 6.0 ) );
-    brw_MUL( p, zi, param2, brw_imm_f( 6.0 ) );
-    brw_ADD( p, xi, xi, brw_imm_f( -15.0 ) );
-    brw_ADD( p, yi, yi, brw_imm_f( -15.0 ) );
-    brw_ADD( p, zi, zi, brw_imm_f( -15.0 ) );
-    brw_MUL( p, xi, xi, param0 );
-    brw_MUL( p, yi, yi, param1 );
-    brw_MUL( p, zi, zi, param2 );
-    brw_ADD( p, xi, xi, brw_imm_f( 10.0 ) );
-    brw_ADD( p, yi, yi, brw_imm_f( 10.0 ) );
-    brw_ADD( p, zi, zi, brw_imm_f( 10.0 ) );
-    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) ); /* unrelated work */
-    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) ); /* unrelated work */
-    brw_MUL( p, xi, xi, param0 );
-    brw_MUL( p, yi, yi, param1 );
-    brw_MUL( p, zi, zi, param2 );
-    brw_MUL( p, xi, xi, param0 );
-    brw_MUL( p, yi, yi, param1 );
-    brw_MUL( p, zi, zi, param2 );
-    brw_MUL( p, xi, xi, param0 );
-    brw_MUL( p, yi, yi, param1 );
-    brw_MUL( p, zi, zi, param2 );
-    
-    /* Here we interpolate in the y dimension... */
-    brw_MUL( p, x0y1, x0y1, yi );
-    brw_MUL( p, x1y1, x1y1, yi );
-    brw_ADD( p, x0y0, x0y0, x0y1 );
-    brw_ADD( p, x1y0, x1y0, x1y1 );
-
-    /* And now in x.  Leave the result in tmp[ 0 ] (see below)... */
-    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
-    brw_MUL( p, x1y0, x1y0, xi );
-    brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
-
-    /* Now do the same thing for the front four gradients... */
-    /* x component */
-    brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
-    brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
-    brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
-    brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
-
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
-    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
-    brw_pop_insn_state( p );
-
-    brw_MUL( p, x1y0, x1y0, t );
-    brw_MUL( p, x1y1, x1y1, t );
-    brw_ADD( p, t, param1, brw_imm_f( -1.0 ) );
-    brw_MUL( p, x0y0, x0y0, param0 );
-    brw_MUL( p, x0y1, x0y1, param0 );
-
-    /* y component */
-    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
-    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
-    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
-    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
-    
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 5 ) );
-    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 5 ) );
-    brw_pop_insn_state( p );
-
-    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
-    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
-    brw_ADD( p, t, param2, brw_imm_f( -1.0 ) );
-    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param1 );
-    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param1 );
-    
-    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
-    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
-    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-    
-    /* z component */
-    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
-    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
-    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
-    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
-
-    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
-    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
-    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
-    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
-    
-    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
-    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
-    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-    
-    /* The interpolation coefficients are still around from last time, so
-       again interpolate in the y dimension... */
-    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
-    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
-    brw_MUL( p, x0y1, x0y1, yi );
-    brw_MUL( p, x1y1, x1y1, yi );
-    brw_ADD( p, x0y0, x0y0, x0y1 );
-    brw_ADD( p, x1y0, x1y0, x1y1 );
-
-    /* And now in x.  The rear face is in tmp[ 0 ] (see above), so this
-       time put the front face in tmp[ 1 ] and we're nearly there... */
-    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
-    brw_MUL( p, x1y0, x1y0, xi );
-    brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
-
-    /* The final interpolation, in the z dimension: */
-    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );    
-    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], zi );
-    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
-    
-    /* scale by pow( 2, -15 ), as described above */
-    brw_MUL( p, param0, tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
-
-    release_tmps( c, mark );
-}
-
-static void emit_noise3( struct brw_wm_compile *c,
-			 const struct prog_instruction *inst )
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg src0, src1, src2, param0, param1, param2, dst;
-    GLuint mask = inst->DstReg.WriteMask;
-    int i;
-    int mark = mark_tmps( c );
-
-    assert( mark == 0 );
-    
-    src0 = get_src_reg( c, inst, 0, 0 );
-    src1 = get_src_reg( c, inst, 0, 1 );
-    src2 = get_src_reg( c, inst, 0, 2 );
-
-    param0 = alloc_tmp( c );
-    param1 = alloc_tmp( c );
-    param2 = alloc_tmp( c );
-
-    brw_MOV( p, param0, src0 );
-    brw_MOV( p, param1, src1 );
-    brw_MOV( p, param2, src2 );
-
-    invoke_subroutine( c, SUB_NOISE3, noise3_sub );
-    
-    /* Fill in the result: */
-    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
-    for (i = 0 ; i < 4; i++) {
-	if (mask & (1<<i)) {
-	    dst = get_dst_reg(c, inst, i);
-	    brw_MOV( p, dst, param0 );
-	}
-    }
-    if( inst->SaturateMode == SATURATE_ZERO_ONE )
-	brw_set_saturate( p, 0 );
-    
-    release_tmps( c, mark );
-}
-    
-/**
- * For the four-dimensional case, the little micro-optimisation benefits
- * we obtain by unrolling all the loops aren't worth the massive bloat it
- * now causes.  Instead, we loop twice around performing a similar operation
- * to noise3, once for the w=0 cube and once for the w=1, with a bit more
- * code to glue it all together.
- */
-static void noise4_sub( struct brw_wm_compile *c )
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg param[ 4 ],
-	x0y0, x0y1, x1y0, x1y1, /* gradients at four of the corners */
-	w0, /* noise for the w=0 cube */
-	floors[ 2 ], /* integer coordinates of base corner of hypercube */
-	interp[ 4 ], /* interpolation coefficients */
-	t, tmp[ 8 ], /* float temporaries */
-	itmp[ 8 ], /* unsigned integer temporaries (aliases of floats above) */
-	wtmp[ 8 ]; /* 16-way unsigned word temporaries (aliases of above) */
-    int i, j;
-    int mark = mark_tmps( c );
-    GLuint loop, origin;
-    
-    x0y0 = alloc_tmp( c );
-    x0y1 = alloc_tmp( c );
-    x1y0 = alloc_tmp( c );
-    x1y1 = alloc_tmp( c );
-    t = alloc_tmp( c );
-    w0 = alloc_tmp( c );    
-    floors[ 0 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
-    floors[ 1 ] = retype( alloc_tmp( c ), BRW_REGISTER_TYPE_UD );
-
-    for( i = 0; i < 4; i++ ) {
-	param[ i ] = lookup_tmp( c, mark - 5 + i );
-	interp[ i ] = alloc_tmp( c );
-    }
-    
-    for( i = 0; i < 8; i++ ) {
-	tmp[ i ] = alloc_tmp( c );
-	itmp[ i ] = retype( tmp[ i ], BRW_REGISTER_TYPE_UD );
-	wtmp[ i ] = brw_uw16_grf( tmp[ i ].nr, 0 );
-    }
-
-    brw_set_access_mode( p, BRW_ALIGN_1 );
-
-    /* We only want 16 bits of precision from the integral part of each
-       co-ordinate, but unfortunately the RNDD semantics would saturate
-       at 16 bits if we performed the operation directly to a 16-bit
-       destination.  Therefore, we round to 32-bit temporaries where
-       appropriate, and then store only the lower 16 bits. */
-    brw_RNDD( p, retype( floors[ 0 ], BRW_REGISTER_TYPE_D ), param[ 0 ] );
-    brw_RNDD( p, retype( itmp[ 0 ], BRW_REGISTER_TYPE_D ), param[ 1 ] );
-    brw_RNDD( p, retype( floors[ 1 ], BRW_REGISTER_TYPE_D ), param[ 2 ] );
-    brw_RNDD( p, retype( itmp[ 1 ], BRW_REGISTER_TYPE_D ), param[ 3 ] );
-    brw_MOV( p, high_words( floors[ 0 ] ), low_words( itmp[ 0 ] ) );
-    brw_MOV( p, high_words( floors[ 1 ] ), low_words( itmp[ 1 ] ) );
-
-    /* Modify the flag register here, because the side effect is useful
-       later (see below).  We know for certain that all flags will be
-       cleared, since the FRC instruction cannot possibly generate
-       negative results.  Even for exceptional inputs (infinities, denormals,
-       NaNs), the architecture guarantees that the L conditional is false. */
-    brw_set_conditionalmod( p, BRW_CONDITIONAL_L );
-    brw_FRC( p, param[ 0 ], param[ 0 ] );
-    brw_set_predicate_control( p, BRW_PREDICATE_NONE );
-    for( i = 1; i < 4; i++ )	
-	brw_FRC( p, param[ i ], param[ i ] );
-    
-    /* Calculate the interpolation coefficients (6t^5 - 15t^4 + 10t^3) first
-       of all. */
-    for( i = 0; i < 4; i++ )
-	brw_MUL( p, interp[ i ], param[ i ], brw_imm_f( 6.0 ) );
-    for( i = 0; i < 4; i++ )
-	brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( -15.0 ) );
-    for( i = 0; i < 4; i++ )
-	brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
-    for( i = 0; i < 4; i++ )
-	brw_ADD( p, interp[ i ], interp[ i ], brw_imm_f( 10.0 ) );
-    for( j = 0; j < 3; j++ )
-	for( i = 0; i < 4; i++ )
-	    brw_MUL( p, interp[ i ], interp[ i ], param[ i ] );
-
-    /* Mark the current address, as it will be a jump destination.  The
-       following code will be executed twice: first, with the flag
-       register clear indicating the w=0 case, and second with flags
-       set for w=1. */
-    loop = p->nr_insn;
-    
-    /* Arrange the eight corner coordinates into scalars (itmp0..itmp3) to
-       be hashed.  Since we have only 16 bits of precision in the hash, we
-       must be careful about thorough mixing to maintain entropy as we
-       squash the input vector into a small scalar. */
-    brw_MUL( p, brw_null_reg(), low_words( floors[ 0 ] ),
-	     brw_imm_uw( 0xBC8F ) );
-    brw_MAC( p, brw_null_reg(), high_words( floors[ 0 ] ),
-	     brw_imm_uw( 0xD0BD ) );
-    brw_MAC( p, brw_null_reg(), low_words( floors[ 1 ] ),
-	     brw_imm_uw( 0x9B93 ) );
-    brw_MAC( p, low_words( itmp[ 0 ] ), high_words( floors[ 1 ] ),
-	     brw_imm_uw( 0xA359 ) );
-    brw_ADD( p, high_words( itmp[ 0 ] ), low_words( itmp[ 0 ] ),
-	     brw_imm_uw( 0xBC8F ) );
-
-    /* Temporarily disable the execution mask while we work with ExecSize=16
-       channels (the mask is set for ExecSize=8 and is probably incorrect).
-       Although this might cause execution of unwanted channels, the code
-       writes only to temporary registers and has no side effects, so
-       disabling the mask is harmless. */
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_ADD( p, wtmp[ 1 ], wtmp[ 0 ], brw_imm_uw( 0xD0BD ) );
-    brw_ADD( p, wtmp[ 2 ], wtmp[ 0 ], brw_imm_uw( 0x9B93 ) );
-    brw_ADD( p, wtmp[ 3 ], wtmp[ 1 ], brw_imm_uw( 0x9B93 ) );
-
-    /* We're now ready to perform the hashing.  The eight hashes are
-       interleaved for performance.  The hash function used is
-       designed to rapidly achieve avalanche and require only 16x16
-       bit multiplication, and 8-bit swizzles (which we get for
-       free). */
-    for( i = 0; i < 4; i++ )
-	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0x28D9 ) );
-    for( i = 0; i < 4; i++ )
-	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
-		 odd_bytes( wtmp[ i ] ) );
-    for( i = 0; i < 4; i++ )
-	brw_MUL( p, wtmp[ i ], wtmp[ i ], brw_imm_uw( 0xC6D5 ) );
-    for( i = 0; i < 4; i++ )
-	brw_XOR( p, even_bytes( wtmp[ i ] ), even_bytes( wtmp[ i ] ),
-		 odd_bytes( wtmp[ i ] ) );
-    brw_pop_insn_state( p );
-
-    /* Now we want to initialise the four rear gradients based on the
-       hashes.  Format conversion from signed integer to float leaves
-       everything scaled too high by a factor of pow( 2, 15 ), but
-       we correct for that right at the end. */
-    /* x component */
-    brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
-    brw_MOV( p, x0y0, low_words( tmp[ 0 ] ) );
-    brw_MOV( p, x0y1, low_words( tmp[ 1 ] ) );
-    brw_MOV( p, x1y0, high_words( tmp[ 0 ] ) );
-    brw_MOV( p, x1y1, high_words( tmp[ 1 ] ) );
-
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
-    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
-    brw_pop_insn_state( p );
-    
-    brw_MUL( p, x1y0, x1y0, t );
-    brw_MUL( p, x1y1, x1y1, t );
-    brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
-    brw_MUL( p, x0y0, x0y0, param[ 0 ] );
-    brw_MUL( p, x0y1, x0y1, param[ 0 ] );
-
-    /* y component */
-    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
-    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
-    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
-    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
-    
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
-    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
-    brw_pop_insn_state( p );
-
-    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
-    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );    
-    /* prepare t for the w component (used below): w the first time through
-       the loop; w - 1 the second time) */
-    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
-    brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
-    p->current->header.predicate_inverse = 1;
-    brw_MOV( p, t, param[ 3 ] );
-    p->current->header.predicate_inverse = 0;
-    brw_set_predicate_control( p, BRW_PREDICATE_NONE );
-    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
-    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
-    
-    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
-    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
-    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-    
-    /* z component */
-    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
-    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
-    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
-    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
-
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_SHL( p, wtmp[ 0 ], wtmp[ 0 ], brw_imm_uw( 4 ) );
-    brw_SHL( p, wtmp[ 1 ], wtmp[ 1 ], brw_imm_uw( 4 ) );
-    brw_pop_insn_state( p );
-
-    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 2 ] );
-    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], param[ 2 ] );
-    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 2 ] );
-    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], param[ 2 ] );
-    
-    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
-    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
-    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-
-    /* w component */
-    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 0 ] ) );
-    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 1 ] ) );
-    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 0 ] ) );
-    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 1 ] ) );
-
-    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
-    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
-    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
-    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
-    brw_ADD( p, t, param[ 0 ], brw_imm_f( -1.0 ) );
-    
-    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
-    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
-    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-
-    /* Here we interpolate in the y dimension... */
-    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
-    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
-    brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
-    brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
-    brw_ADD( p, x0y0, x0y0, x0y1 );
-    brw_ADD( p, x1y0, x1y0, x1y1 );
-
-    /* And now in x.  Leave the result in tmp[ 0 ] (see below)... */
-    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
-    brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
-    brw_ADD( p, tmp[ 0 ], x0y0, x1y0 );
-
-    /* Now do the same thing for the front four gradients... */
-    /* x component */
-    brw_MOV( p, x0y0, low_words( tmp[ 2 ] ) );
-    brw_MOV( p, x0y1, low_words( tmp[ 3 ] ) );
-    brw_MOV( p, x1y0, high_words( tmp[ 2 ] ) );
-    brw_MOV( p, x1y1, high_words( tmp[ 3 ] ) );
-
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
-    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
-    brw_pop_insn_state( p );
-
-    brw_MUL( p, x1y0, x1y0, t );
-    brw_MUL( p, x1y1, x1y1, t );
-    brw_ADD( p, t, param[ 1 ], brw_imm_f( -1.0 ) );
-    brw_MUL( p, x0y0, x0y0, param[ 0 ] );
-    brw_MUL( p, x0y1, x0y1, param[ 0 ] );
-
-    /* y component */
-    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
-    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
-    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
-    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
-    
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
-    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
-    brw_pop_insn_state( p );
-
-    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
-    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
-    brw_ADD( p, t, param[ 2 ], brw_imm_f( -1.0 ) );
-    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], param[ 1 ] );
-    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], param[ 1 ] );
-    
-    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
-    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
-    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-    
-    /* z component */
-    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
-    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
-    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
-    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
-
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_SHL( p, wtmp[ 2 ], wtmp[ 2 ], brw_imm_uw( 4 ) );
-    brw_SHL( p, wtmp[ 3 ], wtmp[ 3 ], brw_imm_uw( 4 ) );
-    brw_pop_insn_state( p );
-
-    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
-    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
-    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
-    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
-    /* prepare t for the w component (used below): w the first time through
-       the loop; w - 1 the second time) */
-    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
-    brw_ADD( p, t, param[ 3 ], brw_imm_f( -1.0 ) );
-    p->current->header.predicate_inverse = 1;
-    brw_MOV( p, t, param[ 3 ] );
-    p->current->header.predicate_inverse = 0;
-    brw_set_predicate_control( p, BRW_PREDICATE_NONE );
-    
-    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
-    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
-    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-
-    /* w component */
-    brw_MOV( p, tmp[ 4 ], low_words( tmp[ 2 ] ) );
-    brw_MOV( p, tmp[ 5 ], low_words( tmp[ 3 ] ) );
-    brw_MOV( p, tmp[ 6 ], high_words( tmp[ 2 ] ) );
-    brw_MOV( p, tmp[ 7 ], high_words( tmp[ 3 ] ) );
-
-    brw_MUL( p, tmp[ 4 ], tmp[ 4 ], t );
-    brw_MUL( p, tmp[ 5 ], tmp[ 5 ], t );
-    brw_MUL( p, tmp[ 6 ], tmp[ 6 ], t );
-    brw_MUL( p, tmp[ 7 ], tmp[ 7 ], t );
-    
-    brw_ADD( p, x0y0, x0y0, tmp[ 4 ] );
-    brw_ADD( p, x0y1, x0y1, tmp[ 5 ] );
-    brw_ADD( p, x1y0, x1y0, tmp[ 6 ] );
-    brw_ADD( p, x1y1, x1y1, tmp[ 7 ] );
-
-    /* Interpolate in the y dimension: */
-    brw_ADD( p, x0y1, x0y1, negate( x0y0 ) );
-    brw_ADD( p, x1y1, x1y1, negate( x1y0 ) );
-    brw_MUL( p, x0y1, x0y1, interp[ 1 ] );
-    brw_MUL( p, x1y1, x1y1, interp[ 1 ] );
-    brw_ADD( p, x0y0, x0y0, x0y1 );
-    brw_ADD( p, x1y0, x1y0, x1y1 );
-
-    /* And now in x.  The rear face is in tmp[ 0 ] (see above), so this
-       time put the front face in tmp[ 1 ] and we're nearly there... */
-    brw_ADD( p, x1y0, x1y0, negate( x0y0 ) );
-    brw_MUL( p, x1y0, x1y0, interp[ 0 ] );
-    brw_ADD( p, tmp[ 1 ], x0y0, x1y0 );
-
-    /* Another interpolation, in the z dimension: */
-    brw_ADD( p, tmp[ 1 ], tmp[ 1 ], negate( tmp[ 0 ] ) );    
-    brw_MUL( p, tmp[ 1 ], tmp[ 1 ], interp[ 2 ] );
-    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], tmp[ 1 ] );
-
-    /* Exit the loop if we've computed both cubes... */
-    origin = p->nr_insn;
-    brw_push_insn_state( p );
-    brw_set_predicate_control( p, BRW_PREDICATE_NORMAL );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_ADD( p, brw_ip_reg(), brw_ip_reg(), brw_imm_d( 0 ) );
-    brw_pop_insn_state( p );
-
-    /* Save the result for the w=0 case, and increment the w coordinate: */
-    brw_MOV( p, w0, tmp[ 0 ] );
-    brw_ADD( p, high_words( floors[ 1 ] ), high_words( floors[ 1 ] ),
-	     brw_imm_uw( 1 ) );
-
-    /* Loop around for the other cube.  Explicitly set the flag register
-       (unfortunately we must spend an extra instruction to do this: we
-       can't rely on a side effect of the previous MOV or ADD because
-       conditional modifiers which are normally true might be false in
-       exceptional circumstances, e.g. given a NaN input; the add to
-       brw_ip_reg() is not suitable because the IP is not an 8-vector). */
-    brw_push_insn_state( p );
-    brw_set_mask_control( p, BRW_MASK_DISABLE );
-    brw_MOV( p, brw_flag_reg(), brw_imm_uw( 0xFF ) );
-    brw_ADD( p, brw_ip_reg(), brw_ip_reg(),
-	     brw_imm_d( ( loop - p->nr_insn ) << 4 ) );
-    brw_pop_insn_state( p );
-
-    /* Patch the previous conditional branch now that we know the
-       destination address. */
-    brw_set_src1( p->store + origin,
-		  brw_imm_d( ( p->nr_insn - origin ) << 4 ) );
-
-    /* The very last interpolation. */
-    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], negate( w0 ) );    
-    brw_MUL( p, tmp[ 0 ], tmp[ 0 ], interp[ 3 ] );
-    brw_ADD( p, tmp[ 0 ], tmp[ 0 ], w0 );
-
-    /* scale by pow( 2, -15 ), as described above */
-    brw_MUL( p, param[ 0 ], tmp[ 0 ], brw_imm_f( 0.000030517578125 ) );
-
-    release_tmps( c, mark );
-}
-
-static void emit_noise4( struct brw_wm_compile *c,
-			 const struct prog_instruction *inst )
-{
-    struct brw_compile *p = &c->func;
-    struct brw_reg src0, src1, src2, src3, param0, param1, param2, param3, dst;
-    GLuint mask = inst->DstReg.WriteMask;
-    int i;
-    int mark = mark_tmps( c );
-
-    assert( mark == 0 );
-    
-    src0 = get_src_reg( c, inst, 0, 0 );
-    src1 = get_src_reg( c, inst, 0, 1 );
-    src2 = get_src_reg( c, inst, 0, 2 );
-    src3 = get_src_reg( c, inst, 0, 3 );
-
-    param0 = alloc_tmp( c );
-    param1 = alloc_tmp( c );
-    param2 = alloc_tmp( c );
-    param3 = alloc_tmp( c );
-
-    brw_MOV( p, param0, src0 );
-    brw_MOV( p, param1, src1 );
-    brw_MOV( p, param2, src2 );
-    brw_MOV( p, param3, src3 );
-
-    invoke_subroutine( c, SUB_NOISE4, noise4_sub );
-    
-    /* Fill in the result: */
-    brw_set_saturate( p, inst->SaturateMode == SATURATE_ZERO_ONE );
-    for (i = 0 ; i < 4; i++) {
-	if (mask & (1<<i)) {
-	    dst = get_dst_reg(c, inst, i);
-	    brw_MOV( p, dst, param0 );
-	}
-    }
-    if( inst->SaturateMode == SATURATE_ZERO_ONE )
-	brw_set_saturate( p, 0 );
-    
-    release_tmps( c, mark );
-}
     
 static void emit_wpos_xy(struct brw_wm_compile *c,
                          const struct prog_instruction *inst)
@@ -2543,19 +1507,18 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
      * X and Y channels.
      */
     if (mask & WRITEMASK_X) {
-	/* X' = X - origin_x */
-	brw_ADD(p,
+	/* X' = X */
+	brw_MOV(p,
 		dst[0],
-		retype(src0[0], BRW_REGISTER_TYPE_W),
-		brw_imm_d(0 - c->key.origin_x));
+		retype(src0[0], BRW_REGISTER_TYPE_W));
     }
 
     if (mask & WRITEMASK_Y) {
-	/* Y' = height - (Y - origin_y) = height + origin_y - Y */
+	/* Y' = height - 1 - Y */
 	brw_ADD(p,
 		dst[1],
 		negate(retype(src0[1], BRW_REGISTER_TYPE_W)),
-		brw_imm_d(c->key.origin_y + c->key.drawable_height - 1));
+		brw_imm_d(c->key.drawable_height - 1));
     }
 }
 
@@ -2827,7 +1790,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 		emit_trunc(c, inst);
 		break;
 	    case OPCODE_MOV:
-	    case OPCODE_SWZ:
 		emit_mov(c, inst);
 		break;
 	    case OPCODE_DP3:
@@ -2903,18 +1865,6 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
 	    case OPCODE_MAD:
 		emit_mad(c, inst);
 		break;
-	    case OPCODE_NOISE1:
-		emit_noise1(c, inst);
-		break;
-	    case OPCODE_NOISE2:
-		emit_noise2(c, inst);
-		break;
-	    case OPCODE_NOISE3:
-		emit_noise3(c, inst);
-		break;
-	    case OPCODE_NOISE4:
-		emit_noise4(c, inst);
-		break;
 	    case OPCODE_TEX:
 		emit_tex(c, inst);
 		break;
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
index 6279258339..0c411b57f5 100644
--- a/src/gallium/drivers/i965/brw_wm_pass0.c
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -422,7 +422,6 @@ void brw_wm_pass0( struct brw_wm_compile *c )
        */      
       switch (inst->Opcode) {
       case OPCODE_MOV: 
-      case OPCODE_SWZ: 
 	 if (!inst->SaturateMode) {
 	    pass0_precalc_mov(c, inst);
 	 }
diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c
index b449394029..d940ec09a9 100644
--- a/src/gallium/drivers/i965/brw_wm_pass1.c
+++ b/src/gallium/drivers/i965/brw_wm_pass1.c
@@ -120,7 +120,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
       GLuint writemask;
       GLuint read0, read1, read2;
 
-      if (inst->opcode == OPCODE_KIL) {
+      if (inst->opcode == TGSI_OPCODE_KIL) {
 	 track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */
 	 continue;
       }
@@ -154,76 +154,75 @@ void brw_wm_pass1( struct brw_wm_compile *c )
       /* Mark all inputs which contribute to the marked outputs:
        */
       switch (inst->opcode) {
-      case OPCODE_ABS:
-      case OPCODE_FLR:
-      case OPCODE_FRC:
-      case OPCODE_MOV:
-      case OPCODE_SWZ:
-      case OPCODE_TRUNC:
+      case TGSI_OPCODE_ABS:
+      case TGSI_OPCODE_FLR:
+      case TGSI_OPCODE_FRC:
+      case TGSI_OPCODE_MOV:
+      case TGSI_OPCODE_TRUNC:
 	 read0 = writemask;
 	 break;
 
-      case OPCODE_SUB:
-      case OPCODE_SLT:
-      case OPCODE_SLE:
-      case OPCODE_SGE:
-      case OPCODE_SGT:
-      case OPCODE_SEQ:
-      case OPCODE_SNE:
-      case OPCODE_ADD:
-      case OPCODE_MAX:
-      case OPCODE_MIN:
-      case OPCODE_MUL:
+      case TGSI_OPCODE_SUB:
+      case TGSI_OPCODE_SLT:
+      case TGSI_OPCODE_SLE:
+      case TGSI_OPCODE_SGE:
+      case TGSI_OPCODE_SGT:
+      case TGSI_OPCODE_SEQ:
+      case TGSI_OPCODE_SNE:
+      case TGSI_OPCODE_ADD:
+      case TGSI_OPCODE_MAX:
+      case TGSI_OPCODE_MIN:
+      case TGSI_OPCODE_MUL:
 	 read0 = writemask;
 	 read1 = writemask;
 	 break;
 
-      case OPCODE_DDX:
-      case OPCODE_DDY:
+      case TGSI_OPCODE_DDX:
+      case TGSI_OPCODE_DDY:
 	 read0 = writemask;
 	 break;
 
-      case OPCODE_MAD:	
-      case OPCODE_CMP:
-      case OPCODE_LRP:
+      case TGSI_OPCODE_MAD:	
+      case TGSI_OPCODE_CMP:
+      case TGSI_OPCODE_LRP:
 	 read0 = writemask;
 	 read1 = writemask;	
 	 read2 = writemask;	
 	 break;
 
-      case OPCODE_XPD: 
+      case TGSI_OPCODE_XPD: 
 	 if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ;	 
 	 if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ;	 
 	 if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY;
 	 read1 = read0;
 	 break;
 
-      case OPCODE_COS:
-      case OPCODE_EX2:
-      case OPCODE_LG2:
-      case OPCODE_RCP:
-      case OPCODE_RSQ:
-      case OPCODE_SIN:
-      case OPCODE_SCS:
+      case TGSI_OPCODE_COS:
+      case TGSI_OPCODE_EX2:
+      case TGSI_OPCODE_LG2:
+      case TGSI_OPCODE_RCP:
+      case TGSI_OPCODE_RSQ:
+      case TGSI_OPCODE_SIN:
+      case TGSI_OPCODE_SCS:
       case WM_CINTERP:
       case WM_PIXELXY:
 	 read0 = WRITEMASK_X;
 	 break;
 
-      case OPCODE_POW:
+      case TGSI_OPCODE_POW:
 	 read0 = WRITEMASK_X;
 	 read1 = WRITEMASK_X;
 	 break;
 
-      case OPCODE_TEX:
-      case OPCODE_TXP:
+      case TGSI_OPCODE_TEX:
+      case TGSI_OPCODE_TXP:
 	 read0 = get_texcoord_mask(inst->tex_idx);
 
          if (inst->tex_shadow)
 	    read0 |= WRITEMASK_Z;
 	 break;
 
-      case OPCODE_TXB:
+      case TGSI_OPCODE_TXB:
 	 /* Shadow ignored for txb.
 	  */
 	 read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W;
@@ -254,28 +253,28 @@ void brw_wm_pass1( struct brw_wm_compile *c )
 	 read2 = WRITEMASK_W; /* pixel w */
 	 break;
 
-      case OPCODE_DP3:	
+      case TGSI_OPCODE_DP3:	
 	 read0 = WRITEMASK_XYZ;
 	 read1 = WRITEMASK_XYZ;
 	 break;
 
-      case OPCODE_DPH:
+      case TGSI_OPCODE_DPH:
 	 read0 = WRITEMASK_XYZ;
 	 read1 = WRITEMASK_XYZW;
 	 break;
 
-      case OPCODE_DP4:
+      case TGSI_OPCODE_DP4:
 	 read0 = WRITEMASK_XYZW;
 	 read1 = WRITEMASK_XYZW;
 	 break;
 
-      case OPCODE_LIT: 
+      case TGSI_OPCODE_LIT: 
 	 read0 = WRITEMASK_XYW;
 	 break;
 
-      case OPCODE_DST:
+      case TGSI_OPCODE_DST:
       case WM_FRONTFACING:
-      case OPCODE_KIL_NV:
+      case TGSI_OPCODE_KIL_NV:
       default:
 	 break;
       }
diff --git a/src/gallium/drivers/i965/intel_chipset.h b/src/gallium/drivers/i965/intel_chipset.h
index 3dc8653a73..3c38f1676c 100644
--- a/src/gallium/drivers/i965/intel_chipset.h
+++ b/src/gallium/drivers/i965/intel_chipset.h
@@ -66,7 +66,6 @@
 #define PCI_CHIP_Q45_G                  0x2E12
 #define PCI_CHIP_G45_G                  0x2E22
 #define PCI_CHIP_G41_G                  0x2E32
-#define PCI_CHIP_B43_G                  0x2E42
 
 #define PCI_CHIP_ILD_G                  0x0042
 #define PCI_CHIP_ILM_G                  0x0046
@@ -84,8 +83,7 @@
 #define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G || \
                                  devid == PCI_CHIP_Q45_G || \
                                  devid == PCI_CHIP_G45_G || \
-                                 devid == PCI_CHIP_G41_G || \
-                                 devid == PCI_CHIP_B43_G)
+                                 devid == PCI_CHIP_G41_G)
 #define IS_GM45(devid)          (devid == PCI_CHIP_GM45_GM)
 #define IS_G4X(devid)		(IS_G45(devid) || IS_GM45(devid))
 
-- 
cgit v1.2.3


From c84a05676497ff7263f3ea8203b868071c4f678f Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Fri, 23 Oct 2009 18:40:13 +0200
Subject: nouveau: nv30: use r5g6b5 as z16 format

---
 src/gallium/drivers/nv30/nv30_fragtex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
index f5f17d4071..3dd636f4ee 100644
--- a/src/gallium/drivers/nv30/nv30_fragtex.c
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -29,7 +29,7 @@ nv30_texture_formats[] = {
 	_(A8_UNORM      , L8      , ZERO, ZERO, ZERO,   S1, X, X, X, X),
 	_(I8_UNORM      , L8      ,   S1,   S1,   S1,   S1, X, X, X, X),
 	_(A8L8_UNORM    , A8L8    ,   S1,   S1,   S1,   S1, X, X, X, Y),
-//	_(Z16_UNORM     , Z16     ,   S1,   S1,   S1,  ONE, X, X, X, X),
+	_(Z16_UNORM     , R5G6B5  ,   S1,   S1,   S1,  ONE, X, X, X, X),
 	_(Z24S8_UNORM   , A8R8G8B8,   S1,   S1,   S1,  ONE, X, X, X, X),
 	_(DXT1_RGB      , DXT1    ,   S1,   S1,   S1,  ONE, X, Y, Z, W),
 	_(DXT1_RGBA     , DXT1    ,   S1,   S1,   S1,   S1, X, Y, Z, W),
-- 
cgit v1.2.3


From d9014a13e72b6682a959217d38050f3252628edb Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Fri, 23 Oct 2009 18:42:21 +0200
Subject: nouveau: nv30: Relax some limits. We can render to z24s8 buffer even
 if color buffer is 16 bits.

---
 src/gallium/drivers/nv30/nv30_screen.c   | 10 ++++++++--
 src/gallium/drivers/nv30/nv30_state_fb.c |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index bb40e1803d..221ae1b5f8 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -102,13 +102,19 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
 	struct pipe_surface *front = ((struct nouveau_winsys *) pscreen->winsys)->front;
 
 	if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
-		return (format == front->format);
+		switch (format) {
+		case PIPE_FORMAT_A8R8G8B8_UNORM:
+		case PIPE_FORMAT_R5G6B5_UNORM:
+			return TRUE;
+		default:
+			break;
+		}
 	} else
 	if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
 		switch (format) {
 		case PIPE_FORMAT_Z24S8_UNORM:
 		case PIPE_FORMAT_Z24X8_UNORM:
-			return (front->format == PIPE_FORMAT_A8R8G8B8_UNORM);
+			return TRUE;
 		case PIPE_FORMAT_Z16_UNORM:
 			return (front->format == PIPE_FORMAT_R5G6B5_UNORM);
 		default:
diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
index f90681b0f9..4d6a67e56d 100644
--- a/src/gallium/drivers/nv30/nv30_state_fb.c
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -92,7 +92,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 		assert(0);
 	}
 
-	if (colour_bits != zeta_bits) {
+	if (colour_bits > zeta_bits) {
 		return FALSE;
 	}
 
-- 
cgit v1.2.3


From 6b48fb002257e6f221dd9d8439a5e1aa718ed2cc Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 23 Oct 2009 20:19:14 +0100
Subject: i965: ignore cliprect_mode

---
 src/gallium/drivers/i965/intel_batchbuffer.h | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/intel_batchbuffer.h b/src/gallium/drivers/i965/intel_batchbuffer.h
index d4899aab7f..a595d2e0c5 100644
--- a/src/gallium/drivers/i965/intel_batchbuffer.h
+++ b/src/gallium/drivers/i965/intel_batchbuffer.h
@@ -51,8 +51,6 @@ struct intel_batchbuffer
    GLubyte *map;
    GLubyte *ptr;
 
-   enum cliprect_mode cliprect_mode;
-
    GLuint size;
 
    /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
@@ -126,21 +124,10 @@ intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
    if (intel_batchbuffer_space(batch) < sz)
       intel_batchbuffer_flush(batch);
 
-   if ((cliprect_mode == LOOP_CLIPRECTS ||
-	cliprect_mode == REFERENCES_CLIPRECTS) &&
-       batch->intel->constant_cliprect)
-      cliprect_mode = NO_LOOP_CLIPRECTS;
-
-   if (cliprect_mode != IGNORE_CLIPRECTS) {
-      if (batch->cliprect_mode == IGNORE_CLIPRECTS) {
-	 batch->cliprect_mode = cliprect_mode;
-      } else {
-	 if (batch->cliprect_mode != cliprect_mode) {
-	    intel_batchbuffer_flush(batch);
-	    batch->cliprect_mode = cliprect_mode;
-	 }
-      }
-   }
+   /* All commands should be executed once regardless of cliprect
+    * mode.
+    */
+   (void)cliprect_mode;
 }
 
 /* Here are the crusty old macros, to be removed:
-- 
cgit v1.2.3


From 255a90a7bd829904554889dd19a16d86fc7f9274 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 23 Oct 2009 20:05:31 +0200
Subject: nv50: add depth texture formats, and a few others, too

---
 src/gallium/drivers/nv50/nv50_screen.c         | 14 +++++++++
 src/gallium/drivers/nv50/nv50_state_validate.c | 39 +++++++++++-------------
 src/gallium/drivers/nv50/nv50_tex.c            | 42 +++++++++++++++++++-------
 src/gallium/drivers/nv50/nv50_texture.h        | 13 ++++++++
 4 files changed, 76 insertions(+), 32 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 63dce0f4c2..c672ea471a 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -38,6 +38,11 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
 		case PIPE_FORMAT_X8R8G8B8_UNORM:
 		case PIPE_FORMAT_A8R8G8B8_UNORM:
 		case PIPE_FORMAT_R5G6B5_UNORM:
+		case PIPE_FORMAT_R16G16B16A16_SNORM:
+		case PIPE_FORMAT_R16G16B16A16_UNORM:
+		case PIPE_FORMAT_R32G32B32A32_FLOAT:
+		case PIPE_FORMAT_R16G16_SNORM:
+		case PIPE_FORMAT_R16G16_UNORM:
 			return TRUE;
 		default:
 			break;
@@ -57,6 +62,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
 		switch (format) {
 		case PIPE_FORMAT_A8R8G8B8_UNORM:
 		case PIPE_FORMAT_X8R8G8B8_UNORM:
+		case PIPE_FORMAT_A8R8G8B8_SRGB:
+		case PIPE_FORMAT_X8R8G8B8_SRGB:
 		case PIPE_FORMAT_A1R5G5B5_UNORM:
 		case PIPE_FORMAT_A4R4G4B4_UNORM:
 		case PIPE_FORMAT_R5G6B5_UNORM:
@@ -68,6 +75,13 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
 		case PIPE_FORMAT_DXT1_RGBA:
 		case PIPE_FORMAT_DXT3_RGBA:
 		case PIPE_FORMAT_DXT5_RGBA:
+		case PIPE_FORMAT_Z24S8_UNORM:
+		case PIPE_FORMAT_Z32_FLOAT:
+		case PIPE_FORMAT_R16G16B16A16_SNORM:
+		case PIPE_FORMAT_R16G16B16A16_UNORM:
+		case PIPE_FORMAT_R32G32B32A32_FLOAT:
+		case PIPE_FORMAT_R16G16_SNORM:
+		case PIPE_FORMAT_R16G16_UNORM:
 			return TRUE;
 		default:
 			break;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 012911f41b..956a700615 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -23,6 +23,12 @@
 #include "nv50_context.h"
 #include "nouveau/nouveau_stateobj.h"
 
+#define NV50_CBUF_FORMAT_CASE(n) \
+	case PIPE_FORMAT_##n: so_data(so, NV50TCL_RT_FORMAT_##n); break
+
+#define NV50_ZETA_FORMAT_CASE(n) \
+	case PIPE_FORMAT_##n: so_data(so, NV50TCL_ZETA_FORMAT_##n); break
+
 static void
 nv50_state_validate_fb(struct nv50_context *nv50)
 {
@@ -54,15 +60,14 @@ nv50_state_validate_fb(struct nv50_context *nv50)
 		so_reloc (so, bo, fb->cbufs[i]->offset, NOUVEAU_BO_VRAM |
 			      NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
 		switch (fb->cbufs[i]->format) {
-		case PIPE_FORMAT_A8R8G8B8_UNORM:
-			so_data(so, NV50TCL_RT_FORMAT_A8R8G8B8_UNORM);
-			break;
-		case PIPE_FORMAT_X8R8G8B8_UNORM:
-			so_data(so, NV50TCL_RT_FORMAT_X8R8G8B8_UNORM);
-			break;
-		case PIPE_FORMAT_R5G6B5_UNORM:
-			so_data(so, NV50TCL_RT_FORMAT_R5G6B5_UNORM);
-			break;
+		NV50_CBUF_FORMAT_CASE(A8R8G8B8_UNORM);
+		NV50_CBUF_FORMAT_CASE(X8R8G8B8_UNORM);
+		NV50_CBUF_FORMAT_CASE(R5G6B5_UNORM);
+		NV50_CBUF_FORMAT_CASE(R16G16B16A16_SNORM);
+		NV50_CBUF_FORMAT_CASE(R16G16B16A16_UNORM);
+		NV50_CBUF_FORMAT_CASE(R32G32B32A32_FLOAT);
+		NV50_CBUF_FORMAT_CASE(R16G16_SNORM);
+		NV50_CBUF_FORMAT_CASE(R16G16_UNORM);
 		default:
 			NOUVEAU_ERR("AIIII unknown format %s\n",
 				    pf_name(fb->cbufs[i]->format));
@@ -96,18 +101,10 @@ nv50_state_validate_fb(struct nv50_context *nv50)
 		so_reloc (so, bo, fb->zsbuf->offset, NOUVEAU_BO_VRAM |
 			      NOUVEAU_BO_LOW | NOUVEAU_BO_RDWR, 0, 0);
 		switch (fb->zsbuf->format) {
-		case PIPE_FORMAT_Z32_FLOAT:
-			so_data(so, NV50TCL_ZETA_FORMAT_Z32_FLOAT);
-			break;
-		case PIPE_FORMAT_Z24S8_UNORM:
-			so_data(so, NV50TCL_ZETA_FORMAT_Z24S8_UNORM);
-			break;
-		case PIPE_FORMAT_X8Z24_UNORM:
-			so_data(so, NV50TCL_ZETA_FORMAT_X8Z24_UNORM);
-			break;
-		case PIPE_FORMAT_S8Z24_UNORM:
-			so_data(so, NV50TCL_ZETA_FORMAT_S8Z24_UNORM);
-			break;
+		NV50_ZETA_FORMAT_CASE(S8Z24_UNORM);
+		NV50_ZETA_FORMAT_CASE(X8Z24_UNORM);
+		NV50_ZETA_FORMAT_CASE(Z24S8_UNORM);
+		NV50_ZETA_FORMAT_CASE(Z32_FLOAT);
 		default:
 			NOUVEAU_ERR("AIIII unknown format %s\n",
 				    pf_name(fb->zsbuf->format));
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index e12a6ad648..52ccdaa407 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -25,16 +25,18 @@
 
 #include "nouveau/nouveau_stateobj.h"
 
-#define _(pf, tt, r, g, b, a, tf)                       	\
+#define _MIXED(pf, t0, t1, t2, t3, cr, cg, cb, ca, f)		\
 {                                                       	\
 	PIPE_FORMAT_##pf,					\
-	NV50TIC_0_0_MAPR_##r | NV50TIC_0_0_TYPER_##tt |		\
-	NV50TIC_0_0_MAPG_##g | NV50TIC_0_0_TYPEG_##tt |		\
-	NV50TIC_0_0_MAPB_##b | NV50TIC_0_0_TYPEB_##tt |		\
-	NV50TIC_0_0_MAPA_##a | NV50TIC_0_0_TYPEA_##tt |		\
-	NV50TIC_0_0_FMT_##tf					\
+	NV50TIC_0_0_MAPR_##cr | NV50TIC_0_0_TYPER_##t0 |	\
+	NV50TIC_0_0_MAPG_##cg | NV50TIC_0_0_TYPEG_##t1 |	\
+	NV50TIC_0_0_MAPB_##cb | NV50TIC_0_0_TYPEB_##t2 |	\
+	NV50TIC_0_0_MAPA_##ca | NV50TIC_0_0_TYPEA_##t3 |	\
+	NV50TIC_0_0_FMT_##f					\
 }
 
+#define _(pf, t, cr, cg, cb, ca, f) _MIXED(pf, t, t, t, t, cr, cg, cb, ca, f)
+
 struct nv50_texture_format {
 	enum pipe_format pf;
 	uint32_t hw;
@@ -46,7 +48,9 @@ struct nv50_texture_format {
 static const struct nv50_texture_format nv50_tex_format_list[] =
 {
 	_(A8R8G8B8_UNORM, UNORM, C2, C1, C0, C3,  8_8_8_8),
+	_(A8R8G8B8_SRGB,  UNORM, C2, C1, C0, C3,  8_8_8_8),
 	_(X8R8G8B8_UNORM, UNORM, C2, C1, C0, ONE, 8_8_8_8),
+	_(X8R8G8B8_SRGB,  UNORM, C2, C1, C0, ONE, 8_8_8_8),
 	_(A1R5G5B5_UNORM, UNORM, C2, C1, C0, C3,  1_5_5_5),
 	_(A4R4G4B4_UNORM, UNORM, C2, C1, C0, C3,  4_4_4_4),
 
@@ -61,16 +65,30 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
 	_(DXT1_RGB, UNORM, C0, C1, C2, ONE, DXT1),
 	_(DXT1_RGBA, UNORM, C0, C1, C2, C3, DXT1),
 	_(DXT3_RGBA, UNORM, C0, C1, C2, C3, DXT3),
-	_(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5)
+	_(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5),
+
+	_MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8),
+
+	_(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16),
+	_(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16),
+	_(R32G32B32A32_FLOAT, FLOAT, C0, C1, C2, C3, 32_32_32_32),
+
+	_(R16G16_SNORM, SNORM, C0, C1, ZERO, ONE, 16_16),
+	_(R16G16_UNORM, UNORM, C0, C1, ZERO, ONE, 16_16),
+
+	_MIXED(Z32_FLOAT, FLOAT, UINT, UINT, UINT, C0, C0, C0, ONE, 32_DEPTH)
+
 };
 
 #undef _
+#undef _MIXED
 
 static int
 nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 		   struct nv50_miptree *mt, int unit)
 {
 	unsigned i;
+	uint32_t mode;
 
 	for (i = 0; i < NV50_TEX_FORMAT_LIST_SIZE; i++)
 		if (nv50_tex_format_list[i].pf == mt->base.base.format)
@@ -78,13 +96,15 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 	if (i == NV50_TEX_FORMAT_LIST_SIZE)
                 return 1;
 
+	mode = (nv50->sampler[unit]->normalized ? 0xd0005000 : 0x5001d000) |
+	       (mt->base.bo->tile_mode << 22);
+	if (pf_type(mt->base.base.format) == PIPE_FORMAT_TYPE_SRGB)
+		mode |= 0x0400;
+
 	so_data (so, nv50_tex_format_list[i].hw);
 	so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
 		     NOUVEAU_BO_RD, 0, 0);
-	if (nv50->sampler[unit]->normalized)
-		so_data (so, 0xd0005000 | mt->base.bo->tile_mode << 22);
-	else
-		so_data (so, 0x5001d000 | mt->base.bo->tile_mode << 22);
+	so_data (so, mode);
 	so_data (so, 0x00300000);
 	so_data (so, mt->base.base.width[0]);
 	so_data (so, (mt->base.base.last_level << 28) |
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
index 13f74c11c6..d531e61132 100644
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -38,18 +38,26 @@
 #define NV50TIC_0_0_TYPEA_MASK                                    0x00038000
 #define NV50TIC_0_0_TYPEA_UNORM                                   0x00010000
 #define NV50TIC_0_0_TYPEA_SNORM                                   0x00008000
+#define NV50TIC_0_0_TYPEA_SINT                                    0x00018000
+#define NV50TIC_0_0_TYPEA_UINT                                    0x00020000
 #define NV50TIC_0_0_TYPEA_FLOAT                                   0x00038000
 #define NV50TIC_0_0_TYPEB_MASK                                    0x00007000
 #define NV50TIC_0_0_TYPEB_UNORM                                   0x00002000
 #define NV50TIC_0_0_TYPEB_SNORM                                   0x00001000
+#define NV50TIC_0_0_TYPEB_SINT                                    0x00003000
+#define NV50TIC_0_0_TYPEB_UINT                                    0x00004000
 #define NV50TIC_0_0_TYPEB_FLOAT                                   0x00007000
 #define NV50TIC_0_0_TYPEG_MASK                                    0x00000e00
 #define NV50TIC_0_0_TYPEG_UNORM                                   0x00000400
 #define NV50TIC_0_0_TYPEG_SNORM                                   0x00000200
+#define NV50TIC_0_0_TYPEG_SINT                                    0x00000600
+#define NV50TIC_0_0_TYPEG_UINT                                    0x00000800
 #define NV50TIC_0_0_TYPEG_FLOAT                                   0x00000e00
 #define NV50TIC_0_0_TYPER_MASK                                    0x000001c0
 #define NV50TIC_0_0_TYPER_UNORM                                   0x00000080
 #define NV50TIC_0_0_TYPER_SNORM                                   0x00000040
+#define NV50TIC_0_0_TYPER_SINT                                    0x000000c0
+#define NV50TIC_0_0_TYPER_UINT                                    0x00000100
 #define NV50TIC_0_0_TYPER_FLOAT                                   0x000001c0
 #define NV50TIC_0_0_FMT_MASK                                      0x0000003f
 #define NV50TIC_0_0_FMT_32_32_32_32                               0x00000001
@@ -57,6 +65,7 @@
 #define NV50TIC_0_0_FMT_32_32                                     0x00000004
 #define NV50TIC_0_0_FMT_8_8_8_8                                   0x00000008
 #define NV50TIC_0_0_FMT_2_10_10_10                                0x00000009
+#define NV50TIC_0_0_FMT_16_16                                     0x0000000c
 #define NV50TIC_0_0_FMT_32                                        0x0000000f
 #define NV50TIC_0_0_FMT_4_4_4_4                                   0x00000012
 /* #define NV50TIC_0_0_FMT_1_5_5_5                                0x00000013 */
@@ -65,12 +74,16 @@
 #define NV50TIC_0_0_FMT_8_8                                       0x00000018
 #define NV50TIC_0_0_FMT_16                                        0x0000001b
 #define NV50TIC_0_0_FMT_8                                         0x0000001d
+#define NV50TIC_0_0_FMT_5_9_9_9                                   0x00000020
 #define NV50TIC_0_0_FMT_10_11_11                                  0x00000021
 #define NV50TIC_0_0_FMT_DXT1                                      0x00000024
 #define NV50TIC_0_0_FMT_DXT3                                      0x00000025
 #define NV50TIC_0_0_FMT_DXT5                                      0x00000026
 #define NV50TIC_0_0_FMT_RGTC1                                     0x00000027
 #define NV50TIC_0_0_FMT_RGTC2                                     0x00000028
+#define NV50TIC_0_0_FMT_24_8                                      0x00000029
+#define NV50TIC_0_0_FMT_32_DEPTH                                  0x0000002f
+#define NV50TIC_0_0_FMT_32_8                                      0x00000030
 
 #define NV50TIC_0_1_OFFSET_LOW_MASK                               0xffffffff
 #define NV50TIC_0_1_OFFSET_LOW_SHIFT                                       0
-- 
cgit v1.2.3


From c738c9ab67859f3d4412417333d0f023dd18dc19 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 23 Oct 2009 22:17:44 +0200
Subject: nv50: fix address reg code

Contained some rather obvious thinking errors before,
and didn't consider offsets from TGSI ADDRESS regs.
---
 src/gallium/drivers/nv50/nv50_program.c | 67 ++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 23 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 5c691877e0..ff6ff578f4 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -32,9 +32,11 @@
 #include "nv50_context.h"
 
 #define NV50_SU_MAX_TEMP 64
-#define NV50_SU_MAX_ADDR 7
+#define NV50_SU_MAX_ADDR 4
 //#define NV50_PROGRAM_DUMP
 
+/* $a5 and $a6 always seem to be 0, and using $a7 gives you noise */
+
 /* ARL - gallium craps itself on progs/vp/arl.txt
  *
  * MSB - Like MAD, but MUL+SUB
@@ -470,16 +472,28 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
 	e->inst[1] |= (val >> 6) << 2;
 }
 
+static INLINE void
+set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
+{
+	assert(!(e->inst[0] & 0x0c000000));
+	assert(!(e->inst[1] & 0x00000004));
+
+	e->inst[0] |= (a->hw & 3) << 26;
+	e->inst[1] |= (a->hw >> 2) << 2;
+}
+
 static void
-emit_set_addr(struct nv50_pc *pc, struct nv50_reg *dst, unsigned val)
+emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst,
+		  struct nv50_reg *src0, uint16_t src1_val)
 {
 	struct nv50_program_exec *e = exec(pc);
 
-	assert(val <= 0xffff);
-	e->inst[0] = 0xd0000000 | ((val & 0xffff) << 9);
+	e->inst[0] = 0xd0000000 | (src1_val << 9);
 	e->inst[1] = 0x20000000;
-	e->inst[0] |= dst->hw << 2;
 	set_long(pc, e);
+	e->inst[0] |= dst->hw << 2;
+	if (src0) /* otherwise will add to $a0, which is always 0 */
+		set_addr(e, src0);
 
 	emit(pc, e);
 }
@@ -488,9 +502,10 @@ static struct nv50_reg *
 alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
 {
 	int i;
-	struct nv50_reg *a = NULL;
+	struct nv50_reg *a_tgsi = NULL, *a = NULL;
 
 	if (!ref) {
+		/* allocate for TGSI address reg */
 		for (i = 0; i < NV50_SU_MAX_ADDR; ++i) {
 			if (pc->r_addr[i].index >= 0)
 				continue;
@@ -506,6 +521,13 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
 		return NULL;
 	}
 
+	/* Allocate and set an address reg so we can access 'ref'.
+	 *
+	 * If and r_addr has index < 0, it is not reserved for TGSI,
+	 * and index will be the negative of the TGSI addr index the
+	 * value in rhw is relative to, or -256 if rhw is an offset
+	 * from 0. If rhw < 0, the reg has not been initialized.
+	 */
 	for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) {
 		if (pc->r_addr[i].index >= 0) /* occupied for TGSI */
 			continue;
@@ -516,17 +538,25 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
 		if (!a && pc->r_addr[i].acc != pc->insn_cur)
 			a = &pc->r_addr[i];
 
-		if (ref->hw - pc->r_addr[i].rhw < 128) {
-		/* alloc'd & suitable */
+		if (ref->hw - pc->r_addr[i].rhw >= 128)
+			continue;
+
+		if ((ref->acc >= 0 && pc->r_addr[i].index == -256) ||
+		    (ref->acc < 0 && -pc->r_addr[i].index == ref->index)) {
 			pc->r_addr[i].acc = pc->insn_cur;
 			return &pc->r_addr[i];
 		}
 	}
 	assert(a);
-	emit_set_addr(pc, a, ref->hw * 4);
 
-	a->rhw = ref->hw % 128;
+	if (ref->acc < 0)
+		a_tgsi = pc->addr[ref->index];
+
+	emit_add_addr_imm(pc, a, a_tgsi, (ref->hw & ~0x7f) * 4);
+
+	a->rhw = ref->hw & ~0x7f;
 	a->acc = pc->insn_cur;
+	a->index = a_tgsi ? -ref->index : -256;
 	return a;
 }
 
@@ -563,23 +593,13 @@ emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *iv,
 	emit(pc, e);
 }
 
-static INLINE void
-set_addr(struct nv50_program_exec *e, struct nv50_reg *a)
-{
-	assert(!(e->inst[0] & 0x0c000000));
-	assert(!(e->inst[1] & 0x00000004));
-
-	e->inst[0] |= (a->hw & 3) << 26;
-	e->inst[1] |= (a->hw >> 2) << 2;
-}
-
 static void
 set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
 	 struct nv50_program_exec *e)
 {
 	set_long(pc, e);
 
-	e->param.index = src->hw;
+	e->param.index = src->hw & 127;
 	e->param.shift = s;
 	e->param.mask = m << (s % 32);
 
@@ -1569,7 +1589,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 			swz = tgsi_util_get_src_register_swizzle(
 						 &src->SrcRegisterInd, 0);
 			ctor_reg(r, P_CONST,
-				 src->SrcRegisterInd.Index * 4 + swz, c);
+				 src->SrcRegisterInd.Index * 4 + swz,
+				 src->SrcRegister.Index * 4 + c);
 			r->acc = -1;
 			break;
 		case TGSI_FILE_IMMEDIATE:
@@ -2743,7 +2764,7 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
 			return FALSE;
 	}
 	for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
-		ctor_reg(&pc->r_addr[i], P_ADDR, -1, i + 1);
+		ctor_reg(&pc->r_addr[i], P_ADDR, -256, i + 1);
 
 	return TRUE;
 }
-- 
cgit v1.2.3


From ad67326f12c0d6298cffc0fc4e421ddc02b3cb07 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 23 Oct 2009 21:38:37 +0200
Subject: nv50: allow all 127 TEMP regs

We should really learn to not waste so many though.
---
 src/gallium/drivers/nv50/nv50_program.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index ff6ff578f4..dd7634c58a 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -31,7 +31,7 @@
 
 #include "nv50_context.h"
 
-#define NV50_SU_MAX_TEMP 64
+#define NV50_SU_MAX_TEMP 127
 #define NV50_SU_MAX_ADDR 4
 //#define NV50_PROGRAM_DUMP
 
@@ -452,6 +452,8 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
 	}
 
 	alloc_reg(pc, dst);
+	if (dst->hw > 63)
+		set_long(pc, e);
 	e->inst[0] |= (dst->hw << 2);
 }
 
@@ -642,6 +644,8 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 		}
 
 		alloc_reg(pc, src);
+		if (src->hw > 63)
+			set_long(pc, e);
 		e->inst[0] |= (src->hw << 9);
 	}
 
@@ -701,6 +705,8 @@ set_src_0_restricted(struct nv50_pc *pc, struct nv50_reg *src,
 	}
 
 	alloc_reg(pc, src);
+	if (src->hw > 63)
+		set_long(pc, e);
 	e->inst[0] |= (src->hw << 9);
 }
 
@@ -719,6 +725,8 @@ set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
 	}
 
 	alloc_reg(pc, src);
+	if (src->hw > 63)
+		set_long(pc, e);
 	e->inst[0] |= (src->hw << 9);
 }
 
@@ -745,6 +753,8 @@ set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
 	}
 
 	alloc_reg(pc, src);
+	if (src->hw > 63)
+		set_long(pc, e);
 	e->inst[0] |= ((src->hw & 127) << 16);
 }
 
@@ -813,11 +823,12 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
 {
 	struct nv50_program_exec *e = exec(pc);
 
-	e->inst[0] |= 0xb0000000;
+	e->inst[0] = 0xb0000000;
 
+	alloc_reg(pc, src1);
 	check_swap_src_0_1(pc, &src0, &src1);
 
-	if (!pc->allow32 || src0->neg || src1->neg) {
+	if (!pc->allow32 || (src0->neg | src1->neg) || src1->hw > 63) {
 		set_long(pc, e);
 		e->inst[1] |= (src0->neg << 26) | (src1->neg << 27);
 	}
@@ -873,6 +884,7 @@ static INLINE void
 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1)
 {
+	assert(src0 != src1);
 	src1->neg ^= 1;
 	emit_add(pc, dst, src0, src1);
 	src1->neg ^= 1;
@@ -904,6 +916,7 @@ static INLINE void
 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1, struct nv50_reg *src2)
 {
+	assert(src2 != src0 && src2 != src1);
 	src2->neg ^= 1;
 	emit_mad(pc, dst, src0, src1, src2);
 	src2->neg ^= 1;
-- 
cgit v1.2.3


From 99e728a13ea8518efc7e27242093b43470f102d6 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 23 Oct 2009 21:57:42 +0200
Subject: nv50: fix saturation outside of tx_insn case

---
 src/gallium/drivers/nv50/nv50_program.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index dd7634c58a..3f834b5736 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1821,7 +1821,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)) || dst[c]->type == P_TEMP)
 				continue;
-			rdst[c] = dst[c];
+			/* rdst[c] = dst[c]; */ /* done above */
 			dst[c] = temp_temp(pc);
 		}
 	}
@@ -2150,8 +2150,10 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
-			/* in this case we saturate later */
-			if (dst[c]->type == P_TEMP && dst[c]->index < 0)
+			/* In this case we saturate later, and dst[c] won't
+			 * be another temp_temp (and thus lost), since rdst
+			 * already is TEMP (see above). */
+			if (rdst[c]->type == P_TEMP && rdst[c]->index < 0)
 				continue;
 			emit_sat(pc, rdst[c], dst[c]);
 		}
-- 
cgit v1.2.3


From 683722740c85fb6b8c0a930e8a4dce51e1709464 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 23 Oct 2009 22:00:06 +0200
Subject: nv50: do SIGN_SET as one instruction

---
 src/gallium/drivers/nv50/nv50_program.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 3f834b5736..9ccc4f5a16 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -986,7 +986,6 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 /* 0x80 == src is float */
 #define CVT_F32_F32 0xc4
 #define CVT_F32_S32 0x44
-#define CVT_F32_U32 0x64
 #define CVT_S32_F32 0x8c
 #define CVT_S32_S32 0x0c
 #define CVT_NEG     0x20
@@ -1644,11 +1643,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 		break;
 	case TGSI_UTIL_SIGN_SET:
 		temp = temp_temp(pc);
-		emit_abs(pc, temp, r);
-		if (neg)
-			temp->neg = 1;
-		else
-			emit_neg(pc, temp, temp);
+		emit_cvt(pc, temp, r, -1, CVTOP_ABS, CVT_F32_F32 | CVT_NEG);
 		r = temp;
 		break;
 	default:
-- 
cgit v1.2.3


From 22906f730141a233341f3ec124bbb9dd2e8904e2 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 23 Oct 2009 23:27:43 +0100
Subject: i965g: wip on removing GL stuff, trying to get a few files compiling

---
 src/gallium/drivers/i965/Makefile               |  73 ++-----
 src/gallium/drivers/i965/brw_cc.c               |  18 +-
 src/gallium/drivers/i965/brw_clip.c             |   4 +-
 src/gallium/drivers/i965/brw_clip_state.c       |   7 +-
 src/gallium/drivers/i965/brw_context.c          |  64 +++---
 src/gallium/drivers/i965/brw_context.h          | 199 ++++++++-----------
 src/gallium/drivers/i965/brw_curbe.c            |   5 +-
 src/gallium/drivers/i965/brw_draw.c             |  25 +--
 src/gallium/drivers/i965/brw_draw.h             |   7 +-
 src/gallium/drivers/i965/brw_draw_upload.c      |  39 ++--
 src/gallium/drivers/i965/brw_eu_debug.c         |   2 -
 src/gallium/drivers/i965/brw_gs.c               |   8 +-
 src/gallium/drivers/i965/brw_gs_emit.c          |   5 -
 src/gallium/drivers/i965/brw_gs_state.c         |   7 +-
 src/gallium/drivers/i965/brw_misc_state.c       |  22 +-
 src/gallium/drivers/i965/brw_pipe_depth.c       |  18 +-
 src/gallium/drivers/i965/brw_pipe_fb.c          |   4 +-
 src/gallium/drivers/i965/brw_pipe_flush.c       |   8 +-
 src/gallium/drivers/i965/brw_pipe_query.c       | 246 +++++++++++++++++++++++
 src/gallium/drivers/i965/brw_program.c          | 166 ----------------
 src/gallium/drivers/i965/brw_queryobj.c         | 254 ------------------------
 src/gallium/drivers/i965/brw_sf.c               |  12 +-
 src/gallium/drivers/i965/brw_sf.h               |  12 +-
 src/gallium/drivers/i965/brw_sf_emit.c          |  26 +--
 src/gallium/drivers/i965/brw_sf_state.c         |  16 +-
 src/gallium/drivers/i965/brw_state.h            |  30 +--
 src/gallium/drivers/i965/brw_state_batch.c      |   1 -
 src/gallium/drivers/i965/brw_state_cache.c      |  59 +++---
 src/gallium/drivers/i965/brw_state_dump.c       |  12 +-
 src/gallium/drivers/i965/brw_state_upload.c     |   4 +-
 src/gallium/drivers/i965/brw_structs.h          |   1 +
 src/gallium/drivers/i965/brw_swtnl.c            |   1 -
 src/gallium/drivers/i965/brw_tex.c              |   7 -
 src/gallium/drivers/i965/brw_tex_layout.c       |  12 +-
 src/gallium/drivers/i965/brw_types.h            |  15 +-
 src/gallium/drivers/i965/brw_util.c             |   2 -
 src/gallium/drivers/i965/brw_util.h             |   2 +-
 src/gallium/drivers/i965/brw_vs.c               |   5 +-
 src/gallium/drivers/i965/brw_vs_emit.c          |   3 -
 src/gallium/drivers/i965/brw_vs_state.c         |   9 +-
 src/gallium/drivers/i965/brw_vs_surface_state.c |  20 +-
 src/gallium/drivers/i965/brw_wm.c               |   6 +-
 src/gallium/drivers/i965/brw_wm_emit.c          |   1 -
 src/gallium/drivers/i965/brw_wm_glsl.c          |   4 -
 src/gallium/drivers/i965/brw_wm_iz.c            |   1 -
 src/gallium/drivers/i965/brw_wm_sampler_state.c |  15 +-
 src/gallium/drivers/i965/brw_wm_state.c         |  19 +-
 src/gallium/drivers/i965/brw_wm_surface_state.c | 181 ++++++++---------
 src/gallium/drivers/i965/intel_batchbuffer.h    |   7 +-
 src/gallium/drivers/i965/intel_tex_format.c     | 197 ------------------
 src/gallium/drivers/i965/intel_tex_layout.c     |   7 +-
 51 files changed, 634 insertions(+), 1234 deletions(-)
 create mode 100644 src/gallium/drivers/i965/brw_pipe_query.c
 delete mode 100644 src/gallium/drivers/i965/brw_program.c
 delete mode 100644 src/gallium/drivers/i965/brw_queryobj.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 7a55333e89..480d2efbc5 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -1,38 +1,9 @@
-
-TOP = ../../../../..
+TOP = ../../../..
 include $(TOP)/configs/current
 
-LIBNAME = i965_dri.so
+LIBNAME = i965
 
-DRIVER_SOURCES = \
-	intel_batchbuffer.c \
-	intel_blit.c \
-	intel_buffer_objects.c \
-	intel_buffers.c \
-	intel_clear.c \
-	intel_context.c \
-	intel_decode.c \
-	intel_extensions.c \
-	intel_fbo.c \
-	intel_mipmap_tree.c \
-	intel_regions.c \
-	intel_screen.c \
-	intel_span.c \
-	intel_pixel.c \
-	intel_pixel_bitmap.c \
-	intel_pixel_copy.c \
-	intel_pixel_draw.c \
-	intel_pixel_read.c \
-	intel_state.c \
-	intel_swapbuffers.c \
-	intel_syncobj.c \
-	intel_tex.c \
-	intel_tex_copy.c \
-	intel_tex_format.c \
-	intel_tex_image.c \
-	intel_tex_layout.c \
-	intel_tex_subimage.c \
-	intel_tex_validate.c \
+C_SOURCES = \
 	brw_cc.c \
 	brw_clip.c \
 	brw_clip_line.c \
@@ -50,13 +21,18 @@ DRIVER_SOURCES = \
 	brw_eu_debug.c \
 	brw_eu_emit.c \
 	brw_eu_util.c \
-	brw_fallback.c \
 	brw_gs.c \
 	brw_gs_emit.c \
 	brw_gs_state.c \
 	brw_misc_state.c \
-	brw_program.c \
-	brw_queryobj.c \
+	brw_pipe_blend.c \
+	brw_pipe_debug.c \
+	brw_pipe_depth.c \
+	brw_pipe_fb.c \
+	brw_pipe_flush.c \
+	brw_pipe_query.c \
+	brw_pipe_shader.c \
+	brw_screen_surface.c \
 	brw_sf.c \
 	brw_sf_emit.c \
 	brw_sf_state.c \
@@ -64,41 +40,30 @@ DRIVER_SOURCES = \
 	brw_state_cache.c \
 	brw_state_dump.c \
 	brw_state_upload.c \
+	brw_swtnl.c \
 	brw_tex.c \
 	brw_tex_layout.c \
 	brw_urb.c \
 	brw_util.c \
 	brw_vs.c \
-	brw_vs_constval.c \
 	brw_vs_emit.c \
 	brw_vs_state.c \
 	brw_vs_surface_state.c \
-	brw_vtbl.c \
 	brw_wm.c \
 	brw_wm_debug.c \
 	brw_wm_emit.c \
 	brw_wm_fp.c \
-	brw_wm_iz.c \
 	brw_wm_glsl.c \
+	brw_wm_iz.c \
 	brw_wm_pass0.c \
 	brw_wm_pass1.c \
 	brw_wm_pass2.c \
 	brw_wm_sampler_state.c \
 	brw_wm_state.c \
-	brw_wm_surface_state.c 
-
-C_SOURCES = \
-	$(COMMON_SOURCES) \
-	$(MINIGLX_SOURCES) \
-	$(DRIVER_SOURCES)
-
-ASM_SOURCES = 
-
-DRIVER_DEFINES = -I../intel -I../intel/server
-
-DRI_LIB_DEPS += -ldrm_intel
-
-include ../Makefile.template
+	brw_wm_surface_state.c \
+	brw_bo.c \
+	intel_batchbuffer.c \
+	intel_tex_format.c \
+	intel_tex_layout.c 
 
-intel_decode.o: ../intel/intel_decode.c
-intel_tex_layout.o: ../intel/intel_tex_layout.c
+include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index 9ab5638137..af432b1f52 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -33,13 +33,9 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "brw_util.h"
-#include "main/macros.h"
-#include "main/enums.h"
 
 static void prepare_cc_vp( struct brw_context *brw )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_cc_viewport ccv;
 
    memset(&ccv, 0, sizeof(ccv));
@@ -48,13 +44,13 @@ static void prepare_cc_vp( struct brw_context *brw )
    ccv.min_depth = ctx->Viewport.Near;
    ccv.max_depth = ctx->Viewport.Far;
 
-   dri_bo_unreference(brw->cc.vp_bo);
+   brw->sws->bo_unreference(brw->cc.vp_bo);
    brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
 }
 
 const struct brw_tracked_state brw_cc_vp = {
    .dirty = {
-      .mesa = _NEW_VIEWPORT,
+      .mesa = PIPE_NEW_VIEWPORT,
       .brw = BRW_NEW_CONTEXT,
       .cache = 0
    },
@@ -71,8 +67,8 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
 {
    memset(key, 0, sizeof(*key));
    
-   key->dsa = brw->curr.dsa.base;
-   key->blend = brw->curr.blend.base;
+   key->dsa = brw->dsa;
+   key->blend = brw->blend;
 
    /* Clear non-respected values:
     */
@@ -82,11 +78,11 @@ cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
 /**
  * Creates the state cache entry for the given CC unit key.
  */
-static dri_bo *
+static struct brw_winsys_buffer *
 cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
 {
    struct brw_cc_unit_state cc;
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
 
    memset(&cc, 0, sizeof(cc));
 
@@ -124,7 +120,7 @@ static void prepare_cc_unit( struct brw_context *brw )
 
    cc_unit_populate_key(brw, &key);
 
-   dri_bo_unreference(brw->cc.state_bo);
+   brw->sws->bo_unreference(brw->cc.state_bo);
    brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT,
 				       &key, sizeof(key),
 				       &brw->cc.vp_bo, 1,
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index df1b3718d0..d82ebeb9a9 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -129,7 +129,7 @@ static void compile_clip_prog( struct brw_context *brw,
 
    /* Upload
     */
-   dri_bo_unreference(brw->clip.prog_bo);
+   brw->sws->bo_unreference(brw->clip.prog_bo);
    brw->clip.prog_bo = brw_upload_cache( &brw->cache,
 					 BRW_CLIP_PROG,
 					 &c.key, sizeof(c.key),
@@ -199,7 +199,7 @@ static void upload_clip_prog(struct brw_context *brw)
       }
    }
 
-   dri_bo_unreference(brw->clip.prog_bo);
+   brw->sws->bo_unreference(brw->clip.prog_bo);
    brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
 					&key, sizeof(key),
 					NULL, 0,
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 72e27205e2..0ea7ce5734 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -49,7 +49,6 @@ struct brw_clip_unit_key {
 static void
 clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    memset(key, 0, sizeof(*key));
 
    /* CACHE_NEW_CLIP_PROG */
@@ -69,12 +68,12 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
    key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp;
 }
 
-static dri_bo *
+static struct brw_winsys_buffer *
 clip_unit_create_from_key(struct brw_context *brw,
 			  struct brw_clip_unit_key *key)
 {
    struct brw_clip_unit_state clip;
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
 
    memset(&clip, 0, sizeof(clip));
 
@@ -162,7 +161,7 @@ static void upload_clip_unit( struct brw_context *brw )
 
    clip_unit_populate_key(brw, &key);
 
-   dri_bo_unreference(brw->clip.state_bo);
+   brw->sws->bo_unreference(brw->clip.state_bo);
    brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
 					 &key, sizeof(key),
 					 &brw->clip.prog_bo, 1,
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index bf0ec89e13..063ada5772 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -30,32 +30,21 @@
   */
 
 
-#include "main/imports.h"
-#include "main/api_noop.h"
-#include "main/macros.h"
-#include "main/vtxfmt.h"
-#include "main/simple_list.h"
-#include "shader/shader_api.h"
+#include "pipe/p_context.h"
 
 #include "brw_context.h"
 #include "brw_defines.h"
 #include "brw_draw.h"
 #include "brw_state.h"
 #include "brw_vs.h"
-#include "intel_tex.h"
-#include "intel_blit.h"
+#include "brw_screen_tex.h"
 #include "intel_batchbuffer.h"
-#include "intel_pixel.h"
-#include "intel_span.h"
-#include "tnl/t_pipeline.h"
 
-#include "utils.h"
 
 
-GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
-			    __DRIcontextPrivate *driContextPriv,
-			    void *sharedContextPrivate)
+struct pipe_context *brw_create_context( struct pipe_screen *screen,
+					 void *priv )
 {
    struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
 
@@ -87,9 +76,8 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
 /**
  * called from intelDestroyContext()
  */
-static void brw_destroy_context( struct intel_context *intel )
+static void brw_destroy_context( struct brw_context *brw )
 {
-   struct brw_context *brw = brw_context(&intel->ctx);
    int i;
 
    brw_destroy_state(brw);
@@ -102,27 +90,27 @@ static void brw_destroy_context( struct intel_context *intel )
    brw->state.nr_color_regions = 0;
    intel_region_release(&brw->state.depth_region);
 
-   dri_bo_unreference(brw->curbe.curbe_bo);
-   dri_bo_unreference(brw->vs.prog_bo);
-   dri_bo_unreference(brw->vs.state_bo);
-   dri_bo_unreference(brw->vs.bind_bo);
-   dri_bo_unreference(brw->gs.prog_bo);
-   dri_bo_unreference(brw->gs.state_bo);
-   dri_bo_unreference(brw->clip.prog_bo);
-   dri_bo_unreference(brw->clip.state_bo);
-   dri_bo_unreference(brw->clip.vp_bo);
-   dri_bo_unreference(brw->sf.prog_bo);
-   dri_bo_unreference(brw->sf.state_bo);
-   dri_bo_unreference(brw->sf.vp_bo);
+   brw->sws->bo_unreference(brw->curbe.curbe_bo);
+   brw->sws->bo_unreference(brw->vs.prog_bo);
+   brw->sws->bo_unreference(brw->vs.state_bo);
+   brw->sws->bo_unreference(brw->vs.bind_bo);
+   brw->sws->bo_unreference(brw->gs.prog_bo);
+   brw->sws->bo_unreference(brw->gs.state_bo);
+   brw->sws->bo_unreference(brw->clip.prog_bo);
+   brw->sws->bo_unreference(brw->clip.state_bo);
+   brw->sws->bo_unreference(brw->clip.vp_bo);
+   brw->sws->bo_unreference(brw->sf.prog_bo);
+   brw->sws->bo_unreference(brw->sf.state_bo);
+   brw->sws->bo_unreference(brw->sf.vp_bo);
    for (i = 0; i < BRW_MAX_TEX_UNIT; i++)
-      dri_bo_unreference(brw->wm.sdc_bo[i]);
-   dri_bo_unreference(brw->wm.bind_bo);
+      brw->sws->bo_unreference(brw->wm.sdc_bo[i]);
+   brw->sws->bo_unreference(brw->wm.bind_bo);
    for (i = 0; i < BRW_WM_MAX_SURF; i++)
-      dri_bo_unreference(brw->wm.surf_bo[i]);
-   dri_bo_unreference(brw->wm.sampler_bo);
-   dri_bo_unreference(brw->wm.prog_bo);
-   dri_bo_unreference(brw->wm.state_bo);
-   dri_bo_unreference(brw->cc.prog_bo);
-   dri_bo_unreference(brw->cc.state_bo);
-   dri_bo_unreference(brw->cc.vp_bo);
+      brw->sws->bo_unreference(brw->wm.surf_bo[i]);
+   brw->sws->bo_unreference(brw->wm.sampler_bo);
+   brw->sws->bo_unreference(brw->wm.prog_bo);
+   brw->sws->bo_unreference(brw->wm.state_bo);
+   brw->sws->bo_unreference(brw->cc.prog_bo);
+   brw->sws->bo_unreference(brw->cc.state_bo);
+   brw->sws->bo_unreference(brw->cc.vp_bo);
 }
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 009e28b227..0fcb75a440 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -33,9 +33,9 @@
 #ifndef BRWCONTEXT_INC
 #define BRWCONTEXT_INC
 
-#include "intel_context.h"
 #include "brw_structs.h"
-#include "main/imports.h"
+#include "brw_winsys.h"
+#include "pipe/p_state.h"
 
 
 /* Glossary:
@@ -119,6 +119,19 @@
 
 struct brw_context;
 
+#define PIPE_NEW_DEPTH_STENCIL_ALPHA    0x1
+#define PIPE_NEW_RAST                   0x2
+#define PIPE_NEW_BLEND                  0x2
+#define PIPE_NEW_VIEWPORT               0x2
+#define PIPE_NEW_FRAMEBUFFER            0x2
+#define PIPE_NEW_VERTEX_BUFFER          0x2
+#define PIPE_NEW_VERTEX_ELEMENT         0x2
+#define PIPE_NEW_FRAGMENT_SHADER        0x2
+#define PIPE_NEW_VERTEX_SHADER          0x2
+#define PIPE_NEW_FRAGMENT_CONSTS        0x2
+#define PIPE_NEW_VERTEX_CONSTS          0x2
+
+
 #define BRW_NEW_URB_FENCE               0x1
 #define BRW_NEW_FRAGMENT_PROGRAM        0x2
 #define BRW_NEW_VERTEX_PROGRAM          0x4
@@ -156,26 +169,23 @@ struct brw_state_flags {
 };
 
 
-/** Subclass of Mesa vertex program */
 struct brw_vertex_program {
-   struct gl_vertex_program program;
+   const struct tgsi_token *tokens;
    GLuint id;
-   dri_bo *const_buffer;    /** Program constant buffer/surface */
+   struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
    GLboolean use_const_buffer;
 };
 
 
 /** Subclass of Mesa fragment program */
 struct brw_fragment_program {
-   struct gl_fragment_program program;
+   const struct tgsi_token *tokens;
+
    GLuint id;  /**< serial no. to identify frag progs, never re-used */
-   GLboolean isGLSL;  /**< really, any IF/LOOP/CONT/BREAK instructions */
+   GLboolean isGLSL;  /**< any IF/LOOP/CONT/BREAK instructions */
 
-   dri_bo *const_buffer;    /** Program constant buffer/surface */
+   struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
    GLboolean use_const_buffer;
-
-   /** for debugging, which texture units are referenced */
-   GLbitfield tex_units_used;
 };
 
 
@@ -244,7 +254,7 @@ struct brw_vs_prog_data {
 /* Size == 0 if output either not written, or always [0,0,0,1]
  */
 struct brw_vs_ouput_sizes {
-   GLubyte output_size[VERT_RESULT_MAX];
+   GLubyte output_size[PIPE_MAX_SHADER_OUTPUTS];
 };
 
 
@@ -312,10 +322,10 @@ struct brw_cache_item {
    GLuint hash;
    GLuint key_size;		/* for variable-sized keys */
    const void *key;
-   dri_bo **reloc_bufs;
+   struct brw_winsys_buffer **reloc_bufs;
    GLuint nr_reloc_bufs;
 
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
    GLuint data_size;
 
    struct brw_cache_item *next;
@@ -336,7 +346,7 @@ struct brw_cache {
    /* Record of the last BOs chosen for each cache_id.  Used to set
     * brw->state.dirty.cache when a new cache item is chosen.
     */
-   dri_bo *last_bo[BRW_MAX_CACHE];
+   struct brw_winsys_buffer *last_bo[BRW_MAX_CACHE];
 };
 
 
@@ -384,56 +394,22 @@ struct brw_cached_batch_item {
 /* Protect against a future where VERT_ATTRIB_MAX > 32.  Wouldn't life
  * be easier if C allowed arrays of packed elements?
  */
-#define ATTRIB_BIT_DWORDS  ((VERT_ATTRIB_MAX+31)/32)
-
-struct brw_vertex_element {
-   const struct gl_client_array *glarray;
-
-   /** The corresponding Mesa vertex attribute */
-   gl_vert_attrib attrib;
-   /** Size of a complete element */
-   GLuint element_size;
-   /** Number of uploaded elements for this input. */
-   GLuint count;
-   /** Byte stride between elements in the uploaded array */
-   GLuint stride;
-   /** Offset of the first element within the buffer object */
-   unsigned int offset;
-   /** Buffer object containing the uploaded vertex data */
-   dri_bo *bo;
-};
-
-
-
-struct brw_vertex_info {
-   GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
-};
+#define VS_INPUT_BITMASK_DWORDS  ((PIPE_MAX_SHADER_INPUTS+31)/32)
 
 
-/* Cache for TNL programs.
- */
-struct brw_tnl_cache_item {
-   GLuint hash;
-   void *key;
-   void *data;
-   struct brw_tnl_cache_item *next;
+struct brw_vertex_info {
+   GLuint sizes[VS_INPUT_BITMASK_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
 };
 
-struct brw_tnl_cache {
-   struct brw_tnl_cache_item **items;
-   GLuint size, n_items;
-};
 
 struct brw_query_object {
-   struct gl_query_object Base;
-
    /** Doubly linked list of active query objects in the context. */
    struct brw_query_object *prev, *next;
 
    /** Last query BO associated with this query. */
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
    /** First index in bo with query data for this object. */
    int first_index;
    /** Last index in bo with query data for this object. */
@@ -445,22 +421,29 @@ struct brw_query_object {
 
 
 /**
- * brw_context is derived from intel_context.
+ * brw_context is derived from pipe_context
  */
 struct brw_context 
 {
+   struct pipe_context *pipe;
+   struct pipe_screen *screen;
+   
+   struct brw_winsys_screen *sws;
+
    GLuint primitive;
 
    GLboolean emit_state_always;
    GLboolean no_batch_wrap;
 
+   /* Active vertex program: 
+    */
+   const struct gl_vertex_program *vertex_program;
+   const struct gl_fragment_program *fragment_program;
+   struct pipe_framebuffer_state fb;
+
    struct {
       struct brw_state_flags dirty;
 
-      GLuint nr_color_regions;
-      struct intel_region *color_regions[MAX_DRAW_BUFFERS];
-      struct intel_region *depth_region;
-
       /**
        * List of buffers accumulated in brw_validate_state to receive
        * dri_bo_check_aperture treatment before exec, so we can know if we
@@ -471,7 +454,7 @@ struct brw_context
        * consisting of the vertex buffers, pipelined state pointers,
        * the CURBE, the depth buffer, and a query BO.
        */
-      dri_bo *validated_bos[VERT_ATTRIB_MAX + 16];
+      struct brw_winsys_buffer *validated_bos[PIPE_MAX_SHADER_INPUTS + 16];
       int validated_bo_count;
    } state;
 
@@ -480,18 +463,14 @@ struct brw_context
    struct brw_cached_batch_item *cached_batch_items;
 
    struct {
-      struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
+      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+      struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+      unsigned num_vertex_element;
+      unsigned num_vertex_buffer;
 
-      struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
-      GLuint nr_enabled;
-
-#define BRW_NR_UPLOAD_BUFS 17
-#define BRW_UPLOAD_INIT_SIZE (128*1024)
-
-      struct {
-	 dri_bo *bo;
-	 GLuint offset;
-      } upload;
+      struct u_upload_mgr *upload_vertex;
+      struct u_upload_mgr *upload_index;
+      
 
       /* Summary of size and varying of active arrays, so we can check
        * for changes to this state:
@@ -509,7 +488,7 @@ struct brw_context
       const struct _mesa_index_buffer *ib;
 
       /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
-      dri_bo *bo;
+      struct brw_winsys_buffer *bo;
       unsigned int offset;
       unsigned int size;
       /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
@@ -519,16 +498,6 @@ struct brw_context
       unsigned int start_vertex_offset;
    } ib;
 
-   /* Active vertex program: 
-    */
-   const struct gl_vertex_program *vertex_program;
-   const struct gl_fragment_program *fragment_program;
-
-
-   /* For populating the gtt:
-    */
-   GLuint next_free_page;
-
 
    /* BRW_NEW_URB_ALLOCATIONS:
     */
@@ -545,12 +514,6 @@ struct brw_context
       GLuint nr_sf_entries;
       GLuint nr_cs_entries;
 
-/*       GLuint vs_size; */
-/*       GLuint gs_size; */
-/*       GLuint clip_size; */
-/*       GLuint sf_size; */
-/*       GLuint cs_size; */
-
       GLuint vs_start;
       GLuint gs_start;
       GLuint clip_start;
@@ -570,7 +533,7 @@ struct brw_context
       GLuint vs_size;
       GLuint total_size;
 
-      dri_bo *curbe_bo;
+      struct brw_winsys_buffer *curbe_bo;
       /** Offset within curbe_bo of space for current curbe entry */
       GLuint curbe_offset;
       /** Offset within curbe_bo of space for next curbe entry */
@@ -588,12 +551,12 @@ struct brw_context
    struct {
       struct brw_vs_prog_data *prog_data;
 
-      dri_bo *prog_bo;
-      dri_bo *state_bo;
+      struct brw_winsys_buffer *prog_bo;
+      struct brw_winsys_buffer *state_bo;
 
       /** Binding table of pointers to surf_bo entries */
-      dri_bo *bind_bo;
-      dri_bo *surf_bo[BRW_VS_MAX_SURF];
+      struct brw_winsys_buffer *bind_bo;
+      struct brw_winsys_buffer *surf_bo[BRW_VS_MAX_SURF];
       GLuint nr_surfaces;      
    } vs;
 
@@ -601,25 +564,25 @@ struct brw_context
       struct brw_gs_prog_data *prog_data;
 
       GLboolean prog_active;
-      dri_bo *prog_bo;
-      dri_bo *state_bo;
+      struct brw_winsys_buffer *prog_bo;
+      struct brw_winsys_buffer *state_bo;
    } gs;
 
    struct {
       struct brw_clip_prog_data *prog_data;
 
-      dri_bo *prog_bo;
-      dri_bo *state_bo;
-      dri_bo *vp_bo;
+      struct brw_winsys_buffer *prog_bo;
+      struct brw_winsys_buffer *state_bo;
+      struct brw_winsys_buffer *vp_bo;
    } clip;
 
 
    struct {
       struct brw_sf_prog_data *prog_data;
 
-      dri_bo *prog_bo;
-      dri_bo *state_bo;
-      dri_bo *vp_bo;
+      struct brw_winsys_buffer *prog_bo;
+      struct brw_winsys_buffer *state_bo;
+      struct brw_winsys_buffer *vp_bo;
    } sf;
 
    struct {
@@ -629,38 +592,38 @@ struct brw_context
       /** Input sizes, calculated from active vertex program.
        * One bit per fragment program input attribute.
        */
-      GLbitfield input_size_masks[4];
+      //GLbitfield input_size_masks[4];
 
       /** Array of surface default colors (texture border color) */
-      dri_bo *sdc_bo[BRW_MAX_TEX_UNIT];
+      struct brw_winsys_buffer *sdc_bo[BRW_MAX_TEX_UNIT];
 
       GLuint render_surf;
       GLuint nr_surfaces;      
 
       GLuint max_threads;
-      dri_bo *scratch_bo;
+      struct brw_winsys_buffer *scratch_bo;
 
       GLuint sampler_count;
-      dri_bo *sampler_bo;
+      struct brw_winsys_buffer *sampler_bo;
 
       /** Binding table of pointers to surf_bo entries */
-      dri_bo *bind_bo;
-      dri_bo *surf_bo[BRW_WM_MAX_SURF];
+      struct brw_winsys_buffer *bind_bo;
+      struct brw_winsys_buffer *surf_bo[PIPE_MAX_COLOR_BUFS];
 
-      dri_bo *prog_bo;
-      dri_bo *state_bo;
+      struct brw_winsys_buffer *prog_bo;
+      struct brw_winsys_buffer *state_bo;
    } wm;
 
 
    struct {
-      dri_bo *prog_bo;
-      dri_bo *state_bo;
-      dri_bo *vp_bo;
+      struct brw_winsys_buffer *prog_bo;
+      struct brw_winsys_buffer *state_bo;
+      struct brw_winsys_buffer *vp_bo;
    } cc;
 
    struct {
       struct brw_query_object active_head;
-      dri_bo *bo;
+      struct brw_winsys_buffer *bo;
       int index;
       GLboolean active;
    } query;
@@ -679,12 +642,6 @@ struct brw_context
  */
 void brwInitVtbl( struct brw_context *brw );
 
-/*======================================================================
- * brw_context.c
- */
-GLboolean brwCreateContext( const __GLcontextModes *mesaVis,
-			    __DRIcontextPrivate *driContextPriv,
-			    void *sharedContextPrivate);
 
 /*======================================================================
  * brw_queryobj.c
@@ -697,7 +654,7 @@ void brw_emit_query_end(struct brw_context *brw);
 /*======================================================================
  * brw_state_dump.c
  */
-void brw_debug_batch(struct intel_context *intel);
+void brw_debug_batch(struct brw_context *intel);
 
 /*======================================================================
  * brw_tex.c
@@ -706,9 +663,9 @@ void brw_validate_textures( struct brw_context *brw );
 
 
 /*======================================================================
- * brw_program.c
+ * brw_pipe_shader.c
  */
-void brwInitFragProgFuncs( struct dd_function_table *functions );
+void brw_init_shader_funcs( struct brw_context *brw );
 
 
 /* brw_urb.c
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 3e32c4983d..33ea9a00f7 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -47,7 +47,6 @@
  */
 static void calculate_curbe_offsets( struct brw_context *brw )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    /* CACHE_NEW_WM_PROG */
    const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
    
@@ -157,7 +156,6 @@ static GLfloat fixed_plane[6][4] = {
  */
 static void prepare_constant_buffer(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    const struct brw_vertex_program *vp =
       brw_vertex_program_const(brw->vertex_program);
    const struct brw_fragment_program *fp =
@@ -269,7 +267,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
 	  (brw->curbe.need_new_bo ||
 	   brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
       {
-	 dri_bo_unreference(brw->curbe.curbe_bo);
+	 brw->sws->bo_unreference(brw->curbe.curbe_bo);
 	 brw->curbe.curbe_bo = NULL;
       }
 
@@ -310,7 +308,6 @@ static void prepare_constant_buffer(struct brw_context *brw)
 
 static void emit_constant_buffer(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
    GLuint sz = brw->curbe.total_size;
 
    BEGIN_BATCH(2, IGNORE_CLIPRECTS);
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 8cd117c24f..856999f3ef 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -26,15 +26,6 @@
  **************************************************************************/
 
 
-#include "main/glheader.h"
-#include "main/context.h"
-#include "main/state.h"
-#include "main/enums.h"
-#include "tnl/tnl.h"
-#include "vbo/vbo_context.h"
-#include "swrast/swrast.h"
-#include "swrast_setup/swrast_setup.h"
-
 #include "brw_draw.h"
 #include "brw_defines.h"
 #include "brw_context.h"
@@ -67,7 +58,6 @@ static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
  */
 static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
 {
-   GLcontext *ctx = &brw->intel.ctx;
 
    if (INTEL_DEBUG & DEBUG_PRIMS)
       _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
@@ -110,7 +100,6 @@ static void brw_emit_prim(struct brw_context *brw,
 			  uint32_t hw_prim)
 {
    struct brw_3d_primitive prim_packet;
-   struct intel_context *intel = &brw->intel;
 
    if (INTEL_DEBUG & DEBUG_PRIMS)
       _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 
@@ -163,7 +152,7 @@ static void brw_merge_inputs( struct brw_context *brw,
    GLuint i;
 
    for (i = 0; i < VERT_ATTRIB_MAX; i++)
-      dri_bo_unreference(brw->vb.inputs[i].bo);
+      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
 
    memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
    memset(&brw->vb.info, 0, sizeof(brw->vb.info));
@@ -185,7 +174,7 @@ static void brw_merge_inputs( struct brw_context *brw,
 /* May fail if out of video memory for texture or vbo upload, or on
  * fallback conditions.
  */
-static GLboolean brw_try_draw_prims( GLcontext *ctx,
+static GLboolean brw_try_draw_prims( struct brw_context *brw,
 				     const struct gl_client_array *arrays[],
 				     const struct _mesa_prim *prim,
 				     GLuint nr_prims,
@@ -193,7 +182,6 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
 				     GLuint min_index,
 				     GLuint max_index )
 {
-   struct intel_context *intel = intel_context(ctx);
    struct brw_context *brw = brw_context(ctx);
    GLboolean retval = GL_FALSE;
    GLboolean warn = GL_FALSE;
@@ -241,7 +229,7 @@ static GLboolean brw_try_draw_prims( GLcontext *ctx,
    return 0;
 }
 
-void brw_draw_prims( GLcontext *ctx,
+void brw_draw_prims( struct brw_context *brw,
 		     const struct gl_client_array *arrays[],
 		     const struct _mesa_prim *prim,
 		     GLuint nr_prims,
@@ -274,7 +262,6 @@ void brw_draw_prims( GLcontext *ctx,
 
 void brw_draw_init( struct brw_context *brw )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct vbo_context *vbo = vbo_context(ctx);
 
    /* Register our drawing function: 
@@ -287,15 +274,15 @@ void brw_draw_destroy( struct brw_context *brw )
    int i;
 
    if (brw->vb.upload.bo != NULL) {
-      dri_bo_unreference(brw->vb.upload.bo);
+      brw->sws->bo_unreference(brw->vb.upload.bo);
       brw->vb.upload.bo = NULL;
    }
 
    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
-      dri_bo_unreference(brw->vb.inputs[i].bo);
+      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
       brw->vb.inputs[i].bo = NULL;
    }
 
-   dri_bo_unreference(brw->ib.bo);
+   brw->sws->bo_unreference(brw->ib.bo);
    brw->ib.bo = NULL;
 }
diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h
index 2a14db217f..dc7ca8731d 100644
--- a/src/gallium/drivers/i965/brw_draw.h
+++ b/src/gallium/drivers/i965/brw_draw.h
@@ -28,13 +28,12 @@
 #ifndef BRW_DRAW_H
 #define BRW_DRAW_H
 
-#include "main/mtypes.h"		/* for GLcontext... */
-#include "vbo/vbo.h"
+#include "brw_types.h"
 
 struct brw_context;
 
 
-void brw_draw_prims( GLcontext *ctx,
+void brw_draw_prims( struct brw_context *brw,
 		     const struct gl_client_array *arrays[],
 		     const struct _mesa_prim *prims,
 		     GLuint nr_prims,
@@ -48,7 +47,7 @@ void brw_draw_destroy( struct brw_context *brw );
 
 /* brw_draw_current.c
  */
-void brw_init_current_values(GLcontext *ctx,
+void brw_init_current_values(struct brw_context *brw,
 			     struct gl_client_array *arrays);
 
 #endif
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index ad3ef6b7dd..dce015d79f 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -191,8 +191,6 @@ static unsigned get_index_type(int type)
 
 static boolean brw_prepare_vertices(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   struct intel_context *intel = intel_context(ctx);
    GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; 
    GLuint i;
    const unsigned char *ptr = NULL;
@@ -210,15 +208,17 @@ static boolean brw_prepare_vertices(struct brw_context *brw)
 
 
-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      struct brw_vertex_element *input = brw->vb.enabled[i];
+   for (i = 0; i < brw->vb.num_vertex_buffer; i++) {
+      struct brw_vertex_buffer *vb = brw->vb.vertex_buffer[i];
+      unsigned size = (vb->stride == 0 ? 
+		       vb->size :
+		       vb->stride * (max_index + 1 - min_index));
 
-      input->element_size = get_size(input->glarray->Type) * input->glarray->Size;
 
       if (brw_is_user_buffer(vb)) {
-	 u_upload_buffer( brw->upload, 
+	 u_upload_buffer( brw->upload_vertex, 
 			  min_index * vb->stride,
-			  (max_index + 1 - min_index) * vb->stride,
+			  size,
 			  &offset,
 			  &buffer );
       }
@@ -226,20 +226,20 @@ static boolean brw_prepare_vertices(struct brw_context *brw)
       {
 	 offset = 0;
 	 buffer = vb->buffer;
-	 count = stride == 0 ? 1 : max_index + 1 - min_index;
       }
-
-      /* Named buffer object: Just reference its contents directly. */
-      dri_bo_unreference(input->bo);
-      input->bo = intel_bufferobj_buffer(intel, intel_buffer,
-					 INTEL_READ);
-      dri_bo_reference(input->bo);
-
+      
+      /* Set up post-upload info about this vertex buffer:
+       */
       input->offset = (unsigned long)offset;
       input->stride = vb->stride;
       input->count = count;
+      brw->sws->bo_unreference(input->bo);
+      input->bo = intel_bufferobj_buffer(intel, intel_buffer,
+					 INTEL_READ);
+      brw->sws->bo_reference(input->bo);
 
       assert(input->offset < input->bo->size);
+      assert(input->offset + size <= input->bo->size);
    }
 
    brw_prepare_query_begin(brw);
@@ -253,8 +253,6 @@ static boolean brw_prepare_vertices(struct brw_context *brw)
 
 static void brw_emit_vertices(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   struct intel_context *intel = intel_context(ctx);
    GLuint i;
 
    brw_emit_query_begin(brw);
@@ -370,11 +368,9 @@ const struct brw_tracked_state brw_vertices = {
 
 static void brw_prepare_indices(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   struct intel_context *intel = &brw->intel;
    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
    GLuint ib_size;
-   dri_bo *bo = NULL;
+   struct brw_winsys_buffer *bo = NULL;
    struct gl_buffer_object *bufferobj;
    GLuint offset;
    GLuint ib_type_size;
@@ -421,7 +417,7 @@ static void brw_prepare_indices(struct brw_context *brw)
        } else {
 	  bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
 				      INTEL_READ);
-	  dri_bo_reference(bo);
+	  brw->sws->bo_reference(bo);
 
 	  /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
 	   * the index buffer state when we're just moving the start index
@@ -461,7 +457,6 @@ const struct brw_tracked_state brw_indices = {
 
 static void brw_emit_index_buffer(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
    const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
 
    if (index_buffer == NULL)
diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c
index 29f3f6d02f..ad7ec36e86 100644
--- a/src/gallium/drivers/i965/brw_eu_debug.c
+++ b/src/gallium/drivers/i965/brw_eu_debug.c
@@ -30,8 +30,6 @@
   */
     
 
-#include "main/mtypes.h"
-#include "main/imports.h"
 #include "brw_eu.h"
 
 void brw_print_reg( struct brw_reg hwreg )
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
index 5ec0c585fe..58930e7964 100644
--- a/src/gallium/drivers/i965/brw_gs.c
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -29,10 +29,6 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
       
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-
 #include "intel_batchbuffer.h"
 
 #include "brw_defines.h"
@@ -124,7 +120,7 @@ static void compile_gs_prog( struct brw_context *brw,
 
    /* Upload
     */
-   dri_bo_unreference(brw->gs.prog_bo);
+   brw->sws->bo_unreference(brw->gs.prog_bo);
    brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG,
 				       &c.key, sizeof(c.key),
 				       NULL, 0,
@@ -180,7 +176,7 @@ static void prepare_gs_prog(struct brw_context *brw)
    }
 
    if (brw->gs.prog_active) {
-      dri_bo_unreference(brw->gs.prog_bo);
+      brw->sws->bo_unreference(brw->gs.prog_bo);
       brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
 					 &key, sizeof(key),
 					 NULL, 0,
diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c
index a9b2aa2eac..9ec206d7e8 100644
--- a/src/gallium/drivers/i965/brw_gs_emit.c
+++ b/src/gallium/drivers/i965/brw_gs_emit.c
@@ -30,11 +30,6 @@
   */
  
 
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-
-#include "shader/program.h"
 #include "intel_batchbuffer.h"
 
 #include "brw_defines.h"
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
index ed9d2ffe60..6d03d72d96 100644
--- a/src/gallium/drivers/i965/brw_gs_state.c
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -34,7 +34,6 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "main/macros.h"
 
 struct brw_gs_unit_key {
    unsigned int total_grf;
@@ -69,11 +68,11 @@ gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
    key->urb_size = brw->urb.vsize;
 }
 
-static dri_bo *
+static struct brw_winsys_buffer *
 gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
 {
    struct brw_gs_unit_state gs;
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
 
    memset(&gs, 0, sizeof(gs));
 
@@ -128,7 +127,7 @@ static void prepare_gs_unit(struct brw_context *brw)
 
    gs_unit_populate_key(brw, &key);
 
-   dri_bo_unreference(brw->gs.state_bo);
+   brw->sws->bo_unreference(brw->gs.state_bo);
    brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT,
 				       &key, sizeof(key),
 				       &brw->gs.prog_bo, 1,
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index ea71857548..d33bf40a01 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -48,7 +48,6 @@
 
 static void upload_blend_constant_color(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_blend_constant_color bcc;
 
    memset(&bcc, 0, sizeof(bcc));      
@@ -75,17 +74,11 @@ const struct brw_tracked_state brw_blend_constant_color = {
 /* Constant single cliprect for framebuffer object or DRI2 drawing */
 static void upload_drawing_rect(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
-   GLcontext *ctx = &intel->ctx;
-
-   if (!intel->constant_cliprect)
-      return;
-
    BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
    OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
-   OUT_BATCH(0); /* xmin, ymin */
-   OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) |
-	    ((ctx->DrawBuffer->Height - 1) << 16));
+   OUT_BATCH(0);
+   OUT_BATCH(((brw->fb.width - 1) & 0xffff) |
+	    ((brw->fb.height - 1) << 16));
    OUT_BATCH(0);
    ADVANCE_BATCH();
 }
@@ -114,8 +107,6 @@ static void prepare_binding_table_pointers(struct brw_context *brw)
  */
 static void upload_binding_table_pointers(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
-
    BEGIN_BATCH(6, IGNORE_CLIPRECTS);
    OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
    if (brw->vs.bind_bo != NULL)
@@ -148,8 +139,6 @@ const struct brw_tracked_state brw_binding_table_pointers = {
  */
 static void upload_pipelined_state_pointers(struct brw_context *brw )
 {
-   struct intel_context *intel = &brw->intel;
-
    BEGIN_BATCH(7, IGNORE_CLIPRECTS);
    OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
    OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
@@ -210,7 +199,6 @@ static void prepare_depthbuffer(struct brw_context *brw)
 
 static void emit_depthbuffer(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
    struct intel_region *region = brw->state.depth_region;
    unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
 
@@ -287,7 +275,6 @@ const struct brw_tracked_state brw_depthbuffer = {
 
 static void upload_polygon_stipple(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_polygon_stipple bps;
    GLuint i;
 
@@ -401,7 +388,6 @@ const struct brw_tracked_state brw_aa_line_parameters = {
 
 static void upload_line_stipple(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_line_stipple bls;
    GLfloat tmp;
    GLint tmpi;
@@ -507,8 +493,6 @@ const struct brw_tracked_state brw_invarient_state = {
  */
 static void upload_state_base_address( struct brw_context *brw )
 {
-   struct intel_context *intel = &brw->intel;
-
    /* Output the structure (brw_state_base_address) directly to the
     * batchbuffer, so we can emit relocations inline.
     */
diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c
index da29bc8bcb..29f135d37a 100644
--- a/src/gallium/drivers/i965/brw_pipe_depth.c
+++ b/src/gallium/drivers/i965/brw_pipe_depth.c
@@ -1,5 +1,9 @@
-   /* _NEW_STENCIL */
-   if (key->dsa.stencil[0].enable) {
+
+static void *
+brw_create_depth_stencil( struct pipe_context *pipe,
+			  const struct pipe_depth_stencil_alpha_state *tmpl )
+{
+   if (tmpl->stencil[0].enable) {
       cc.cc0.stencil_enable = 1;
       cc.cc0.stencil_func =
 	 intel_translate_compare_func(key->stencil_func[0]);
@@ -13,7 +17,7 @@
       cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
       cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
 
-      if (key->stencil_two_side) {
+      if (tmpl->stencil[1].enable) {
 	 cc.cc0.bf_stencil_enable = 1;
 	 cc.cc0.bf_stencil_func =
 	    intel_translate_compare_func(key->stencil_func[1]);
@@ -30,9 +34,8 @@
 
       /* Not really sure about this:
        */
-      if (key->stencil_write_mask[0] ||
-	  (key->stencil_two_side && key->stencil_write_mask[1]))
-	 cc.cc0.stencil_write_enable = 1;
+      cc.cc0.stencil_write_enable = (cc.cc1.stencil_write_mask ||
+				     cc.cc2.bf_stencil_write_mask);
    }
 
 
@@ -50,3 +53,6 @@
       cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
       cc.cc2.depth_write_enable = key->depth_write;
    }
+
+
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
index d4ae332f46..dbf97a0544 100644
--- a/src/gallium/drivers/i965/brw_pipe_fb.c
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -2,12 +2,12 @@
 /**
  * called from intelDrawBuffer()
  */
-static void brw_set_draw_region( struct intel_context *intel, 
+static void brw_set_draw_region( struct pipe_context *pipe, 
                                  struct intel_region *color_regions[],
                                  struct intel_region *depth_region,
                                  GLuint num_color_regions)
 {
-   struct brw_context *brw = brw_context(&intel->ctx);
+   struct brw_context *brw = brw_context(pipe);
    GLuint i;
 
    /* release old color/depth regions */
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
index 008f623151..d5b7bd3b83 100644
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -13,10 +13,8 @@ static void brw_finish_batch(struct intel_context *intel)
 /**
  * called from intelFlushBatchLocked
  */
-static void brw_new_batch( struct intel_context *intel )
+static void brw_new_batch( struct brw_context *brw )
 {
-   struct brw_context *brw = brw_context(&intel->ctx);
-
    /* Check that we didn't just wrap our batchbuffer at a bad time. */
    assert(!brw->no_batch_wrap);
 
@@ -36,14 +34,14 @@ static void brw_new_batch( struct intel_context *intel )
     * a new buffer next time.
     */
    if (brw->vb.upload.bo != NULL) {
-      dri_bo_unreference(brw->vb.upload.bo);
+      brw->sws->bo_unreference(brw->vb.upload.bo);
       brw->vb.upload.bo = NULL;
       brw->vb.upload.offset = 0;
    }
 }
 
 
-static void brw_note_fence( struct intel_context *intel, GLuint fence )
+static void brw_note_fence( struct brw_context *brw, GLuint fence )
 {
    brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
 }
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
new file mode 100644
index 0000000000..0b9ba0c0ed
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file support for ARB_query_object
+ *
+ * ARB_query_object is implemented by using the PIPE_CONTROL command to stall
+ * execution on the completion of previous depth tests, and write the
+ * current PS_DEPTH_COUNT to a buffer object.
+ *
+ * We use before and after counts when drawing during a query so that
+ * we don't pick up other clients' query data in ours.  To reduce overhead,
+ * a single BO is used to record the query data for all active queries at
+ * once.  This also gives us a simple bound on how much batchbuffer space is
+ * required for handling queries, so that we can be sure that we won't
+ * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
+ */
+#include "util/u_simple_list.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "intel_batchbuffer.h"
+#include "intel_reg.h"
+
+/** Waits on the query object's BO and totals the results for this query */
+static void
+brw_queryobj_get_results(struct brw_query_object *query)
+{
+   int i;
+   uint64_t *results;
+
+   if (query->bo == NULL)
+      return;
+
+   /* Map and count the pixels from the current query BO */
+   dri_bo_map(query->bo, GL_FALSE);
+   results = query->bo->virtual;
+   for (i = query->first_index; i <= query->last_index; i++) {
+      query->Base.Result += results[i * 2 + 1] - results[i * 2];
+   }
+   dri_bo_unmap(query->bo);
+
+   brw->sws->bo_unreference(query->bo);
+   query->bo = NULL;
+}
+
+static struct pipe_query *
+brw_query_create(struct pipe_context *pipe, unsigned type )
+{
+   struct brw_query_object *query;
+
+   switch (query->type) {
+   case PIPE_QUERY_OCCLUSION_COUNTER:
+      query = CALLOC_STRUCT( brw_query_object );
+      if (query == NULL)
+	 return NULL;
+      return &query->Base;
+      
+   default:
+      return NULL;
+   }
+}
+
+static void
+brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   brw->sws->bo_unreference(query->bo);
+   FREE(query);
+}
+
+static void
+brw_begin_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+   struct brw_context *brw = brw_context(pipe);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   /* Reset our driver's tracking of query state. */
+   brw->sws->bo_unreference(query->bo);
+   query->bo = NULL;
+   query->first_index = -1;
+   query->last_index = -1;
+
+   insert_at_head(&brw->query.active_head, query);
+   brw->stats_wm++;
+   brw->dirty.mesa |= PIPE_NEW_QUERY;
+}
+
+static void
+brw_end_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+   struct brw_context *brw = brw_context(pipe);
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   /* Flush the batchbuffer in case it has writes to our query BO.
+    * Have later queries write to a new query BO so that further rendering
+    * doesn't delay the collection of our results.
+    */
+   if (query->bo) {
+      brw_emit_query_end(brw);
+      intel_batchbuffer_flush(brw->batch);
+
+      brw->sws->bo_unreference(brw->query.bo);
+      brw->query.bo = NULL;
+   }
+
+   remove_from_list(query);
+   brw->stats_wm--;
+   brw->dirty.mesa |= PIPE_NEW_QUERY;
+}
+
+static void brw_wait_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   brw_queryobj_get_results(query);
+   query->Base.Ready = GL_TRUE;
+}
+
+static void brw_check_query(struct pipe_context *pipe, struct pipe_query *q)
+{
+   struct brw_query_object *query = (struct brw_query_object *)q;
+
+   if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
+      brw_queryobj_get_results(query);
+      query->Base.Ready = GL_TRUE;
+   }
+}
+
+/** Called to set up the query BO and account for its aperture space */
+void
+brw_prepare_query_begin(struct brw_context *brw)
+{
+   /* Skip if we're not doing any queries. */
+   if (is_empty_list(&brw->query.active_head))
+      return;
+
+   /* Get a new query BO if we're going to need it. */
+   if (brw->query.bo == NULL ||
+       brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
+      brw->sws->bo_unreference(brw->query.bo);
+      brw->query.bo = NULL;
+
+      brw->query.bo = dri_bo_alloc(brw->bufmgr, "query", 4096, 1);
+      brw->query.index = 0;
+   }
+
+   brw_add_validated_bo(brw, brw->query.bo);
+}
+
+/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
+void
+brw_emit_query_begin(struct brw_context *brw)
+{
+   struct brw_query_object *query;
+
+   /* Skip if we're not doing any queries, or we've emitted the start. */
+   if (brw->query.active || is_empty_list(&brw->query.active_head))
+      return;
+
+   BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+	     PIPE_CONTROL_DEPTH_STALL |
+	     PIPE_CONTROL_WRITE_DEPTH_COUNT);
+   /* This object could be mapped cacheable, but we don't have an exposed
+    * mechanism to support that.  Since it's going uncached, tell GEM that
+    * we're writing to it.  The usual clflush should be all that's required
+    * to pick up the results.
+    */
+   OUT_RELOC(brw->query.bo,
+	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+	     PIPE_CONTROL_GLOBAL_GTT_WRITE |
+	     ((brw->query.index * 2) * sizeof(uint64_t)));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   foreach(query, &brw->query.active_head) {
+      if (query->bo != brw->query.bo) {
+	 if (query->bo != NULL)
+	    brw_queryobj_get_results(query);
+	 brw->sws->bo_reference(brw->query.bo);
+	 query->bo = brw->query.bo;
+	 query->first_index = brw->query.index;
+      }
+      query->last_index = brw->query.index;
+   }
+   brw->query.active = GL_TRUE;
+}
+
+/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
+void
+brw_emit_query_end(struct brw_context *brw)
+{
+   if (!brw->query.active)
+      return;
+
+   BEGIN_BATCH(4, IGNORE_CLIPRECTS);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL |
+	     PIPE_CONTROL_DEPTH_STALL |
+	     PIPE_CONTROL_WRITE_DEPTH_COUNT);
+   OUT_RELOC(brw->query.bo,
+	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+	     PIPE_CONTROL_GLOBAL_GTT_WRITE |
+	     ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   brw->query.active = GL_FALSE;
+   brw->query.index++;
+}
+
+void brw_init_queryobj_functions(struct dd_function_table *functions)
+{
+   functions->NewQueryObject = brw_new_query_object;
+   functions->DeleteQuery = brw_delete_query;
+   functions->BeginQuery = brw_begin_query;
+   functions->EndQuery = brw_end_query;
+   functions->CheckQuery = brw_check_query;
+   functions->WaitQuery = brw_wait_query;
+}
diff --git a/src/gallium/drivers/i965/brw_program.c b/src/gallium/drivers/i965/brw_program.c
deleted file mode 100644
index bac69187c1..0000000000
--- a/src/gallium/drivers/i965/brw_program.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
- 
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- 
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
- 
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  */
-  
-#include "main/imports.h"
-#include "main/enums.h"
-#include "shader/prog_parameter.h"
-#include "shader/program.h"
-#include "shader/programopt.h"
-#include "tnl/tnl.h"
-
-#include "brw_context.h"
-#include "brw_util.h"
-#include "brw_wm.h"
-
-static void brwBindProgram( GLcontext *ctx,
-			    GLenum target, 
-			    struct gl_program *prog )
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   switch (target) {
-   case GL_VERTEX_PROGRAM_ARB: 
-      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
-      break;
-   case GL_FRAGMENT_PROGRAM_ARB:
-      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
-      break;
-   }
-}
-
-static struct gl_program *brwNewProgram( GLcontext *ctx,
-				      GLenum target, 
-				      GLuint id )
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   switch (target) {
-   case GL_VERTEX_PROGRAM_ARB: {
-      struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
-      if (prog) {
-	 prog->id = brw->program_id++;
-
-	 return _mesa_init_vertex_program( ctx, &prog->program,
-					     target, id );
-      }
-      else
-	 return NULL;
-   }
-
-   case GL_FRAGMENT_PROGRAM_ARB: {
-      struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
-      if (prog) {
-	 prog->id = brw->program_id++;
-
-	 return _mesa_init_fragment_program( ctx, &prog->program,
-					     target, id );
-      }
-      else
-	 return NULL;
-   }
-
-   default:
-      return _mesa_new_program(ctx, target, id);
-   }
-}
-
-static void brwDeleteProgram( GLcontext *ctx,
-			      struct gl_program *prog )
-{
-   if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
-      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
-      struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
-      dri_bo_unreference(brw_fprog->const_buffer);
-   }
-
-   _mesa_delete_program( ctx, prog );
-}
-
-
-static GLboolean brwIsProgramNative( GLcontext *ctx,
-				     GLenum target, 
-				     struct gl_program *prog )
-{
-   return GL_TRUE;
-}
-
-static void brwProgramStringNotify( GLcontext *ctx,
-				    GLenum target,
-				    struct gl_program *prog )
-{
-   struct brw_context *brw = brw_context(ctx);
-
-   if (target == GL_FRAGMENT_PROGRAM_ARB) {
-      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
-      struct brw_fragment_program *newFP = brw_fragment_program(fprog);
-      const struct brw_fragment_program *curFP =
-         brw_fragment_program_const(brw->fragment_program);
-
-      if (fprog->FogOption) {
-         _mesa_append_fog_code(ctx, fprog);
-         fprog->FogOption = GL_NONE;
-      }
-
-      if (newFP == curFP)
-	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
-      newFP->id = brw->program_id++;      
-      newFP->isGLSL = brw_wm_is_glsl(fprog);
-   }
-   else if (target == GL_VERTEX_PROGRAM_ARB) {
-      struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
-      struct brw_vertex_program *newVP = brw_vertex_program(vprog);
-      const struct brw_vertex_program *curVP =
-         brw_vertex_program_const(brw->vertex_program);
-
-      if (newVP == curVP)
-	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
-      if (newVP->program.IsPositionInvariant) {
-	 _mesa_insert_mvp_code(ctx, &newVP->program);
-      }
-      newVP->id = brw->program_id++;      
-
-      /* Also tell tnl about it:
-       */
-      _tnl_program_string(ctx, target, prog);
-   }
-}
-
-void brwInitFragProgFuncs( struct dd_function_table *functions )
-{
-   assert(functions->ProgramStringNotify == _tnl_program_string); 
-
-   functions->BindProgram = brwBindProgram;
-   functions->NewProgram = brwNewProgram;
-   functions->DeleteProgram = brwDeleteProgram;
-   functions->IsProgramNative = brwIsProgramNative;
-   functions->ProgramStringNotify = brwProgramStringNotify;
-}
-
diff --git a/src/gallium/drivers/i965/brw_queryobj.c b/src/gallium/drivers/i965/brw_queryobj.c
deleted file mode 100644
index a195bc32b0..0000000000
--- a/src/gallium/drivers/i965/brw_queryobj.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright © 2008 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-/** @file support for ARB_query_object
- *
- * ARB_query_object is implemented by using the PIPE_CONTROL command to stall
- * execution on the completion of previous depth tests, and write the
- * current PS_DEPTH_COUNT to a buffer object.
- *
- * We use before and after counts when drawing during a query so that
- * we don't pick up other clients' query data in ours.  To reduce overhead,
- * a single BO is used to record the query data for all active queries at
- * once.  This also gives us a simple bound on how much batchbuffer space is
- * required for handling queries, so that we can be sure that we won't
- * have to emit a batchbuffer without getting the ending PS_DEPTH_COUNT.
- */
-#include "main/simple_list.h"
-#include "main/imports.h"
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "intel_batchbuffer.h"
-#include "intel_reg.h"
-
-/** Waits on the query object's BO and totals the results for this query */
-static void
-brw_queryobj_get_results(struct brw_query_object *query)
-{
-   int i;
-   uint64_t *results;
-
-   if (query->bo == NULL)
-      return;
-
-   /* Map and count the pixels from the current query BO */
-   dri_bo_map(query->bo, GL_FALSE);
-   results = query->bo->virtual;
-   for (i = query->first_index; i <= query->last_index; i++) {
-      query->Base.Result += results[i * 2 + 1] - results[i * 2];
-   }
-   dri_bo_unmap(query->bo);
-
-   dri_bo_unreference(query->bo);
-   query->bo = NULL;
-}
-
-static struct gl_query_object *
-brw_new_query_object(GLcontext *ctx, GLuint id)
-{
-   struct brw_query_object *query;
-
-   query = _mesa_calloc(sizeof(struct brw_query_object));
-
-   query->Base.Id = id;
-   query->Base.Result = 0;
-   query->Base.Active = GL_FALSE;
-   query->Base.Ready = GL_TRUE;
-
-   return &query->Base;
-}
-
-static void
-brw_delete_query(GLcontext *ctx, struct gl_query_object *q)
-{
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   dri_bo_unreference(query->bo);
-   _mesa_free(query);
-}
-
-static void
-brw_begin_query(GLcontext *ctx, struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct intel_context *intel = intel_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   /* Reset our driver's tracking of query state. */
-   dri_bo_unreference(query->bo);
-   query->bo = NULL;
-   query->first_index = -1;
-   query->last_index = -1;
-
-   insert_at_head(&brw->query.active_head, query);
-   intel->stats_wm++;
-}
-
-/**
- * Begin the ARB_occlusion_query query on a query object.
- */
-static void
-brw_end_query(GLcontext *ctx, struct gl_query_object *q)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct intel_context *intel = intel_context(ctx);
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   /* Flush the batchbuffer in case it has writes to our query BO.
-    * Have later queries write to a new query BO so that further rendering
-    * doesn't delay the collection of our results.
-    */
-   if (query->bo) {
-      brw_emit_query_end(brw);
-      intel_batchbuffer_flush(intel->batch);
-
-      dri_bo_unreference(brw->query.bo);
-      brw->query.bo = NULL;
-   }
-
-   remove_from_list(query);
-
-   intel->stats_wm--;
-}
-
-static void brw_wait_query(GLcontext *ctx, struct gl_query_object *q)
-{
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   brw_queryobj_get_results(query);
-   query->Base.Ready = GL_TRUE;
-}
-
-static void brw_check_query(GLcontext *ctx, struct gl_query_object *q)
-{
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
-      brw_queryobj_get_results(query);
-      query->Base.Ready = GL_TRUE;
-   }
-}
-
-/** Called to set up the query BO and account for its aperture space */
-void
-brw_prepare_query_begin(struct brw_context *brw)
-{
-   struct intel_context *intel = &brw->intel;
-
-   /* Skip if we're not doing any queries. */
-   if (is_empty_list(&brw->query.active_head))
-      return;
-
-   /* Get a new query BO if we're going to need it. */
-   if (brw->query.bo == NULL ||
-       brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
-      dri_bo_unreference(brw->query.bo);
-      brw->query.bo = NULL;
-
-      brw->query.bo = dri_bo_alloc(intel->bufmgr, "query", 4096, 1);
-      brw->query.index = 0;
-   }
-
-   brw_add_validated_bo(brw, brw->query.bo);
-}
-
-/** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
-void
-brw_emit_query_begin(struct brw_context *brw)
-{
-   struct intel_context *intel = &brw->intel;
-   struct brw_query_object *query;
-
-   /* Skip if we're not doing any queries, or we've emitted the start. */
-   if (brw->query.active || is_empty_list(&brw->query.active_head))
-      return;
-
-   BEGIN_BATCH(4, IGNORE_CLIPRECTS);
-   OUT_BATCH(_3DSTATE_PIPE_CONTROL |
-	     PIPE_CONTROL_DEPTH_STALL |
-	     PIPE_CONTROL_WRITE_DEPTH_COUNT);
-   /* This object could be mapped cacheable, but we don't have an exposed
-    * mechanism to support that.  Since it's going uncached, tell GEM that
-    * we're writing to it.  The usual clflush should be all that's required
-    * to pick up the results.
-    */
-   OUT_RELOC(brw->query.bo,
-	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-	     PIPE_CONTROL_GLOBAL_GTT_WRITE |
-	     ((brw->query.index * 2) * sizeof(uint64_t)));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-
-   foreach(query, &brw->query.active_head) {
-      if (query->bo != brw->query.bo) {
-	 if (query->bo != NULL)
-	    brw_queryobj_get_results(query);
-	 dri_bo_reference(brw->query.bo);
-	 query->bo = brw->query.bo;
-	 query->first_index = brw->query.index;
-      }
-      query->last_index = brw->query.index;
-   }
-   brw->query.active = GL_TRUE;
-}
-
-/** Called at batchbuffer flush to get an ending PS_DEPTH_COUNT */
-void
-brw_emit_query_end(struct brw_context *brw)
-{
-   struct intel_context *intel = &brw->intel;
-
-   if (!brw->query.active)
-      return;
-
-   BEGIN_BATCH(4, IGNORE_CLIPRECTS);
-   OUT_BATCH(_3DSTATE_PIPE_CONTROL |
-	     PIPE_CONTROL_DEPTH_STALL |
-	     PIPE_CONTROL_WRITE_DEPTH_COUNT);
-   OUT_RELOC(brw->query.bo,
-	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-	     PIPE_CONTROL_GLOBAL_GTT_WRITE |
-	     ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-
-   brw->query.active = GL_FALSE;
-   brw->query.index++;
-}
-
-void brw_init_queryobj_functions(struct dd_function_table *functions)
-{
-   functions->NewQueryObject = brw_new_query_object;
-   functions->DeleteQuery = brw_delete_query;
-   functions->BeginQuery = brw_begin_query;
-   functions->EndQuery = brw_end_query;
-   functions->CheckQuery = brw_check_query;
-   functions->WaitQuery = brw_wait_query;
-}
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index 90513245ee..0115f77c08 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -30,10 +30,6 @@
   */
   
 
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-
 #include "intel_batchbuffer.h"
 
 #include "brw_defines.h"
@@ -46,7 +42,6 @@
 static void compile_sf_prog( struct brw_context *brw,
 			     struct brw_sf_prog_key *key )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_sf_compile c;
    const GLuint *program;
    GLuint program_size;
@@ -116,7 +111,7 @@ static void compile_sf_prog( struct brw_context *brw,
 
    /* Upload
     */
-   dri_bo_unreference(brw->sf.prog_bo);
+   brw->sws->bo_unreference(brw->sf.prog_bo);
    brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
 				       &c.key, sizeof(c.key),
 				       NULL, 0,
@@ -129,7 +124,6 @@ static void compile_sf_prog( struct brw_context *brw,
  */
 static void upload_sf_prog(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_sf_prog_key key;
 
    memset(&key, 0, sizeof(key));
@@ -167,7 +161,7 @@ static void upload_sf_prog(struct brw_context *brw)
    key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
 
    /* _NEW_HINT */
-   key.linear_color = (ctx->Hint.PerspectiveCorrection == GL_FASTEST);
+   key.linear_color = 0;
 
    /* _NEW_POLYGON */
    if (key.do_twoside_color) {
@@ -179,7 +173,7 @@ static void upload_sf_prog(struct brw_context *brw)
       key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0);
    }
 
-   dri_bo_unreference(brw->sf.prog_bo);
+   brw->sws->bo_unreference(brw->sf.prog_bo);
    brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
 				      &key, sizeof(key),
 				      NULL, 0,
diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h
index 6426b6df9f..26c2e8891a 100644
--- a/src/gallium/drivers/i965/brw_sf.h
+++ b/src/gallium/drivers/i965/brw_sf.h
@@ -45,19 +45,23 @@
 #define SF_UNFILLED_TRIS   3
 
 struct brw_sf_prog_key {
-   GLuint attrs:32;
+
+   /* Bitmask of linear and perspective interpolated inputs, 0..nr
+    */
+   GLuint persp_attrs:32;
+   GLuint linear_attrs:32;
+
    GLuint primitive:2;
    GLuint do_twoside_color:1;
    GLuint do_flat_shading:1;
    GLuint frontface_ccw:1;
    GLuint do_point_sprite:1;
-   GLuint linear_color:1;  /**< linear interp vs. perspective interp */
+   GLuint sprite_origin_lower_left:1;
    GLuint pad:25;
-   GLenum SpriteOrigin;
 };
 
 struct brw_sf_point_tex {
-	GLboolean CoordReplace;	
+   GLboolean CoordReplace;	
 };
 
 struct brw_sf_compile {
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
index 4cc427a935..c98d7ec13a 100644
--- a/src/gallium/drivers/i965/brw_sf_emit.c
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -30,10 +30,6 @@
   */
    
 
-#include "main/glheader.h"
-#include "main/macros.h"
-#include "main/enums.h"
-
 #include "intel_batchbuffer.h"
 
 #include "brw_defines.h"
@@ -305,6 +301,10 @@ static void invert_det( struct brw_sf_compile *c)
 }
 
 
+/* Two attributes packed into a wide register.  Figure out if either
+ * or both of them need linear/perspective interpolation.  Constant
+ * regs are left as-is.
+ */
 static GLboolean calculate_masks( struct brw_sf_compile *c,
 				  GLuint reg,
 				  GLushort *pc,
@@ -312,20 +312,8 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
 				  GLushort *pc_linear)
 {
    GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
-   GLuint persp_mask;
-   GLuint linear_mask;
-
-   if (c->key.do_flat_shading || c->key.linear_color)
-      persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS |
-                                    FRAG_BIT_COL0 |
-                                    FRAG_BIT_COL1);
-   else
-      persp_mask = c->key.attrs & ~(FRAG_BIT_WPOS);
-
-   if (c->key.do_flat_shading)
-      linear_mask = c->key.attrs & ~(FRAG_BIT_COL0|FRAG_BIT_COL1);
-   else
-      linear_mask = c->key.attrs;
+   GLuint persp_mask = c->key.persp_attrs;
+   GLuint linear_mask = c->key.linear_attrs;
 
    *pc_persp = 0;
    *pc_linear = 0;
@@ -570,7 +558,7 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
       {
 	 brw_set_predicate_control_flag_value(p, pc); 
 	 if (tex->CoordReplace) {
-	     if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
+	     if (c->key.sprite_origin_lower_left) {
 		 brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
 		 brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
 	     }
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index bc0f076073..5e1229d22f 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -34,12 +34,9 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "main/macros.h"
-#include "intel_fbo.h"
 
 static void upload_sf_vp(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
    struct brw_sf_viewport sfv;
    GLfloat y_scale, y_bias;
@@ -92,7 +89,7 @@ static void upload_sf_vp(struct brw_context *brw)
       sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
    }
 
-   dri_bo_unreference(brw->sf.vp_bo);
+   brw->sws->bo_unreference(brw->sf.vp_bo);
    brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
 }
 
@@ -126,7 +123,6 @@ struct brw_sf_unit_key {
 static void
 sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    memset(key, 0, sizeof(*key));
 
    /* CACHE_NEW_SF_PROG */
@@ -159,12 +155,12 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
    key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
 }
 
-static dri_bo *
+static struct brw_winsys_buffer *
 sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
-			dri_bo **reloc_bufs)
+			struct brw_winsys_buffer **reloc_bufs)
 {
    struct brw_sf_unit_state sf;
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
    int chipset_max_threads;
    memset(&sf, 0, sizeof(sf));
 
@@ -332,14 +328,14 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 static void upload_sf_unit( struct brw_context *brw )
 {
    struct brw_sf_unit_key key;
-   dri_bo *reloc_bufs[2];
+   struct brw_winsys_buffer *reloc_bufs[2];
 
    sf_unit_populate_key(brw, &key);
 
    reloc_bufs[0] = brw->sf.prog_bo;
    reloc_bufs[1] = brw->sf.vp_bo;
 
-   dri_bo_unreference(brw->sf.state_bo);
+   brw->sws->bo_unreference(brw->sf.state_bo);
    brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT,
 				       &key, sizeof(key),
 				       reloc_bufs, 2,
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index d639656b9d..a007d542d0 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -36,12 +36,12 @@
 #include "brw_context.h"
 
 static inline void
-brw_add_validated_bo(struct brw_context *brw, dri_bo *bo)
+brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo)
 {
    assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
 
    if (bo != NULL) {
-      dri_bo_reference(bo);
+      brw->sws->bo_reference(bo);
       brw->state.validated_bos[brw->state.validated_bo_count++] = bo;
    }
 };
@@ -95,9 +95,9 @@ const struct brw_tracked_state brw_index_buffer;
  * Use same key for WM and VS surfaces.
  */
 struct brw_surface_key {
-   GLenum target, depthmode;
-   dri_bo *bo;
-   GLint format, internal_format;
+   unsigned target;
+   struct brw_winsys_buffer *bo;
+   GLint format;
    GLint first_level, last_level;
    GLint width, height, depth;
    GLint pitch, cpp;
@@ -116,42 +116,42 @@ void brw_destroy_state(struct brw_context *brw);
 /***********************************************************************
  * brw_state_cache.c
  */
-dri_bo *brw_cache_data(struct brw_cache *cache,
+struct brw_winsys_buffer *brw_cache_data(struct brw_cache *cache,
 		       enum brw_cache_id cache_id,
 		       const void *data,
-		       dri_bo **reloc_bufs,
+		       struct brw_winsys_buffer **reloc_bufs,
 		       GLuint nr_reloc_bufs);
 
-dri_bo *brw_cache_data_sz(struct brw_cache *cache,
+struct brw_winsys_buffer *brw_cache_data_sz(struct brw_cache *cache,
 			  enum brw_cache_id cache_id,
 			  const void *data,
 			  GLuint data_size,
-			  dri_bo **reloc_bufs,
+			  struct brw_winsys_buffer **reloc_bufs,
 			  GLuint nr_reloc_bufs);
 
-dri_bo *brw_upload_cache( struct brw_cache *cache,
+struct brw_winsys_buffer *brw_upload_cache( struct brw_cache *cache,
 			  enum brw_cache_id cache_id,
 			  const void *key,
 			  GLuint key_sz,
-			  dri_bo **reloc_bufs,
+			  struct brw_winsys_buffer **reloc_bufs,
 			  GLuint nr_reloc_bufs,
 			  const void *data,
 			  GLuint data_sz,
 			  const void *aux,
 			  void *aux_return );
 
-dri_bo *brw_search_cache( struct brw_cache *cache,
+struct brw_winsys_buffer *brw_search_cache( struct brw_cache *cache,
 			  enum brw_cache_id cache_id,
 			  const void *key,
 			  GLuint key_size,
-			  dri_bo **reloc_bufs,
+			  struct brw_winsys_buffer **reloc_bufs,
 			  GLuint nr_reloc_bufs,
 			  void *aux_return);
 void brw_state_cache_check_size( struct brw_context *brw );
 
 void brw_init_caches( struct brw_context *brw );
 void brw_destroy_caches( struct brw_context *brw );
-void brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo);
+void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo);
 
 /***********************************************************************
  * brw_state_batch.c
@@ -166,7 +166,7 @@ void brw_destroy_batch_cache( struct brw_context *brw );
 void brw_clear_batch_cache( struct brw_context *brw );
 
 /* brw_wm_surface_state.c */
-dri_bo *
+struct brw_winsys_buffer *
 brw_create_constant_surface( struct brw_context *brw,
                              struct brw_surface_key *key );
 
diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c
index 7821898cf9..9568794625 100644
--- a/src/gallium/drivers/i965/brw_state_batch.c
+++ b/src/gallium/drivers/i965/brw_state_batch.c
@@ -33,7 +33,6 @@
 
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
-#include "main/imports.h"
 
 
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
index c262e1db8b..91d0f80297 100644
--- a/src/gallium/drivers/i965/brw_state_cache.c
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -56,7 +56,6 @@
  * incorrect program is run for the other instance.
  */
 
-#include "main/imports.h"
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
 
@@ -72,7 +71,7 @@
 
 static GLuint
 hash_key(const void *key, GLuint key_size,
-         dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+         struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs)
 {
    GLuint *ikey = (GLuint *)key;
    GLuint hash = 0, i;
@@ -88,7 +87,7 @@ hash_key(const void *key, GLuint key_size,
 
    /* Include the BO pointers as key data as well */
    ikey = (GLuint *)reloc_bufs;
-   key_size = nr_reloc_bufs * sizeof(dri_bo *);
+   key_size = nr_reloc_bufs * sizeof(struct brw_winsys_buffer *);
    for (i = 0; i < key_size/4; i++) {
       hash ^= ikey[i];
       hash = (hash << 5) | (hash >> 27);
@@ -103,14 +102,14 @@ hash_key(const void *key, GLuint key_size,
  */
 static void
 update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
-		  dri_bo *bo)
+		  struct brw_winsys_buffer *bo)
 {
    if (bo == cache->last_bo[cache_id])
       return; /* no change */
 
-   dri_bo_unreference(cache->last_bo[cache_id]);
+   brw->sws->bo_unreference(cache->last_bo[cache_id]);
    cache->last_bo[cache_id] = bo;
-   dri_bo_reference(cache->last_bo[cache_id]);
+   brw->sws->bo_reference(cache->last_bo[cache_id]);
    cache->brw->state.dirty.cache |= 1 << cache_id;
 }
 
@@ -118,7 +117,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
 static struct brw_cache_item *
 search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
 	     GLuint hash, const void *key, GLuint key_size,
-	     dri_bo **reloc_bufs, GLuint nr_reloc_bufs)
+	     struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs)
 {
    struct brw_cache_item *c;
 
@@ -139,7 +138,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
 	  memcmp(c->key, key, key_size) == 0 &&
 	  c->nr_reloc_bufs == nr_reloc_bufs &&
 	  memcmp(c->reloc_bufs, reloc_bufs,
-		 nr_reloc_bufs * sizeof(dri_bo *)) == 0)
+		 nr_reloc_bufs * sizeof(struct brw_winsys_buffer *)) == 0)
 	 return c;
    }
 
@@ -173,12 +172,12 @@ rehash(struct brw_cache *cache)
 /**
  * Returns the buffer object matching cache_id and key, or NULL.
  */
-dri_bo *
+struct brw_winsys_buffer *
 brw_search_cache(struct brw_cache *cache,
                  enum brw_cache_id cache_id,
                  const void *key,
                  GLuint key_size,
-                 dri_bo **reloc_bufs, GLuint nr_reloc_bufs,
+                 struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs,
                  void *aux_return)
 {
    struct brw_cache_item *item;
@@ -195,17 +194,17 @@ brw_search_cache(struct brw_cache *cache,
 
    update_cache_last(cache, cache_id, item->bo);
 
-   dri_bo_reference(item->bo);
+   brw->sws->bo_reference(item->bo);
    return item->bo;
 }
 
 
-dri_bo *
+struct brw_winsys_buffer *
 brw_upload_cache( struct brw_cache *cache,
 		  enum brw_cache_id cache_id,
 		  const void *key,
 		  GLuint key_size,
-		  dri_bo **reloc_bufs,
+		  struct brw_winsys_buffer **reloc_bufs,
 		  GLuint nr_reloc_bufs,
 		  const void *data,
 		  GLuint data_size,
@@ -214,10 +213,10 @@ brw_upload_cache( struct brw_cache *cache,
 {
    struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
    GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
-   GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *);
+   GLuint relocs_size = nr_reloc_bufs * sizeof(struct brw_winsys_buffer *);
    GLuint aux_size = cache->aux_size[cache_id];
    void *tmp;
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
    int i;
 
    /* Create the buffer object to contain the data */
@@ -233,7 +232,7 @@ brw_upload_cache( struct brw_cache *cache,
    memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
    for (i = 0; i < nr_reloc_bufs; i++) {
       if (reloc_bufs[i] != NULL)
-	 dri_bo_reference(reloc_bufs[i]);
+	 brw->sws->bo_reference(reloc_bufs[i]);
    }
 
    item->cache_id = cache_id;
@@ -244,7 +243,7 @@ brw_upload_cache( struct brw_cache *cache,
    item->nr_reloc_bufs = nr_reloc_bufs;
 
    item->bo = bo;
-   dri_bo_reference(bo);
+   brw->sws->bo_reference(bo);
    item->data_size = data_size;
 
    if (cache->n_items > cache->size * 1.5)
@@ -277,15 +276,15 @@ brw_upload_cache( struct brw_cache *cache,
 /**
  * This doesn't really work with aux data.  Use search/upload instead
  */
-dri_bo *
+struct brw_winsys_buffer *
 brw_cache_data_sz(struct brw_cache *cache,
 		  enum brw_cache_id cache_id,
 		  const void *data,
 		  GLuint data_size,
-		  dri_bo **reloc_bufs,
+		  struct brw_winsys_buffer **reloc_bufs,
 		  GLuint nr_reloc_bufs)
 {
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
    struct brw_cache_item *item;
    GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs);
 
@@ -293,7 +292,7 @@ brw_cache_data_sz(struct brw_cache *cache,
 		       reloc_bufs, nr_reloc_bufs);
    if (item) {
       update_cache_last(cache, cache_id, item->bo);
-      dri_bo_reference(item->bo);
+      brw->sws->bo_reference(item->bo);
       return item->bo;
    }
 
@@ -314,11 +313,11 @@ brw_cache_data_sz(struct brw_cache *cache,
  * better to use, as the potentially changing offsets in the data-used-as-key
  * will result in excessive cache misses.
  */
-dri_bo *
+struct brw_winsys_buffer *
 brw_cache_data(struct brw_cache *cache,
 	       enum brw_cache_id cache_id,
 	       const void *data,
-	       dri_bo **reloc_bufs,
+	       struct brw_winsys_buffer **reloc_bufs,
 	       GLuint nr_reloc_bufs)
 {
    return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
@@ -497,8 +496,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
 
 	 next = c->next;
 	 for (j = 0; j < c->nr_reloc_bufs; j++)
-	    dri_bo_unreference(c->reloc_bufs[j]);
-	 dri_bo_unreference(c->bo);
+	    brw->sws->bo_unreference(c->reloc_bufs[j]);
+	 brw->sws->bo_unreference(c->bo);
 	 free((void *)c->key);
 	 free(c);
       }
@@ -523,7 +522,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
  * at the cost of walking the entire hash table.
  */
 void
-brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo)
+brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo)
 {
    struct brw_cache_item **prev;
    GLuint i;
@@ -535,14 +534,14 @@ brw_state_cache_bo_delete(struct brw_cache *cache, dri_bo *bo)
       for (prev = &cache->items[i]; *prev;) {
 	 struct brw_cache_item *c = *prev;
 
-	 if (drm_intel_bo_references(c->bo, bo)) {
+	 if (cache->sws->bo_references(c->bo, bo)) {
 	    int j;
 
 	    *prev = c->next;
 
 	    for (j = 0; j < c->nr_reloc_bufs; j++)
-	       dri_bo_unreference(c->reloc_bufs[j]);
-	    dri_bo_unreference(c->bo);
+	       brw->sws->bo_unreference(c->reloc_bufs[j]);
+	    brw->sws->bo_unreference(c->bo);
 	    free((void *)c->key);
 	    free(c);
 	    cache->n_items--;
@@ -580,7 +579,7 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
 
    brw_clear_cache(brw, cache);
    for (i = 0; i < BRW_MAX_CACHE; i++) {
-      dri_bo_unreference(cache->last_bo[i]);
+      brw->sws->bo_unreference(cache->last_bo[i]);
       free(cache->name[i]);
    }
    free(cache->items);
diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c
index e94fa7d2b4..1bc83fb9c1 100644
--- a/src/gallium/drivers/i965/brw_state_dump.c
+++ b/src/gallium/drivers/i965/brw_state_dump.c
@@ -25,8 +25,6 @@
  *
  */
 
-#include "main/mtypes.h"
-
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
@@ -55,7 +53,7 @@ state_out(const char *name, void *data, uint32_t hw_offset, int index,
 
 /** Generic, undecoded state buffer debug printout */
 static void
-state_struct_out(const char *name, dri_bo *buffer, unsigned int state_size)
+state_struct_out(const char *name, struct brw_winsys_buffer *buffer, unsigned int state_size)
 {
    int i;
 
@@ -102,7 +100,7 @@ static void dump_wm_surface_state(struct brw_context *brw)
    int i;
 
    for (i = 0; i < brw->wm.nr_surfaces; i++) {
-      dri_bo *surf_bo = brw->wm.surf_bo[i];
+      struct brw_winsys_buffer *surf_bo = brw->wm.surf_bo[i];
       unsigned int surfoff;
       struct brw_surface_state *surf;
       char name[20];
@@ -162,7 +160,7 @@ static void dump_sf_viewport_state(struct brw_context *brw)
    dri_bo_unmap(brw->sf.vp_bo);
 }
 
-static void brw_debug_prog(const char *name, dri_bo *prog)
+static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog)
 {
    unsigned int i;
    uint32_t *data;
@@ -202,10 +200,8 @@ static void brw_debug_prog(const char *name, dri_bo *prog)
  * The buffer offsets printed rely on the buffer containing the last offset
  * it was validated at.
  */
-void brw_debug_batch(struct intel_context *intel)
+void brw_debug_batch(struct brw_context *brw)
 {
-   struct brw_context *brw = brw_context(&intel->ctx);
-
    state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
    dump_wm_surface_state(brw);
 
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index 6801084616..b68b6cb21a 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -149,7 +149,7 @@ brw_clear_validated_bos(struct brw_context *brw)
 
    /* Clear the last round of validated bos */
    for (i = 0; i < brw->state.validated_bo_count; i++) {
-      dri_bo_unreference(brw->state.validated_bos[i]);
+      brw->sws->bo_unreference(brw->state.validated_bos[i]);
       brw->state.validated_bos[i] = NULL;
    }
    brw->state.validated_bo_count = 0;
@@ -272,8 +272,6 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
  */
 enum pipe_error brw_validate_state( struct brw_context *brw )
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   struct intel_context *intel = &brw->intel;
    struct brw_state_flags *state = &brw->state.dirty;
    GLuint i;
 
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
index 66d4127271..27d264c3de 100644
--- a/src/gallium/drivers/i965/brw_structs.h
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -33,6 +33,7 @@
 #ifndef BRW_STRUCTS_H
 #define BRW_STRUCTS_H
 
+#include "brw_types.h"
 
 /** Number of general purpose registers (VS, WM, etc) */
 #define BRW_MAX_GRF 128
diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c
index 6684f442d5..83f138f67a 100644
--- a/src/gallium/drivers/i965/brw_swtnl.c
+++ b/src/gallium/drivers/i965/brw_swtnl.c
@@ -6,7 +6,6 @@ static GLboolean check_fallbacks( struct brw_context *brw,
 				  const struct _mesa_prim *prim,
 				  GLuint nr_prims )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    GLuint i;
 
    /* If we don't require strict OpenGL conformance, never 
diff --git a/src/gallium/drivers/i965/brw_tex.c b/src/gallium/drivers/i965/brw_tex.c
index e911b105b2..c33c19ee51 100644
--- a/src/gallium/drivers/i965/brw_tex.c
+++ b/src/gallium/drivers/i965/brw_tex.c
@@ -30,11 +30,6 @@
   */
         
 
-#include "main/glheader.h"
-#include "main/mtypes.h"
-#include "main/teximage.h"
-
-#include "intel_context.h"
 #include "intel_regions.h"
 #include "intel_tex.h"
 #include "brw_context.h"
@@ -45,8 +40,6 @@
  */
 void brw_validate_textures( struct brw_context *brw )
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   struct intel_context *intel = &brw->intel;
    int i;
 
    for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c
index 5986cbffad..75cdc18912 100644
--- a/src/gallium/drivers/i965/brw_tex_layout.c
+++ b/src/gallium/drivers/i965/brw_tex_layout.c
@@ -34,13 +34,11 @@
 
 #include "intel_mipmap_tree.h"
 #include "intel_tex_layout.h"
-#include "intel_context.h"
-#include "main/macros.h"
 #include "intel_chipset.h"
 
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
-GLboolean brw_miptree_layout(struct intel_context *intel,
+GLboolean brw_miptree_layout(struct brw_context *brw,
 			     struct intel_mipmap_tree *mt,
 			     uint32_t tiling)
 {
@@ -67,7 +65,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
               mt->pitch = ALIGN(mt->width0, align_w);
           }
 
-          if (mt->first_level != mt->last_level) {
+          if (mt->last_level != 0) {
               GLuint mip1_width;
 
               if (mt->compressed) {
@@ -93,7 +91,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
               mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
           }
 
-          for (level = mt->first_level; level <= mt->last_level; level++) {
+          for (level = 0; level <= mt->last_level; level++) {
               GLuint img_height;
               GLuint nr_images = 6;
               GLuint q = 0;
@@ -109,7 +107,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
               else
                   img_height = ALIGN(height, align_h);
 
-              if (level == mt->first_level + 1) {
+              if (level == 1) {
                   x += ALIGN(width, align_w);
               }
               else {
@@ -147,7 +145,7 @@ GLboolean brw_miptree_layout(struct intel_context *intel,
       pack_x_pitch = width;
       pack_x_nr = 1;
 
-      for (level = mt->first_level ; level <= mt->last_level ; level++) {
+      for (level = 0 ; level <= mt->last_level ; level++) {
 	 GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
 	 GLint x = 0;
 	 GLint y = 0;
diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h
index 32b62848da..87dae13d94 100644
--- a/src/gallium/drivers/i965/brw_types.h
+++ b/src/gallium/drivers/i965/brw_types.h
@@ -1,11 +1,18 @@
 #ifndef BRW_TYPES_H
 #define BRW_TYPES_H
 
-typedef GLuint uint32_t;
-typedef GLubyte uint8_t;
-typedef GLushort uint16_t;
+#include "pipe/p_compiler.h"
+
+typedef uint32_t GLuint;
+typedef uint8_t GLubyte;
+typedef uint16_t GLushort;
+typedef int32_t GLint;
+typedef int8_t GLbyte;
+typedef int16_t GLshort;
+typedef float GLfloat;
+
 /* no GLenum, translate all away */
 
-typedef GLboolean uint8_t;
+typedef uint8_t GLboolean;
 
 #endif
diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c
index 17f671a8fa..c5244e58ab 100644
--- a/src/gallium/drivers/i965/brw_util.c
+++ b/src/gallium/drivers/i965/brw_util.c
@@ -30,8 +30,6 @@
   */
          
 
-#include "main/mtypes.h"
-#include "shader/prog_parameter.h"
 #include "brw_util.h"
 #include "brw_defines.h"
 
diff --git a/src/gallium/drivers/i965/brw_util.h b/src/gallium/drivers/i965/brw_util.h
index 33e7cd87e4..37c3acbc11 100644
--- a/src/gallium/drivers/i965/brw_util.h
+++ b/src/gallium/drivers/i965/brw_util.h
@@ -33,7 +33,7 @@
 #ifndef BRW_UTIL_H
 #define BRW_UTIL_H
 
-#include "main/mtypes.h"
+#include "brw_types.h"
 
 extern GLuint brw_count_bits( GLuint val );
 extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList);
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 53a5560105..97e523c3ee 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -71,7 +71,7 @@ static void do_vs_prog( struct brw_context *brw,
     */
    program = brw_get_program(&c.func, &program_size);
 
-   dri_bo_unreference(brw->vs.prog_bo);
+   brw->sws->bo_unreference(brw->vs.prog_bo);
    brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG,
 				       &c.key, sizeof(c.key),
 				       NULL, 0,
@@ -83,7 +83,6 @@ static void do_vs_prog( struct brw_context *brw,
 
 static void brw_upload_vs_prog(struct brw_context *brw)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_vs_prog_key key;
    struct brw_vertex_program *vp = 
       (struct brw_vertex_program *)brw->vertex_program;
@@ -100,7 +99,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
 
    /* Make an early check for the key.
     */
-   dri_bo_unreference(brw->vs.prog_bo);
+   brw->sws->bo_unreference(brw->vs.prog_bo);
    brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
 				      &key, sizeof(key),
 				      NULL, 0,
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 7f20c4baca..6adb743017 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -30,9 +30,6 @@
   */
             
 
-#include "main/macros.h"
-#include "shader/program.h"
-#include "shader/prog_parameter.h"
 #include "pipe/p_shader_tokens.h"
 #include "brw_context.h"
 #include "brw_vs.h"
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index d790ab6555..1717223e49 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -34,7 +34,6 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
-#include "main/macros.h"
 
 struct brw_vs_unit_key {
    unsigned int total_grf;
@@ -51,8 +50,6 @@ struct brw_vs_unit_key {
 static void
 vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-
    memset(key, 0, sizeof(*key));
 
    /* CACHE_NEW_VS_PROG */
@@ -79,11 +76,11 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
    }
 }
 
-static dri_bo *
+static struct brw_winsys_buffer *
 vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 {
    struct brw_vs_unit_state vs;
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
    int chipset_max_threads;
 
    memset(&vs, 0, sizeof(vs));
@@ -163,7 +160,7 @@ static void prepare_vs_unit(struct brw_context *brw)
 
    vs_unit_populate_key(brw, &key);
 
-   dri_bo_unreference(brw->vs.state_bo);
+   brw->sws->bo_unreference(brw->vs.state_bo);
    brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT,
 				       &key, sizeof(key),
 				       &brw->vs.prog_bo, 1,
diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c
index 89f47522a1..6446e8e761 100644
--- a/src/gallium/drivers/i965/brw_vs_surface_state.c
+++ b/src/gallium/drivers/i965/brw_vs_surface_state.c
@@ -29,11 +29,6 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "main/mtypes.h"
-#include "main/texformat.h"
-#include "main/texstore.h"
-#include "shader/prog_parameter.h"
-
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
@@ -47,7 +42,6 @@
 static drm_intel_bo *
 brw_vs_update_constant_buffer(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
    struct brw_vertex_program *vp =
       (struct brw_vertex_program *) brw->vertex_program;
    const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
@@ -73,7 +67,7 @@ brw_vs_update_constant_buffer(struct brw_context *brw)
  * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
  */
 static void
-brw_update_vs_constant_surface( GLcontext *ctx,
+brw_update_vs_constant_surface( struct brw_context *brw,
                                 GLuint surf)
 {
    struct brw_context *brw = brw_context(ctx);
@@ -87,7 +81,7 @@ brw_update_vs_constant_surface( GLcontext *ctx,
    /* If we're in this state update atom, we need to update VS constants, so
     * free the old buffer and create a new one for the new contents.
     */
-   dri_bo_unreference(vp->const_buffer);
+   brw->sws->bo_unreference(vp->const_buffer);
    vp->const_buffer = brw_vs_update_constant_buffer(brw);
 
    /* If there's no constant buffer, then no surface BO is needed to point at
@@ -101,8 +95,7 @@ brw_update_vs_constant_surface( GLcontext *ctx,
 
    memset(&key, 0, sizeof(key));
 
-   key.format = MESA_FORMAT_RGBA_FLOAT32;
-   key.internal_format = GL_RGBA;
+   key.format = PIPE_FORMAT_R32G32B32A32_FLOAT;
    key.bo = vp->const_buffer;
    key.depthmode = GL_NONE;
    key.pitch = params->NumParameters;
@@ -132,10 +125,10 @@ brw_update_vs_constant_surface( GLcontext *ctx,
 /**
  * Constructs the binding table for the VS surface state.
  */
-static dri_bo *
+static struct brw_winsys_buffer *
 brw_vs_get_binding_table(struct brw_context *brw)
 {
-   dri_bo *bind_bo;
+   struct brw_winsys_buffer *bind_bo;
 
    bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
 			      NULL, 0,
@@ -186,7 +179,6 @@ brw_vs_get_binding_table(struct brw_context *brw)
  */
 static void prepare_vs_surfaces(struct brw_context *brw )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    int i;
    int nr_surfaces = 0;
 
@@ -208,7 +200,7 @@ static void prepare_vs_surfaces(struct brw_context *brw )
     * just slightly increases our working set size.
     */
    if (brw->vs.nr_surfaces != 0) {
-      dri_bo_unreference(brw->vs.bind_bo);
+      brw->sws->bo_unreference(brw->vs.bind_bo);
       brw->vs.bind_bo = brw_vs_get_binding_table(brw);
    }
 }
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 20d31880b4..32b8900bac 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -29,7 +29,6 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
              
-#include "main/texformat.h"
 #include "brw_context.h"
 #include "brw_util.h"
 #include "brw_wm.h"
@@ -186,7 +185,7 @@ static void do_wm_prog( struct brw_context *brw,
     */
    program = brw_get_program(&c->func, &program_size);
 
-   dri_bo_unreference(brw->wm.prog_bo);
+   brw->sws->bo_unreference(brw->wm.prog_bo);
    brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG,
 				       &c->key, sizeof(c->key),
 				       NULL, 0,
@@ -200,7 +199,6 @@ static void do_wm_prog( struct brw_context *brw,
 static void brw_wm_populate_key( struct brw_context *brw,
 				 struct brw_wm_prog_key *key )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    /* BRW_NEW_FRAGMENT_PROGRAM */
    const struct brw_fragment_program *fp = 
       (struct brw_fragment_program *)brw->fragment_program;
@@ -329,7 +327,7 @@ static void brw_prepare_wm_prog(struct brw_context *brw)
 
    /* Make an early check for the key.
     */
-   dri_bo_unreference(brw->wm.prog_bo);
+   brw->sws->bo_unreference(brw->wm.prog_bo);
    brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
 				      &key, sizeof(key),
 				      NULL, 0,
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 9c47c46a3d..fec33f74eb 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -30,7 +30,6 @@
   */
                
 
-#include "main/macros.h"
 #include "brw_context.h"
 #include "brw_wm.h"
 
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index d836e2fb34..c4f0711793 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -1,7 +1,3 @@
-#include "main/macros.h"
-#include "shader/prog_parameter.h"
-#include "shader/prog_print.h"
-#include "shader/prog_optimize.h"
 #include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_wm.h"
diff --git a/src/gallium/drivers/i965/brw_wm_iz.c b/src/gallium/drivers/i965/brw_wm_iz.c
index 5e399ac62a..6f1e9fcc3c 100644
--- a/src/gallium/drivers/i965/brw_wm_iz.c
+++ b/src/gallium/drivers/i965/brw_wm_iz.c
@@ -30,7 +30,6 @@
   */
                 
 
-#include "main/mtypes.h"
 #include "brw_wm.h"
 
 
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index dff466587a..a8993f9312 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -34,9 +34,6 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 
-#include "main/macros.h"
-
-
 
 /* Samplers aren't strictly wm state from the hardware's perspective,
  * but that is the only situation in which we use them in this driver.
@@ -79,7 +76,7 @@ static GLint S_FIXED(GLfloat value, GLuint frac_bits)
 }
 
 
-static dri_bo *upload_default_color( struct brw_context *brw,
+static struct brw_winsys_buffer *upload_default_color( struct brw_context *brw,
 				     const GLfloat *color )
 {
    struct brw_sampler_default_color sdc;
@@ -102,7 +99,7 @@ struct wm_sampler_key {
       float max_aniso;
       GLenum minfilter, magfilter;
       GLenum comparemode, comparefunc;
-      dri_bo *sdc_bo;
+      struct brw_winsys_buffer *sdc_bo;
 
       /** If target is cubemap, take context setting.
        */
@@ -115,7 +112,7 @@ struct wm_sampler_key {
  * entry.
  */
 static void brw_update_sampler_state(struct wm_sampler_entry *key,
-				     dri_bo *sdc_bo,
+				     struct brw_winsys_buffer *sdc_bo,
 				     struct brw_sampler_state *sampler)
 {
    _mesa_memset(sampler, 0, sizeof(*sampler));
@@ -240,7 +237,6 @@ static void
 brw_wm_sampler_populate_key(struct brw_context *brw,
 			    struct wm_sampler_key *key)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    int unit;
 
    memset(key, 0, sizeof(*key));
@@ -272,7 +268,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
 	 entry->comparemode = texObj->CompareMode;
          entry->comparefunc = texObj->CompareFunc;
 
-	 dri_bo_unreference(brw->wm.sdc_bo[unit]);
+	 brw->sws->bo_unreference(brw->wm.sdc_bo[unit]);
 	 if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
 	    float bordercolor[4] = {
 	       texObj->BorderColor[0],
@@ -300,7 +296,6 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
  */
 static void upload_wm_samplers( struct brw_context *brw )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct wm_sampler_key key;
    int i;
 
@@ -311,7 +306,7 @@ static void upload_wm_samplers( struct brw_context *brw )
       brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
    }
 
-   dri_bo_unreference(brw->wm.sampler_bo);
+   brw->sws->bo_unreference(brw->wm.sampler_bo);
    brw->wm.sampler_bo = NULL;
    if (brw->wm.sampler_count == 0)
       return;
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index 361f91292b..958c00d3e0 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -60,10 +60,8 @@ struct brw_wm_unit_key {
 static void
 wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 {
-   GLcontext *ctx = &brw->intel.ctx;
    const struct gl_fragment_program *fp = brw->fragment_program;
    const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
-   struct intel_context *intel = &brw->intel;
 
    memset(key, 0, sizeof(*key));
 
@@ -121,7 +119,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
    /* temporary sanity check assertion */
    ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
 
-   /* _NEW_DEPTH */
+   /* _NEW_QUERY */
    key->stats_wm = intel->stats_wm;
 
    /* _NEW_LINE */
@@ -136,12 +134,12 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 /**
  * Setup wm hardware state.  See page 225 of Volume 2
  */
-static dri_bo *
+static struct brw_winsys_buffer *
 wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
-			dri_bo **reloc_bufs)
+			struct brw_winsys_buffer **reloc_bufs)
 {
    struct brw_wm_unit_state wm;
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
 
    memset(&wm, 0, sizeof(wm));
 
@@ -257,9 +255,8 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
 static void upload_wm_unit( struct brw_context *brw )
 {
-   struct intel_context *intel = &brw->intel;
    struct brw_wm_unit_key key;
-   dri_bo *reloc_bufs[3];
+   struct brw_winsys_buffer *reloc_bufs[3];
    wm_unit_populate_key(brw, &key);
 
    /* Allocate the necessary scratch space if we haven't already.  Don't
@@ -271,7 +268,7 @@ static void upload_wm_unit( struct brw_context *brw )
       GLuint total = key.total_scratch * key.max_threads;
 
       if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
-	 dri_bo_unreference(brw->wm.scratch_bo);
+	 brw->sws->bo_unreference(brw->wm.scratch_bo);
 	 brw->wm.scratch_bo = NULL;
       }
       if (brw->wm.scratch_bo == NULL) {
@@ -286,7 +283,7 @@ static void upload_wm_unit( struct brw_context *brw )
    reloc_bufs[1] = brw->wm.scratch_bo;
    reloc_bufs[2] = brw->wm.sampler_bo;
 
-   dri_bo_unreference(brw->wm.state_bo);
+   brw->sws->bo_unreference(brw->wm.state_bo);
    brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
 				       &key, sizeof(key),
 				       reloc_bufs, 3,
@@ -302,7 +299,7 @@ const struct brw_tracked_state brw_wm_unit = {
 	       _NEW_POLYGONSTIPPLE | 
 	       _NEW_LINE | 
 	       _NEW_COLOR |
-	       _NEW_DEPTH),
+	       _NEW_QUERY),
 
       .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
 	      BRW_NEW_CURBE_OFFSETS |
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index f7cc5153a8..86dcb74b5b 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -30,11 +30,6 @@
   */
                    
 
-#include "main/mtypes.h"
-#include "main/texformat.h"
-#include "main/texstore.h"
-#include "shader/prog_parameter.h"
-
 #include "intel_mipmap_tree.h"
 #include "intel_batchbuffer.h"
 #include "intel_tex.h"
@@ -70,90 +65,87 @@ static GLuint translate_tex_target( GLenum target )
 }
 
 
-static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
+static GLuint translate_tex_format( GLuint mesa_format, 
 				    GLenum depth_mode )
 {
-   switch( mesa_format ) {
-   case MESA_FORMAT_L8:
+   switch( pipe_format ) {
+   case PIPE_FORMAT_L8_UNORM:
       return BRW_SURFACEFORMAT_L8_UNORM;
 
-   case MESA_FORMAT_I8:
+   case PIPE_FORMAT_I8_UNORM:
       return BRW_SURFACEFORMAT_I8_UNORM;
 
-   case MESA_FORMAT_A8:
+   case PIPE_FORMAT_A8_UNORM:
       return BRW_SURFACEFORMAT_A8_UNORM; 
 
-   case MESA_FORMAT_AL88:
+   case PIPE_FORMAT_A8L8_UNORM:
       return BRW_SURFACEFORMAT_L8A8_UNORM;
 
-   case MESA_FORMAT_RGB888:
-      assert(0);		/* not supported for sampling */
-      return BRW_SURFACEFORMAT_R8G8B8_UNORM;      
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_R8G8B8A8_UNORM:
+      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
 
-   case MESA_FORMAT_ARGB8888:
-      if (internal_format == GL_RGB)
-	 return BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
-      else
-	 return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+   case PIPE_FORMAT_R8G8B8X8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
 
-   case MESA_FORMAT_RGBA8888_REV:
-      if (internal_format == GL_RGB)
-	 return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
-      else
-	 return BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
+   case PIPE_FORMAT_:
+      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
 
-   case MESA_FORMAT_RGB565:
+   case PIPE_FORMAT_RGB565:
       return BRW_SURFACEFORMAT_B5G6R5_UNORM;
 
-   case MESA_FORMAT_ARGB1555:
+   case PIPE_FORMAT_ARGB1555:
       return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
 
-   case MESA_FORMAT_ARGB4444:
+   case PIPE_FORMAT_ARGB4444:
       return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
 
-   case MESA_FORMAT_YCBCR_REV:
+
+   case PIPE_FORMAT_L16_UNORM:
+      return BRW_SURFACEFORMAT_L16_UNORM;
+
+   case PIPE_FORMAT_I16_UNORM:
+      return BRW_SURFACEFORMAT_I16_UNORM;
+
+   case PIPE_FORMAT_A16_UNORM:
+      return BRW_SURFACEFORMAT_A16_UNORM; 
+
+   case PIPE_FORMAT_YCBCR_REV:
       return BRW_SURFACEFORMAT_YCRCB_NORMAL;
 
-   case MESA_FORMAT_YCBCR:
+   case PIPE_FORMAT_YCBCR:
       return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
 
-   case MESA_FORMAT_RGB_FXT1:
-   case MESA_FORMAT_RGBA_FXT1:
+   case PIPE_FORMAT_RGB_FXT1:
+   case PIPE_FORMAT_RGBA_FXT1:
       return BRW_SURFACEFORMAT_FXT1;
 
-   case MESA_FORMAT_Z16:
-      if (depth_mode == GL_INTENSITY) 
-	  return BRW_SURFACEFORMAT_I16_UNORM;
-      else if (depth_mode == GL_ALPHA)
-	  return BRW_SURFACEFORMAT_A16_UNORM;
-      else
-	  return BRW_SURFACEFORMAT_L16_UNORM;
-
-   case MESA_FORMAT_RGB_DXT1:
+   case PIPE_FORMAT_RGB_DXT1:
        return BRW_SURFACEFORMAT_DXT1_RGB;
 
-   case MESA_FORMAT_RGBA_DXT1:
+   case PIPE_FORMAT_RGBA_DXT1:
        return BRW_SURFACEFORMAT_BC1_UNORM;
        
-   case MESA_FORMAT_RGBA_DXT3:
+   case PIPE_FORMAT_RGBA_DXT3:
        return BRW_SURFACEFORMAT_BC2_UNORM;
        
-   case MESA_FORMAT_RGBA_DXT5:
+   case PIPE_FORMAT_RGBA_DXT5:
        return BRW_SURFACEFORMAT_BC3_UNORM;
 
-   case MESA_FORMAT_SARGB8:
+   case PIPE_FORMAT_R8G8B8A8_SRGB:
       return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
 
-   case MESA_FORMAT_SLA8:
+   case PIPE_FORMAT_A8L8_SRGB:
       return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
 
-   case MESA_FORMAT_SL8:
+   case PIPE_FORMAT_L8_SRGB:
       return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
 
-   case MESA_FORMAT_SRGB_DXT1:
+   case PIPE_FORMAT_SRGB_DXT1:
       return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
 
-   case MESA_FORMAT_S8_Z24:
+   case PIPE_FORMAT_S8_Z24:
       /* XXX: these different surface formats don't seem to
        * make any difference for shadow sampler/compares.
        */
@@ -164,10 +156,10 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format,
       else
          return BRW_SURFACEFORMAT_L24X8_UNORM;
 
-   case MESA_FORMAT_DUDV8:
+   case PIPE_FORMAT_DUDV8:
       return BRW_SURFACEFORMAT_R8G8_SNORM;
 
-   case MESA_FORMAT_SIGNED_RGBA8888_REV:
+   case PIPE_FORMAT_SIGNED_RGBA8888_REV:
       return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
 
    default:
@@ -195,12 +187,12 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
    }
 }
 
-static dri_bo *
+static struct brw_winsys_buffer *
 brw_create_texture_surface( struct brw_context *brw,
 			    struct brw_surface_key *key )
 {
    struct brw_surface_state surf;
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
 
    memset(&surf, 0, sizeof(surf));
 
@@ -234,7 +226,7 @@ brw_create_texture_surface( struct brw_context *brw,
    else
       surf.ss1.base_addr = key->offset;
 
-   surf.ss2.mip_count = key->last_level - key->first_level;
+   surf.ss2.mip_count = key->last_level;
    surf.ss2.width = key->width - 1;
    surf.ss2.height = key->height - 1;
    brw_set_surface_tiling(&surf, key->tiling);
@@ -270,41 +262,30 @@ brw_create_texture_surface( struct brw_context *brw,
 }
 
 static void
-brw_update_texture_surface( GLcontext *ctx, GLuint unit )
+brw_update_texture_surface( struct brw_context *brw, GLuint unit )
 {
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
-   struct intel_texture_object *intelObj = intel_texture_object(tObj);
-   struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel];
+   struct pipe_texture *tex = brw->texture[unit];
    struct brw_surface_key key;
    const GLuint surf = SURF_INDEX_TEXTURE(unit);
 
    memset(&key, 0, sizeof(key));
 
-   if (intelObj->imageOverride) {
-      key.pitch = intelObj->pitchOverride / intelObj->mt->cpp;
-      key.depth = intelObj->depthOverride;
-      key.bo = NULL;
-      key.offset = intelObj->textureOffset;
-   } else {
-      key.format = firstImage->TexFormat->MesaFormat;
-      key.internal_format = firstImage->InternalFormat;
-      key.pitch = intelObj->mt->pitch;
-      key.depth = firstImage->Depth;
-      key.bo = intelObj->mt->region->buffer;
-      key.offset = 0;
-   }
-
-   key.target = tObj->Target;
-   key.depthmode = tObj->DepthMode;
-   key.first_level = intelObj->firstLevel;
-   key.last_level = intelObj->lastLevel;
-   key.width = firstImage->Width;
-   key.height = firstImage->Height;
-   key.cpp = intelObj->mt->cpp;
-   key.tiling = intelObj->mt->region->tiling;
-
-   dri_bo_unreference(brw->wm.surf_bo[surf]);
+   key.format = tex->base.format;
+   key.pitch = tex->pitch;
+   key.depth = tex->base.depth[0];
+   key.bo = tex->buffer;
+   key.offset = 0;
+
+   key.target = tObj->target;	/* translated to BRW enum */
+   /* key.depthmode = tObj->DepthMode; */ /* XXX: add this to gallium? or the state tracker? */
+   key.first_level = 0;
+   key.last_level = tex->base.last_level;
+   key.width = tex->base.depth[0];
+   key.height = tex->base.height[0];
+   key.cpp = tex->cpp;
+   key.tiling = tex->tiling;
+
+   brw->sws->bo_unreference(brw->wm.surf_bo[surf]);
    brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
                                             BRW_SS_SURFACE,
                                             &key, sizeof(key),
@@ -321,13 +302,13 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit )
  * Create the constant buffer surface.  Vertex/fragment shader constants will be
  * read from this buffer with Data Port Read instructions/messages.
  */
-dri_bo *
+struct brw_winsys_buffer *
 brw_create_constant_surface( struct brw_context *brw,
                              struct brw_surface_key *key )
 {
    const GLint w = key->width - 1;
    struct brw_surface_state surf;
-   dri_bo *bo;
+   struct brw_winsys_buffer *bo;
 
    memset(&surf, 0, sizeof(surf));
 
@@ -374,7 +355,6 @@ brw_create_constant_surface( struct brw_context *brw,
 static drm_intel_bo *
 brw_wm_update_constant_buffer(struct brw_context *brw)
 {
-   struct intel_context *intel = &brw->intel;
    struct brw_fragment_program *fp =
       (struct brw_fragment_program *) brw->fragment_program;
    const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
@@ -399,7 +379,7 @@ brw_wm_update_constant_buffer(struct brw_context *brw)
  * The constant buffer will be (re)allocated here if needed.
  */
 static void
-brw_update_wm_constant_surface( GLcontext *ctx,
+brw_update_wm_constant_surface( struct brw_context *brw,
                                 GLuint surf)
 {
    struct brw_context *brw = brw_context(ctx);
@@ -412,7 +392,7 @@ brw_update_wm_constant_surface( GLcontext *ctx,
    /* If we're in this state update atom, we need to update WM constants, so
     * free the old buffer and create a new one for the new contents.
     */
-   dri_bo_unreference(fp->const_buffer);
+   brw->sws->bo_unreference(fp->const_buffer);
    fp->const_buffer = brw_wm_update_constant_buffer(brw);
 
    /* If there's no constant buffer, then no surface BO is needed to point at
@@ -426,7 +406,7 @@ brw_update_wm_constant_surface( GLcontext *ctx,
 
    memset(&key, 0, sizeof(key));
 
-   key.format = MESA_FORMAT_RGBA_FLOAT32;
+   key.format = PIPE_FORMAT_RGBA_FLOAT32;
    key.internal_format = GL_RGBA;
    key.bo = fp->const_buffer;
    key.depthmode = GL_NONE;
@@ -442,7 +422,7 @@ brw_update_wm_constant_surface( GLcontext *ctx,
           key.width, key.height, key.depth, key.cpp, key.pitch);
    */
 
-   dri_bo_unreference(brw->wm.surf_bo[surf]);
+   brw->sws->bo_unreference(brw->wm.surf_bo[surf]);
    brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
                                             BRW_SS_SURFACE,
                                             &key, sizeof(key),
@@ -464,7 +444,6 @@ brw_update_wm_constant_surface( GLcontext *ctx,
  */
 static void prepare_wm_constant_surface(struct brw_context *brw )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    struct brw_fragment_program *fp =
       (struct brw_fragment_program *) brw->fragment_program;
    GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
@@ -507,8 +486,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
 				struct gl_renderbuffer *rb,
 				unsigned int unit)
 {
-   GLcontext *ctx = &brw->intel.ctx;
-   dri_bo *region_bo = NULL;
+   struct brw_winsys_buffer *region_bo = NULL;
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
    struct intel_region *region = irb ? irb->region : NULL;
    struct {
@@ -528,16 +506,16 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
 
       key.surface_type = BRW_SURFACE_2D;
       switch (irb->texformat->MesaFormat) {
-      case MESA_FORMAT_ARGB8888:
+      case PIPE_FORMAT_ARGB8888:
 	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
 	 break;
-      case MESA_FORMAT_RGB565:
+      case PIPE_FORMAT_RGB565:
 	 key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
 	 break;
-      case MESA_FORMAT_ARGB1555:
+      case PIPE_FORMAT_ARGB1555:
 	 key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
 	 break;
-      case MESA_FORMAT_ARGB4444:
+      case PIPE_FORMAT_ARGB4444:
 	 key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
 	 break;
       default:
@@ -569,7 +547,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
    key.color_blend = (!ctx->Color._LogicOpEnabled &&
 		      ctx->Color.BlendEnabled);
 
-   dri_bo_unreference(brw->wm.surf_bo[unit]);
+   brw->sws->bo_unreference(brw->wm.surf_bo[unit]);
    brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
 					    BRW_SS_SURFACE,
 					    &key, sizeof(key),
@@ -646,10 +624,10 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
  * Constructs the binding table for the WM surface state, which maps unit
  * numbers to surface state objects.
  */
-static dri_bo *
+static struct brw_winsys_buffer *
 brw_wm_get_binding_table(struct brw_context *brw)
 {
-   dri_bo *bind_bo;
+   struct brw_winsys_buffer *bind_bo;
 
    assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
 
@@ -692,7 +670,6 @@ brw_wm_get_binding_table(struct brw_context *brw)
 
 static void prepare_wm_surfaces(struct brw_context *brw )
 {
-   GLcontext *ctx = &brw->intel.ctx;
    GLuint i;
    int old_nr_surfaces;
 
@@ -724,12 +701,12 @@ static void prepare_wm_surfaces(struct brw_context *brw )
 	 brw_update_texture_surface(ctx, i);
 	 brw->wm.nr_surfaces = surf + 1;
       } else {
-         dri_bo_unreference(brw->wm.surf_bo[surf]);
+         brw->sws->bo_unreference(brw->wm.surf_bo[surf]);
          brw->wm.surf_bo[surf] = NULL;
       }
    }
 
-   dri_bo_unreference(brw->wm.bind_bo);
+   brw->sws->bo_unreference(brw->wm.bind_bo);
    brw->wm.bind_bo = brw_wm_get_binding_table(brw);
 
    if (brw->wm.nr_surfaces != old_nr_surfaces)
diff --git a/src/gallium/drivers/i965/intel_batchbuffer.h b/src/gallium/drivers/i965/intel_batchbuffer.h
index a595d2e0c5..be04656aec 100644
--- a/src/gallium/drivers/i965/intel_batchbuffer.h
+++ b/src/gallium/drivers/i965/intel_batchbuffer.h
@@ -1,9 +1,6 @@
 #ifndef INTEL_BATCHBUFFER_H
 #define INTEL_BATCHBUFFER_H
 
-#include "main/mtypes.h"
-
-#include "intel_context.h"
 #include "intel_bufmgr.h"
 #include "intel_reg.h"
 
@@ -44,7 +41,7 @@ struct intel_batchbuffer
 {
    struct intel_context *intel;
 
-   dri_bo *buf;
+   struct brw_winsys_buffer *buf;
 
    GLubyte *buffer;
 
@@ -89,7 +86,7 @@ void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
                                      GLuint bytes);
 
 GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
-                                       dri_bo *buffer,
+                                       struct brw_winsys_buffer *buffer,
 				       uint32_t read_domains,
 				       uint32_t write_domain,
 				       uint32_t offset);
diff --git a/src/gallium/drivers/i965/intel_tex_format.c b/src/gallium/drivers/i965/intel_tex_format.c
index 3322a71130..c62ecdadf0 100644
--- a/src/gallium/drivers/i965/intel_tex_format.c
+++ b/src/gallium/drivers/i965/intel_tex_format.c
@@ -1,206 +1,9 @@
 #include "intel_context.h"
 #include "intel_tex.h"
 #include "intel_chipset.h"
-#include "main/texformat.h"
-#include "main/enums.h"
 
 
-/**
- * Choose hardware texture format given the user's glTexImage parameters.
- *
- * It works out that this function is fine for all the supported
- * hardware.  However, there is still a need to map the formats onto
- * hardware descriptors.
- *
- * Note that the i915 can actually support many more formats than
- * these if we take the step of simply swizzling the colors
- * immediately after sampling...
- */
-const struct gl_texture_format *
-intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
-                         GLenum format, GLenum type)
-{
-   struct intel_context *intel = intel_context(ctx);
-   const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24);
-
-#if 0
-   printf("%s intFmt=0x%x format=0x%x type=0x%x\n",
-          __FUNCTION__, internalFormat, format, type);
-#endif
-
-   switch (internalFormat) {
-   case 4:
-   case GL_RGBA:
-   case GL_COMPRESSED_RGBA:
-      if (format == GL_BGRA) {
-         if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) {
-            return &_mesa_texformat_argb8888;
-         }
-         else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) {
-            return &_mesa_texformat_argb4444;
-         }
-         else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) {
-            return &_mesa_texformat_argb1555;
-         }
-      }
-      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
-
-   case 3:
-   case GL_RGB:
-   case GL_COMPRESSED_RGB:
-      if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
-         return &_mesa_texformat_rgb565;
-      }
-      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_rgb565;
-
-   case GL_RGBA8:
-   case GL_RGB10_A2:
-   case GL_RGBA12:
-   case GL_RGBA16:
-      return do32bpt ? &_mesa_texformat_argb8888 : &_mesa_texformat_argb4444;
-
-   case GL_RGBA4:
-   case GL_RGBA2:
-      return &_mesa_texformat_argb4444;
-
-   case GL_RGB5_A1:
-      return &_mesa_texformat_argb1555;
-
-   case GL_RGB8:
-   case GL_RGB10:
-   case GL_RGB12:
-   case GL_RGB16:
-      return &_mesa_texformat_argb8888;
-
-   case GL_RGB5:
-   case GL_RGB4:
-   case GL_R3_G3_B2:
-      return &_mesa_texformat_rgb565;
-
-   case GL_ALPHA:
-   case GL_ALPHA4:
-   case GL_ALPHA8:
-   case GL_ALPHA12:
-   case GL_ALPHA16:
-   case GL_COMPRESSED_ALPHA:
-      return &_mesa_texformat_a8;
-
-   case 1:
-   case GL_LUMINANCE:
-   case GL_LUMINANCE4:
-   case GL_LUMINANCE8:
-   case GL_LUMINANCE12:
-   case GL_LUMINANCE16:
-   case GL_COMPRESSED_LUMINANCE:
-      return &_mesa_texformat_l8;
-
-   case 2:
-   case GL_LUMINANCE_ALPHA:
-   case GL_LUMINANCE4_ALPHA4:
-   case GL_LUMINANCE6_ALPHA2:
-   case GL_LUMINANCE8_ALPHA8:
-   case GL_LUMINANCE12_ALPHA4:
-   case GL_LUMINANCE12_ALPHA12:
-   case GL_LUMINANCE16_ALPHA16:
-   case GL_COMPRESSED_LUMINANCE_ALPHA:
-      return &_mesa_texformat_al88;
-
-   case GL_INTENSITY:
-   case GL_INTENSITY4:
-   case GL_INTENSITY8:
-   case GL_INTENSITY12:
-   case GL_INTENSITY16:
-   case GL_COMPRESSED_INTENSITY:
-      return &_mesa_texformat_i8;
 
-   case GL_YCBCR_MESA:
-      if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE)
-         return &_mesa_texformat_ycbcr;
-      else
-         return &_mesa_texformat_ycbcr_rev;
-
-   case GL_COMPRESSED_RGB_FXT1_3DFX:
-      return &_mesa_texformat_rgb_fxt1;
-   case GL_COMPRESSED_RGBA_FXT1_3DFX:
-      return &_mesa_texformat_rgba_fxt1;
-
-   case GL_RGB_S3TC:
-   case GL_RGB4_S3TC:
-   case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-      return &_mesa_texformat_rgb_dxt1;
-
-   case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-      return &_mesa_texformat_rgba_dxt1;
-
-   case GL_RGBA_S3TC:
-   case GL_RGBA4_S3TC:
-   case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-      return &_mesa_texformat_rgba_dxt3;
-
-   case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-      return &_mesa_texformat_rgba_dxt5;
-
-   case GL_DEPTH_COMPONENT:
-   case GL_DEPTH_COMPONENT16:
-   case GL_DEPTH_COMPONENT24:
-   case GL_DEPTH_COMPONENT32:
-#if 0
-      return &_mesa_texformat_z16;
-#else
-      /* fall-through.
-       * 16bpp depth texture can't be paired with a stencil buffer so
-       * always used combined depth/stencil format.
-       */
-#endif
-   case GL_DEPTH_STENCIL_EXT:
-   case GL_DEPTH24_STENCIL8_EXT:
-      return &_mesa_texformat_s8_z24;
-
-#ifndef I915
-   case GL_SRGB_EXT:
-   case GL_SRGB8_EXT:
-   case GL_SRGB_ALPHA_EXT:
-   case GL_SRGB8_ALPHA8_EXT:
-   case GL_COMPRESSED_SRGB_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_EXT:
-   case GL_COMPRESSED_SLUMINANCE_EXT:
-   case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT:
-      return &_mesa_texformat_sargb8;
-   case GL_SLUMINANCE_EXT:
-   case GL_SLUMINANCE8_EXT:
-      if (IS_G4X(intel->intelScreen->deviceID))
-         return &_mesa_texformat_sl8;
-      else
-         return &_mesa_texformat_sargb8;
-   case GL_SLUMINANCE_ALPHA_EXT:
-   case GL_SLUMINANCE8_ALPHA8_EXT:
-      if (IS_G4X(intel->intelScreen->deviceID))
-         return &_mesa_texformat_sla8;
-      else
-         return &_mesa_texformat_sargb8;
-   case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT:
-   case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT:
-      return &_mesa_texformat_srgb_dxt1;
-
-   /* i915 could also do this */
-   case GL_DUDV_ATI:
-   case GL_DU8DV8_ATI:
-      return &_mesa_texformat_dudv8;
-   case GL_RGBA_SNORM:
-   case GL_RGBA8_SNORM:
-      return &_mesa_texformat_signed_rgba8888_rev;
-#endif
-
-   default:
-      fprintf(stderr, "unexpected texture format %s in %s\n",
-              _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__);
-      return NULL;
-   }
-
-   return NULL;                 /* never get here */
-}
 
 int intel_compressed_num_bytes(GLuint mesaFormat)
 {
diff --git a/src/gallium/drivers/i965/intel_tex_layout.c b/src/gallium/drivers/i965/intel_tex_layout.c
index 7d69ea4484..1cdab49e5e 100644
--- a/src/gallium/drivers/i965/intel_tex_layout.c
+++ b/src/gallium/drivers/i965/intel_tex_layout.c
@@ -33,7 +33,6 @@
 #include "intel_mipmap_tree.h"
 #include "intel_tex_layout.h"
 #include "intel_context.h"
-#include "main/macros.h"
 
 void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h)
 {
@@ -86,7 +85,7 @@ void i945_miptree_layout_2d( struct intel_context *intel,
     * constraints of mipmap placement push the right edge of the
     * 2nd mipmap out past the width of its parent.
     */
-   if (mt->first_level != mt->last_level) {
+   if (mt->last_level) {
        GLuint mip1_width;
 
        if (mt->compressed) {
@@ -108,7 +107,7 @@ void i945_miptree_layout_2d( struct intel_context *intel,
    mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch);
    mt->total_height = 0;
 
-   for ( level = mt->first_level ; level <= mt->last_level ; level++ ) {
+   for ( level = 0 ; level <= mt->last_level ; level++ ) {
       GLuint img_height;
 
       intel_miptree_set_level_info(mt, level, 1, x, y, width, 
@@ -127,7 +126,7 @@ void i945_miptree_layout_2d( struct intel_context *intel,
 
       /* Layout_below: step right after second mipmap.
        */
-      if (level == mt->first_level + 1) {
+      if (level == 1) {
 	 x += ALIGN(width, align_w);
       }
       else {
-- 
cgit v1.2.3


From 94a63dccdd79268cf37587c93e3dec0d02dad457 Mon Sep 17 00:00:00 2001
From: Joakim Sindholt <opensource@zhasha.com>
Date: Sat, 24 Oct 2009 02:38:28 +0200
Subject: r300g: fix scons build yet again

---
 src/gallium/drivers/r300/SConscript    | 3 ---
 src/gallium/drivers/r300/r300_render.c | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index b4c8ba2015..97989040d2 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -9,8 +9,6 @@ env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/sr
 r300 = env.ConvenienceLibrary(
     target = 'r300',
     source = [
-        'r3xx_fs.c',
-        'r5xx_fs.c',
         'r300_chipset.c',
         'r300_clear.c',
         'r300_context.c',
@@ -25,7 +23,6 @@ r300 = env.ConvenienceLibrary(
         'r300_state_derived.c',
         'r300_state_invariant.c',
         'r300_vs.c',
-        'r300_surface.c',
         'r300_texture.c',
         'r300_tgsi_to_rc.c',
     ] + r300compiler) + r300compiler
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 6e2bcc62da..6f392402bd 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -76,7 +76,6 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
                                  unsigned count)
 {
     struct r300_context* r300 = r300_context(pipe);
-    CS_LOCALS(r300);
     uint32_t prim = r300_translate_primitive(mode);
     struct pipe_vertex_buffer* aos = r300->vertex_buffers;
     unsigned aos_count = r300->vertex_buffer_count;
@@ -84,6 +83,8 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
     unsigned packet_size;
     unsigned i;
     bool invalid = FALSE;
+    
+    CS_LOCALS(r300);
 
 validate:
     for (i = 0; i < aos_count; i++) {
-- 
cgit v1.2.3


From d71af266dfe01953f2545708e16a8eb799113abb Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sat, 24 Oct 2009 11:53:43 +0100
Subject: i965g: first compiling file

---
 src/gallium/drivers/i965/brw_cc.c         | 96 ++++++++++++++++++++++---------
 src/gallium/drivers/i965/brw_context.h    | 30 ++++++++++
 src/gallium/drivers/i965/brw_pipe_blend.c |  4 ++
 src/gallium/drivers/i965/brw_state.h      |  3 +-
 src/gallium/drivers/i965/brw_structs.h    | 16 +++---
 5 files changed, 114 insertions(+), 35 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index af432b1f52..bf2743ebbe 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -34,15 +34,41 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 
+
+struct sane_viewport {
+   float top;
+   float left;
+   float width;
+   float height;
+   float near;
+   float far;
+};
+
+static void calc_sane_viewport( const struct pipe_viewport_state *vp,
+				struct sane_viewport *svp )
+{
+   /* XXX fix me, obviously.
+    */
+   svp->top = 0;
+   svp->left = 0;
+   svp->width = 250;
+   svp->height = 250;
+   svp->near = 0;
+   svp->far = 1;
+}
+
 static void prepare_cc_vp( struct brw_context *brw )
 {
    struct brw_cc_viewport ccv;
+   struct sane_viewport svp;
 
    memset(&ccv, 0, sizeof(ccv));
 
-   /* _NEW_VIEWPORT */
-   ccv.min_depth = ctx->Viewport.Near;
-   ccv.max_depth = ctx->Viewport.Far;
+   /* PIPE_NEW_VIEWPORT */
+   calc_sane_viewport( &brw->vp, &svp );
+
+   ccv.min_depth = svp.near;
+   ccv.max_depth = svp.far;
 
    brw->sws->bo_unreference(brw->cc.vp_bo);
    brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
@@ -58,21 +84,38 @@ const struct brw_tracked_state brw_cc_vp = {
 };
 
 struct brw_cc_unit_key {
-   struct pipe_depth_stencil_alpha_state dsa;
-   struct pipe_blend_state blend; /* no color mask */
+   struct brw_cc0 cc0;
+   struct brw_cc1 cc1;
+   struct brw_cc2 cc2;
+   struct brw_cc3 cc3;
+   struct brw_cc5 cc5;
+   struct brw_cc6 cc6;
+   struct brw_cc7 cc7;
 };
 
-static void
-cc_unit_populate_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+/* A long-winded way to OR two unsigned integers together:
+ */
+static INLINE struct brw_cc3
+combine_cc3( struct brw_cc3 a, struct brw_cc3 b )
 {
-   memset(key, 0, sizeof(*key));
-   
-   key->dsa = brw->dsa;
-   key->blend = brw->blend;
+   union { struct brw_cc3 cc3; unsigned i; } ca, cb;
+   ca.cc3 = a;
+   cb.cc3 = b;
+   ca.i |= cb.i;
+   return ca.cc3;
+}
 
-   /* Clear non-respected values:
-    */
-   key->blend.colormask = 0xf;
+static void
+cc_unit_populate_key(const struct brw_context *brw,
+		     struct brw_cc_unit_key *key)
+{
+   key->cc0 = brw->dsa->cc0;
+   key->cc1 = brw->dsa->cc1;
+   key->cc2 = brw->dsa->cc2;
+   key->cc3 = combine_cc3( brw->dsa->cc3, brw->blend->cc3 );
+   key->cc5 = brw->blend->cc5;
+   key->cc6 = brw->blend->cc6;
+   key->cc7 = brw->blend->cc7;
 }
 
 /**
@@ -86,16 +129,17 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
 
    memset(&cc, 0, sizeof(cc));
 
-   cc.cc0 = brw->dsa.cc0;
-   cc.cc1 = brw->dsa.cc1;
-   cc.cc2 = brw->dsa.cc2;
-   cc.cc3 = brw->dsa.cc3 | brw->blend.cc3;
+   cc.cc0 = key->cc0;
+   cc.cc1 = key->cc1;
+   cc.cc2 = key->cc2;
+   cc.cc3 = key->cc3;
 
    /* CACHE_NEW_CC_VP */
    cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
 
-   cc.cc5 = brw->blend.cc5 | brw->debug.cc5;
-
+   cc.cc5 = key->cc5;
+   cc.cc6 = key->cc6;
+   cc.cc7 = key->cc7;
 
    bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
 			 key, sizeof(*key),
@@ -104,12 +148,12 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
 			 NULL, NULL);
 
    /* Emit CC viewport relocation */
-   dri_bo_emit_reloc(bo,
-		     I915_GEM_DOMAIN_INSTRUCTION,
-		     0,
-		     0,
-		     offsetof(struct brw_cc_unit_state, cc4),
-		     brw->cc.vp_bo);
+   brw->sws->bo_emit_reloc(bo,
+			   I915_GEM_DOMAIN_INSTRUCTION,
+			   0,
+			   0,
+			   offsetof(struct brw_cc_unit_state, cc4),
+			   brw->cc.vp_bo);
 
    return bo;
 }
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 0fcb75a440..6699d3bdb6 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -119,6 +119,33 @@
 
 struct brw_context;
 
+struct brw_depth_stencil_alpha_state {
+   struct pipe_depth_stencil_alpha_state templ; /* for draw module */
+
+   /* Precalculated hardware state:
+    */
+   struct brw_cc0 cc0;
+   struct brw_cc1 cc1;
+   struct brw_cc2 cc2;
+   struct brw_cc3 cc3;
+};
+
+
+struct brw_blend_state {
+   struct pipe_depth_stencil_alpha_state templ; /* for draw module */
+
+   /* Precalculated hardware state:
+    */
+   struct brw_cc3 cc3;
+   struct brw_cc5 cc5;
+   struct brw_cc6 cc6;
+   struct brw_cc7 cc7;
+};
+
+
+
+
+
 #define PIPE_NEW_DEPTH_STENCIL_ALPHA    0x1
 #define PIPE_NEW_RAST                   0x2
 #define PIPE_NEW_BLEND                  0x2
@@ -440,6 +467,9 @@ struct brw_context
    const struct gl_vertex_program *vertex_program;
    const struct gl_fragment_program *fragment_program;
    struct pipe_framebuffer_state fb;
+   struct brw_depth_stencil_alpha_state *dsa;
+   struct brw_blend_state *blend;
+   struct pipe_viewport_state vp;
 
    struct {
       struct brw_state_flags dirty;
diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c
index b351794dce..17895d2782 100644
--- a/src/gallium/drivers/i965/brw_pipe_blend.c
+++ b/src/gallium/drivers/i965/brw_pipe_blend.c
@@ -1,4 +1,5 @@
 
+
    /* _NEW_COLOR */
    if (key->logic_op != GL_COPY) {
       cc.cc2.logicop_enable = 1;
@@ -39,3 +40,6 @@
       cc.cc6.x_dither_offset = 0;
    }
 
+   if (INTEL_DEBUG & DEBUG_STATS)
+      cc.cc5.statistics_enable = 1;
+}
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index a007d542d0..b716097bfc 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -34,11 +34,12 @@
 #define BRW_STATE_H
 
 #include "brw_context.h"
+#include "util/u_memory.h"
 
 static inline void
 brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo)
 {
-   assert(brw->state.validated_bo_count < ARRAY_SIZE(brw->state.validated_bos));
+   assert(brw->state.validated_bo_count < Elements(brw->state.validated_bos));
 
    if (bo != NULL) {
       brw->sws->bo_reference(bo);
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
index 27d264c3de..11372697f9 100644
--- a/src/gallium/drivers/i965/brw_structs.h
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -663,7 +663,7 @@ struct brw_clip_unit_state
 
 struct brw_cc_unit_state
 {
-   struct
+   struct brw_cc0
    {
       GLuint pad0:3;
       GLuint bf_stencil_pass_depth_pass_op:3; 
@@ -681,7 +681,7 @@ struct brw_cc_unit_state
    } cc0;
 
    
-   struct
+   struct brw_cc1
    {
       GLuint bf_stencil_ref:8; 
       GLuint stencil_write_mask:8; 
@@ -690,7 +690,7 @@ struct brw_cc_unit_state
    } cc1;
 
    
-   struct
+   struct brw_cc2
    {
       GLuint logicop_enable:1; 
       GLuint pad0:10;
@@ -702,7 +702,7 @@ struct brw_cc_unit_state
    } cc2;
 
    
-   struct
+   struct brw_cc3
    {
       GLuint pad0:8;
       GLuint alpha_test_func:3; 
@@ -714,13 +714,13 @@ struct brw_cc_unit_state
       GLuint pad2:16;
    } cc3;
    
-   struct
+   struct brw_cc4
    {
       GLuint pad0:5; 
       GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */
    } cc4;
    
-   struct
+   struct brw_cc5
    {
       GLuint pad0:2;
       GLuint ia_dest_blend_factor:5; 
@@ -732,7 +732,7 @@ struct brw_cc_unit_state
       GLuint dither_enable:1; 
    } cc5;
 
-   struct
+   struct brw_cc6
    {
       GLuint clamp_post_alpha_blend:1; 
       GLuint clamp_pre_alpha_blend:1; 
@@ -745,7 +745,7 @@ struct brw_cc_unit_state
       GLuint blend_function:3; 
    } cc6;
 
-   struct {
+   struct brw_cc7 {
       union {
 	 GLfloat f;  
 	 GLubyte ub[4];
-- 
cgit v1.2.3


From 074606a806df755ecbb84e0a1182c66fd0b2a8dd Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sat, 24 Oct 2009 13:18:34 +0100
Subject: i965g: more files compiling

---
 src/gallium/drivers/i965/brw_batchbuffer.h      | 124 ++++++++++++
 src/gallium/drivers/i965/brw_cc.c               |  16 +-
 src/gallium/drivers/i965/brw_clip.c             |  80 +++-----
 src/gallium/drivers/i965/brw_clip.h             |   7 +-
 src/gallium/drivers/i965/brw_clip_unfilled.c    |   2 +-
 src/gallium/drivers/i965/brw_clip_util.c        |   2 +-
 src/gallium/drivers/i965/brw_context.c          |   2 +-
 src/gallium/drivers/i965/brw_context.h          |  89 ++++-----
 src/gallium/drivers/i965/brw_curbe.c            |  10 +-
 src/gallium/drivers/i965/brw_defines.h          |   4 +-
 src/gallium/drivers/i965/brw_draw.c             |  12 +-
 src/gallium/drivers/i965/brw_draw_upload.c      |   2 +-
 src/gallium/drivers/i965/brw_eu.h               |  32 +++-
 src/gallium/drivers/i965/brw_eu_emit.c          |   4 +-
 src/gallium/drivers/i965/brw_gs.c               |   2 +-
 src/gallium/drivers/i965/brw_gs_emit.c          |   2 +-
 src/gallium/drivers/i965/brw_misc_state.c       |   2 +-
 src/gallium/drivers/i965/brw_pipe_flush.c       |   2 +-
 src/gallium/drivers/i965/brw_pipe_query.c       |   4 +-
 src/gallium/drivers/i965/brw_pipe_rast.c        |  46 +++++
 src/gallium/drivers/i965/brw_pipe_rast.h        |  14 ++
 src/gallium/drivers/i965/brw_pipe_shader.c      | 159 ++++++++++++++++
 src/gallium/drivers/i965/brw_reg.h              |  79 ++++++++
 src/gallium/drivers/i965/brw_screen.h           |  78 ++++++++
 src/gallium/drivers/i965/brw_screen_surface.c   |   4 +-
 src/gallium/drivers/i965/brw_sf.c               |   2 +-
 src/gallium/drivers/i965/brw_sf.h               |   1 -
 src/gallium/drivers/i965/brw_sf_emit.c          |   2 +-
 src/gallium/drivers/i965/brw_state.h            |   2 +-
 src/gallium/drivers/i965/brw_state_batch.c      |   6 +-
 src/gallium/drivers/i965/brw_state_cache.c      |   2 +-
 src/gallium/drivers/i965/brw_state_upload.c     |   2 +-
 src/gallium/drivers/i965/brw_tex_layout.c       |   2 +-
 src/gallium/drivers/i965/brw_urb.c              |   2 +-
 src/gallium/drivers/i965/brw_util.h             |   5 +-
 src/gallium/drivers/i965/brw_vs.c               |   3 +-
 src/gallium/drivers/i965/brw_vs.h               |   1 -
 src/gallium/drivers/i965/brw_vs_emit.c          |  82 ++++----
 src/gallium/drivers/i965/brw_winsys.h           | 243 ++++++++++++++++++++++++
 src/gallium/drivers/i965/brw_wm.h               |   1 -
 src/gallium/drivers/i965/brw_wm_debug.c         |   2 +-
 src/gallium/drivers/i965/brw_wm_emit.c          |  84 ++++----
 src/gallium/drivers/i965/brw_wm_fp.c            |  60 +++---
 src/gallium/drivers/i965/brw_wm_pass0.c         |   1 -
 src/gallium/drivers/i965/brw_wm_pass1.c         |  68 +++----
 src/gallium/drivers/i965/brw_wm_surface_state.c |   2 +-
 src/gallium/drivers/i965/intel_batchbuffer.h    | 168 ----------------
 47 files changed, 1027 insertions(+), 492 deletions(-)
 create mode 100644 src/gallium/drivers/i965/brw_batchbuffer.h
 create mode 100644 src/gallium/drivers/i965/brw_pipe_rast.c
 create mode 100644 src/gallium/drivers/i965/brw_pipe_rast.h
 create mode 100644 src/gallium/drivers/i965/brw_pipe_shader.c
 create mode 100644 src/gallium/drivers/i965/brw_reg.h
 create mode 100644 src/gallium/drivers/i965/brw_screen.h
 create mode 100644 src/gallium/drivers/i965/brw_winsys.h
 delete mode 100644 src/gallium/drivers/i965/intel_batchbuffer.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
new file mode 100644
index 0000000000..76b3c1bf69
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -0,0 +1,124 @@
+#ifndef BRW_BATCHBUFFER_H
+#define BRW_BATCHBUFFER_H
+
+#include "brw_types.h"
+#include "brw_winsys.h"
+#include "brw_reg.h"
+
+#define BATCH_SZ 16384
+#define BATCH_RESERVED 16
+
+/* All ignored:
+ */
+enum cliprect_mode {
+   IGNORE_CLIPRECTS,
+   LOOP_CLIPRECTS,
+   NO_LOOP_CLIPRECTS,
+   REFERENCES_CLIPRECTS
+};
+
+void brw_batchbuffer_free(struct brw_batchbuffer *batch);
+
+void _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
+			      const char *file, int line);
+
+#define brw_batchbuffer_flush(batch) \
+	_brw_batchbuffer_flush(batch, __FILE__, __LINE__)
+
+void brw_batchbuffer_reset(struct brw_batchbuffer *batch);
+
+
+/* Unlike bmBufferData, this currently requires the buffer be mapped.
+ * Consider it a convenience function wrapping multple
+ * intel_buffer_dword() calls.
+ */
+void brw_batchbuffer_data(struct brw_batchbuffer *batch,
+                            const void *data, GLuint bytes,
+			    enum cliprect_mode cliprect_mode);
+
+void brw_batchbuffer_release_space(struct brw_batchbuffer *batch,
+                                     GLuint bytes);
+
+GLboolean brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+                                       struct brw_winsys_buffer *buffer,
+				       uint32_t read_domains,
+				       uint32_t write_domain,
+				       uint32_t offset);
+
+/* Inline functions - might actually be better off with these
+ * non-inlined.  Certainly better off switching all command packets to
+ * be passed as structs rather than dwords, but that's a little bit of
+ * work...
+ */
+static INLINE GLint
+brw_batchbuffer_space(struct brw_batchbuffer *batch)
+{
+   return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
+}
+
+
+static INLINE void
+brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword)
+{
+   assert(batch->map);
+   assert(brw_batchbuffer_space(batch) >= 4);
+   *(GLuint *) (batch->ptr) = dword;
+   batch->ptr += 4;
+}
+
+static INLINE boolean
+brw_batchbuffer_require_space(struct brw_batchbuffer *batch,
+                                GLuint sz,
+				enum cliprect_mode cliprect_mode)
+{
+   assert(sz < batch->size - 8);
+   if (brw_batchbuffer_space(batch) < sz) {
+      assert(0);
+      return FALSE;
+   }
+
+   /* All commands should be executed once regardless of cliprect
+    * mode.
+    */
+   (void)cliprect_mode;
+}
+
+/* Here are the crusty old macros, to be removed:
+ */
+#define BATCH_LOCALS
+
+#define BEGIN_BATCH(n, cliprect_mode) do {				\
+   brw_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \
+   assert(intel->batch->emit.start_ptr == NULL);			\
+   intel->batch->emit.total = (n) * 4;					\
+   intel->batch->emit.start_ptr = intel->batch->ptr;			\
+} while (0)
+
+#define OUT_BATCH(d) brw_batchbuffer_emit_dword(intel->batch, d)
+
+#define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
+   assert((unsigned) (delta) < buf->size);				\
+   brw_batchbuffer_emit_reloc(intel->batch, buf,			\
+				read_domains, write_domain, delta);	\
+} while (0)
+
+#define ADVANCE_BATCH() do {						\
+   unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr;	\
+   assert(intel->batch->emit.start_ptr != NULL);			\
+   if (_n != intel->batch->emit.total) {				\
+      fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",	\
+	      _n, intel->batch->emit.total);				\
+      abort();								\
+   }									\
+   intel->batch->emit.start_ptr = NULL;					\
+} while(0)
+
+
+static INLINE void
+brw_batchbuffer_emit_mi_flush(struct brw_batchbuffer *batch)
+{
+   brw_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS);
+   brw_batchbuffer_emit_dword(batch, MI_FLUSH);
+}
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index bf2743ebbe..c8e7851d75 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -65,7 +65,7 @@ static void prepare_cc_vp( struct brw_context *brw )
    memset(&ccv, 0, sizeof(ccv));
 
    /* PIPE_NEW_VIEWPORT */
-   calc_sane_viewport( &brw->vp, &svp );
+   calc_sane_viewport( &brw->curr.vp, &svp );
 
    ccv.min_depth = svp.near;
    ccv.max_depth = svp.far;
@@ -109,13 +109,13 @@ static void
 cc_unit_populate_key(const struct brw_context *brw,
 		     struct brw_cc_unit_key *key)
 {
-   key->cc0 = brw->dsa->cc0;
-   key->cc1 = brw->dsa->cc1;
-   key->cc2 = brw->dsa->cc2;
-   key->cc3 = combine_cc3( brw->dsa->cc3, brw->blend->cc3 );
-   key->cc5 = brw->blend->cc5;
-   key->cc6 = brw->blend->cc6;
-   key->cc7 = brw->blend->cc7;
+   key->cc0 = brw->curr.dsa->cc0;
+   key->cc1 = brw->curr.dsa->cc1;
+   key->cc2 = brw->curr.dsa->cc2;
+   key->cc3 = combine_cc3( brw->curr.dsa->cc3, brw->curr.blend->cc3 );
+   key->cc5 = brw->curr.blend->cc5;
+   key->cc6 = brw->curr.blend->cc6;
+   key->cc7 = brw->curr.blend->cc7;
 }
 
 /**
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index d82ebeb9a9..591e904705 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -33,13 +33,14 @@
 
 #include "util/u_math.h"
 
-#include "intel_batchbuffer.h"
-
+#include "brw_screen.h"
+#include "brw_batchbuffer.h"
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_util.h"
 #include "brw_state.h"
+#include "brw_pipe_rast.h"
 #include "brw_clip.h"
 
 
@@ -77,13 +78,16 @@ static void compile_clip_prog( struct brw_context *brw,
    else
        delta = REG_SIZE;
 
-   for (i = 0; i < VERT_RESULT_MAX; i++)
-      if (c.key.attrs & (1<<i)) {
-	 c.offset[i] = delta;
-	 delta += ATTR_SIZE;
-      }
+   /* XXX: c.offset is now pretty redundant:
+    */
+   for (i = 0; i < c.key.nr_attrs; i++) {
+      c.offset[i] = delta;
+      delta += ATTR_SIZE;
+   }
 
-   c.nr_attrs = util_count_bits(c.key.attrs);
+   /* XXX: c.nr_attrs is very redundant:
+    */
+   c.nr_attrs = c.key.nr_attrs;
    
    if (BRW_IS_IGDNG(brw))
        c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
@@ -145,59 +149,21 @@ static void upload_clip_prog(struct brw_context *brw)
 {
    struct brw_clip_prog_key key;
 
-   memset(&key, 0, sizeof(key));
-
-   /* Populate the key:
+   /* Populate the key, starting from the almost-complete version from
+    * the rast state. 
     */
+
+   /* PIPE_NEW_RAST */
+   memcpy(&key, &brw->curr.rast->clip_key, sizeof key);
+
    /* BRW_NEW_REDUCED_PRIMITIVE */
    key.primitive = brw->reduced_primitive;
-   /* CACHE_NEW_VS_PROG */
-   key.attrs = brw->vs.prog_data->outputs_written;
-   /* PIPE_NEW_RAST */
-   key.do_flat_shading = brw->rast.base.flatshade;
-   /* PIPE_NEW_UCP */
-   key.nr_userclip = brw->nr_ucp;
 
-   if (BRW_IS_IGDNG(brw))
-       key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
-   else
-       key.clip_mode = BRW_CLIPMODE_NORMAL;
+   /* PIPE_NEW_VS */
+   key.nr_attrs = brw->curr.vs->info.file_max[TGSI_FILE_OUTPUT] + 1;
 
-   /* PIPE_NEW_RAST */
-   if (key.primitive == PIPE_PRIM_TRIANGLES) {
-      if (brw->rast->cull_mode = PIPE_WINDING_BOTH)
-	 key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
-      else {
-	 key.fill_ccw = CLIP_CULL;
-	 key.fill_cw = CLIP_CULL;
-
-	 if (!(brw->rast->cull_mode & PIPE_WINDING_CCW)) {
-	    key.fill_ccw = translate_fill(brw->rast.fill_ccw);
-	 }
-
-	 if (!(brw->rast->cull_mode & PIPE_WINDING_CW)) {
-	    key.fill_cw = translate_fill(brw->rast.fill_cw);
-	 }
-
-	 if (key.fill_cw != CLIP_FILL ||
-	     key.fill_ccw != CLIP_FILL) {
-	    key.do_unfilled = 1;
-	    key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
-	 }
-
-	 key.offset_ccw = brw->rast.offset_ccw;
-	 key.offset_cw = brw->rast.offset_cw;
-
-	 if (brw->rast.light_twoside &&
-	     key.fill_cw != CLIP_CULL) 
-	    key.copy_bfc_cw = 1;
-
-	 if (brw->rast.light_twoside &&
-	     key.fill_ccw != CLIP_CULL) 
-	    key.copy_bfc_ccw = 1;
-	 }
-      }
-   }
+   /* PIPE_NEW_CLIP */
+   key.nr_userclip = brw->curr.ucp.nr;
 
    brw->sws->bo_unreference(brw->clip.prog_bo);
    brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
@@ -212,7 +178,7 @@ static void upload_clip_prog(struct brw_context *brw)
 const struct brw_tracked_state brw_clip_prog = {
    .dirty = {
       .mesa  = (PIPE_NEW_RAST | 
-		PIPE_NEW_UCP),
+		PIPE_NEW_CLIP),
       .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
       .cache = CACHE_NEW_VS_PROG
    },
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
index d80ec819b9..cfe51bf292 100644
--- a/src/gallium/drivers/i965/brw_clip.h
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -42,8 +42,7 @@
  * up polygon offset and flatshading at this point:
  */
 struct brw_clip_prog_key {
-   GLuint attrs:32;		
-
+   GLuint nr_attrs:5;
    GLuint primitive:4;
    GLuint nr_userclip:3;
    GLuint do_flat_shading:1;
@@ -55,7 +54,7 @@ struct brw_clip_prog_key {
    GLuint copy_bfc_cw:1;
    GLuint copy_bfc_ccw:1;
    GLuint clip_mode:3;
-   GLuint pad1:12;
+   GLuint pad1:7;
    
    GLfloat offset_factor;
    GLfloat offset_units;
@@ -117,7 +116,7 @@ struct brw_clip_compile {
    GLuint last_mrf;
 
    GLuint header_position_offset;
-   GLuint offset[VERT_ATTRIB_MAX];
+   GLuint offset[PIPE_MAX_SHADER_OUTPUTS];
    GLboolean need_ff_sync;
 };
 
diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c
index 4baff55806..8501599aef 100644
--- a/src/gallium/drivers/i965/brw_clip_unfilled.c
+++ b/src/gallium/drivers/i965/brw_clip_unfilled.c
@@ -29,7 +29,7 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 
 #include "brw_defines.h"
 #include "brw_context.h"
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
index 7a6c46ce07..60bfd3538e 100644
--- a/src/gallium/drivers/i965/brw_clip_util.c
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -93,7 +93,7 @@ void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos )
    /* value.xyz *= value.rhw
     */
    brw_set_access_mode(p, BRW_ALIGN_16);
-   brw_MUL(p, brw_writemask(pos, WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
+   brw_MUL(p, brw_writemask(pos, BRW_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W));
    brw_set_access_mode(p, BRW_ALIGN_1);
 }
 
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index 063ada5772..07a5420d6e 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -38,7 +38,7 @@
 #include "brw_state.h"
 #include "brw_vs.h"
 #include "brw_screen_tex.h"
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 
 
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 6699d3bdb6..3a2fece45c 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -36,6 +36,8 @@
 #include "brw_structs.h"
 #include "brw_winsys.h"
 #include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "tgsi/tgsi_scan.h"
 
 
 /* Glossary:
@@ -143,6 +145,27 @@ struct brw_blend_state {
 };
 
 
+struct brw_rasterizer_state;
+
+
+struct brw_vertex_shader {
+   const struct tgsi_token *tokens;
+   struct tgsi_shader_info info;
+
+   struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
+   GLboolean use_const_buffer;
+};
+
+
+struct brw_fragment_shader {
+   const struct tgsi_token *tokens;
+   struct tgsi_shader_info info;
+
+   GLboolean isGLSL;
+
+   struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
+   GLboolean use_const_buffer;
+};
 
 
@@ -157,6 +180,7 @@ struct brw_blend_state {
 #define PIPE_NEW_VERTEX_SHADER          0x2
 #define PIPE_NEW_FRAGMENT_CONSTS        0x2
 #define PIPE_NEW_VERTEX_CONSTS          0x2
+#define PIPE_NEW_CLIP                   0x2
 
 
 #define BRW_NEW_URB_FENCE               0x1
@@ -196,25 +220,6 @@ struct brw_state_flags {
 };
 
 
-struct brw_vertex_program {
-   const struct tgsi_token *tokens;
-   GLuint id;
-   struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
-   GLboolean use_const_buffer;
-};
-
-
-/** Subclass of Mesa fragment program */
-struct brw_fragment_program {
-   const struct tgsi_token *tokens;
-
-   GLuint id;  /**< serial no. to identify frag progs, never re-used */
-   GLboolean isGLSL;  /**< any IF/LOOP/CONT/BREAK instructions */
-
-   struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
-   GLboolean use_const_buffer;
-};
-
 
 /* Data about a particular attempt to compile a program.  Note that
  * there can be many of these, each in a different GL state
@@ -452,24 +457,29 @@ struct brw_query_object {
  */
 struct brw_context 
 {
-   struct pipe_context *pipe;
-   struct pipe_screen *screen;
-   
+   struct pipe_context pipe;
+
+   struct brw_screen *brw_screen;   
    struct brw_winsys_screen *sws;
 
    GLuint primitive;
+   GLuint reduced_primitive;
 
    GLboolean emit_state_always;
    GLboolean no_batch_wrap;
 
    /* Active vertex program: 
     */
-   const struct gl_vertex_program *vertex_program;
-   const struct gl_fragment_program *fragment_program;
-   struct pipe_framebuffer_state fb;
-   struct brw_depth_stencil_alpha_state *dsa;
-   struct brw_blend_state *blend;
-   struct pipe_viewport_state vp;
+   struct {
+      const struct brw_vertex_shader *vs;
+      const struct brw_fragment_shader *fs;
+      const struct brw_blend_state *blend;
+      const struct brw_rasterizer_state *rast;
+      const struct brw_depth_stencil_alpha_state *dsa;
+      struct pipe_framebuffer_state fb;
+      struct pipe_viewport_state vp;
+      struct pipe_clip_state ucp;
+   } curr;
 
    struct {
       struct brw_state_flags dirty;
@@ -719,29 +729,6 @@ brw_context( struct pipe_context *ctx )
    return (struct brw_context *)ctx;
 }
 
-static INLINE struct brw_vertex_program *
-brw_vertex_program(struct gl_vertex_program *p)
-{
-   return (struct brw_vertex_program *) p;
-}
-
-static INLINE const struct brw_vertex_program *
-brw_vertex_program_const(const struct gl_vertex_program *p)
-{
-   return (const struct brw_vertex_program *) p;
-}
-
-static INLINE struct brw_fragment_program *
-brw_fragment_program(struct gl_fragment_program *p)
-{
-   return (struct brw_fragment_program *) p;
-}
-
-static INLINE const struct brw_fragment_program *
-brw_fragment_program_const(const struct gl_fragment_program *p)
-{
-   return (const struct brw_fragment_program *) p;
-}
 
 
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 33ea9a00f7..f2524d75e2 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -30,7 +30,7 @@
   */
 
 
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 #include "intel_regions.h"
 #include "brw_context.h"
 #include "brw_defines.h"
@@ -55,8 +55,8 @@ static void calculate_curbe_offsets( struct brw_context *brw )
    GLuint nr_clip_regs = 0;
    GLuint total_regs;
 
-   /* PIPE_NEW_UCP */
-   if (brw->nr_ucp) {
+   /* PIPE_NEW_CLIP */
+   if (brw->curr.ucp.nr) {
       GLuint nr_planes = 6 + brw->nr_ucp;
       nr_clip_regs = (nr_planes * 4 + 15) / 16;
    }
@@ -106,7 +106,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
 
 const struct brw_tracked_state brw_curbe_offsets = {
    .dirty = {
-      .mesa = PIPE_NEW_UCP,
+      .mesa = PIPE_NEW_CLIP,
       .brw  = BRW_NEW_VERTEX_PROGRAM,
       .cache = CACHE_NEW_WM_PROG
    },
@@ -327,7 +327,7 @@ const struct brw_tracked_state brw_constant_buffer = {
    .dirty = {
       .mesa = (PIPE_NEW_FS_CONSTANTS |
 	       PIPE_NEW_VS_CONSTANTS |
-	       PIPE_NEW_UCP),
+	       PIPE_NEW_CLIP),
       .brw  = (BRW_NEW_FRAGMENT_PROGRAM |
 	       BRW_NEW_VERTEX_PROGRAM |
 	       BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
index 282c5b18f4..1dc64ddc8f 100644
--- a/src/gallium/drivers/i965/brw_defines.h
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -840,8 +840,8 @@
 
 #include "intel_chipset.h"
 
-#define BRW_IS_G4X(brw)         (IS_G4X((brw)->brw_screen->deviceID))
-#define BRW_IS_IGDNG(brw)         (IS_IGDNG((brw)->brw_screen->deviceID))
+#define BRW_IS_G4X(brw)         (IS_G4X((brw)->brw_screen->pci_id))
+#define BRW_IS_IGDNG(brw)         (IS_IGDNG((brw)->brw_screen->pci_id))
 #define BRW_IS_965(brw)         (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))
 #define CMD_PIPELINE_SELECT(brw)        ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
 #define CMD_VF_STATISTICS(brw)          ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 856999f3ef..741537309a 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -31,7 +31,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 #include "intel_buffer_objects.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BATCH
@@ -133,7 +133,7 @@ static void brw_emit_prim(struct brw_context *brw,
       ADVANCE_BATCH();
    }
    if (prim_packet.verts_per_instance) {
-      intel_batchbuffer_data( brw->intel.batch, &prim_packet,
+      brw_batchbuffer_data( brw->intel.batch, &prim_packet,
 			      sizeof(prim_packet), LOOP_CLIPRECTS);
    }
    if (intel->always_flush_cache) {
@@ -224,7 +224,7 @@ static GLboolean brw_try_draw_prims( struct brw_context *brw,
       return ret;
 
    if (intel->always_flush_batch)
-      intel_batchbuffer_flush(intel->batch);
+      brw_batchbuffer_flush(intel->batch);
 
    return 0;
 }
@@ -249,12 +249,10 @@ void brw_draw_prims( struct brw_context *brw,
     */
    ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
 
-   /* Otherwise, we really are out of memory.  Pass the drawing
-    * command to the software tnl module and which will in turn call
-    * swrast to do the drawing.
+   /* Otherwise, flush and retry:
     */
    if (ret != 0) {
-      intel_batchbuffer_flush(intel->batch);
+      brw_batchbuffer_flush(intel->batch);
       ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
       assert(ret == 0);
    }
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index dce015d79f..1ab65d60c4 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -35,7 +35,7 @@
 #include "brw_state.h"
 #include "brw_fallback.h"
 
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 #include "intel_buffer_objects.h"
 #include "intel_tex.h"
 
diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h
index 30603bdd0e..46d52a473b 100644
--- a/src/gallium/drivers/i965/brw_eu.h
+++ b/src/gallium/drivers/i965/brw_eu.h
@@ -35,7 +35,6 @@
 
 #include "brw_structs.h"
 #include "brw_defines.h"
-#include "shader/prog_instruction.h"
 
 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
@@ -45,6 +44,23 @@
 #define BRW_SWIZZLE_XXXX      BRW_SWIZZLE4(0,0,0,0)
 #define BRW_SWIZZLE_XYXY      BRW_SWIZZLE4(0,1,0,1)
 
+#define BRW_WRITEMASK_NONE     0x00
+#define BRW_WRITEMASK_X        0x01
+#define BRW_WRITEMASK_Y        0x02
+#define BRW_WRITEMASK_XY       0x03
+#define BRW_WRITEMASK_Z        0x04
+#define BRW_WRITEMASK_XZ       0x05
+#define BRW_WRITEMASK_YZ       0x06
+#define BRW_WRITEMASK_XYZ      0x07
+#define BRW_WRITEMASK_W        0x08
+#define BRW_WRITEMASK_XW       0x09
+#define BRW_WRITEMASK_YW       0x0A
+#define BRW_WRITEMASK_XYW      0x0B
+#define BRW_WRITEMASK_ZW       0x0C
+#define BRW_WRITEMASK_XZW      0x0D
+#define BRW_WRITEMASK_YZW      0x0E
+#define BRW_WRITEMASK_XYZW     0x0F
+
 
 #define REG_SIZE (8*4)
 
@@ -157,7 +173,7 @@ static INLINE int type_sz( GLuint type )
  * \param width  one of BRW_WIDTH_x
  * \param hstride  one of BRW_HORIZONTAL_STRIDE_x
  * \param swizzle  one of BRW_SWIZZLE_x
- * \param writemask  WRITEMASK_X/Y/Z/W bitfield
+ * \param writemask  BRW_WRITEMASK_X/Y/Z/W bitfield
  */
 static INLINE struct brw_reg brw_reg( GLuint file,
                                       GLuint nr,
@@ -215,7 +231,7 @@ static INLINE struct brw_reg brw_vec16_reg( GLuint file,
 		  BRW_WIDTH_16,
 		  BRW_HORIZONTAL_STRIDE_1,
 		  BRW_SWIZZLE_XYZW,
-		  WRITEMASK_XYZW);
+		  BRW_WRITEMASK_XYZW);
 }
 
 /** Construct float[8] register */
@@ -231,7 +247,7 @@ static INLINE struct brw_reg brw_vec8_reg( GLuint file,
 		  BRW_WIDTH_8,
 		  BRW_HORIZONTAL_STRIDE_1,
 		  BRW_SWIZZLE_XYZW,
-		  WRITEMASK_XYZW);
+		  BRW_WRITEMASK_XYZW);
 }
 
 /** Construct float[4] register */
@@ -247,7 +263,7 @@ static INLINE struct brw_reg brw_vec4_reg( GLuint file,
 		  BRW_WIDTH_4,
 		  BRW_HORIZONTAL_STRIDE_1,
 		  BRW_SWIZZLE_XYZW,
-		  WRITEMASK_XYZW);
+		  BRW_WRITEMASK_XYZW);
 }
 
 /** Construct float[2] register */
@@ -263,7 +279,7 @@ static INLINE struct brw_reg brw_vec2_reg( GLuint file,
 		  BRW_WIDTH_2,
 		  BRW_HORIZONTAL_STRIDE_1,
 		  BRW_SWIZZLE_XYXY,
-		  WRITEMASK_XY);
+		  BRW_WRITEMASK_XY);
 }
 
 /** Construct float[1] register */
@@ -279,7 +295,7 @@ static INLINE struct brw_reg brw_vec1_reg( GLuint file,
 		  BRW_WIDTH_1,
 		  BRW_HORIZONTAL_STRIDE_0,
 		  BRW_SWIZZLE_XXXX,
-		  WRITEMASK_X);
+		  BRW_WRITEMASK_X);
 }
 
 
@@ -510,7 +526,7 @@ static INLINE struct brw_reg brw_ip_reg( void )
 		  BRW_WIDTH_1,
 		  BRW_HORIZONTAL_STRIDE_0,
 		  BRW_SWIZZLE_XYZW, /* NOTE! */
-		  WRITEMASK_XYZW); /* NOTE! */
+		  BRW_WRITEMASK_XYZW); /* NOTE! */
 }
 
 static INLINE struct brw_reg brw_acc_reg( void )
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
index 241cdc33f8..f6b8843e01 100644
--- a/src/gallium/drivers/i965/brw_eu_emit.c
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -1276,7 +1276,7 @@ void brw_SAMPLE(struct brw_compile *p,
     * instruction, so that is a guide for whether a workaround is
     * needed.
     */
-   if (writemask != WRITEMASK_XYZW) {
+   if (writemask != BRW_WRITEMASK_XYZW) {
       GLuint dst_offset = 0;
       GLuint i, newmask = 0, len = 0;
 
@@ -1299,7 +1299,7 @@ void brw_SAMPLE(struct brw_compile *p,
       else {
 	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
 	 
-	 newmask = ~newmask & WRITEMASK_XYZW;
+	 newmask = ~newmask & BRW_WRITEMASK_XYZW;
 
 	 brw_push_insn_state(p);
 
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
index 58930e7964..692ce46679 100644
--- a/src/gallium/drivers/i965/brw_gs.c
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -29,7 +29,7 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
       
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 
 #include "brw_defines.h"
 #include "brw_context.h"
diff --git a/src/gallium/drivers/i965/brw_gs_emit.c b/src/gallium/drivers/i965/brw_gs_emit.c
index 9ec206d7e8..fd8e2acced 100644
--- a/src/gallium/drivers/i965/brw_gs_emit.c
+++ b/src/gallium/drivers/i965/brw_gs_emit.c
@@ -30,7 +30,7 @@
   */
  
 
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 
 #include "brw_defines.h"
 #include "brw_context.h"
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index d33bf40a01..eb39be8545 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -31,7 +31,7 @@
  
 
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 #include "intel_regions.h"
 
 #include "brw_context.h"
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
index d5b7bd3b83..e85a1a9c1b 100644
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -1,6 +1,6 @@
 
 /**
- * called from intel_batchbuffer_flush and children before sending a
+ * called from brw_batchbuffer_flush and children before sending a
  * batchbuffer off.
  */
 static void brw_finish_batch(struct intel_context *intel)
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
index 0b9ba0c0ed..55242ac6ad 100644
--- a/src/gallium/drivers/i965/brw_pipe_query.c
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -42,7 +42,7 @@
 
 #include "brw_context.h"
 #include "brw_state.h"
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 #include "intel_reg.h"
 
 /** Waits on the query object's BO and totals the results for this query */
@@ -122,7 +122,7 @@ brw_end_query(struct pipe_context *pipe, struct pipe_query *q)
     */
    if (query->bo) {
       brw_emit_query_end(brw);
-      intel_batchbuffer_flush(brw->batch);
+      brw_batchbuffer_flush(brw->batch);
 
       brw->sws->bo_unreference(brw->query.bo);
       brw->query.bo = NULL;
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c
new file mode 100644
index 0000000000..ff64dbd48d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_rast.c
@@ -0,0 +1,46 @@
+
+static void
+calculate_clip_key_rast()
+{
+   if (BRW_IS_IGDNG(brw))
+       key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
+   else
+       key.clip_mode = BRW_CLIPMODE_NORMAL;
+
+   key.do_flat_shading = brw->rast->templ.flatshade;
+
+   if (key.primitive == PIPE_PRIM_TRIANGLES) {
+      if (brw->rast->templ.cull_mode = PIPE_WINDING_BOTH)
+	 key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
+      else {
+	 key.fill_ccw = CLIP_CULL;
+	 key.fill_cw = CLIP_CULL;
+
+	 if (!(brw->rast->templ.cull_mode & PIPE_WINDING_CCW)) {
+	    key.fill_ccw = translate_fill(brw->rast.fill_ccw);
+	 }
+
+	 if (!(brw->rast->templ.cull_mode & PIPE_WINDING_CW)) {
+	    key.fill_cw = translate_fill(brw->rast.fill_cw);
+	 }
+
+	 if (key.fill_cw != CLIP_FILL ||
+	     key.fill_ccw != CLIP_FILL) {
+	    key.do_unfilled = 1;
+	    key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+	 }
+
+	 key.offset_ccw = brw->rast.templ.offset_ccw;
+	 key.offset_cw = brw->rast.templ.offset_cw;
+
+	 if (brw->rast.templ.light_twoside &&
+	     key.fill_cw != CLIP_CULL) 
+	    key.copy_bfc_cw = 1;
+
+	 if (brw->rast.templ.light_twoside &&
+	     key.fill_ccw != CLIP_CULL) 
+	    key.copy_bfc_ccw = 1;
+	 }
+      }
+   }
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h
new file mode 100644
index 0000000000..6ceaa1fb09
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_rast.h
@@ -0,0 +1,14 @@
+#ifndef BRW_PIPE_RAST_H
+#define BRW_PIPE_RAST_H
+
+#include "brw_clip.h"
+
+struct brw_rasterizer_state {
+   struct pipe_rasterizer_state templ; /* for draw module */
+
+   /* Precalculated hardware state:
+    */
+   struct brw_clip_prog_key clip_key;
+};
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
new file mode 100644
index 0000000000..fbb772d18c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -0,0 +1,159 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+  
+#include "brw_context.h"
+#include "brw_util.h"
+#include "brw_wm.h"
+
+static void brwBindProgram( struct brw_context *brw,
+			    GLenum target, 
+			    struct gl_program *prog )
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB: 
+      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      break;
+   case GL_FRAGMENT_PROGRAM_ARB:
+      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+      break;
+   }
+}
+
+static struct gl_program *brwNewProgram( structg brw_context *brw,
+				      GLenum target, 
+				      GLuint id )
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   switch (target) {
+   case GL_VERTEX_PROGRAM_ARB: {
+      struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
+      if (prog) {
+	 prog->id = brw->program_id++;
+
+	 return _mesa_init_vertex_program( ctx, &prog->program,
+					     target, id );
+      }
+      else
+	 return NULL;
+   }
+
+   case GL_FRAGMENT_PROGRAM_ARB: {
+      struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
+      if (prog) {
+	 prog->id = brw->program_id++;
+
+	 return _mesa_init_fragment_program( ctx, &prog->program,
+					     target, id );
+      }
+      else
+	 return NULL;
+   }
+
+   default:
+      return _mesa_new_program(ctx, target, id);
+   }
+}
+
+static void brwDeleteProgram( struct brw_context *brw,
+			      struct gl_program *prog )
+{
+   if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+      struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
+      brw->sws->bo_unreference(brw_fprog->const_buffer);
+   }
+
+   _mesa_delete_program( ctx, prog );
+}
+
+
+static GLboolean brwIsProgramNative( struct brw_context *brw,
+				     GLenum target, 
+				     struct gl_program *prog )
+{
+   return GL_TRUE;
+}
+
+static void brwProgramStringNotify( struct brw_context *brw,
+				    GLenum target,
+				    struct gl_program *prog )
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
+      struct brw_fragment_program *newFP = brw_fragment_program(fprog);
+      const struct brw_fragment_program *curFP =
+         brw_fragment_program_const(brw->fragment_program);
+
+      if (fprog->FogOption) {
+         _mesa_append_fog_code(ctx, fprog);
+         fprog->FogOption = GL_NONE;
+      }
+
+      if (newFP == curFP)
+	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
+      newFP->id = brw->program_id++;      
+      newFP->isGLSL = brw_wm_is_glsl(fprog);
+   }
+   else if (target == GL_VERTEX_PROGRAM_ARB) {
+      struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
+      struct brw_vertex_program *newVP = brw_vertex_program(vprog);
+      const struct brw_vertex_program *curVP =
+         brw_vertex_program_const(brw->vertex_program);
+
+      if (newVP == curVP)
+	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+      if (newVP->program.IsPositionInvariant) {
+	 _mesa_insert_mvp_code(ctx, &newVP->program);
+      }
+      newVP->id = brw->program_id++;      
+
+      /* Also tell tnl about it:
+       */
+      _tnl_program_string(ctx, target, prog);
+   }
+}
+
+void brwInitFragProgFuncs( struct dd_function_table *functions )
+{
+   assert(functions->ProgramStringNotify == _tnl_program_string); 
+
+   functions->BindProgram = brwBindProgram;
+   functions->NewProgram = brwNewProgram;
+   functions->DeleteProgram = brwDeleteProgram;
+   functions->IsProgramNative = brwIsProgramNative;
+   functions->ProgramStringNotify = brwProgramStringNotify;
+}
+
diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h
new file mode 100644
index 0000000000..a640104d71
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_reg.h
@@ -0,0 +1,79 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef BRW_REG_H
+#define BRW_REG_H
+
+#define CMD_MI				(0x0 << 29)
+#define CMD_2D				(0x2 << 29)
+#define CMD_3D				(0x3 << 29)
+
+#define MI_NOOP				(CMD_MI | 0)
+#define MI_BATCH_BUFFER_END		(CMD_MI | 0xA << 23)
+#define MI_FLUSH			(CMD_MI | (4 << 23))
+
+#define _3DSTATE_DRAWRECT_INFO_I965	(CMD_3D | (3 << 27) | (1 << 24) | 0x2)
+
+/** @{
+ *
+ * PIPE_CONTROL operation, a combination MI_FLUSH and register write with
+ * additional flushing control.
+ */
+#define _3DSTATE_PIPE_CONTROL		(CMD_3D | (3 << 27) | (2 << 24) | 2)
+#define PIPE_CONTROL_NO_WRITE		(0 << 14)
+#define PIPE_CONTROL_WRITE_IMMEDIATE	(1 << 14)
+#define PIPE_CONTROL_WRITE_DEPTH_COUNT	(2 << 14)
+#define PIPE_CONTROL_WRITE_TIMESTAMP	(3 << 14)
+#define PIPE_CONTROL_DEPTH_STALL	(1 << 13)
+#define PIPE_CONTROL_WRITE_FLUSH	(1 << 12)
+#define PIPE_CONTROL_INSTRUCTION_FLUSH	(1 << 11)
+#define PIPE_CONTROL_INTERRUPT_ENABLE	(1 << 8)
+#define PIPE_CONTROL_PPGTT_WRITE	(0 << 2)
+#define PIPE_CONTROL_GLOBAL_GTT_WRITE	(1 << 2)
+
+/** @} */
+
+#define XY_SETUP_BLT_CMD		(CMD_2D | (0x01 << 22) | 6)
+#define XY_COLOR_BLT_CMD		(CMD_2D | (0x50 << 22) | 4)
+#define XY_SRC_COPY_BLT_CMD             (CMD_2D | (0x53 << 22) | 6)
+
+/* BR00 */
+#define XY_BLT_WRITE_ALPHA	(1 << 21)
+#define XY_BLT_WRITE_RGB	(1 << 20)
+#define XY_SRC_TILED		(1 << 15)
+#define XY_DST_TILED		(1 << 11)
+
+/* BR13 */
+#define BR13_565		(0x1 << 24)
+#define BR13_8888		(0x3 << 24)
+
+#define FENCE_LINEAR 0
+#define FENCE_XMAJOR 1
+#define FENCE_YMAJOR 2
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
new file mode 100644
index 0000000000..716b55c52b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -0,0 +1,78 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef BRW_SCREEN_H
+#define BRW_SCREEN_H
+
+#include "pipe/p_state.h"
+#include "pipe/p_screen.h"
+
+
+struct brw_winsys_screen;
+
+
+/**
+ * Subclass of pipe_screen
+ */
+struct brw_screen
+{
+   struct pipe_screen base;
+
+   struct brw_winsys_screen *sws;
+
+   boolean is_i945;
+   uint pci_id;
+};
+
+/**
+ * Subclass of pipe_transfer
+ */
+struct brw_transfer
+{
+   struct pipe_transfer base;
+
+   unsigned offset;
+};
+
+
+/*
+ * Cast wrappers
+ */
+static INLINE struct brw_screen *
+brw_screen(struct pipe_screen *pscreen)
+{
+   return (struct brw_screen *) pscreen;
+}
+
+static INLINE struct brw_transfer *
+brw_transfer(struct pipe_transfer *transfer)
+{
+   return (struct brw_transfer *)transfer;
+}
+
+
+#endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
index d199d0b81a..544be6a089 100644
--- a/src/gallium/drivers/i965/brw_screen_surface.c
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -1,6 +1,6 @@
    /* _NEW_BUFFERS */
-   if (IS_965(intel->intelScreen->deviceID) &&
-       !IS_G4X(intel->intelScreen->deviceID)) {
+   if (IS_965(brw->brw_screen->pci_id) &&
+       !IS_G4X(brw->brw_screen->pci_id)) {
       for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
 	 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
 	 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index 0115f77c08..54202cbd12 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -30,7 +30,7 @@
   */
   
 
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 
 #include "brw_defines.h"
 #include "brw_context.h"
diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h
index 26c2e8891a..c99116b8b1 100644
--- a/src/gallium/drivers/i965/brw_sf.h
+++ b/src/gallium/drivers/i965/brw_sf.h
@@ -34,7 +34,6 @@
 #define BRW_SF_H
 
 
-#include "shader/program.h"
 #include "brw_context.h"
 #include "brw_eu.h"
 
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
index c98d7ec13a..4acb2b7d72 100644
--- a/src/gallium/drivers/i965/brw_sf_emit.c
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -30,7 +30,7 @@
   */
    
 
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 
 #include "brw_defines.h"
 #include "brw_context.h"
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index b716097bfc..02657eaba7 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -157,7 +157,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer
 /***********************************************************************
  * brw_state_batch.c
  */
-#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
+#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
 #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
 
 GLboolean brw_cached_batch_struct( struct brw_context *brw,
diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c
index 9568794625..b285837070 100644
--- a/src/gallium/drivers/i965/brw_state_batch.c
+++ b/src/gallium/drivers/i965/brw_state_batch.c
@@ -32,7 +32,7 @@
 
 
 #include "brw_state.h"
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 
 
@@ -47,7 +47,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
    struct header *newheader = (struct header *)data;
 
    if (brw->emit_state_always) {
-      intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+      brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
       return GL_TRUE;
    }
 
@@ -74,7 +74,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
 
  emit:
    memcpy(item->header, newheader, sz);
-   intel_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+   brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
    return GL_TRUE;
 }
 
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
index 91d0f80297..1b5f27cc16 100644
--- a/src/gallium/drivers/i965/brw_state_cache.c
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -57,7 +57,7 @@
  */
 
 #include "brw_state.h"
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 
 /* XXX: Fixme - have to include these to get the sizes of the prog_key
  * structs:
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index b68b6cb21a..842380e38f 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -33,7 +33,7 @@
 
 #include "brw_context.h"
 #include "brw_state.h"
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 
 /* This is used to initialize brw->state.atoms[].  We could use this
  * list directly except for a single atom, brw_constant_buffer, which
diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c
index 75cdc18912..813cd31f49 100644
--- a/src/gallium/drivers/i965/brw_tex_layout.c
+++ b/src/gallium/drivers/i965/brw_tex_layout.c
@@ -47,7 +47,7 @@ GLboolean brw_miptree_layout(struct brw_context *brw,
 
    switch (mt->target) {
    case GL_TEXTURE_CUBE_MAP:
-      if (IS_IGDNG(intel->intelScreen->deviceID)) {
+      if (IS_IGDNG(brw->brw_screen->pci_id)) {
           GLuint align_h = 2, align_w = 4;
           GLuint level;
           GLuint x = 0;
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
index 8c6f4355a6..18d79c5ebb 100644
--- a/src/gallium/drivers/i965/brw_urb.c
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -31,7 +31,7 @@
         
 
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
diff --git a/src/gallium/drivers/i965/brw_util.h b/src/gallium/drivers/i965/brw_util.h
index 37c3acbc11..b5f9a36e7b 100644
--- a/src/gallium/drivers/i965/brw_util.h
+++ b/src/gallium/drivers/i965/brw_util.h
@@ -36,9 +36,8 @@
 #include "brw_types.h"
 
 extern GLuint brw_count_bits( GLuint val );
-extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList);
-extern GLuint brw_translate_blend_factor( GLenum factor );
-extern GLuint brw_translate_blend_equation( GLenum mode );
+extern GLuint brw_translate_blend_factor( unsigned factor );
+extern GLuint brw_translate_blend_equation( unsigned mode );
 
 
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 97e523c3ee..dcd687ac34 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -34,7 +34,6 @@
 #include "brw_vs.h"
 #include "brw_util.h"
 #include "brw_state.h"
-#include "shader/prog_print.h"
 
 
@@ -113,7 +112,7 @@ static void brw_upload_vs_prog(struct brw_context *brw)
  */
 const struct brw_tracked_state brw_vs_prog = {
    .dirty = {
-      .mesa  = PIPE_NEW_UCP | PIPE_NEW_RAST,
+      .mesa  = PIPE_NEW_CLIP | PIPE_NEW_RAST,
       .brw   = BRW_NEW_VERTEX_PROGRAM,
       .cache = 0
    },
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index 4a591365c9..54f7d7d7c4 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -36,7 +36,6 @@
 
 #include "brw_context.h"
 #include "brw_eu.h"
-#include "shader/program.h"
 
 
 struct brw_vs_prog_key {
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 6adb743017..e946944295 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -192,7 +192,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 					     BRW_WIDTH_8,
 					     BRW_HORIZONTAL_STRIDE_1,
 					     BRW_SWIZZLE_XXXX,
-					     WRITEMASK_X);
+					     BRW_WRITEMASK_X);
       reg++;
    }
 
@@ -487,7 +487,7 @@ static void emit_exp_noalias( struct brw_vs_compile *c,
    struct brw_compile *p = &c->func;
    
 
-   if (dst.dw1.bits.writemask & WRITEMASK_X) {
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_X) {
       struct brw_reg tmp = get_tmp(c);
       struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
 
@@ -499,23 +499,23 @@ static void emit_exp_noalias( struct brw_vs_compile *c,
       /* Adjust exponent for floating point: 
        * exp += 127 
        */
-      brw_ADD(p, brw_writemask(tmp_d, WRITEMASK_X), tmp_d, brw_imm_d(127));
+      brw_ADD(p, brw_writemask(tmp_d, BRW_WRITEMASK_X), tmp_d, brw_imm_d(127));
 
       /* Install exponent and sign.  
        * Excess drops off the edge: 
        */
-      brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), WRITEMASK_X), 
+      brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), BRW_WRITEMASK_X), 
 	      tmp_d, brw_imm_d(23));
 
       release_tmp(c, tmp);
    }
 
-   if (dst.dw1.bits.writemask & WRITEMASK_Y) {
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y) {
       /* result[1] = arg0.x - floor(arg0.x) */
-      brw_FRC(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0, 0));
+      brw_FRC(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0, 0));
    }
    
-   if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) {
       /* As with the LOG instruction, we might be better off just
        * doing a taylor expansion here, seeing as we have to do all
        * the prep work.
@@ -525,14 +525,14 @@ static void emit_exp_noalias( struct brw_vs_compile *c,
        */
       emit_math1(c, 
 		 BRW_MATH_FUNCTION_EXP, 
-		 brw_writemask(dst, WRITEMASK_Z),
+		 brw_writemask(dst, BRW_WRITEMASK_Z),
 		 brw_swizzle1(arg0, 0), 
 		 BRW_MATH_PRECISION_FULL);
    }  
 
-   if (dst.dw1.bits.writemask & WRITEMASK_W) {
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) {
       /* result[3] = 1.0; */
-      brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1));
+      brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), brw_imm_f(1));
    }
 }
 
@@ -562,36 +562,36 @@ static void emit_log_noalias( struct brw_vs_compile *c,
     * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
     * result[1].i = (x.i & ((1<<23)-1)        + (127<<23)
     */
-   if (dst.dw1.bits.writemask & WRITEMASK_XZ) {
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_XZ) {
       brw_AND(p, 
-	      brw_writemask(tmp_ud, WRITEMASK_X),
+	      brw_writemask(tmp_ud, BRW_WRITEMASK_X),
 	      brw_swizzle1(arg0_ud, 0),
 	      brw_imm_ud((1U<<31)-1));
 
       brw_SHR(p, 
-	      brw_writemask(tmp_ud, WRITEMASK_X), 
+	      brw_writemask(tmp_ud, BRW_WRITEMASK_X), 
 	      tmp_ud,
 	      brw_imm_ud(23));
 
       brw_ADD(p, 
-	      brw_writemask(tmp, WRITEMASK_X), 
+	      brw_writemask(tmp, BRW_WRITEMASK_X), 
 	      retype(tmp_ud, BRW_REGISTER_TYPE_D),	/* does it matter? */
 	      brw_imm_d(-127));
    }
 
-   if (dst.dw1.bits.writemask & WRITEMASK_YZ) {
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_YZ) {
       brw_AND(p, 
-	      brw_writemask(tmp_ud, WRITEMASK_Y),
+	      brw_writemask(tmp_ud, BRW_WRITEMASK_Y),
 	      brw_swizzle1(arg0_ud, 0),
 	      brw_imm_ud((1<<23)-1));
 
       brw_OR(p, 
-	     brw_writemask(tmp_ud, WRITEMASK_Y), 
+	     brw_writemask(tmp_ud, BRW_WRITEMASK_Y), 
 	     tmp_ud,
 	     brw_imm_ud(127<<23));
    }
    
-   if (dst.dw1.bits.writemask & WRITEMASK_Z) {
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z) {
       /* result[2] = result[0] + LOG2(result[1]); */
 
       /* Why bother?  The above is just a hint how to do this with a
@@ -606,19 +606,19 @@ static void emit_log_noalias( struct brw_vs_compile *c,
        */
       emit_math1(c, 
 		 BRW_MATH_FUNCTION_LOG, 
-		 brw_writemask(tmp, WRITEMASK_Z), 
+		 brw_writemask(tmp, BRW_WRITEMASK_Z), 
 		 brw_swizzle1(tmp, 1), 
 		 BRW_MATH_PRECISION_FULL);
       
       brw_ADD(p, 
-	      brw_writemask(tmp, WRITEMASK_Z), 
+	      brw_writemask(tmp, BRW_WRITEMASK_Z), 
 	      brw_swizzle1(tmp, 2), 
 	      brw_swizzle1(tmp, 0));
    }  
 
-   if (dst.dw1.bits.writemask & WRITEMASK_W) {
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_W) {
       /* result[3] = 1.0; */
-      brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1));
+      brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_W), brw_imm_f(1));
    }
 
    if (need_tmp) {
@@ -639,14 +639,14 @@ static void emit_dst_noalias( struct brw_vs_compile *c,
 
    /* There must be a better way to do this: 
     */
-   if (dst.dw1.bits.writemask & WRITEMASK_X)
-      brw_MOV(p, brw_writemask(dst, WRITEMASK_X), brw_imm_f(1.0));
-   if (dst.dw1.bits.writemask & WRITEMASK_Y)
-      brw_MUL(p, brw_writemask(dst, WRITEMASK_Y), arg0, arg1);
-   if (dst.dw1.bits.writemask & WRITEMASK_Z)
-      brw_MOV(p, brw_writemask(dst, WRITEMASK_Z), arg0);
-   if (dst.dw1.bits.writemask & WRITEMASK_W)
-      brw_MOV(p, brw_writemask(dst, WRITEMASK_W), arg1);
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_X)
+      brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_X), brw_imm_f(1.0));
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_Y)
+      brw_MUL(p, brw_writemask(dst, BRW_WRITEMASK_Y), arg0, arg1);
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_Z)
+      brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Z), arg0);
+   if (dst.dw1.bits.writemask & BRW_WRITEMASK_W)
+      brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_W), arg1);
 }
 
 
@@ -672,8 +672,8 @@ static void emit_lit_noalias( struct brw_vs_compile *c,
    if (need_tmp) 
       tmp = get_tmp(c);
    
-   brw_MOV(p, brw_writemask(dst, WRITEMASK_YZ), brw_imm_f(0)); 
-   brw_MOV(p, brw_writemask(dst, WRITEMASK_XW), brw_imm_f(1)); 
+   brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_YZ), brw_imm_f(0)); 
+   brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_XW), brw_imm_f(1)); 
 
    /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
     * to get all channels active inside the IF.  In the clipping code
@@ -683,15 +683,15 @@ static void emit_lit_noalias( struct brw_vs_compile *c,
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
    if_insn = brw_IF(p, BRW_EXECUTE_8);
    {
-      brw_MOV(p, brw_writemask(dst, WRITEMASK_Y), brw_swizzle1(arg0,0));
+      brw_MOV(p, brw_writemask(dst, BRW_WRITEMASK_Y), brw_swizzle1(arg0,0));
 
       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
-      brw_MOV(p, brw_writemask(tmp, WRITEMASK_Z),  brw_swizzle1(arg0,1));
+      brw_MOV(p, brw_writemask(tmp, BRW_WRITEMASK_Z),  brw_swizzle1(arg0,1));
       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 
       emit_math2(c, 
 		 BRW_MATH_FUNCTION_POW, 
-		 brw_writemask(dst, WRITEMASK_Z),
+		 brw_writemask(dst, BRW_WRITEMASK_Z),
 		 brw_swizzle1(tmp, 2),
 		 brw_swizzle1(arg0, 3),
 		 BRW_MATH_PRECISION_PARTIAL);      
@@ -1045,7 +1045,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
    /* ndc = 1.0 / pos.w */
    emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
    /* ndc.xyz = pos * ndc */
-   brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+   brw_MUL(p, brw_writemask(ndc, BRW_WRITEMASK_XYZ), pos, ndc);
 
    /* Update the header for point size, user clipping flags, and -ve rhw
     * workaround.
@@ -1062,14 +1062,14 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 
       if (c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) {
 	 struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
-	 brw_MUL(p, brw_writemask(header1, WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
-	 brw_AND(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
+	 brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
+	 brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
       }
 
       for (i = 0; i < c->key.nr_userclip; i++) {
 	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
 	 brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
-	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<i));
+	 brw_OR(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(1<<i));
 	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
       }
 
@@ -1089,7 +1089,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 		 brw_swizzle1(ndc, 3),
 		 brw_imm_f(0));
    
-	 brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+	 brw_OR(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(1<<6));
 	 brw_MOV(p, ndc, brw_imm_f(0));
 	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
       }
@@ -1139,7 +1139,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 		 eot, 		/* writes complete */
 		 0, 		/* urb destination offset */
 		 BRW_URB_SWIZZLE_INTERLEAVE);
-
+!
    if (c->first_overflow_output > 0) {
       /* Not all of the vertex outputs/results fit into the MRF.
        * Move the overflowed attributes from the GRF to the MRF and
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
new file mode 100644
index 0000000000..2142db5a4d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -0,0 +1,243 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 Jakob Bornecrantz
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef BRW_WINSYS_H
+#define BRW_WINSYS_H
+
+#include "pipe/p_compiler.h"
+
+struct brw_winsys;
+struct pipe_fence_handle;
+
+/* This currently just wraps dri_bo:
+ */
+struct brw_winsys_buffer {
+   struct brw_winsys_screen *sws;
+   void *bo;
+   unsigned offset;
+};
+
+enum brw_buffer_usage {
+   I915_GEM_DOMAIN_RENDER,
+   I915_GEM_DOMAIN_SAMPLER,
+   I915_GEM_DOMAIN_VERTEX,
+   I915_GEM_DOMAIN_INSTRUCTION,
+
+
+   /* XXX: migrate from domains to explicit usage cases, eg below:
+    */
+
+   /* use on textures */
+   BRW_USAGE_RENDER    = 0x01,
+   BRW_USAGE_SAMPLER   = 0x02,
+   BRW_USAGE_2D_TARGET = 0x04,
+   BRW_USAGE_2D_SOURCE = 0x08,
+   /* use on vertex */
+   BRW_USAGE_VERTEX    = 0x10,
+};
+
+enum brw_buffer_type
+{
+   BRW_BUFFER_TYPE_TEXTURE,
+   BRW_BUFFER_TYPE_SCANOUT, /**< a texture used for scanning out from */
+   BRW_BUFFER_TYPE_VERTEX,
+};
+
+
+/* AKA winsys context:
+ */
+struct brw_batchbuffer {
+
+   struct brw_winsys *iws;
+   struct brw_winsys_buffer *buf;
+
+   /**
+    * Values exported to speed up the writing the batchbuffer,
+    * instead of having to go trough a accesor function for
+    * each dword written.
+    */
+   /*{@*/
+   uint8_t *map;
+   uint8_t *ptr;
+   size_t size;
+
+   size_t relocs;
+   size_t max_relocs;
+   /*@}*/
+};
+
+struct brw_winsys_screen {
+
+   /**
+    * Batchbuffer functions.
+    */
+   /*@{*/
+   /**
+    * Create a new batchbuffer.
+    */
+   struct brw_batchbuffer *(*batchbuffer_create)(struct brw_winsys_screen *iws);
+
+   /**
+    * Emit a relocation to a buffer.
+    * Target position in batchbuffer is the same as ptr.
+    */
+   int (*batchbuffer_reloc)(struct brw_batchbuffer *batch,
+			    unsigned offset,
+                            struct brw_winsys_buffer *reloc,
+			    unsigned pre_add,
+                            enum brw_buffer_usage usage);
+
+   /**
+    * Flush a bufferbatch.
+    */
+   void (*batchbuffer_flush)(struct brw_batchbuffer *batch,
+                             struct pipe_fence_handle **fence);
+
+   /**
+    * Destroy a batchbuffer.
+    */
+   void (*batchbuffer_destroy)(struct brw_batchbuffer *batch);
+   /*@}*/
+
+
+   /**
+    * Buffer functions.
+    */
+   /*@{*/
+   /**
+    * Create a buffer.
+    */
+   struct brw_winsys_buffer *(*buffer_create)(struct brw_winsys *iws,
+					      unsigned size, 
+					      unsigned alignment,
+					      enum brw_buffer_type type);
+
+
+   /* Reference and unreference buffers:
+    */
+   void (*bo_reference)( struct brw_winsys_buffer *buffer );
+   void (*bo_unreference)( struct brw_winsys_buffer *buffer );
+   void (*bo_emit_reloc)( struct brw_winsys_buffer *buffer,
+			  unsigned domain,
+			  unsigned a,
+			  unsigned b,
+			  unsigned offset,
+			  struct brw_winsys_buffer *b2);
+
+   /**
+    * Map a buffer.
+    */
+   void *(*buffer_map)(struct brw_winsys *iws,
+                       struct brw_winsys_buffer *buffer,
+                       boolean write);
+
+   /**
+    * Unmap a buffer.
+    */
+   void (*buffer_unmap)(struct brw_winsys *iws,
+                        struct brw_winsys_buffer *buffer);
+
+   /**
+    * Write to a buffer.
+    *
+    * Arguments follows pipe_buffer_write.
+    */
+   int (*buffer_write)(struct brw_winsys *iws,
+                       struct brw_winsys_buffer *dst,
+                       size_t offset,
+                       size_t size,
+                       const void *data);
+
+   void (*buffer_destroy)(struct brw_winsys *iws,
+                          struct brw_winsys_buffer *buffer);
+   /*@}*/
+
+
+   /**
+    * Fence functions.
+    */
+   /*@{*/
+   /**
+    * Reference fence and set ptr to fence.
+    */
+   void (*fence_reference)(struct brw_winsys *iws,
+                           struct pipe_fence_handle **ptr,
+                           struct pipe_fence_handle *fence);
+
+   /**
+    * Check if a fence has finished.
+    */
+   int (*fence_signalled)(struct brw_winsys *iws,
+                          struct pipe_fence_handle *fence);
+
+   /**
+    * Wait on a fence to finish.
+    */
+   int (*fence_finish)(struct brw_winsys *iws,
+                       struct pipe_fence_handle *fence);
+   /*@}*/
+
+
+   /**
+    * Destroy the winsys.
+    */
+   void (*destroy)(struct brw_winsys *iws);
+};
+
+
+/**
+ * Create i915 pipe_screen.
+ */
+struct pipe_screen *i915_create_screen(struct brw_winsys *iws, unsigned pci_id);
+
+/**
+ * Create a i915 pipe_context.
+ */
+struct pipe_context *i915_create_context(struct pipe_screen *screen);
+
+/**
+ * Get the brw_winsys buffer backing the texture.
+ *
+ * TODO UGLY
+ */
+struct pipe_texture;
+boolean i915_get_texture_buffer_brw(struct pipe_texture *texture,
+				    struct brw_winsys_buffer **buffer,
+				    unsigned *stride);
+
+/**
+ * Wrap a brw_winsys buffer with a texture blanket.
+ *
+ * TODO UGLY
+ */
+struct pipe_texture * i915_texture_blanket_brw(struct pipe_screen *screen,
+                                                 struct pipe_texture *tmplt,
+                                                 unsigned pitch,
+                                                 struct brw_winsys_buffer *buffer);
+
+
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 756a680150..18775830f9 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -34,7 +34,6 @@
 #define BRW_WM_H
 
 
-#include "shader/prog_instruction.h"
 #include "brw_context.h"
 #include "brw_eu.h"
 
diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c
index 220821087c..c6659646f2 100644
--- a/src/gallium/drivers/i965/brw_wm_debug.c
+++ b/src/gallium/drivers/i965/brw_wm_debug.c
@@ -98,7 +98,7 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
    }
    _mesa_printf("]");
 
-   if (inst->writemask != WRITEMASK_XYZW)
+   if (inst->writemask != BRW_WRITEMASK_XYZW)
       _mesa_printf(".%s%s%s%s", 
 		   GET_BIT(inst->writemask, 0) ? "x" : "",
 		   GET_BIT(inst->writemask, 1) ? "y" : "",
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index fec33f74eb..7df9b79d7a 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -72,14 +72,14 @@ static void emit_pixel_xy(struct brw_compile *p,
    /* Calculate pixel centers by adding 1 or 0 to each of the
     * micro-tile coordinates passed in r1.
     */
-   if (mask & WRITEMASK_X) {
+   if (mask & BRW_WRITEMASK_X) {
       brw_ADD(p,
 	      vec16(retype(dst[0], BRW_REGISTER_TYPE_UW)),
 	      stride(suboffset(r1_uw, 4), 2, 4, 0),
 	      brw_imm_v(0x10101010));
    }
 
-   if (mask & WRITEMASK_Y) {
+   if (mask & BRW_WRITEMASK_Y) {
       brw_ADD(p,
 	      vec16(retype(dst[1], BRW_REGISTER_TYPE_UW)),
 	      stride(suboffset(r1_uw,5), 2, 4, 0),
@@ -101,14 +101,14 @@ static void emit_delta_xy(struct brw_compile *p,
    /* Calc delta X,Y by subtracting origin in r1 from the pixel
     * centers.
     */
-   if (mask & WRITEMASK_X) {
+   if (mask & BRW_WRITEMASK_X) {
       brw_ADD(p,
 	      dst[0],
 	      retype(arg0[0], BRW_REGISTER_TYPE_UW),
 	      negate(r1));
    }
 
-   if (mask & WRITEMASK_Y) {
+   if (mask & BRW_WRITEMASK_Y) {
       brw_ADD(p,
 	      dst[1],
 	      retype(arg0[1], BRW_REGISTER_TYPE_UW),
@@ -124,7 +124,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
 {
    struct brw_compile *p = &c->func;
 
-   if (mask & WRITEMASK_X) {
+   if (mask & BRW_WRITEMASK_X) {
       /* X' = X */
       brw_MOV(p,
 	      dst[0],
@@ -133,7 +133,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
 
    /* XXX: is this needed any more, or is this a NOOP?
     */
-   if (mask & WRITEMASK_Y) {
+   if (mask & BRW_WRITEMASK_Y) {
       /* Y' = height - 1 - Y */
       brw_ADD(p,
 	      dst[1],
@@ -152,7 +152,7 @@ static void emit_pixel_w( struct brw_compile *p,
    /* Don't need this if all you are doing is interpolating color, for
     * instance.
     */
-   if (mask & WRITEMASK_W) {      
+   if (mask & BRW_WRITEMASK_W) {      
       struct brw_reg interp3 = brw_vec1_grf(arg0[0].nr+1, 4);
 
       /* Calc 1/w - just linterp wpos[3] optimized by putting the
@@ -255,7 +255,7 @@ static void emit_frontfacing( struct brw_compile *p,
    struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
    GLuint i;
 
-   if (!(mask & WRITEMASK_XYZW))
+   if (!(mask & BRW_WRITEMASK_XYZW))
       return;
 
    for (i = 0; i < 4; i++) {
@@ -321,26 +321,26 @@ void emit_ddxy(struct brw_compile *p,
 			   BRW_VERTICAL_STRIDE_2,
 			   BRW_WIDTH_2,
 			   BRW_HORIZONTAL_STRIDE_0,
-			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+			   BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 	    src1 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 			   BRW_REGISTER_TYPE_F,
 			   BRW_VERTICAL_STRIDE_2,
 			   BRW_WIDTH_2,
 			   BRW_HORIZONTAL_STRIDE_0,
-			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+			   BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 	 } else {
 	    src0 = brw_reg(arg0[i].file, arg0[i].nr, 0,
 			   BRW_REGISTER_TYPE_F,
 			   BRW_VERTICAL_STRIDE_4,
 			   BRW_WIDTH_4,
 			   BRW_HORIZONTAL_STRIDE_0,
-			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+			   BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 	    src1 = brw_reg(arg0[i].file, arg0[i].nr, 2,
 			   BRW_REGISTER_TYPE_F,
 			   BRW_VERTICAL_STRIDE_4,
 			   BRW_WIDTH_4,
 			   BRW_HORIZONTAL_STRIDE_0,
-			   BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+			   BRW_SWIZZLE_XYZW, BRW_WRITEMASK_XYZW);
 	 }
 	 brw_ADD(p, dst[i], src0, negate(src1));
       }
@@ -611,12 +611,12 @@ static void emit_dp3( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
-   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
-   if (!(mask & WRITEMASK_XYZW))
+   if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -633,12 +633,12 @@ static void emit_dp4( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
-   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
-   if (!(mask & WRITEMASK_XYZW))
+   if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -656,12 +656,12 @@ static void emit_dph( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
-   const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   const int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
-   if (!(mask & WRITEMASK_XYZW))
+   if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -681,7 +681,7 @@ static void emit_xpd( struct brw_compile *p,
 {
    GLuint i;
 
-   assert(!(mask & WRITEMASK_W) == WRITEMASK_X);
+   assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X);
    
    for (i = 0 ; i < 3; i++) {
       if (mask & (1<<i)) {
@@ -704,12 +704,12 @@ static void emit_math1( struct brw_compile *p,
 			GLuint mask,
 			const struct brw_reg *arg0 )
 {
-   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
-   if (!(mask & WRITEMASK_XYZW))
+   if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MOV(p, brw_message_reg(2), arg0[0]);
 
@@ -732,12 +732,12 @@ static void emit_math2( struct brw_compile *p,
 			const struct brw_reg *arg0,
 			const struct brw_reg *arg1)
 {
-   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
-   if (!(mask & WRITEMASK_XYZW))
+   if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_push_insn_state(p);
 
@@ -795,17 +795,17 @@ static void emit_tex( struct brw_wm_compile *c,
     */
    switch (inst->tex_idx) {
    case TEXTURE_1D_INDEX:
-      emit = WRITEMASK_X;
+      emit = BRW_WRITEMASK_X;
       nr = 1;
       break;
    case TEXTURE_2D_INDEX:
    case TEXTURE_RECT_INDEX:
-      emit = WRITEMASK_XY;
+      emit = BRW_WRITEMASK_XY;
       nr = 2;
       break;
    case TEXTURE_3D_INDEX:
    case TEXTURE_CUBE_INDEX:
-      emit = WRITEMASK_XYZ;
+      emit = BRW_WRITEMASK_XYZ;
       nr = 3;
       break;
    default:
@@ -815,7 +815,7 @@ static void emit_tex( struct brw_wm_compile *c,
 
    if (inst->tex_shadow) {
       nr = 4;
-      emit |= WRITEMASK_W;
+      emit |= BRW_WRITEMASK_W;
    }
 
    msgLength = 1;
@@ -922,18 +922,18 @@ static void emit_lit( struct brw_compile *p,
 		      GLuint mask,
 		      const struct brw_reg *arg0 )
 {
-   assert((mask & WRITEMASK_XW) == 0);
+   assert((mask & BRW_WRITEMASK_XW) == 0);
 
-   if (mask & WRITEMASK_Y) {
+   if (mask & BRW_WRITEMASK_Y) {
       brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
       brw_MOV(p, dst[1], arg0[0]);
       brw_set_saturate(p, 0);
    }
 
-   if (mask & WRITEMASK_Z) {
+   if (mask & BRW_WRITEMASK_Z) {
       emit_math2(p, BRW_MATH_FUNCTION_POW,
 		 &dst[2],
-		 WRITEMASK_X | (mask & SATURATE),
+		 BRW_WRITEMASK_X | (mask & SATURATE),
 		 &arg0[1],
 		 &arg0[3]);
    }
@@ -944,10 +944,10 @@ static void emit_lit( struct brw_compile *p,
     */
    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_LE, arg0[0], brw_imm_f(0));
    {
-      if (mask & WRITEMASK_Y) 
+      if (mask & BRW_WRITEMASK_Y) 
 	 brw_MOV(p, dst[1], brw_imm_f(0));
 
-      if (mask & WRITEMASK_Z) 
+      if (mask & BRW_WRITEMASK_Z) 
 	 brw_MOV(p, dst[2], brw_imm_f(0)); 
    }
    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
@@ -1414,10 +1414,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 /* There is an scs math function, but it would need some
 	  * fixup for 16-element execution.
 	  */
-	 if (dst_flags & WRITEMASK_X)
-	    emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
-	 if (dst_flags & WRITEMASK_Y)
-	    emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|WRITEMASK_X, args[0]);
+	 if (dst_flags & BRW_WRITEMASK_X)
+	    emit_math1(p, BRW_MATH_FUNCTION_COS, dst, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
+	 if (dst_flags & BRW_WRITEMASK_Y)
+	    emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
 	 break;
 
       case OPCODE_POW:
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index 5f47d86f71..be240031c7 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -115,7 +115,7 @@ static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
    struct prog_dst_register reg;
    reg.File = file;
    reg.Index = idx;
-   reg.WriteMask = WRITEMASK_XYZW;
+   reg.WriteMask = BRW_WRITEMASK_XYZW;
    reg.RelAddr = 0;
    reg.CondMask = COND_TR;
    reg.CondSwizzle = 0;
@@ -249,7 +249,7 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
        */
       emit_op(c,
 	      WM_PIXELXY,
-	      dst_mask(pixel_xy, WRITEMASK_XY),
+	      dst_mask(pixel_xy, BRW_WRITEMASK_XY),
 	      0,
 	      payload_r0_depth,
 	      src_undef(),
@@ -272,7 +272,7 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
        */
       emit_op(c,
 	      WM_DELTAXY,
-	      dst_mask(delta_xy, WRITEMASK_XY),
+	      dst_mask(delta_xy, BRW_WRITEMASK_XY),
 	      0,
 	      pixel_xy, 
 	      payload_r0_depth,
@@ -295,7 +295,7 @@ static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
        */
       emit_op(c,
 	      WM_PIXELW,
-	      dst_mask(pixel_w, WRITEMASK_W),
+	      dst_mask(pixel_w, BRW_WRITEMASK_W),
 	      0,
 	      interp_wpos,
 	      deltas, 
@@ -327,13 +327,13 @@ static void emit_interp( struct brw_wm_compile *c,
        */
       emit_op(c,
 	      WM_WPOSXY,
-	      dst_mask(dst, WRITEMASK_XY),
+	      dst_mask(dst, BRW_WRITEMASK_XY),
 	      0,
 	      get_pixel_xy(c),
 	      src_undef(),
 	      src_undef());
       
-      dst = dst_mask(dst, WRITEMASK_ZW);
+      dst = dst_mask(dst, BRW_WRITEMASK_ZW);
 
       /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
        */
@@ -370,7 +370,7 @@ static void emit_interp( struct brw_wm_compile *c,
       /* Interpolate the fog coordinate */
       emit_op(c,
 	      WM_PINTERP,
-	      dst_mask(dst, WRITEMASK_X),
+	      dst_mask(dst, BRW_WRITEMASK_X),
 	      0,
 	      interp,
 	      deltas,
@@ -378,7 +378,7 @@ static void emit_interp( struct brw_wm_compile *c,
 
       emit_op(c,
 	      TGSI_OPCODE_MOV,
-	      dst_mask(dst, WRITEMASK_YZW),
+	      dst_mask(dst, BRW_WRITEMASK_YZW),
 	      0,
 	      src_swizzle(interp,
 			  SWIZZLE_ZERO,
@@ -393,7 +393,7 @@ static void emit_interp( struct brw_wm_compile *c,
       /* XXX review/test this case */
       emit_op(c,
               WM_FRONTFACING,
-              dst_mask(dst, WRITEMASK_X),
+              dst_mask(dst, BRW_WRITEMASK_X),
               0,
               src_undef(),
               src_undef(),
@@ -404,7 +404,7 @@ static void emit_interp( struct brw_wm_compile *c,
       /* XXX review/test this case */
       emit_op(c,
 	      WM_PINTERP,
-	      dst_mask(dst, WRITEMASK_XY),
+	      dst_mask(dst, BRW_WRITEMASK_XY),
 	      0,
 	      interp,
 	      deltas,
@@ -412,7 +412,7 @@ static void emit_interp( struct brw_wm_compile *c,
 
       emit_op(c,
 	      TGSI_OPCODE_MOV,
-	      dst_mask(dst, WRITEMASK_ZW),
+	      dst_mask(dst, BRW_WRITEMASK_ZW),
 	      0,
 	      src_swizzle(interp,
 			  SWIZZLE_ZERO,
@@ -518,19 +518,19 @@ static void precalc_dst( struct brw_wm_compile *c,
    struct prog_src_register src1 = inst->SrcReg[1];
    struct prog_dst_register dst = inst->DstReg;
    
-   if (dst.WriteMask & WRITEMASK_Y) {      
+   if (dst.WriteMask & BRW_WRITEMASK_Y) {      
       /* dst.y = mul src0.y, src1.y
        */
       emit_op(c,
 	      TGSI_OPCODE_MUL,
-	      dst_mask(dst, WRITEMASK_Y),
+	      dst_mask(dst, BRW_WRITEMASK_Y),
 	      inst->SaturateMode,
 	      src0,
 	      src1,
 	      src_undef());
    }
 
-   if (dst.WriteMask & WRITEMASK_XZ) {
+   if (dst.WriteMask & BRW_WRITEMASK_XZ) {
       struct prog_instruction *swz;
       GLuint z = GET_SWZ(src0.Swizzle, Z);
 
@@ -538,7 +538,7 @@ static void precalc_dst( struct brw_wm_compile *c,
        */
       swz = emit_op(c,
 		    TGSI_OPCODE_MOV,
-		    dst_mask(dst, WRITEMASK_XZ),
+		    dst_mask(dst, BRW_WRITEMASK_XZ),
 		    inst->SaturateMode,
 		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
 		    src_undef(),
@@ -546,12 +546,12 @@ static void precalc_dst( struct brw_wm_compile *c,
       /* Avoid letting negation flag of src0 affect our 1 constant. */
       swz->SrcReg[0].Negate &= ~NEGATE_X;
    }
-   if (dst.WriteMask & WRITEMASK_W) {
+   if (dst.WriteMask & BRW_WRITEMASK_W) {
       /* dst.w = mov src1.w
        */
       emit_op(c,
 	      TGSI_OPCODE_MOV,
-	      dst_mask(dst, WRITEMASK_W),
+	      dst_mask(dst, BRW_WRITEMASK_W),
 	      inst->SaturateMode,
 	      src1,
 	      src_undef(),
@@ -566,14 +566,14 @@ static void precalc_lit( struct brw_wm_compile *c,
    struct prog_src_register src0 = inst->SrcReg[0];
    struct prog_dst_register dst = inst->DstReg;
    
-   if (dst.WriteMask & WRITEMASK_XW) {
+   if (dst.WriteMask & BRW_WRITEMASK_XW) {
       struct prog_instruction *swz;
 
       /* dst.xw = swz src0.1111
        */
       swz = emit_op(c,
 		    TGSI_OPCODE_MOV,
-		    dst_mask(dst, WRITEMASK_XW),
+		    dst_mask(dst, BRW_WRITEMASK_XW),
 		    0,
 		    src_swizzle1(src0, SWIZZLE_ONE),
 		    src_undef(),
@@ -582,10 +582,10 @@ static void precalc_lit( struct brw_wm_compile *c,
       swz->SrcReg[0].Negate = NEGATE_NONE;
    }
 
-   if (dst.WriteMask & WRITEMASK_YZ) {
+   if (dst.WriteMask & BRW_WRITEMASK_YZ) {
       emit_op(c,
 	      TGSI_OPCODE_LIT,
-	      dst_mask(dst, WRITEMASK_YZ),
+	      dst_mask(dst, BRW_WRITEMASK_YZ),
 	      inst->SaturateMode,
 	      src0,
 	      src_undef(),
@@ -649,7 +649,7 @@ static void precalc_tex( struct brw_wm_compile *c,
 
        /* tmp0 = 1 / tmp1 */
        emit_op(c, TGSI_OPCODE_RCP,
-               dst_mask(tmp0, WRITEMASK_X),
+               dst_mask(tmp0, BRW_WRITEMASK_X),
                0,
                tmp1src,
                src_undef(),
@@ -740,7 +740,7 @@ static void precalc_tex( struct brw_wm_compile *c,
        */
       emit_op(c,
 	      TGSI_OPCODE_ADD,
-	      dst_mask(tmp, WRITEMASK_XYZ),
+	      dst_mask(tmp, BRW_WRITEMASK_XYZ),
 	      0,
 	      tmpsrc,
 	      C0,
@@ -751,7 +751,7 @@ static void precalc_tex( struct brw_wm_compile *c,
 
       emit_op(c,
 	      TGSI_OPCODE_MUL,
-	      dst_mask(tmp, WRITEMASK_Y),
+	      dst_mask(tmp, BRW_WRITEMASK_Y),
 	      0,
 	      tmpsrc,
 	      src_swizzle1(C0, W),
@@ -766,7 +766,7 @@ static void precalc_tex( struct brw_wm_compile *c,
 
       emit_op(c,
 	      TGSI_OPCODE_MAD,
-	      dst_mask(dst, WRITEMASK_XYZ),
+	      dst_mask(dst, BRW_WRITEMASK_XYZ),
 	      0,
 	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
 	      C1,
@@ -776,7 +776,7 @@ static void precalc_tex( struct brw_wm_compile *c,
        */
       emit_op(c,
 	      TGSI_OPCODE_MAD,
-	      dst_mask(dst, WRITEMASK_Y),
+	      dst_mask(dst, BRW_WRITEMASK_Y),
 	      0,
 	      src_swizzle1(tmpsrc, Z),
 	      src_swizzle1(C1, W),
@@ -863,7 +863,7 @@ static void precalc_txp( struct brw_wm_compile *c,
        */
       emit_op(c,
 	      TGSI_OPCODE_RCP,
-	      dst_mask(tmp, WRITEMASK_W),
+	      dst_mask(tmp, BRW_WRITEMASK_W),
 	      0,
 	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
 	      src_undef(),
@@ -873,7 +873,7 @@ static void precalc_txp( struct brw_wm_compile *c,
        */
       emit_op(c,
 	      TGSI_OPCODE_MUL,
-	      dst_mask(tmp, WRITEMASK_XYZ),
+	      dst_mask(tmp, BRW_WRITEMASK_XYZ),
 	      0,
 	      src0,
 	      src_swizzle1(src_reg_from_dst(tmp), W),
@@ -1053,7 +1053,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
 	 out = emit_insn(c, inst);
 	 /* This should probably be done in the parser. 
 	  */
-	 out->DstReg.WriteMask &= WRITEMASK_XY;
+	 out->DstReg.WriteMask &= BRW_WRITEMASK_XY;
 	 break;
 	 
       case TGSI_OPCODE_DST:
@@ -1082,7 +1082,7 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
 	 out = emit_insn(c, inst);
 	 /* This should probably be done in the parser. 
 	  */
-	 out->DstReg.WriteMask &= WRITEMASK_XYZ;
+	 out->DstReg.WriteMask &= BRW_WRITEMASK_XYZ;
 	 break;
 
       case TGSI_OPCODE_KIL: 
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
index 0c411b57f5..de5f5fe821 100644
--- a/src/gallium/drivers/i965/brw_wm_pass0.c
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -32,7 +32,6 @@
 
 #include "brw_context.h"
 #include "brw_wm.h"
-#include "shader/prog_parameter.h"
 
 
diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c
index d940ec09a9..f2ae3a958f 100644
--- a/src/gallium/drivers/i965/brw_wm_pass1.c
+++ b/src/gallium/drivers/i965/brw_wm_pass1.c
@@ -91,15 +91,15 @@ static GLuint get_texcoord_mask( GLuint tex_idx )
 {
    switch (tex_idx) {
    case TEXTURE_1D_INDEX:
-      return WRITEMASK_X;
+      return BRW_WRITEMASK_X;
    case TEXTURE_2D_INDEX:
-      return WRITEMASK_XY;
+      return BRW_WRITEMASK_XY;
    case TEXTURE_3D_INDEX:
-      return WRITEMASK_XYZ;
+      return BRW_WRITEMASK_XYZ;
    case TEXTURE_CUBE_INDEX:
-      return WRITEMASK_XYZ;
+      return BRW_WRITEMASK_XYZ;
    case TEXTURE_RECT_INDEX:
-      return WRITEMASK_XY;
+      return BRW_WRITEMASK_XY;
    default: return 0;
    }
 }
@@ -121,16 +121,16 @@ void brw_wm_pass1( struct brw_wm_compile *c )
       GLuint read0, read1, read2;
 
       if (inst->opcode == TGSI_OPCODE_KIL) {
-	 track_arg(c, inst, 0, WRITEMASK_XYZW); /* All args contribute to final */
+	 track_arg(c, inst, 0, BRW_WRITEMASK_XYZW); /* All args contribute to final */
 	 continue;
       }
 
       if (inst->opcode == WM_FB_WRITE) {
-	 track_arg(c, inst, 0, WRITEMASK_XYZW); 
-	 track_arg(c, inst, 1, WRITEMASK_XYZW); 
+	 track_arg(c, inst, 0, BRW_WRITEMASK_XYZW); 
+	 track_arg(c, inst, 1, BRW_WRITEMASK_XYZW); 
 	 if (c->key.source_depth_to_render_target &&
 	     c->key.computes_depth)
-	    track_arg(c, inst, 2, WRITEMASK_Z); 
+	    track_arg(c, inst, 2, BRW_WRITEMASK_Z); 
 	 else
 	    track_arg(c, inst, 2, 0); 
 	 continue;
@@ -191,9 +191,9 @@ void brw_wm_pass1( struct brw_wm_compile *c )
 	 break;
 
       case TGSI_OPCODE_XPD: 
-	 if (writemask & WRITEMASK_X) read0 |= WRITEMASK_YZ;	 
-	 if (writemask & WRITEMASK_Y) read0 |= WRITEMASK_XZ;	 
-	 if (writemask & WRITEMASK_Z) read0 |= WRITEMASK_XY;
+	 if (writemask & BRW_WRITEMASK_X) read0 |= BRW_WRITEMASK_YZ;	 
+	 if (writemask & BRW_WRITEMASK_Y) read0 |= BRW_WRITEMASK_XZ;	 
+	 if (writemask & BRW_WRITEMASK_Z) read0 |= BRW_WRITEMASK_XY;
 	 read1 = read0;
 	 break;
 
@@ -206,12 +206,12 @@ void brw_wm_pass1( struct brw_wm_compile *c )
       case TGSI_OPCODE_SCS:
       case WM_CINTERP:
       case WM_PIXELXY:
-	 read0 = WRITEMASK_X;
+	 read0 = BRW_WRITEMASK_X;
 	 break;
 
       case TGSI_OPCODE_POW:
-	 read0 = WRITEMASK_X;
-	 read1 = WRITEMASK_X;
+	 read0 = BRW_WRITEMASK_X;
+	 read1 = BRW_WRITEMASK_X;
 	 break;
 
       case TGSI_OPCODE_TEX:
@@ -219,57 +219,57 @@ void brw_wm_pass1( struct brw_wm_compile *c )
 	 read0 = get_texcoord_mask(inst->tex_idx);
 
          if (inst->tex_shadow)
-	    read0 |= WRITEMASK_Z;
+	    read0 |= BRW_WRITEMASK_Z;
 	 break;
 
       case TGSI_OPCODE_TXB:
 	 /* Shadow ignored for txb.
 	  */
-	 read0 = get_texcoord_mask(inst->tex_idx) | WRITEMASK_W;
+	 read0 = get_texcoord_mask(inst->tex_idx) | BRW_WRITEMASK_W;
 	 break;
 
       case WM_WPOSXY:
-	 read0 = writemask & WRITEMASK_XY;
+	 read0 = writemask & BRW_WRITEMASK_XY;
 	 break;
 
       case WM_DELTAXY:
-	 read0 = writemask & WRITEMASK_XY;
-	 read1 = WRITEMASK_X;
+	 read0 = writemask & BRW_WRITEMASK_XY;
+	 read1 = BRW_WRITEMASK_X;
 	 break;
 
       case WM_PIXELW:
-	 read0 = WRITEMASK_X;
-	 read1 = WRITEMASK_XY;
+	 read0 = BRW_WRITEMASK_X;
+	 read1 = BRW_WRITEMASK_XY;
 	 break;
 
       case WM_LINTERP:
-	 read0 = WRITEMASK_X;
-	 read1 = WRITEMASK_XY;
+	 read0 = BRW_WRITEMASK_X;
+	 read1 = BRW_WRITEMASK_XY;
 	 break;
 
       case WM_PINTERP:
-	 read0 = WRITEMASK_X; /* interpolant */
-	 read1 = WRITEMASK_XY; /* deltas */
-	 read2 = WRITEMASK_W; /* pixel w */
+	 read0 = BRW_WRITEMASK_X; /* interpolant */
+	 read1 = BRW_WRITEMASK_XY; /* deltas */
+	 read2 = BRW_WRITEMASK_W; /* pixel w */
 	 break;
 
       case TGSI_OPCODE_DP3:	
-	 read0 = WRITEMASK_XYZ;
-	 read1 = WRITEMASK_XYZ;
+	 read0 = BRW_WRITEMASK_XYZ;
+	 read1 = BRW_WRITEMASK_XYZ;
 	 break;
 
       case TGSI_OPCODE_DPH:
-	 read0 = WRITEMASK_XYZ;
-	 read1 = WRITEMASK_XYZW;
+	 read0 = BRW_WRITEMASK_XYZ;
+	 read1 = BRW_WRITEMASK_XYZW;
 	 break;
 
       case TGSI_OPCODE_DP4:
-	 read0 = WRITEMASK_XYZW;
-	 read1 = WRITEMASK_XYZW;
+	 read0 = BRW_WRITEMASK_XYZW;
+	 read1 = BRW_WRITEMASK_XYZW;
 	 break;
 
       case TGSI_OPCODE_LIT: 
-	 read0 = WRITEMASK_XYW;
+	 read0 = BRW_WRITEMASK_XYW;
 	 break;
 
       case TGSI_OPCODE_DST:
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index 86dcb74b5b..5045c9b4a6 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -31,7 +31,7 @@
                    
 
 #include "intel_mipmap_tree.h"
-#include "intel_batchbuffer.h"
+#include "brw_batchbuffer.h"
 #include "intel_tex.h"
 #include "intel_fbo.h"
 
diff --git a/src/gallium/drivers/i965/intel_batchbuffer.h b/src/gallium/drivers/i965/intel_batchbuffer.h
deleted file mode 100644
index be04656aec..0000000000
--- a/src/gallium/drivers/i965/intel_batchbuffer.h
+++ /dev/null
@@ -1,168 +0,0 @@
-#ifndef INTEL_BATCHBUFFER_H
-#define INTEL_BATCHBUFFER_H
-
-#include "intel_bufmgr.h"
-#include "intel_reg.h"
-
-#define BATCH_SZ 16384
-#define BATCH_RESERVED 16
-
-enum cliprect_mode {
-   /**
-    * Batchbuffer contents may be looped over per cliprect, but do not
-    * require it.
-    */
-   IGNORE_CLIPRECTS,
-   /**
-    * Batchbuffer contents require looping over per cliprect at batch submit
-    * time.
-    *
-    * This will be upgraded to NO_LOOP_CLIPRECTS when there's a single
-    * constant cliprect, as in DRI2 or FBO rendering.
-    */
-   LOOP_CLIPRECTS,
-   /**
-    * Batchbuffer contents contain drawing that should not be executed multiple
-    * times.
-    */
-   NO_LOOP_CLIPRECTS,
-   /**
-    * Batchbuffer contents contain drawing that already handles cliprects, such
-    * as 2D drawing to front/back/depth that doesn't respect DRAWING_RECTANGLE.
-    *
-    * Equivalent behavior to NO_LOOP_CLIPRECTS, but may not persist in batch
-    * outside of LOCK/UNLOCK.  This is upgraded to just NO_LOOP_CLIPRECTS when
-    * there's a constant cliprect, as in DRI2 or FBO rendering.
-    */
-   REFERENCES_CLIPRECTS
-};
-
-struct intel_batchbuffer
-{
-   struct intel_context *intel;
-
-   struct brw_winsys_buffer *buf;
-
-   GLubyte *buffer;
-
-   GLubyte *map;
-   GLubyte *ptr;
-
-   GLuint size;
-
-   /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */
-   struct {
-      GLuint total;
-      GLubyte *start_ptr;
-   } emit;
-
-   GLuint dirty_state;
-};
-
-struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
-                                                  *intel);
-
-void intel_batchbuffer_free(struct intel_batchbuffer *batch);
-
-
-void _intel_batchbuffer_flush(struct intel_batchbuffer *batch,
-			      const char *file, int line);
-
-#define intel_batchbuffer_flush(batch) \
-	_intel_batchbuffer_flush(batch, __FILE__, __LINE__)
-
-void intel_batchbuffer_reset(struct intel_batchbuffer *batch);
-
-
-/* Unlike bmBufferData, this currently requires the buffer be mapped.
- * Consider it a convenience function wrapping multple
- * intel_buffer_dword() calls.
- */
-void intel_batchbuffer_data(struct intel_batchbuffer *batch,
-                            const void *data, GLuint bytes,
-			    enum cliprect_mode cliprect_mode);
-
-void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
-                                     GLuint bytes);
-
-GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
-                                       struct brw_winsys_buffer *buffer,
-				       uint32_t read_domains,
-				       uint32_t write_domain,
-				       uint32_t offset);
-
-/* Inline functions - might actually be better off with these
- * non-inlined.  Certainly better off switching all command packets to
- * be passed as structs rather than dwords, but that's a little bit of
- * work...
- */
-static INLINE GLint
-intel_batchbuffer_space(struct intel_batchbuffer *batch)
-{
-   return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
-}
-
-
-static INLINE void
-intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
-{
-   assert(batch->map);
-   assert(intel_batchbuffer_space(batch) >= 4);
-   *(GLuint *) (batch->ptr) = dword;
-   batch->ptr += 4;
-}
-
-static INLINE void
-intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
-                                GLuint sz,
-				enum cliprect_mode cliprect_mode)
-{
-   assert(sz < batch->size - 8);
-   if (intel_batchbuffer_space(batch) < sz)
-      intel_batchbuffer_flush(batch);
-
-   /* All commands should be executed once regardless of cliprect
-    * mode.
-    */
-   (void)cliprect_mode;
-}
-
-/* Here are the crusty old macros, to be removed:
- */
-#define BATCH_LOCALS
-
-#define BEGIN_BATCH(n, cliprect_mode) do {				\
-   intel_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \
-   assert(intel->batch->emit.start_ptr == NULL);			\
-   intel->batch->emit.total = (n) * 4;					\
-   intel->batch->emit.start_ptr = intel->batch->ptr;			\
-} while (0)
-
-#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d)
-
-#define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
-   assert((unsigned) (delta) < buf->size);				\
-   intel_batchbuffer_emit_reloc(intel->batch, buf,			\
-				read_domains, write_domain, delta);	\
-} while (0)
-
-#define ADVANCE_BATCH() do {						\
-   unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr;	\
-   assert(intel->batch->emit.start_ptr != NULL);			\
-   if (_n != intel->batch->emit.total) {				\
-      fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",	\
-	      _n, intel->batch->emit.total);				\
-      abort();								\
-   }									\
-   intel->batch->emit.start_ptr = NULL;					\
-} while(0)
-
-
-static INLINE void
-intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
-{
-   intel_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS);
-   intel_batchbuffer_emit_dword(batch, MI_FLUSH);
-}
-
-#endif
-- 
cgit v1.2.3


From 4f7931bb3554cb1839adc2044e3abe6d4af8b0b5 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sat, 24 Oct 2009 17:07:01 +0100
Subject: i965g: more work on compiling

---
 src/gallium/drivers/i965/Makefile               |   2 -
 src/gallium/drivers/i965/brw_batchbuffer.h      |  41 ++-
 src/gallium/drivers/i965/brw_cc.c               |   8 +-
 src/gallium/drivers/i965/brw_clip.c             |   7 +-
 src/gallium/drivers/i965/brw_clip.h             |  19 +-
 src/gallium/drivers/i965/brw_clip_line.c        |  21 +-
 src/gallium/drivers/i965/brw_clip_point.c       |   1 -
 src/gallium/drivers/i965/brw_clip_state.c       |  22 +-
 src/gallium/drivers/i965/brw_clip_tri.c         |   5 +-
 src/gallium/drivers/i965/brw_clip_unfilled.c    |  28 +-
 src/gallium/drivers/i965/brw_clip_util.c        |  31 +-
 src/gallium/drivers/i965/brw_context.c          |  87 +++---
 src/gallium/drivers/i965/brw_context.h          |  47 ++-
 src/gallium/drivers/i965/brw_curbe.c            |  63 ++--
 src/gallium/drivers/i965/brw_debug.h            |  42 +++
 src/gallium/drivers/i965/brw_defines.h          |   7 -
 src/gallium/drivers/i965/brw_draw.c             | 134 ++++-----
 src/gallium/drivers/i965/brw_draw.h             |  15 +-
 src/gallium/drivers/i965/brw_draw_upload.c      |   2 -
 src/gallium/drivers/i965/brw_eu.h               |   2 +
 src/gallium/drivers/i965/brw_misc_state.c       |   2 -
 src/gallium/drivers/i965/brw_pipe_debug.c       |   2 -
 src/gallium/drivers/i965/brw_pipe_query.c       |   4 +-
 src/gallium/drivers/i965/brw_pipe_vertex.c      |  26 ++
 src/gallium/drivers/i965/brw_reg.h              |  36 +++
 src/gallium/drivers/i965/brw_screen.c           | 365 ++++++++++++++++++++++++
 src/gallium/drivers/i965/brw_screen.h           |   6 +-
 src/gallium/drivers/i965/brw_sf.c               |   2 +-
 src/gallium/drivers/i965/brw_sf_state.c         |   3 +-
 src/gallium/drivers/i965/brw_state_cache.c      |   4 +-
 src/gallium/drivers/i965/brw_state_debug.c      | 145 ++++++++++
 src/gallium/drivers/i965/brw_state_upload.c     | 113 +-------
 src/gallium/drivers/i965/brw_tex.c              |   2 -
 src/gallium/drivers/i965/brw_tex_layout.c       |   4 +-
 src/gallium/drivers/i965/brw_types.h            |   3 +
 src/gallium/drivers/i965/brw_vs_surface_state.c |   3 +-
 src/gallium/drivers/i965/brw_winsys.h           |  54 ++--
 src/gallium/drivers/i965/brw_wm.c               |   2 +-
 src/gallium/drivers/i965/brw_wm_state.c         |   8 +-
 src/gallium/drivers/i965/brw_wm_surface_state.c |   9 +-
 src/gallium/drivers/i965/intel_chipset.h        | 116 --------
 src/gallium/drivers/i965/intel_tex_format.c     |  28 --
 src/gallium/drivers/i965/intel_tex_layout.c     |   2 -
 43 files changed, 920 insertions(+), 603 deletions(-)
 create mode 100644 src/gallium/drivers/i965/brw_debug.h
 delete mode 100644 src/gallium/drivers/i965/brw_pipe_debug.c
 create mode 100644 src/gallium/drivers/i965/brw_pipe_vertex.c
 create mode 100644 src/gallium/drivers/i965/brw_screen.c
 create mode 100644 src/gallium/drivers/i965/brw_state_debug.c
 delete mode 100644 src/gallium/drivers/i965/intel_chipset.h
 delete mode 100644 src/gallium/drivers/i965/intel_tex_format.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 480d2efbc5..40c8364824 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -26,7 +26,6 @@ C_SOURCES = \
 	brw_gs_state.c \
 	brw_misc_state.c \
 	brw_pipe_blend.c \
-	brw_pipe_debug.c \
 	brw_pipe_depth.c \
 	brw_pipe_fb.c \
 	brw_pipe_flush.c \
@@ -63,7 +62,6 @@ C_SOURCES = \
 	brw_wm_surface_state.c \
 	brw_bo.c \
 	intel_batchbuffer.c \
-	intel_tex_format.c \
 	intel_tex_layout.c 
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index 76b3c1bf69..b8492882e1 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -4,6 +4,7 @@
 #include "brw_types.h"
 #include "brw_winsys.h"
 #include "brw_reg.h"
+#include "util/u_debug.h"
 
 #define BATCH_SZ 16384
 #define BATCH_RESERVED 16
@@ -68,56 +69,50 @@ brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword)
 
 static INLINE boolean
 brw_batchbuffer_require_space(struct brw_batchbuffer *batch,
-                                GLuint sz,
-				enum cliprect_mode cliprect_mode)
+                                GLuint sz)
 {
    assert(sz < batch->size - 8);
    if (brw_batchbuffer_space(batch) < sz) {
       assert(0);
       return FALSE;
    }
-
-   /* All commands should be executed once regardless of cliprect
-    * mode.
-    */
-   (void)cliprect_mode;
+#ifdef DEBUG
+   batch->emit.end_ptr = batch->ptr + sz;
+#endif
+   return TRUE;
 }
 
 /* Here are the crusty old macros, to be removed:
  */
-#define BATCH_LOCALS
-
 #define BEGIN_BATCH(n, cliprect_mode) do {				\
-   brw_batchbuffer_require_space(intel->batch, (n)*4, cliprect_mode); \
-   assert(intel->batch->emit.start_ptr == NULL);			\
-   intel->batch->emit.total = (n) * 4;					\
-   intel->batch->emit.start_ptr = intel->batch->ptr;			\
+   brw_batchbuffer_require_space(brw->batch, (n)*4); \
 } while (0)
 
-#define OUT_BATCH(d) brw_batchbuffer_emit_dword(intel->batch, d)
+#define OUT_BATCH(d) brw_batchbuffer_emit_dword(brw->batch, d)
 
 #define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
    assert((unsigned) (delta) < buf->size);				\
-   brw_batchbuffer_emit_reloc(intel->batch, buf,			\
+   brw_batchbuffer_emit_reloc(brw->batch, buf,			\
 				read_domains, write_domain, delta);	\
 } while (0)
 
+#ifdef DEBUG
 #define ADVANCE_BATCH() do {						\
-   unsigned int _n = intel->batch->ptr - intel->batch->emit.start_ptr;	\
-   assert(intel->batch->emit.start_ptr != NULL);			\
-   if (_n != intel->batch->emit.total) {				\
-      fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n",	\
-	      _n, intel->batch->emit.total);				\
+   unsigned int _n = brw->batch->ptr - brw->batch->emit.end_ptr;	\
+   if (_n != 0) {							\
+      debug_printf("%s: %d too many bytes emitted to batch\n", __FUNCTION__, _n); \
       abort();								\
    }									\
-   intel->batch->emit.start_ptr = NULL;					\
+   brw->batch->emit.end_ptr = NULL;					\
 } while(0)
-
+#else
+#define ADVANCE_BATCH()
+#endif
 
 static INLINE void
 brw_batchbuffer_emit_mi_flush(struct brw_batchbuffer *batch)
 {
-   brw_batchbuffer_require_space(batch, 4, IGNORE_CLIPRECTS);
+   brw_batchbuffer_require_space(batch, 4);
    brw_batchbuffer_emit_dword(batch, MI_FLUSH);
 }
 
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index c8e7851d75..76759304eb 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -109,10 +109,10 @@ static void
 cc_unit_populate_key(const struct brw_context *brw,
 		     struct brw_cc_unit_key *key)
 {
-   key->cc0 = brw->curr.dsa->cc0;
-   key->cc1 = brw->curr.dsa->cc1;
-   key->cc2 = brw->curr.dsa->cc2;
-   key->cc3 = combine_cc3( brw->curr.dsa->cc3, brw->curr.blend->cc3 );
+   key->cc0 = brw->curr.zstencil->cc0;
+   key->cc1 = brw->curr.zstencil->cc1;
+   key->cc2 = brw->curr.zstencil->cc2;
+   key->cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 );
    key->cc5 = brw->curr.blend->cc5;
    key->cc6 = brw->curr.blend->cc6;
    key->cc7 = brw->curr.blend->cc7;
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index 591e904705..622d9dba96 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -65,15 +65,16 @@ static void compile_clip_prog( struct brw_context *brw,
 
    c.func.single_program_flow = 1;
 
+   c.chipset = brw->chipset;
    c.key = *key;
-   c.need_ff_sync = BRW_IS_IGDNG(brw);
+   c.need_ff_sync = c.chipset.is_igdng;
 
    /* Need to locate the two positions present in vertex + header.
     * These are currently hardcoded:
     */
    c.header_position_offset = ATTR_SIZE;
 
-   if (BRW_IS_IGDNG(brw))
+   if (c.chipset.is_igdng)
        delta = 3 * REG_SIZE;
    else
        delta = REG_SIZE;
@@ -160,7 +161,7 @@ static void upload_clip_prog(struct brw_context *brw)
    key.primitive = brw->reduced_primitive;
 
    /* PIPE_NEW_VS */
-   key.nr_attrs = brw->curr.vs->info.file_max[TGSI_FILE_OUTPUT] + 1;
+   key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_OUTPUT] + 1;
 
    /* PIPE_NEW_CLIP */
    key.nr_userclip = brw->curr.ucp.nr;
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
index cfe51bf292..772c34be88 100644
--- a/src/gallium/drivers/i965/brw_clip.h
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -32,8 +32,8 @@
 #ifndef BRW_CLIP_H
 #define BRW_CLIP_H
 
-
-#include "brw_context.h"
+#include "pipe/p_state.h"
+#include "brw_reg.h"
 #include "brw_eu.h"
 
 #define MAX_VERTS (3+6+6)	
@@ -60,6 +60,12 @@ struct brw_clip_prog_key {
    GLfloat offset_units;
 };
 
+struct brw_clip_prog_data {
+   GLuint curb_read_length;	/* user planes? */
+   GLuint clip_mode;
+   GLuint urb_read_length;
+   GLuint total_grf;
+};
 
 #define CLIP_LINE   0
 #define CLIP_POINT  1
@@ -112,12 +118,21 @@ struct brw_clip_compile {
    GLuint last_tmp;
 
    GLboolean need_direction;
+   struct brw_chipset chipset;
 
    GLuint last_mrf;
 
    GLuint header_position_offset;
    GLuint offset[PIPE_MAX_SHADER_OUTPUTS];
    GLboolean need_ff_sync;
+
+   GLuint nr_color_attrs;
+   GLuint offset_color0;
+   GLuint offset_color1;
+   GLuint offset_bfc0;
+   GLuint offset_bfc1;
+   
+   GLuint offset_edge;
 };
 
 #define ATTR_SIZE  (4*4)
diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c
index 6b4da25644..a4790bda95 100644
--- a/src/gallium/drivers/i965/brw_clip_line.c
+++ b/src/gallium/drivers/i965/brw_clip_line.c
@@ -29,14 +29,16 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
+#include "util/u_debug.h"
+
 #include "brw_defines.h"
-#include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_util.h"
 #include "brw_clip.h"
 
 
+
 static void brw_clip_line_alloc_regs( struct brw_clip_compile *c )
 {
    GLuint i = 0,j;
@@ -130,6 +132,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
    struct brw_instruction *is_neg2 = NULL;
    struct brw_instruction *not_culled;
    struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
+   const int hpos = 0;		/* XXX: position not always first element */
 
    brw_MOV(p, get_addr_reg(vtx0),      brw_address(c->reg.vertex[0]));
    brw_MOV(p, get_addr_reg(vtx1),      brw_address(c->reg.vertex[1]));
@@ -145,7 +148,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
    brw_clip_init_clipmask(c);
 
    /* -ve rhw workaround */
-   if (BRW_IS_965(p->brw)) {
+   if (c->chipset.is_965) {
       brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
       brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
               brw_imm_ud(1<<20));
@@ -170,19 +173,19 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
 
 	 /* dp = DP4(vtx->position, plane) 
 	  */
-	 brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	 brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[hpos]), c->reg.plane_equation);
 
 	 /* if (IS_NEGATIVE(dp1)) 
 	  */
 	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
-	 brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	 brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[hpos]), c->reg.plane_equation);
 	 is_negative = brw_IF(p, BRW_EXECUTE_1);
 	 {
              /*
               * Both can be negative on GM965/G965 due to RHW workaround
               * if so, this object should be rejected.
               */
-             if (BRW_IS_965(p->brw)) {
+             if (c->chipset.is_965) {
                  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, c->reg.dp0, brw_imm_f(0.0));
                  is_neg2 = brw_IF(p, BRW_EXECUTE_1);
                  {
@@ -207,7 +210,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
 
              /* If both are positive, do nothing */
              /* Only on GM965/G965 */
-             if (BRW_IS_965(p->brw)) {
+             if (c->chipset.is_965) {
                  brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0));
                  is_neg2 = brw_IF(p, BRW_EXECUTE_1);
              }
@@ -222,7 +225,7 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
                  brw_set_predicate_control(p, BRW_PREDICATE_NONE);
              }
 
-             if (BRW_IS_965(p->brw)) {
+             if (c->chipset.is_965) {
                  brw_ENDIF(p, is_neg2);
              }
          }
@@ -245,8 +248,8 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
    brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0));
    not_culled = brw_IF(p, BRW_EXECUTE_1);
    {
-      brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, GL_FALSE);
-      brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, GL_FALSE);
+      brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE);
+      brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE);
 
       brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START);
       brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); 
diff --git a/src/gallium/drivers/i965/brw_clip_point.c b/src/gallium/drivers/i965/brw_clip_point.c
index b2cf7b2011..e0a5330556 100644
--- a/src/gallium/drivers/i965/brw_clip_point.c
+++ b/src/gallium/drivers/i965/brw_clip_point.c
@@ -30,7 +30,6 @@
   */
 
 #include "brw_defines.h"
-#include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_util.h"
 #include "brw_clip.h"
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 0ea7ce5734..25b8c6372f 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -29,9 +29,13 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
+#include "util/u_math.h"
+
 #include "brw_context.h"
+#include "brw_clip.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_debug.h"
 
 struct brw_clip_unit_key {
    unsigned int total_grf;
@@ -77,7 +81,7 @@ clip_unit_create_from_key(struct brw_context *brw,
 
    memset(&clip, 0, sizeof(clip));
 
-   clip.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    /* reloc */
    clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
 
@@ -112,10 +116,10 @@ clip_unit_create_from_key(struct brw_context *brw,
       clip.thread4.max_threads = 1 - 1;
    }
 
-   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+   if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
       clip.thread4.max_threads = 0;
 
-   if (INTEL_DEBUG & DEBUG_STATS)
+   if (BRW_DEBUG & DEBUG_STATS)
       clip.thread4.stats_enable = 1;
 
    clip.clip5.userclip_enable_flags = 0x7f;
@@ -145,12 +149,12 @@ clip_unit_create_from_key(struct brw_context *brw,
 
    /* Emit clip program relocation */
    assert(brw->clip.prog_bo);
-   dri_bo_emit_reloc(bo,
-		     I915_GEM_DOMAIN_INSTRUCTION,
-		     0,
-		     clip.thread0.grf_reg_count << 1,
-		     offsetof(struct brw_clip_unit_state, thread0),
-		     brw->clip.prog_bo);
+   brw->sws->bo_emit_reloc(bo,
+			   I915_GEM_DOMAIN_INSTRUCTION,
+			   0,
+			   clip.thread0.grf_reg_count << 1,
+			   offsetof(struct brw_clip_unit_state, thread0),
+			   brw->clip.prog_bo);
 
    return bo;
 }
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
index d8feca6a87..5486f4fa89 100644
--- a/src/gallium/drivers/i965/brw_clip_tri.c
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -30,7 +30,6 @@
   */
 
 #include "brw_defines.h"
-#include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_util.h"
 #include "brw_clip.h"
@@ -71,7 +70,7 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
       for (j = 0; j < 3; j++) {
 	 GLuint delta = c->nr_attrs*16 + 32;
 
-         if (BRW_IS_IGDNG(c->func.brw))
+         if (c->chipset.is_igdng)
              delta = c->nr_attrs * 16 + 32 * 3;
 
 	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
@@ -565,7 +564,7 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
 
    /* if -ve rhw workaround bit is set, 
       do cliptest */
-   if (BRW_IS_965(p->brw)) {
+   if (c->chipset.is_965) {
       brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
       brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), 
               brw_imm_ud(1<<20));
diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c
index 8501599aef..1cb86dd25b 100644
--- a/src/gallium/drivers/i965/brw_clip_unfilled.c
+++ b/src/gallium/drivers/i965/brw_clip_unfilled.c
@@ -29,10 +29,7 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-#include "brw_batchbuffer.h"
-
 #include "brw_defines.h"
-#include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_util.h"
 #include "brw_clip.h"
@@ -126,8 +123,7 @@ static void copy_bfc( struct brw_clip_compile *c )
 
    /* Do we have any colors to copy? 
     */
-   if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) &&
-       !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]))
+   if (c->nr_color_attrs == 0)
       return;
 
    /* In some wierd degnerate cases we can end up testing the
@@ -150,15 +146,15 @@ static void copy_bfc( struct brw_clip_compile *c )
       GLuint i;
 
       for (i = 0; i < 3; i++) {
-	 if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0])
+	 if (c->offset_color0 && c->offset_bfc0)
 	    brw_MOV(p, 
-		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]),
-		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0]));
+		    byte_offset(c->reg.vertex[i], c->offset_color0),
+		    byte_offset(c->reg.vertex[i], c->offset_bfc0));
 
-	 if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])
+	 if (c->offset_color1 && c->offset_bfc1)
 	    brw_MOV(p, 
-		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]),
-		    byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1]));
+		    byte_offset(c->reg.vertex[i], c->offset_color0),
+		    byte_offset(c->reg.vertex[i], c->offset_bfc0));
       }
    }
    brw_ENDIF(p, ccw);
@@ -218,12 +214,12 @@ static void merge_edgeflags( struct brw_clip_compile *c )
    {   
       brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
       brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
-      brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+      brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset_edge), brw_imm_f(0));
       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 
       brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
       brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
-      brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0));
+      brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset_edge), brw_imm_f(0));
       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    }
    brw_ENDIF(p, is_poly);
@@ -294,7 +290,7 @@ static void emit_lines(struct brw_clip_compile *c,
       /* draw edge if edgeflag != 0 */
       brw_CMP(p, 
 	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
-	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+	      deref_1f(v0, c->offset_edge),
 	      brw_imm_f(0));
       draw_edge = brw_IF(p, BRW_EXECUTE_1);
       {
@@ -333,7 +329,7 @@ static void emit_points(struct brw_clip_compile *c,
        */
       brw_CMP(p, 
 	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
-	      deref_1f(v0, c->offset[VERT_RESULT_EDGE]),
+	      deref_1f(v0, c->offset_edge),
 	      brw_imm_f(0));
       draw_point = brw_IF(p, BRW_EXECUTE_1);
       {
@@ -450,7 +446,7 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c )
    brw_clip_tri_init_vertices(c);
    brw_clip_init_ff_sync(c);
 
-   assert(c->offset[VERT_RESULT_EDGE]);
+   assert(c->offset_edge);
 
    if (c->key.fill_ccw == CLIP_CULL &&
        c->key.fill_cw == CLIP_CULL) {
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
index 60bfd3538e..f8f98c8037 100644
--- a/src/gallium/drivers/i965/brw_clip_util.c
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -31,7 +31,6 @@
 
 
 #include "brw_defines.h"
-#include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_util.h"
 #include "brw_clip.h"
@@ -144,10 +143,10 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
    for (i = 0; i < c->nr_attrs; i++) {
       GLuint delta = i*16 + 32;
 
-      if (BRW_IS_IGDNG(p->brw))
+      if (c->chipset.is_igdng)
           delta = i * 16 + 32 * 3;
 
-      if (delta == c->offset[VERT_RESULT_EDGE]) {
+      if (delta == c->offset_edge) {
 	 if (force_edgeflag) 
 	    brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
 	 else
@@ -178,7 +177,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
    if (i & 1) {
       GLuint delta = i*16 + 32;
 
-      if (BRW_IS_IGDNG(p->brw))
+      if (c->chipset.is_igdng)
           delta = i * 16 + 32 * 3;
 
       brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0));
@@ -304,25 +303,25 @@ void brw_clip_copy_colors( struct brw_clip_compile *c,
 {
    struct brw_compile *p = &c->func;
 
-   if (c->offset[VERT_RESULT_COL0])
+   if (c->offset_color0)
       brw_MOV(p, 
-	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]),
-	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0]));
+	      byte_offset(c->reg.vertex[to], c->offset_color0),
+	      byte_offset(c->reg.vertex[from], c->offset_color0));
 
-   if (c->offset[VERT_RESULT_COL1])
+   if (c->offset_color1)
       brw_MOV(p, 
-	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]),
-	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1]));
+	      byte_offset(c->reg.vertex[to], c->offset_color1),
+	      byte_offset(c->reg.vertex[from], c->offset_color1));
 
-   if (c->offset[VERT_RESULT_BFC0])
+   if (c->offset_bfc0)
       brw_MOV(p, 
-	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]),
-	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0]));
+	      byte_offset(c->reg.vertex[to], c->offset_bfc0),
+	      byte_offset(c->reg.vertex[from], c->offset_bfc0));
 
-   if (c->offset[VERT_RESULT_BFC1])
+   if (c->offset_bfc1)
       brw_MOV(p, 
-	      byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]),
-	      byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1]));
+	      byte_offset(c->reg.vertex[to], c->offset_bfc1),
+	      byte_offset(c->reg.vertex[from], c->offset_bfc1));
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index 07a5420d6e..e9605bafe6 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -31,64 +31,31 @@
 
 
 #include "pipe/p_context.h"
+#include "util/u_simple_list.h"
 
 #include "brw_context.h"
 #include "brw_defines.h"
 #include "brw_draw.h"
 #include "brw_state.h"
-#include "brw_vs.h"
-#include "brw_screen_tex.h"
 #include "brw_batchbuffer.h"
+#include "brw_winsys.h"
 
 
-
-
-struct pipe_context *brw_create_context( struct pipe_screen *screen,
-					 void *priv )
-{
-   struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
-
-   if (!brw) {
-      debug_printf("%s: failed to alloc context\n", __FUNCTION__);
-      return GL_FALSE;
-   }
-
-   /* We want the GLSL compiler to emit code that uses condition codes */
-   ctx->Shader.EmitCondCodes = GL_TRUE;
-   ctx->Shader.EmitNVTempInitialization = GL_TRUE;
-
-
-   brw_init_query( brw );
-   brw_init_state( brw );
-   brw_draw_init( brw );
-
-   brw->state.dirty.mesa = ~0;
-   brw->state.dirty.brw = ~0;
-
-   brw->emit_state_always = 0;
-
-   make_empty_list(&brw->query.active_head);
-
-
-   return GL_TRUE;
-}
-
-/**
- * called from intelDestroyContext()
- */
-static void brw_destroy_context( struct brw_context *brw )
+static void brw_destroy_context( struct pipe_context *pipe )
 {
+   struct brw_context *brw = brw_context(pipe);
    int i;
 
    brw_destroy_state(brw);
-   brw_draw_destroy( brw );
 
-   _mesa_free(brw->wm.compile_data);
+   brw_draw_cleanup( brw );
+
+   FREE(brw->wm.compile_data);
 
-   for (i = 0; i < brw->state.nr_color_regions; i++)
-      intel_region_release(&brw->state.color_regions[i]);
-   brw->state.nr_color_regions = 0;
-   intel_region_release(&brw->state.depth_region);
+   for (i = 0; i < brw->curr.fb.nr_cbufs; i++)
+      pipe_surface_reference(&brw->curr.fb.cbufs[i], NULL);
+   brw->curr.fb.nr_cbufs = 0;
+   pipe_surface_reference(&brw->curr.fb.zsbuf, NULL);
 
    brw->sws->bo_unreference(brw->curbe.curbe_bo);
    brw->sws->bo_unreference(brw->vs.prog_bo);
@@ -114,3 +81,35 @@ static void brw_destroy_context( struct brw_context *brw )
    brw->sws->bo_unreference(brw->cc.state_bo);
    brw->sws->bo_unreference(brw->cc.vp_bo);
 }
+
+
+struct pipe_context *brw_create_context(struct pipe_screen *screen)
+{
+   struct brw_context *brw = (struct brw_context *) CALLOC_STRUCT(brw_context);
+
+   if (!brw) {
+      debug_printf("%s: failed to alloc context\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   /* We want the GLSL compiler to emit code that uses condition codes */
+   //ctx->Shader.EmitCondCodes = GL_TRUE;
+   //ctx->Shader.EmitNVTempInitialization = GL_TRUE;
+
+   brw->base.destroy = brw_destroy_context;
+
+   brw_init_query( brw );
+   brw_init_state( brw );
+   brw_draw_init( brw );
+
+   brw->state.dirty.mesa = ~0;
+   brw->state.dirty.brw = ~0;
+
+   brw->emit_state_always = 0;
+
+   make_empty_list(&brw->query.active_head);
+
+
+   return &brw->base;
+}
+
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 3a2fece45c..dd782fdba9 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -35,6 +35,7 @@
 
 #include "brw_structs.h"
 #include "brw_winsys.h"
+#include "brw_reg.h"
 #include "pipe/p_state.h"
 #include "pipe/p_context.h"
 #include "tgsi/tgsi_scan.h"
@@ -178,8 +179,8 @@ struct brw_fragment_shader {
 #define PIPE_NEW_VERTEX_ELEMENT         0x2
 #define PIPE_NEW_FRAGMENT_SHADER        0x2
 #define PIPE_NEW_VERTEX_SHADER          0x2
-#define PIPE_NEW_FRAGMENT_CONSTS        0x2
-#define PIPE_NEW_VERTEX_CONSTS          0x2
+#define PIPE_NEW_FRAGMENT_CONSTANTS     0x2
+#define PIPE_NEW_VERTEX_CONSTANTS       0x2
 #define PIPE_NEW_CLIP                   0x2
 
 
@@ -256,12 +257,8 @@ struct brw_sf_prog_data {
    GLuint urb_entry_size;
 };
 
-struct brw_clip_prog_data {
-   GLuint curb_read_length;	/* user planes? */
-   GLuint clip_mode;
-   GLuint urb_read_length;
-   GLuint total_grf;
-};
+
+struct brw_clip_prog_data;
 
 struct brw_gs_prog_data {
    GLuint urb_read_length;
@@ -298,15 +295,15 @@ struct brw_vs_ouput_sizes {
  * This contains pointers to the drawing surfaces and current texture
  * objects and shader constant buffers (+2).
  */
-#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
+#define BRW_WM_MAX_SURF (PIPE_MAX_COLOR_BUFS + BRW_MAX_TEX_UNIT + 1)
 
 /**
  * Helpers to convert drawing buffers, textures and constant buffers
  * to surface binding table indexes, for WM.
  */
 #define SURF_INDEX_DRAW(d)           (d)
-#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) 
-#define SURF_INDEX_TEXTURE(t)        (MAX_DRAW_BUFFERS + 1 + (t))
+#define SURF_INDEX_FRAG_CONST_BUFFER (PIPE_MAX_COLOR_BUFS) 
+#define SURF_INDEX_TEXTURE(t)        (PIPE_MAX_COLOR_BUFS + 1 + (t))
 
 /**
  * Size of surface binding table for the VS.
@@ -457,28 +454,32 @@ struct brw_query_object {
  */
 struct brw_context 
 {
-   struct pipe_context pipe;
+   struct pipe_context base;
+   struct brw_chipset chipset;
 
    struct brw_screen *brw_screen;   
    struct brw_winsys_screen *sws;
 
+   struct brw_batchbuffer *batch;
+
    GLuint primitive;
    GLuint reduced_primitive;
 
    GLboolean emit_state_always;
-   GLboolean no_batch_wrap;
 
    /* Active vertex program: 
     */
    struct {
-      const struct brw_vertex_shader *vs;
-      const struct brw_fragment_shader *fs;
+      const struct brw_vertex_shader *vertex_shader;
+      const struct brw_fragment_shader *fragment_shader;
       const struct brw_blend_state *blend;
       const struct brw_rasterizer_state *rast;
-      const struct brw_depth_stencil_alpha_state *dsa;
+      const struct brw_depth_stencil_alpha_state *zstencil;
       struct pipe_framebuffer_state fb;
       struct pipe_viewport_state vp;
       struct pipe_clip_state ucp;
+      struct pipe_buffer *vertex_constants;
+      struct pipe_buffer *fragment_constants;
    } curr;
 
    struct {
@@ -673,15 +674,6 @@ struct brw_context
 };
 
 
-#define BRW_PACKCOLOR8888(r,g,b,a)  ((r<<24) | (g<<16) | (b<<8) | a)
-
-
-
-/*======================================================================
- * brw_vtbl.c
- */
-void brwInitVtbl( struct brw_context *brw );
-
 
 /*======================================================================
  * brw_queryobj.c
@@ -730,9 +722,10 @@ brw_context( struct pipe_context *ctx )
 }
 
 
+#define BRW_IS_965(brw)    ((brw)->chipset.is_965)
+#define BRW_IS_IGDNG(brw)  ((brw)->chipset.is_igdng)
+#define BRW_IS_G4X(brw)    ((brw)->chipset.is_g4x)
 
 
-#define DO_SETUP_BITS ((1<<(FRAG_ATTRIB_MAX)) - 1)
-
 #endif
 
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index f2524d75e2..edc39ff223 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -29,13 +29,16 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
+#include "util/u_memory.h"
+#include "util/u_math.h"
 
 #include "brw_batchbuffer.h"
-#include "intel_regions.h"
 #include "brw_context.h"
 #include "brw_defines.h"
 #include "brw_state.h"
 #include "brw_util.h"
+#include "brw_debug.h"
+#include "brw_screen.h"
 
 
 /**
@@ -57,7 +60,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
 
    /* PIPE_NEW_CLIP */
    if (brw->curr.ucp.nr) {
-      GLuint nr_planes = 6 + brw->nr_ucp;
+      GLuint nr_planes = 6 + brw->curr.ucp.nr;
       nr_clip_regs = (nr_planes * 4 + 15) / 16;
    }
 
@@ -156,10 +159,6 @@ static GLfloat fixed_plane[6][4] = {
  */
 static void prepare_constant_buffer(struct brw_context *brw)
 {
-   const struct brw_vertex_program *vp =
-      brw_vertex_program_const(brw->vertex_program);
-   const struct brw_fragment_program *fp =
-      brw_fragment_program_const(brw->fragment_program);
    const GLuint sz = brw->curbe.total_size;
    const GLuint bufsz = sz * 16 * sizeof(GLfloat);
    GLfloat *buf;
@@ -174,7 +173,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
       return;
    }
 
-   buf = (GLfloat *) _mesa_calloc(bufsz);
+   buf = (GLfloat *) CALLOC(bufsz, 1);
 
    /* fragment shader constants */
    if (brw->curbe.wm_size) {
@@ -208,12 +207,12 @@ static void prepare_constant_buffer(struct brw_context *brw)
 
       /* Clip planes:
        */
-      assert(brw->nr_ucp <= 6);
-      for (j = 0; j < brw->nr_ucp; j++) {
-	 buf[offset + i * 4 + 0] = brw->ucp[j][0];
-	 buf[offset + i * 4 + 1] = brw->ucp[j][1];
-	 buf[offset + i * 4 + 2] = brw->ucp[j][2];
-	 buf[offset + i * 4 + 3] = brw->ucp[j][3];
+      assert(brw->curr.ucp.nr <= 6);
+      for (j = 0; j < brw->curr.ucp.nr; j++) {
+	 buf[offset + i * 4 + 0] = brw->curr.ucp.ucp[j][0];
+	 buf[offset + i * 4 + 1] = brw->curr.ucp.ucp[j][1];
+	 buf[offset + i * 4 + 2] = brw->curr.ucp.ucp[j][2];
+	 buf[offset + i * 4 + 3] = brw->curr.ucp.ucp[j][3];
 	 i++;
       }
    }
@@ -221,23 +220,21 @@ static void prepare_constant_buffer(struct brw_context *brw)
    /* vertex shader constants */
    if (brw->curbe.vs_size) {
       GLuint offset = brw->curbe.vs_start * 16;
-      GLuint nr = brw->vs.prog_data->nr_params / 4;
+      GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT];
+      struct pipe_screen *screen = &brw->brw_screen->base;
 
-      /* map vs constant buffer */
+      const GLfloat *value = screen->buffer_map( screen,
+						 brw->curr.vertex_constants,
+						 PIPE_BUFFER_USAGE_CPU_READ);
 
-      /* XXX just use a memcpy here */
-      for (i = 0; i < nr; i++) {
-         const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i];
-	 buf[offset + i * 4 + 0] = value[0];
-	 buf[offset + i * 4 + 1] = value[1];
-	 buf[offset + i * 4 + 2] = value[2];
-	 buf[offset + i * 4 + 3] = value[3];
-      }
+      /* XXX: what if user's constant buffer is too small?
+       */
+      memcpy(&buf[offset], value, nr * 4 * sizeof(float));
 
-      /* unmap vs constant buffer */
+      screen->buffer_unmap( screen, brw->curr.vertex_constants );
    }
 
-   if (0) {
+   if (BRW_DEBUG & DEBUG_CURBE) {
       for (i = 0; i < sz*16; i+=4) 
 	 debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
 		      buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
@@ -275,18 +272,22 @@ static void prepare_constant_buffer(struct brw_context *brw)
 	 /* Allocate a single page for CURBE entries for this batchbuffer.
 	  * They're generally around 64b.
 	  */
-	 brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE",
-					    4096, 1 << 6);
+	 brw->curbe.curbe_bo = brw->sws->bo_alloc(brw->sws, 
+						  BRW_BUFFER_TYPE_CURBE,
+						  4096, 1 << 6);
 	 brw->curbe.curbe_next_offset = 0;
       }
 
       brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
       brw->curbe.curbe_next_offset += bufsz;
-      brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);
+      brw->curbe.curbe_next_offset = align(brw->curbe.curbe_next_offset, 64);
 
       /* Copy data to the buffer:
        */
-      dri_bo_subdata(brw->curbe.curbe_bo, brw->curbe.curbe_offset, bufsz, buf);
+      brw->sws->bo_subdata(brw->curbe.curbe_bo,
+			   brw->curbe.curbe_offset,
+			   bufsz,
+			   buf);
    }
 
    brw_add_validated_bo(brw, brw->curbe.curbe_bo);
@@ -325,8 +326,8 @@ static void emit_constant_buffer(struct brw_context *brw)
 
 const struct brw_tracked_state brw_constant_buffer = {
    .dirty = {
-      .mesa = (PIPE_NEW_FS_CONSTANTS |
-	       PIPE_NEW_VS_CONSTANTS |
+      .mesa = (PIPE_NEW_FRAGMENT_CONSTANTS |
+	       PIPE_NEW_VERTEX_CONSTANTS |
 	       PIPE_NEW_CLIP),
       .brw  = (BRW_NEW_FRAGMENT_PROGRAM |
 	       BRW_NEW_VERTEX_PROGRAM |
diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h
new file mode 100644
index 0000000000..aee62f7a5b
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_debug.h
@@ -0,0 +1,42 @@
+#ifndef BRW_DEBUG_H
+#define BRW_DEBUG_H
+
+/* ================================================================
+ * Debugging:
+ */
+
+#define DEBUG_TEXTURE	        0x1
+#define DEBUG_STATE	        0x2
+#define DEBUG_IOCTL	        0x4
+#define DEBUG_BLIT	        0x8
+#define DEBUG_CURBE             0x10
+#define DEBUG_FALLBACKS	        0x20
+#define DEBUG_VERBOSE	        0x40
+#define DEBUG_BATCH             0x80
+#define DEBUG_PIXEL             0x100
+#define DEBUG_BUFMGR            0x200
+#define DEBUG_unused1           0x400
+#define DEBUG_unused2           0x800
+#define DEBUG_unused3           0x1000
+#define DEBUG_SYNC	        0x2000
+#define DEBUG_PRIMS	        0x4000
+#define DEBUG_VERTS	        0x8000
+#define DEBUG_unused4           0x10000
+#define DEBUG_DMA               0x20000
+#define DEBUG_SANITY            0x40000
+#define DEBUG_SLEEP             0x80000
+#define DEBUG_STATS             0x100000
+#define DEBUG_unused5           0x200000
+#define DEBUG_SINGLE_THREAD     0x400000
+#define DEBUG_WM                0x800000
+#define DEBUG_URB               0x1000000
+#define DEBUG_VS                0x2000000
+
+#ifdef DEBUG
+extern int BRW_DEBUG;
+#else
+#define BRW_DEBUG 0
+#endif
+
+
+#endif
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
index 1dc64ddc8f..544d36306c 100644
--- a/src/gallium/drivers/i965/brw_defines.h
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -838,13 +838,6 @@
 #define R02_PRIM_END    0x1
 #define R02_PRIM_START  0x2
 
-#include "intel_chipset.h"
-
-#define BRW_IS_G4X(brw)         (IS_G4X((brw)->brw_screen->pci_id))
-#define BRW_IS_IGDNG(brw)         (IS_IGDNG((brw)->brw_screen->pci_id))
-#define BRW_IS_965(brw)         (!(BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)))
-#define CMD_PIPELINE_SELECT(brw)        ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_PIPELINE_SELECT_GM45 : CMD_PIPELINE_SELECT_965)
-#define CMD_VF_STATISTICS(brw)          ((BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? CMD_VF_STATISTICS_GM45 : CMD_VF_STATISTICS_965)
 #define URB_SIZES(brw)                  (BRW_IS_IGDNG(brw) ? 1024 : \
                                          (BRW_IS_G4X(brw) ? 384 : 256))  /* 512 bit units */
 
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 741537309a..7af490bc5a 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -30,9 +30,9 @@
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_state.h"
+#include "brw_debug.h"
 
 #include "brw_batchbuffer.h"
-#include "intel_buffer_objects.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BATCH
 
@@ -56,26 +56,18 @@ static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
  * programs be immune to the active primitive (ie. cope with all
  * possibilities).  That may not be realistic however.
  */
-static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
+static GLuint brw_set_prim(struct brw_context *brw, unsigned prim)
 {
 
-   if (INTEL_DEBUG & DEBUG_PRIMS)
-      _mesa_printf("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim));
+   if (BRW_DEBUG & DEBUG_PRIMS)
+      debug_printf("PRIM: %s\n", u_prim_name(prim));
    
-   /* Slight optimization to avoid the GS program when not needed:
-    */
-   if (prim == GL_QUAD_STRIP &&
-       ctx->Light.ShadeModel != GL_FLAT &&
-       ctx->Polygon.FrontMode == GL_FILL &&
-       ctx->Polygon.BackMode == GL_FILL)
-      prim = GL_TRIANGLE_STRIP;
-
    if (prim != brw->primitive) {
       brw->primitive = prim;
       brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
 
-      if (reduced_prim[prim] != brw->intel.reduced_primitive) {
-	 brw->intel.reduced_primitive = reduced_prim[prim];
+      if (reduced_prim[prim] != brw->reduced_primitive) {
+	 brw->reduced_primitive = reduced_prim[prim];
 	 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
       }
    }
@@ -84,43 +76,33 @@ static GLuint brw_set_prim(struct brw_context *brw, GLenum prim)
 }
 
 
-static GLuint trim(GLenum prim, GLuint length)
-{
-   if (prim == GL_QUAD_STRIP)
-      return length > 3 ? (length - length % 2) : 0;
-   else if (prim == GL_QUADS)
-      return length - length % 4;
-   else 
-      return length;
-}
-
 
-static void brw_emit_prim(struct brw_context *brw,
-			  const struct _mesa_prim *prim,
-			  uint32_t hw_prim)
+static enum pipe_error brw_emit_prim(struct brw_context *brw,
+				     unsigned prim,
+				     unsigned start,
+				     unsigned count,
+				     boolean indexed,
+				     uint32_t hw_prim)
 {
    struct brw_3d_primitive prim_packet;
 
    if (INTEL_DEBUG & DEBUG_PRIMS)
-      _mesa_printf("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), 
-		   prim->start, prim->count);
+      debug_printf("PRIM: %s %d %d\n", u_prim_name(prim), start, count);
 
    prim_packet.header.opcode = CMD_3D_PRIM;
    prim_packet.header.length = sizeof(prim_packet)/4 - 2;
    prim_packet.header.pad = 0;
    prim_packet.header.topology = hw_prim;
-   prim_packet.header.indexed = prim->indexed;
+   prim_packet.header.indexed = indexed;
 
-   prim_packet.verts_per_instance = trim(prim->mode, prim->count);
-   prim_packet.start_vert_location = prim->start;
-   if (prim->indexed)
+   prim_packet.verts_per_instance = count;
+   prim_packet.start_vert_location = start;
+   if (indexed)
       prim_packet.start_vert_location += brw->ib.start_vertex_offset;
    prim_packet.instance_count = 1;
    prim_packet.start_instance_location = 0;
    prim_packet.base_vert_location = prim->basevertex;
 
-   /* Can't wrap here, since we rely on the validated state. */
-   brw->no_batch_wrap = GL_TRUE;
 
    /* If we're set to always flush, do it before and after the primitive emit.
     * We want to catch both missed flushes that hurt instruction/state cache
@@ -128,13 +110,15 @@ static void brw_emit_prim(struct brw_context *brw,
     * the besides the draw code.
     */
    if (intel->always_flush_cache) {
-      BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+      BEGIN_BATCH(1, IGNORE_CLIPRECTS)
       OUT_BATCH(intel->vtbl.flush_cmd());
       ADVANCE_BATCH();
    }
    if (prim_packet.verts_per_instance) {
-      brw_batchbuffer_data( brw->intel.batch, &prim_packet,
-			      sizeof(prim_packet), LOOP_CLIPRECTS);
+      ret = brw_batchbuffer_data( brw->intel.batch, &prim_packet,
+				  sizeof(prim_packet), LOOP_CLIPRECTS);
+      if (ret)
+	 return ret;
    }
    if (intel->always_flush_cache) {
       BEGIN_BATCH(1, IGNORE_CLIPRECTS);
@@ -142,34 +126,9 @@ static void brw_emit_prim(struct brw_context *brw,
       ADVANCE_BATCH();
    }
 
-   brw->no_batch_wrap = GL_FALSE;
+   return 0;
 }
 
-static void brw_merge_inputs( struct brw_context *brw,
-		       const struct gl_client_array *arrays[])
-{
-   struct brw_vertex_info old = brw->vb.info;
-   GLuint i;
-
-   for (i = 0; i < VERT_ATTRIB_MAX; i++)
-      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
-
-   memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
-   memset(&brw->vb.info, 0, sizeof(brw->vb.info));
-
-   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
-      brw->vb.inputs[i].glarray = arrays[i];
-      brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
-
-      if (arrays[i]->StrideB != 0)
-	 brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
-	    ((i%16) * 2);
-   }
-
-   /* Raise statechanges if input sizes have changed. */
-   if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
-      brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
-}
 
 /* May fail if out of video memory for texture or vbo upload, or on
  * fallback conditions.
@@ -229,14 +188,14 @@ static GLboolean brw_try_draw_prims( struct brw_context *brw,
    return 0;
 }
 
-void brw_draw_prims( struct brw_context *brw,
-		     const struct gl_client_array *arrays[],
-		     const struct _mesa_prim *prim,
-		     GLuint nr_prims,
-		     const struct _mesa_index_buffer *ib,
-		     GLboolean index_bounds_valid,
-		     GLuint min_index,
-		     GLuint max_index )
+
+static boolean
+brw_draw_range_elements(struct pipe_context *pipe,
+			struct pipe_buffer *index_buffer,
+			unsigned index_size,
+			unsigned min_index,
+			unsigned max_index,
+			unsigned mode, unsigned start, unsigned count)
 {
    enum pipe_error ret;
 
@@ -256,15 +215,40 @@ void brw_draw_prims( struct brw_context *brw,
       ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
       assert(ret == 0);
    }
+
+   return TRUE;
 }
 
-void brw_draw_init( struct brw_context *brw )
+static boolean
+brw_draw_elements(struct pipe_context *pipe,
+		  struct pipe_buffer *index_buffer,
+		  unsigned index_size,
+		  unsigned mode, 
+		  unsigned start, unsigned count)
 {
-   struct vbo_context *vbo = vbo_context(ctx);
+   return brw_draw_range_elements( pipe, index_buffer,
+				   index_size,
+				   0, 0xffffffff,
+				   mode, 
+				   start, count );
+}
 
+static boolean
+brw_draw_arrays(struct pipe_context *pipe, unsigned mode,
+                     unsigned start, unsigned count)
+{
+   return brw_draw_elements(pipe, NULL, 0, mode, start, count);
+}
+
+
+
+void brw_draw_init( struct brw_context *brw )
+{
    /* Register our drawing function: 
     */
-   vbo->draw_prims = brw_draw_prims;
+   brw->base.draw_arrays = brw_draw_arrays;
+   brw->base.draw_elements = brw_draw_elements;
+   brw->base.draw_range_elements = brw_draw_range_elements;
 }
 
 void brw_draw_destroy( struct brw_context *brw )
diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h
index dc7ca8731d..13f0443a81 100644
--- a/src/gallium/drivers/i965/brw_draw.h
+++ b/src/gallium/drivers/i965/brw_draw.h
@@ -33,21 +33,8 @@
 struct brw_context;
 
 
-void brw_draw_prims( struct brw_context *brw,
-		     const struct gl_client_array *arrays[],
-		     const struct _mesa_prim *prims,
-		     GLuint nr_prims,
-		     const struct _mesa_index_buffer *ib,
-		     GLboolean index_bounds_valid,
-		     GLuint min_index,
-		     GLuint max_index );
-
 void brw_draw_init( struct brw_context *brw );
-void brw_draw_destroy( struct brw_context *brw );
+void brw_draw_cleanup( struct brw_context *brw );
 
-/* brw_draw_current.c
- */
-void brw_init_current_values(struct brw_context *brw,
-			     struct gl_client_array *arrays);
 
 #endif
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index 1ab65d60c4..7b0860d04c 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -36,8 +36,6 @@
 #include "brw_fallback.h"
 
 #include "brw_batchbuffer.h"
-#include "intel_buffer_objects.h"
-#include "intel_tex.h"
 
 
diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h
index 46d52a473b..ac5a623cac 100644
--- a/src/gallium/drivers/i965/brw_eu.h
+++ b/src/gallium/drivers/i965/brw_eu.h
@@ -33,6 +33,8 @@
 #ifndef BRW_EU_H
 #define BRW_EU_H
 
+#include "util/u_debug.h"
+
 #include "brw_structs.h"
 #include "brw_defines.h"
 
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index eb39be8545..0f2612c181 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -32,8 +32,6 @@
 
 
 #include "brw_batchbuffer.h"
-#include "intel_regions.h"
-
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
diff --git a/src/gallium/drivers/i965/brw_pipe_debug.c b/src/gallium/drivers/i965/brw_pipe_debug.c
deleted file mode 100644
index 34d6d4028a..0000000000
--- a/src/gallium/drivers/i965/brw_pipe_debug.c
+++ /dev/null
@@ -1,2 +0,0 @@
-   if (INTEL_DEBUG & DEBUG_STATS)
-      cc.cc5.statistics_enable = 1;
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
index 55242ac6ad..a2da1373bf 100644
--- a/src/gallium/drivers/i965/brw_pipe_query.c
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -43,7 +43,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_batchbuffer.h"
-#include "intel_reg.h"
+#include "brw_reg.h"
 
 /** Waits on the query object's BO and totals the results for this query */
 static void
@@ -165,7 +165,7 @@ brw_prepare_query_begin(struct brw_context *brw)
       brw->sws->bo_unreference(brw->query.bo);
       brw->query.bo = NULL;
 
-      brw->query.bo = dri_bo_alloc(brw->bufmgr, "query", 4096, 1);
+      brw->query.bo = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_QUERY, 4096, 1);
       brw->query.index = 0;
    }
 
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
new file mode 100644
index 0000000000..b0928adbe4
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -0,0 +1,26 @@
+
+static void brw_merge_inputs( struct brw_context *brw,
+		       const struct gl_client_array *arrays[])
+{
+   struct brw_vertex_info old = brw->vb.info;
+   GLuint i;
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++)
+      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
+
+   memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
+   memset(&brw->vb.info, 0, sizeof(brw->vb.info));
+
+   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+      brw->vb.inputs[i].glarray = arrays[i];
+      brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
+
+      if (arrays[i]->StrideB != 0)
+	 brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
+	    ((i%16) * 2);
+   }
+
+   /* Raise statechanges if input sizes have changed. */
+   if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
+      brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
+}
diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h
index a640104d71..f428ec9269 100644
--- a/src/gallium/drivers/i965/brw_reg.h
+++ b/src/gallium/drivers/i965/brw_reg.h
@@ -76,4 +76,40 @@
 #define FENCE_YMAJOR 2
 
 
+
+/* PCI IDs
+ */
+#define PCI_CHIP_I965_G			0x29A2
+#define PCI_CHIP_I965_Q			0x2992
+#define PCI_CHIP_I965_G_1		0x2982
+#define PCI_CHIP_I946_GZ		0x2972
+#define PCI_CHIP_I965_GM                0x2A02
+#define PCI_CHIP_I965_GME               0x2A12
+
+#define PCI_CHIP_GM45_GM                0x2A42
+
+#define PCI_CHIP_IGD_E_G                0x2E02
+#define PCI_CHIP_Q45_G                  0x2E12
+#define PCI_CHIP_G45_G                  0x2E22
+#define PCI_CHIP_G41_G                  0x2E32
+#define PCI_CHIP_B43_G                  0x2E42
+
+#define PCI_CHIP_ILD_G                  0x0042
+#define PCI_CHIP_ILM_G                  0x0046
+
+struct brw_chipset {
+   int pci_id:16;
+   int is_965:1;
+   int is_igdng:1;
+   int is_g4x:1;
+   int pad:13;
+};
+
+
+/* XXX: hacks
+ */
+#define VERT_RESULT_HPOS 0	/* not always true */
+#define VERT_RESULT_PSIZ 10000	/* disabled */
+
+
 #endif
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
new file mode 100644
index 0000000000..671467989d
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -0,0 +1,365 @@
+/**************************************************************************
+ * 
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
+
+#include "brw_reg.h"
+#include "brw_context.h"
+#include "brw_screen.h"
+#include "brw_buffer.h"
+#include "brw_texture.h"
+#include "brw_winsys.h"
+
+#ifdef DEBUG
+static const struct debug_named_value debug_names[] = {
+   { "tex",   DEBUG_TEXTURE},
+   { "state", DEBUG_STATE},
+   { "ioctl", DEBUG_IOCTL},
+   { "blit",  DEBUG_BLIT},
+   { "curbe", DEBUG_CURBE},
+   { "fall",  DEBUG_FALLBACKS},
+   { "verb",  DEBUG_VERBOSE},
+   { "bat",   DEBUG_BATCH},
+   { "pix",   DEBUG_PIXEL},
+   { "buf",   DEBUG_BUFMGR},
+   { "reg",   DEBUG_REGION},
+   { "fbo",   DEBUG_FBO},
+   { "lock",  DEBUG_LOCK},
+   { "sync",  DEBUG_SYNC},
+   { "prim",  DEBUG_PRIMS },
+   { "vert",  DEBUG_VERTS },
+   { "dri",   DEBUG_DRI },
+   { "dma",   DEBUG_DMA },
+   { "san",   DEBUG_SANITY },
+   { "sleep", DEBUG_SLEEP },
+   { "stats", DEBUG_STATS },
+   { "tile",  DEBUG_TILE },
+   { "sing",  DEBUG_SINGLE_THREAD },
+   { "thre",  DEBUG_SINGLE_THREAD },
+   { "wm",    DEBUG_WM },
+   { "urb",   DEBUG_URB },
+   { "vs",    DEBUG_VS },
+   { NULL,    0 }
+};
+
+int BRW_DEBUG = 0;
+#endif
+
+
+/*
+ * Probe functions
+ */
+
+
+static const char *
+brw_get_vendor(struct pipe_screen *screen)
+{
+   return "VMware, Inc.";
+}
+
+static const char *
+brw_get_name(struct pipe_screen *screen)
+{
+   static char buffer[128];
+   const char *chipset;
+
+   switch (brw_screen(screen)->pci_id) {
+   case PCI_CHIP_I965_G:
+      chipset = "I965_G";
+      break;
+   case PCI_CHIP_I965_Q:
+      chipset = "I965_Q";
+      break;
+   case PCI_CHIP_I965_G_1:
+      chipset = "I965_G_1";
+      break;
+   case PCI_CHIP_I946_GZ:
+      chipset = "I946_GZ";
+      break;
+   case PCI_CHIP_I965_GM:
+      chipset = "I965_GM";
+      break;
+   case PCI_CHIP_I965_GME:
+      chipset = "I965_GME";
+      break;
+   case PCI_CHIP_GM45_GM:
+      chipset = "GM45_GM";
+      break;
+   case PCI_CHIP_IGD_E_G:
+      chipset = "IGD_E_G";
+      break;
+   case PCI_CHIP_Q45_G:
+      chipset = "Q45_G";
+      break;
+   case PCI_CHIP_G45_G:
+      chipset = "G45_G";
+      break;
+   case PCI_CHIP_G41_G:
+      chipset = "G41_G";
+      break;
+   case PCI_CHIP_B43_G:
+      chipset = "B43_G";
+      break;
+   case PCI_CHIP_ILD_G:
+      chipset = "ILD_G";
+      break;
+   case PCI_CHIP_ILM_G:
+      chipset = "ILM_G";
+      break;
+   }
+
+   util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset);
+   return buffer;
+}
+
+static int
+brw_get_param(struct pipe_screen *screen, int param)
+{
+   switch (param) {
+   case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+      return 8;
+   case PIPE_CAP_NPOT_TEXTURES:
+      return 1;
+   case PIPE_CAP_TWO_SIDED_STENCIL:
+      return 1;
+   case PIPE_CAP_GLSL:
+      return 0;
+   case PIPE_CAP_ANISOTROPIC_FILTER:
+      return 0;
+   case PIPE_CAP_POINT_SPRITE:
+      return 0;
+   case PIPE_CAP_MAX_RENDER_TARGETS:
+      return 1;
+   case PIPE_CAP_OCCLUSION_QUERY:
+      return 0;
+   case PIPE_CAP_TEXTURE_SHADOW_MAP:
+      return 1;
+   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+      return 11; /* max 1024x1024 */
+   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+      return 8;  /* max 128x128x128 */
+   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+      return 11; /* max 1024x1024 */
+   default:
+      return 0;
+   }
+}
+
+static float
+brw_get_paramf(struct pipe_screen *screen, int param)
+{
+   switch (param) {
+   case PIPE_CAP_MAX_LINE_WIDTH:
+      /* fall-through */
+   case PIPE_CAP_MAX_LINE_WIDTH_AA:
+      return 7.5;
+
+   case PIPE_CAP_MAX_POINT_WIDTH:
+      /* fall-through */
+   case PIPE_CAP_MAX_POINT_WIDTH_AA:
+      return 255.0;
+
+   case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+      return 4.0;
+
+   case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+      return 16.0;
+
+   default:
+      return 0;
+   }
+}
+
+static boolean
+brw_is_format_supported(struct pipe_screen *screen,
+                         enum pipe_format format, 
+                         enum pipe_texture_target target,
+                         unsigned tex_usage, 
+                         unsigned geom_flags)
+{
+   static const enum pipe_format tex_supported[] = {
+      PIPE_FORMAT_R8G8B8A8_UNORM,
+      PIPE_FORMAT_A8R8G8B8_UNORM,
+      PIPE_FORMAT_R5G6B5_UNORM,
+      PIPE_FORMAT_L8_UNORM,
+      PIPE_FORMAT_A8_UNORM,
+      PIPE_FORMAT_I8_UNORM,
+      PIPE_FORMAT_A8L8_UNORM,
+      PIPE_FORMAT_YCBCR,
+      PIPE_FORMAT_YCBCR_REV,
+      PIPE_FORMAT_S8Z24_UNORM,
+      PIPE_FORMAT_NONE  /* list terminator */
+   };
+   static const enum pipe_format surface_supported[] = {
+      PIPE_FORMAT_A8R8G8B8_UNORM,
+      PIPE_FORMAT_R5G6B5_UNORM,
+      PIPE_FORMAT_S8Z24_UNORM,
+      PIPE_FORMAT_NONE  /* list terminator */
+   };
+   const enum pipe_format *list;
+   uint i;
+
+   if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET)
+      list = surface_supported;
+   else
+      list = tex_supported;
+
+   for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) {
+      if (list[i] == format)
+         return TRUE;
+   }
+
+   return FALSE;
+}
+
+
+/*
+ * Fence functions
+ */
+
+
+static void
+brw_fence_reference(struct pipe_screen *screen,
+                     struct pipe_fence_handle **ptr,
+                     struct pipe_fence_handle *fence)
+{
+   struct brw_screen *is = brw_screen(screen);
+
+   is->iws->fence_reference(is->iws, ptr, fence);
+}
+
+static int
+brw_fence_signalled(struct pipe_screen *screen,
+                     struct pipe_fence_handle *fence,
+                     unsigned flags)
+{
+   struct brw_screen *is = brw_screen(screen);
+
+   return is->iws->fence_signalled(is->iws, fence);
+}
+
+static int
+brw_fence_finish(struct pipe_screen *screen,
+                  struct pipe_fence_handle *fence,
+                  unsigned flags)
+{
+   struct brw_screen *is = brw_screen(screen);
+
+   return is->iws->fence_finish(is->iws, fence);
+}
+
+
+/*
+ * Generic functions
+ */
+
+
+static void
+brw_destroy_screen(struct pipe_screen *screen)
+{
+   struct brw_screen *is = brw_screen(screen);
+
+   if (is->iws)
+      is->iws->destroy(is->iws);
+
+   FREE(is);
+}
+
+/**
+ * Create a new brw_screen object
+ */
+struct pipe_screen *
+brw_create_screen(struct intel_winsys *iws, uint pci_id)
+{
+   struct brw_screen *is;
+   struct brw_chipset chipset;
+
+#ifdef DEBUG
+   BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0);
+   BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0);
+#endif
+
+   memset(&chipset, 0, sizeof chipset);
+
+   chipset.pci_id = pci_id;
+
+   switch (pci_id) {
+   case PCI_CHIP_I965_G:
+   case PCI_CHIP_I965_Q:
+   case PCI_CHIP_I965_G_1:
+   case PCI_CHIP_I946_GZ:
+   case PCI_CHIP_I965_GM:
+   case PCI_CHIP_I965_GME:
+      chipset.is_965 = TRUE;
+      break;
+
+   case PCI_CHIP_GM45_GM:
+   case PCI_CHIP_IGD_E_G:
+   case PCI_CHIP_Q45_G:
+   case PCI_CHIP_G45_G:
+   case PCI_CHIP_G41_G:
+   case PCI_CHIP_B43_G:
+      chipset.is_g4x = TRUE;
+      break;
+
+   case PCI_CHIP_ILD_G:
+   case PCI_CHIP_ILM_G:
+      chipset.is_igdng = TRUE;
+      break;
+
+   default:
+      debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", 
+                   __FUNCTION__, pci_id);
+      return NULL;
+   }
+
+
+   is = CALLOC_STRUCT(brw_screen);
+   if (!is)
+      return NULL;
+
+   is->chipset = chipset;
+   is->iws = iws;
+   is->base.winsys = NULL;
+   is->base.destroy = brw_destroy_screen;
+   is->base.get_name = brw_get_name;
+   is->base.get_vendor = brw_get_vendor;
+   is->base.get_param = brw_get_param;
+   is->base.get_paramf = brw_get_paramf;
+   is->base.is_format_supported = brw_is_format_supported;
+   is->base.fence_reference = brw_fence_reference;
+   is->base.fence_signalled = brw_fence_signalled;
+   is->base.fence_finish = brw_fence_finish;
+
+   brw_screen_init_texture_functions(is);
+   brw_screen_init_buffer_functions(is);
+
+   return &is->base;
+}
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index 716b55c52b..79d595d0ad 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -31,6 +31,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_screen.h"
 
+#include "brw_reg.h"
 
 struct brw_winsys_screen;
 
@@ -41,11 +42,8 @@ struct brw_winsys_screen;
 struct brw_screen
 {
    struct pipe_screen base;
-
+   struct brw_chipset chipset;
    struct brw_winsys_screen *sws;
-
-   boolean is_i945;
-   uint pci_id;
 };
 
 /**
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index 54202cbd12..53e8f09e37 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -56,7 +56,7 @@ static void compile_sf_prog( struct brw_context *brw,
    c.key = *key;
    c.nr_attrs = util_count_bits(c.key.attrs);
    c.nr_attr_regs = (c.nr_attrs+1)/2;
-   c.nr_setup_attrs = util_count_bits(c.key.attrs & DO_SETUP_BITS);
+   c.nr_setup_attrs = c.key.nr_attrs;
    c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
 
    c.prog_data.urb_read_length = c.nr_attr_regs;
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index 5e1229d22f..0e406f12e1 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -68,8 +68,7 @@ static void upload_sf_vp(struct brw_context *brw)
     */
 
    /* The scissor only needs to handle the intersection of drawable and
-    * scissor rect.  Clipping to the boundaries of static shared buffers
-    * for front/back/depth is covered by looping over cliprects in brw_draw.c.
+    * scissor rect.
     *
     * Note that the hardware's coordinates are inclusive, while Mesa's min is
     * inclusive but max is exclusive.
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
index 1b5f27cc16..97f88b3ab3 100644
--- a/src/gallium/drivers/i965/brw_state_cache.c
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -220,8 +220,8 @@ brw_upload_cache( struct brw_cache *cache,
    int i;
 
    /* Create the buffer object to contain the data */
-   bo = dri_bo_alloc(cache->brw->intel.bufmgr,
-		     cache->name[cache_id], data_size, 1 << 6);
+   bo = brw->sws->bo_alloc(cache->sws,
+			   cache->buffer_type[cache_id], data_size, 1 << 6);
 
 
    /* Set up the memory containing the key, aux_data, and reloc_bufs */
diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c
new file mode 100644
index 0000000000..812b761d40
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_state_debug.c
@@ -0,0 +1,145 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+      
+
+
+#include "brw_context.h"
+#include "brw_state.h"
+
+
+struct dirty_bit_map {
+   uint32_t bit;
+   char *name;
+   uint32_t count;
+};
+
+#define DEFINE_BIT(name) {name, #name, 0}
+
+static struct dirty_bit_map mesa_bits[] = {
+   DEFINE_BIT(_NEW_MODELVIEW),
+   DEFINE_BIT(_NEW_PROJECTION),
+   DEFINE_BIT(_NEW_TEXTURE_MATRIX),
+   DEFINE_BIT(_NEW_COLOR_MATRIX),
+   DEFINE_BIT(_NEW_ACCUM),
+   DEFINE_BIT(_NEW_COLOR),
+   DEFINE_BIT(_NEW_DEPTH),
+   DEFINE_BIT(_NEW_EVAL),
+   DEFINE_BIT(_NEW_FOG),
+   DEFINE_BIT(_NEW_HINT),
+   DEFINE_BIT(_NEW_LIGHT),
+   DEFINE_BIT(_NEW_LINE),
+   DEFINE_BIT(_NEW_PIXEL),
+   DEFINE_BIT(_NEW_POINT),
+   DEFINE_BIT(_NEW_POLYGON),
+   DEFINE_BIT(_NEW_POLYGONSTIPPLE),
+   DEFINE_BIT(_NEW_SCISSOR),
+   DEFINE_BIT(_NEW_STENCIL),
+   DEFINE_BIT(_NEW_TEXTURE),
+   DEFINE_BIT(_NEW_TRANSFORM),
+   DEFINE_BIT(_NEW_VIEWPORT),
+   DEFINE_BIT(_NEW_PACKUNPACK),
+   DEFINE_BIT(_NEW_ARRAY),
+   DEFINE_BIT(_NEW_RENDERMODE),
+   DEFINE_BIT(_NEW_BUFFERS),
+   DEFINE_BIT(_NEW_MULTISAMPLE),
+   DEFINE_BIT(_NEW_TRACK_MATRIX),
+   DEFINE_BIT(_NEW_PROGRAM),
+   DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
+   {0, 0, 0}
+};
+
+static struct dirty_bit_map brw_bits[] = {
+   DEFINE_BIT(BRW_NEW_URB_FENCE),
+   DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
+   DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
+   DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
+   DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
+   DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
+   DEFINE_BIT(BRW_NEW_PRIMITIVE),
+   DEFINE_BIT(BRW_NEW_CONTEXT),
+   DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
+   DEFINE_BIT(BRW_NEW_PSP),
+   DEFINE_BIT(BRW_NEW_FENCE),
+   DEFINE_BIT(BRW_NEW_INDICES),
+   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
+   DEFINE_BIT(BRW_NEW_VERTICES),
+   DEFINE_BIT(BRW_NEW_BATCH),
+   DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
+   {0, 0, 0}
+};
+
+static struct dirty_bit_map cache_bits[] = {
+   DEFINE_BIT(CACHE_NEW_CC_VP),
+   DEFINE_BIT(CACHE_NEW_CC_UNIT),
+   DEFINE_BIT(CACHE_NEW_WM_PROG),
+   DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR),
+   DEFINE_BIT(CACHE_NEW_SAMPLER),
+   DEFINE_BIT(CACHE_NEW_WM_UNIT),
+   DEFINE_BIT(CACHE_NEW_SF_PROG),
+   DEFINE_BIT(CACHE_NEW_SF_VP),
+   DEFINE_BIT(CACHE_NEW_SF_UNIT),
+   DEFINE_BIT(CACHE_NEW_VS_UNIT),
+   DEFINE_BIT(CACHE_NEW_VS_PROG),
+   DEFINE_BIT(CACHE_NEW_GS_UNIT),
+   DEFINE_BIT(CACHE_NEW_GS_PROG),
+   DEFINE_BIT(CACHE_NEW_CLIP_VP),
+   DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
+   DEFINE_BIT(CACHE_NEW_CLIP_PROG),
+   DEFINE_BIT(CACHE_NEW_SURFACE),
+   DEFINE_BIT(CACHE_NEW_SURF_BIND),
+   {0, 0, 0}
+};
+
+
+static void
+brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+   int i;
+
+   for (i = 0; i < 32; i++) {
+      if (bit_map[i].bit == 0)
+	 return;
+
+      if (bit_map[i].bit & bits)
+	 bit_map[i].count++;
+   }
+}
+
+static void
+brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
+{
+   int i;
+
+   for (i = 0; i < 32; i++) {
+      if (bit_map[i].bit == 0)
+	 return;
+
+      fprintf(stderr, "0x%08x: %12d (%s)\n",
+	      bit_map[i].bit, bit_map[i].count, bit_map[i].name);
+   }
+}
+
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index 842380e38f..8659e35289 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -45,7 +45,7 @@ const struct brw_tracked_state *atoms[] =
 {
    &brw_check_fallback,
 
-   &brw_wm_input_sizes,
+//   &brw_wm_input_sizes,
    &brw_vs_prog,
    &brw_gs_prog, 
    &brw_clip_prog, 
@@ -155,117 +155,6 @@ brw_clear_validated_bos(struct brw_context *brw)
    brw->state.validated_bo_count = 0;
 }
 
-struct dirty_bit_map {
-   uint32_t bit;
-   char *name;
-   uint32_t count;
-};
-
-#define DEFINE_BIT(name) {name, #name, 0}
-
-static struct dirty_bit_map mesa_bits[] = {
-   DEFINE_BIT(_NEW_MODELVIEW),
-   DEFINE_BIT(_NEW_PROJECTION),
-   DEFINE_BIT(_NEW_TEXTURE_MATRIX),
-   DEFINE_BIT(_NEW_COLOR_MATRIX),
-   DEFINE_BIT(_NEW_ACCUM),
-   DEFINE_BIT(_NEW_COLOR),
-   DEFINE_BIT(_NEW_DEPTH),
-   DEFINE_BIT(_NEW_EVAL),
-   DEFINE_BIT(_NEW_FOG),
-   DEFINE_BIT(_NEW_HINT),
-   DEFINE_BIT(_NEW_LIGHT),
-   DEFINE_BIT(_NEW_LINE),
-   DEFINE_BIT(_NEW_PIXEL),
-   DEFINE_BIT(_NEW_POINT),
-   DEFINE_BIT(_NEW_POLYGON),
-   DEFINE_BIT(_NEW_POLYGONSTIPPLE),
-   DEFINE_BIT(_NEW_SCISSOR),
-   DEFINE_BIT(_NEW_STENCIL),
-   DEFINE_BIT(_NEW_TEXTURE),
-   DEFINE_BIT(_NEW_TRANSFORM),
-   DEFINE_BIT(_NEW_VIEWPORT),
-   DEFINE_BIT(_NEW_PACKUNPACK),
-   DEFINE_BIT(_NEW_ARRAY),
-   DEFINE_BIT(_NEW_RENDERMODE),
-   DEFINE_BIT(_NEW_BUFFERS),
-   DEFINE_BIT(_NEW_MULTISAMPLE),
-   DEFINE_BIT(_NEW_TRACK_MATRIX),
-   DEFINE_BIT(_NEW_PROGRAM),
-   DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
-   {0, 0, 0}
-};
-
-static struct dirty_bit_map brw_bits[] = {
-   DEFINE_BIT(BRW_NEW_URB_FENCE),
-   DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
-   DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
-   DEFINE_BIT(BRW_NEW_INPUT_DIMENSIONS),
-   DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
-   DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
-   DEFINE_BIT(BRW_NEW_PRIMITIVE),
-   DEFINE_BIT(BRW_NEW_CONTEXT),
-   DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
-   DEFINE_BIT(BRW_NEW_PSP),
-   DEFINE_BIT(BRW_NEW_FENCE),
-   DEFINE_BIT(BRW_NEW_INDICES),
-   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
-   DEFINE_BIT(BRW_NEW_VERTICES),
-   DEFINE_BIT(BRW_NEW_BATCH),
-   DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
-   {0, 0, 0}
-};
-
-static struct dirty_bit_map cache_bits[] = {
-   DEFINE_BIT(CACHE_NEW_CC_VP),
-   DEFINE_BIT(CACHE_NEW_CC_UNIT),
-   DEFINE_BIT(CACHE_NEW_WM_PROG),
-   DEFINE_BIT(CACHE_NEW_SAMPLER_DEFAULT_COLOR),
-   DEFINE_BIT(CACHE_NEW_SAMPLER),
-   DEFINE_BIT(CACHE_NEW_WM_UNIT),
-   DEFINE_BIT(CACHE_NEW_SF_PROG),
-   DEFINE_BIT(CACHE_NEW_SF_VP),
-   DEFINE_BIT(CACHE_NEW_SF_UNIT),
-   DEFINE_BIT(CACHE_NEW_VS_UNIT),
-   DEFINE_BIT(CACHE_NEW_VS_PROG),
-   DEFINE_BIT(CACHE_NEW_GS_UNIT),
-   DEFINE_BIT(CACHE_NEW_GS_PROG),
-   DEFINE_BIT(CACHE_NEW_CLIP_VP),
-   DEFINE_BIT(CACHE_NEW_CLIP_UNIT),
-   DEFINE_BIT(CACHE_NEW_CLIP_PROG),
-   DEFINE_BIT(CACHE_NEW_SURFACE),
-   DEFINE_BIT(CACHE_NEW_SURF_BIND),
-   {0, 0, 0}
-};
-
-
-static void
-brw_update_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
-{
-   int i;
-
-   for (i = 0; i < 32; i++) {
-      if (bit_map[i].bit == 0)
-	 return;
-
-      if (bit_map[i].bit & bits)
-	 bit_map[i].count++;
-   }
-}
-
-static void
-brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
-{
-   int i;
-
-   for (i = 0; i < 32; i++) {
-      if (bit_map[i].bit == 0)
-	 return;
-
-      fprintf(stderr, "0x%08x: %12d (%s)\n",
-	      bit_map[i].bit, bit_map[i].count, bit_map[i].name);
-   }
-}
 
 /***********************************************************************
  * Emit all state:
diff --git a/src/gallium/drivers/i965/brw_tex.c b/src/gallium/drivers/i965/brw_tex.c
index c33c19ee51..6f7adb6393 100644
--- a/src/gallium/drivers/i965/brw_tex.c
+++ b/src/gallium/drivers/i965/brw_tex.c
@@ -30,8 +30,6 @@
   */
         
 
-#include "intel_regions.h"
-#include "intel_tex.h"
 #include "brw_context.h"
 
 /**
diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c
index 813cd31f49..50c30878c6 100644
--- a/src/gallium/drivers/i965/brw_tex_layout.c
+++ b/src/gallium/drivers/i965/brw_tex_layout.c
@@ -32,9 +32,7 @@
 /* Code to layout images in a mipmap tree for i965.
  */
 
-#include "intel_mipmap_tree.h"
-#include "intel_tex_layout.h"
-#include "intel_chipset.h"
+#include "brw_tex_layout.h"
 
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
diff --git a/src/gallium/drivers/i965/brw_types.h b/src/gallium/drivers/i965/brw_types.h
index 87dae13d94..89e08a5c80 100644
--- a/src/gallium/drivers/i965/brw_types.h
+++ b/src/gallium/drivers/i965/brw_types.h
@@ -15,4 +15,7 @@ typedef float GLfloat;
 
 typedef uint8_t GLboolean;
 
+#define GL_FALSE FALSE
+#define GL_TRUE TRUE
+
 #endif
diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c
index 6446e8e761..319e29bfcb 100644
--- a/src/gallium/drivers/i965/brw_vs_surface_state.c
+++ b/src/gallium/drivers/i965/brw_vs_surface_state.c
@@ -52,7 +52,8 @@ brw_vs_update_constant_buffer(struct brw_context *brw)
    if (!vp->use_const_buffer)
       return NULL;
 
-   const_buffer = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
+   const_buffer = brw->sws->bo_alloc(brw->sws, 
+				     BRW_BUFFER_TYPE_SHADER_CONSTANTS,
 				     size, 64);
 
    /* _NEW_PROGRAM_CONSTANTS */
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index 2142db5a4d..82cd8007ac 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -37,6 +37,7 @@ struct brw_winsys_buffer {
    struct brw_winsys_screen *sws;
    void *bo;
    unsigned offset;
+   unsigned size;
 };
 
 enum brw_buffer_usage {
@@ -63,6 +64,11 @@ enum brw_buffer_type
    BRW_BUFFER_TYPE_TEXTURE,
    BRW_BUFFER_TYPE_SCANOUT, /**< a texture used for scanning out from */
    BRW_BUFFER_TYPE_VERTEX,
+   BRW_BUFFER_TYPE_CURBE,
+   BRW_BUFFER_TYPE_QUERY,
+   BRW_BUFFER_TYPE_SHADER_CONSTANTS,
+   BRW_BUFFER_TYPE_WM_SCRATCH,
+   BRW_BUFFER_TYPE_BATCH,
 };
 
 
@@ -82,6 +88,10 @@ struct brw_batchbuffer {
    uint8_t *map;
    uint8_t *ptr;
    size_t size;
+   struct {
+      uint8_t *end_ptr;
+   } emit;
+
 
    size_t relocs;
    size_t max_relocs;
@@ -125,15 +135,15 @@ struct brw_winsys_screen {
    /**
     * Buffer functions.
     */
+
    /*@{*/
    /**
     * Create a buffer.
     */
-   struct brw_winsys_buffer *(*buffer_create)(struct brw_winsys *iws,
-					      unsigned size, 
-					      unsigned alignment,
-					      enum brw_buffer_type type);
-
+   struct brw_winsys_buffer *(*bo_alloc)( struct brw_winsys_screen *sws,
+					  enum brw_buffer_type type,
+					  unsigned size,
+					  unsigned alignment );
 
    /* Reference and unreference buffers:
     */
@@ -146,6 +156,11 @@ struct brw_winsys_screen {
 			  unsigned offset,
 			  struct brw_winsys_buffer *b2);
 
+   void (*bo_subdata)(struct brw_winsys_buffer *dst,
+		      size_t offset,
+		      size_t size,
+		      const void *data);
+
    /**
     * Map a buffer.
     */
@@ -159,17 +174,6 @@ struct brw_winsys_screen {
    void (*buffer_unmap)(struct brw_winsys *iws,
                         struct brw_winsys_buffer *buffer);
 
-   /**
-    * Write to a buffer.
-    *
-    * Arguments follows pipe_buffer_write.
-    */
-   int (*buffer_write)(struct brw_winsys *iws,
-                       struct brw_winsys_buffer *dst,
-                       size_t offset,
-                       size_t size,
-                       const void *data);
-
    void (*buffer_destroy)(struct brw_winsys *iws,
                           struct brw_winsys_buffer *buffer);
    /*@}*/
@@ -208,14 +212,14 @@ struct brw_winsys_screen {
 
 
 /**
- * Create i915 pipe_screen.
+ * Create brw pipe_screen.
  */
-struct pipe_screen *i915_create_screen(struct brw_winsys *iws, unsigned pci_id);
+struct pipe_screen *brw_create_screen(struct brw_winsys *iws, unsigned pci_id);
 
 /**
- * Create a i915 pipe_context.
+ * Create a brw pipe_context.
  */
-struct pipe_context *i915_create_context(struct pipe_screen *screen);
+struct pipe_context *brw_create_context(struct pipe_screen *screen);
 
 /**
  * Get the brw_winsys buffer backing the texture.
@@ -223,7 +227,7 @@ struct pipe_context *i915_create_context(struct pipe_screen *screen);
  * TODO UGLY
  */
 struct pipe_texture;
-boolean i915_get_texture_buffer_brw(struct pipe_texture *texture,
+boolean brw_get_texture_buffer_brw(struct pipe_texture *texture,
 				    struct brw_winsys_buffer **buffer,
 				    unsigned *stride);
 
@@ -232,10 +236,10 @@ boolean i915_get_texture_buffer_brw(struct pipe_texture *texture,
  *
  * TODO UGLY
  */
-struct pipe_texture * i915_texture_blanket_brw(struct pipe_screen *screen,
-                                                 struct pipe_texture *tmplt,
-                                                 unsigned pitch,
-                                                 struct brw_winsys_buffer *buffer);
+struct pipe_texture * brw_texture_blanket(struct pipe_screen *screen,
+					  struct pipe_texture *tmplt,
+					  unsigned pitch,
+					  struct brw_winsys_buffer *buffer);
 
 
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 32b8900bac..284cf42f8b 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -310,7 +310,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
    }
 
    /* CACHE_NEW_VS_PROG */
-   key->vp_outputs_written = brw->vs.prog_data->outputs_written & DO_SETUP_BITS;
+   key->vp_outputs_written = brw->vs.prog_data->outputs_written; /* bitmask */
 
    /* The unique fragment program ID */
    key->program_string_id = fp->id;
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index 958c00d3e0..16a2324049 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -272,10 +272,10 @@ static void upload_wm_unit( struct brw_context *brw )
 	 brw->wm.scratch_bo = NULL;
       }
       if (brw->wm.scratch_bo == NULL) {
-	 brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr,
-                                           "wm scratch",
-                                           total,
-                                           4096);
+	 brw->wm.scratch_bo = brw->sws->bo_alloc(brw->sws,
+						 BRW_BUFFER_TYPE_WM_SCRATCH,
+						 total,
+						 4096);
       }
    }
 
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index 5045c9b4a6..e1ed6438dc 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -30,11 +30,7 @@
   */
                    
 
-#include "intel_mipmap_tree.h"
 #include "brw_batchbuffer.h"
-#include "intel_tex.h"
-#include "intel_fbo.h"
-
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
@@ -365,7 +361,8 @@ brw_wm_update_constant_buffer(struct brw_context *brw)
    if (!fp->use_const_buffer)
       return NULL;
 
-   const_buffer = drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer",
+   const_buffer = drm_intel_bo_alloc(intel->bufmgr, 
+				     BRW_BUFFER_TYPE_SHADER_CONSTANTS,
 				     size, 64);
 
    /* _NEW_PROGRAM_CONSTANTS */
@@ -686,7 +683,7 @@ static void prepare_wm_surfaces(struct brw_context *brw )
    }
 
    old_nr_surfaces = brw->wm.nr_surfaces;
-   brw->wm.nr_surfaces = MAX_DRAW_BUFFERS;
+   brw->wm.nr_surfaces = PIPE_MAX_COLOR_BUFS;
 
    if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
        brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
diff --git a/src/gallium/drivers/i965/intel_chipset.h b/src/gallium/drivers/i965/intel_chipset.h
deleted file mode 100644
index 3c38f1676c..0000000000
--- a/src/gallium/drivers/i965/intel_chipset.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright © 2007 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-#define PCI_CHIP_I810			0x7121
-#define PCI_CHIP_I810_DC100		0x7123
-#define PCI_CHIP_I810_E			0x7125
-#define PCI_CHIP_I815			0x1132
-
-#define PCI_CHIP_I830_M			0x3577
-#define PCI_CHIP_845_G			0x2562
-#define PCI_CHIP_I855_GM		0x3582
-#define PCI_CHIP_I865_G			0x2572
-
-#define PCI_CHIP_I915_G			0x2582
-#define PCI_CHIP_E7221_G		0x258A
-#define PCI_CHIP_I915_GM		0x2592
-#define PCI_CHIP_I945_G			0x2772
-#define PCI_CHIP_I945_GM		0x27A2
-#define PCI_CHIP_I945_GME		0x27AE
-
-#define PCI_CHIP_Q35_G			0x29B2
-#define PCI_CHIP_G33_G			0x29C2
-#define PCI_CHIP_Q33_G			0x29D2
-
-#define PCI_CHIP_IGD_GM			0xA011
-#define PCI_CHIP_IGD_G			0xA001
-
-#define IS_IGDGM(devid)	(devid == PCI_CHIP_IGD_GM)
-#define IS_IGDG(devid)	(devid == PCI_CHIP_IGD_G)
-#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid))
-
-#define PCI_CHIP_I965_G			0x29A2
-#define PCI_CHIP_I965_Q			0x2992
-#define PCI_CHIP_I965_G_1		0x2982
-#define PCI_CHIP_I946_GZ		0x2972
-#define PCI_CHIP_I965_GM                0x2A02
-#define PCI_CHIP_I965_GME               0x2A12
-
-#define PCI_CHIP_GM45_GM                0x2A42
-
-#define PCI_CHIP_IGD_E_G                0x2E02
-#define PCI_CHIP_Q45_G                  0x2E12
-#define PCI_CHIP_G45_G                  0x2E22
-#define PCI_CHIP_G41_G                  0x2E32
-
-#define PCI_CHIP_ILD_G                  0x0042
-#define PCI_CHIP_ILM_G                  0x0046
-
-#define IS_MOBILE(devid)	(devid == PCI_CHIP_I855_GM || \
-				 devid == PCI_CHIP_I915_GM || \
-				 devid == PCI_CHIP_I945_GM || \
-				 devid == PCI_CHIP_I945_GME || \
-				 devid == PCI_CHIP_I965_GM || \
-				 devid == PCI_CHIP_I965_GME || \
-				 devid == PCI_CHIP_GM45_GM || \
-				 IS_IGD(devid) || \
-				 devid == PCI_CHIP_ILM_G)
-
-#define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G || \
-                                 devid == PCI_CHIP_Q45_G || \
-                                 devid == PCI_CHIP_G45_G || \
-                                 devid == PCI_CHIP_G41_G)
-#define IS_GM45(devid)          (devid == PCI_CHIP_GM45_GM)
-#define IS_G4X(devid)		(IS_G45(devid) || IS_GM45(devid))
-
-#define IS_ILD(devid)           (devid == PCI_CHIP_ILD_G)
-#define IS_ILM(devid)           (devid == PCI_CHIP_ILM_G)
-#define IS_IGDNG(devid)           (IS_ILD(devid) || IS_ILM(devid))
-
-#define IS_915(devid)		(devid == PCI_CHIP_I915_G || \
-				 devid == PCI_CHIP_E7221_G || \
-				 devid == PCI_CHIP_I915_GM)
-
-#define IS_945(devid)		(devid == PCI_CHIP_I945_G || \
-				 devid == PCI_CHIP_I945_GM || \
-				 devid == PCI_CHIP_I945_GME || \
-				 devid == PCI_CHIP_G33_G || \
-				 devid == PCI_CHIP_Q33_G || \
-				 devid == PCI_CHIP_Q35_G || IS_IGD(devid))
-
-#define IS_965(devid)		(devid == PCI_CHIP_I965_G || \
-				 devid == PCI_CHIP_I965_Q || \
-				 devid == PCI_CHIP_I965_G_1 || \
-				 devid == PCI_CHIP_I965_GM || \
-				 devid == PCI_CHIP_I965_GME || \
-				 devid == PCI_CHIP_I946_GZ || \
-				 IS_G4X(devid) || \
-				 IS_IGDNG(devid))
-
-#define IS_9XX(devid)		(IS_915(devid) || \
-				 IS_945(devid) || \
-				 IS_965(devid))
diff --git a/src/gallium/drivers/i965/intel_tex_format.c b/src/gallium/drivers/i965/intel_tex_format.c
deleted file mode 100644
index c62ecdadf0..0000000000
--- a/src/gallium/drivers/i965/intel_tex_format.c
+++ /dev/null
@@ -1,28 +0,0 @@
-#include "intel_context.h"
-#include "intel_tex.h"
-#include "intel_chipset.h"
-
-
-
-
-int intel_compressed_num_bytes(GLuint mesaFormat)
-{
-   int bytes = 0;
-   switch(mesaFormat) {
-     
-   case MESA_FORMAT_RGB_FXT1:
-   case MESA_FORMAT_RGBA_FXT1:
-   case MESA_FORMAT_RGB_DXT1:
-   case MESA_FORMAT_RGBA_DXT1:
-     bytes = 2;
-     break;
-     
-   case MESA_FORMAT_RGBA_DXT3:
-   case MESA_FORMAT_RGBA_DXT5:
-     bytes = 4;
-   default:
-     break;
-   }
-   
-   return bytes;
-}
diff --git a/src/gallium/drivers/i965/intel_tex_layout.c b/src/gallium/drivers/i965/intel_tex_layout.c
index 1cdab49e5e..7e0ca553f2 100644
--- a/src/gallium/drivers/i965/intel_tex_layout.c
+++ b/src/gallium/drivers/i965/intel_tex_layout.c
@@ -30,9 +30,7 @@
   *   Michel Dänzer <michel@tungstengraphics.com>
   */
 
-#include "intel_mipmap_tree.h"
 #include "intel_tex_layout.h"
-#include "intel_context.h"
 
 void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h)
 {
-- 
cgit v1.2.3


From 4dd2f6640b70e2313f8771f7588aa49a861153aa Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 25 Oct 2009 00:02:16 +0100
Subject: i965g: more work on compiling, particularly the brw_draw files

---
 src/gallium/auxiliary/util/u_debug.c       |  27 +++
 src/gallium/auxiliary/util/u_prim.h        |   2 +
 src/gallium/auxiliary/util/u_upload_mgr.h  |   2 +
 src/gallium/drivers/i965/Makefile          |   2 +-
 src/gallium/drivers/i965/brw_batchbuffer.c | 198 +++++++++++++++
 src/gallium/drivers/i965/brw_batchbuffer.h |  14 +-
 src/gallium/drivers/i965/brw_cc.c          |   8 +-
 src/gallium/drivers/i965/brw_clip.c        |   4 +-
 src/gallium/drivers/i965/brw_clip_state.c  |   4 +-
 src/gallium/drivers/i965/brw_context.c     |   2 +-
 src/gallium/drivers/i965/brw_context.h     |  68 ++++--
 src/gallium/drivers/i965/brw_curbe.c       |  13 +-
 src/gallium/drivers/i965/brw_draw.c        | 165 +++++++------
 src/gallium/drivers/i965/brw_draw.h        |   3 +-
 src/gallium/drivers/i965/brw_draw_upload.c | 372 +++++++++++++++++------------
 src/gallium/drivers/i965/brw_eu.c          |   5 +-
 src/gallium/drivers/i965/brw_eu_debug.c    |  13 +-
 src/gallium/drivers/i965/brw_misc_state.c  |  18 +-
 src/gallium/drivers/i965/brw_pipe_flush.c  |   3 +
 src/gallium/drivers/i965/brw_pipe_shader.c |  19 ++
 src/gallium/drivers/i965/brw_pipe_vertex.c |  25 +-
 src/gallium/drivers/i965/brw_screen.h      |  22 ++
 src/gallium/drivers/i965/brw_sf.c          |   2 +-
 src/gallium/drivers/i965/brw_sf_state.c    |  39 +--
 src/gallium/drivers/i965/brw_state.h       |   6 +-
 src/gallium/drivers/i965/brw_state_batch.c |   4 +-
 src/gallium/drivers/i965/brw_swtnl.c       |   6 +-
 src/gallium/drivers/i965/brw_winsys.h      |   7 +
 src/gallium/drivers/i965/brw_wm.c          |   2 +-
 src/gallium/drivers/i965/brw_wm.h          |   8 +-
 src/gallium/drivers/i965/brw_wm_glsl.c     |  28 ---
 src/gallium/drivers/i965/brw_wm_pass0.c    |  32 +--
 src/mesa/state_tracker/st_draw.c           |   3 +-
 33 files changed, 722 insertions(+), 404 deletions(-)
 create mode 100644 src/gallium/drivers/i965/brw_batchbuffer.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 96d400c839..321ac59a7d 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -69,6 +69,7 @@
 #include "util/u_stream.h" 
 #include "util/u_math.h" 
 #include "util/u_tile.h" 
+#include "util/u_prim.h" 
 
 
 #ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY
@@ -600,6 +601,32 @@ const char *pf_name( enum pipe_format format )
 }
 
 
+
+static const struct debug_named_value pipe_prim_names[] = {
+#ifdef DEBUG
+   DEBUG_NAMED_VALUE(PIPE_PRIM_POINTS),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_LINES),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_LOOP),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_LINE_STRIP),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLES),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_STRIP),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_TRIANGLE_FAN),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_QUADS),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_QUAD_STRIP),
+   DEBUG_NAMED_VALUE(PIPE_PRIM_POLYGON),
+#endif
+   DEBUG_NAMED_VALUE_END
+};
+
+
+const char *u_prim_name( unsigned prim )
+{
+   return debug_dump_enum(pipe_prim_names, prim);
+}
+
+
+
+
 #ifdef DEBUG
 void debug_dump_image(const char *prefix,
                       unsigned format, unsigned cpp,
diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h
index a9b533eea7..7434329962 100644
--- a/src/gallium/auxiliary/util/u_prim.h
+++ b/src/gallium/auxiliary/util/u_prim.h
@@ -135,4 +135,6 @@ static INLINE unsigned u_reduced_prim( unsigned pipe_prim )
    }
 }
 
+const char *u_prim_name( unsigned pipe_prim );
+
 #endif
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index 745b5834af..d414a1f2f6 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -32,6 +32,8 @@
 #ifndef U_UPLOAD_MGR_H
 #define U_UPLOAD_MGR_H
 
+#include "pipe/p_error.h"
+
 struct pipe_screen;
 struct pipe_buffer;
 struct u_upload_mgr;
diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 40c8364824..40e8aa8786 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -61,7 +61,7 @@ C_SOURCES = \
 	brw_wm_state.c \
 	brw_wm_surface_state.c \
 	brw_bo.c \
-	intel_batchbuffer.c \
+	brw_batchbuffer.c \
 	intel_tex_layout.c 
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
new file mode 100644
index 0000000000..8bcac76ede
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -0,0 +1,198 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "brw_batchbuffer.h"
+#include "brw_decode.h"
+#include "brw_reg.h"
+#include "brw_winsys.h"
+
+
+void
+brw_batchbuffer_reset(struct brw_batchbuffer *batch)
+{
+   struct intel_context *intel = batch->intel;
+
+   if (batch->buf != NULL) {
+      brw->sws->bo_unreference(batch->buf);
+      batch->buf = NULL;
+   }
+
+   if (!batch->buffer && intel->ttm == GL_TRUE)
+      batch->buffer = malloc (intel->maxBatchSize);
+
+   batch->buf = batch->sws->bo_alloc(batch->sws,
+				     BRW_BUFFER_TYPE_BATCH,
+				     intel->maxBatchSize, 4096);
+   if (batch->buffer)
+      batch->map = batch->buffer;
+   else {
+      batch->sws->bo_map(batch->buf, GL_TRUE);
+      batch->map = batch->buf->virtual;
+   }
+   batch->size = intel->maxBatchSize;
+   batch->ptr = batch->map;
+   batch->dirty_state = ~0;
+   batch->cliprect_mode = IGNORE_CLIPRECTS;
+}
+
+struct brw_batchbuffer *
+brw_batchbuffer_alloc(struct brw_winsys_screen *sws)
+{
+   struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
+
+   batch->sws = sws;
+   brw_batchbuffer_reset(batch);
+
+   return batch;
+}
+
+void
+brw_batchbuffer_free(struct brw_batchbuffer *batch)
+{
+   if (batch->map) {
+      dri_bo_unmap(batch->buf);
+      batch->map = NULL;
+   }
+
+   brw->sws->bo_unreference(batch->buf);
+   batch->buf = NULL;
+   FREE(batch);
+}
+
+
+void
+_brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file,
+			 int line)
+{
+   struct intel_context *intel = batch->intel;
+   GLuint used = batch->ptr - batch->map;
+
+   if (used == 0)
+      return;
+
+   if (intel->first_post_swapbuffers_batch == NULL) {
+      intel->first_post_swapbuffers_batch = intel->batch->buf;
+      batch->sws->bo_reference(intel->first_post_swapbuffers_batch);
+   }
+
+   if (intel->first_post_swapbuffers_batch == NULL) {
+      intel->first_post_swapbuffers_batch = intel->batch->buf;
+      batch->sws->bo_reference(intel->first_post_swapbuffers_batch);
+   }
+
+
+   if (INTEL_DEBUG & DEBUG_BATCH)
+      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
+	      used);
+
+   /* Emit a flush if the bufmgr doesn't do it for us. */
+   if (intel->always_flush_cache || !intel->ttm) {
+      *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd();
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
+   }
+
+   /* Round batchbuffer usage to 2 DWORDs. */
+
+   if ((used & 4) == 0) {
+      *(GLuint *) (batch->ptr) = 0; /* noop */
+      batch->ptr += 4;
+      used = batch->ptr - batch->map;
+   }
+
+   /* Mark the end of the buffer. */
+   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+   batch->ptr += 4;
+   used = batch->ptr - batch->map;
+
+   batch->sws->bo_unmap(batch->buf);
+
+   batch->map = NULL;
+   batch->ptr = NULL;
+      
+   batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 );
+      
+   if (INTEL_DEBUG & DEBUG_BATCH) {
+      dri_bo_map(batch->buf, GL_FALSE);
+      intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
+		   brw->brw_screen->pci_id);
+      dri_bo_unmap(batch->buf);
+   }
+
+   if (INTEL_DEBUG & DEBUG_SYNC) {
+      fprintf(stderr, "waiting for idle\n");
+      dri_bo_map(batch->buf, GL_TRUE);
+      dri_bo_unmap(batch->buf);
+   }
+
+   /* Reset the buffer:
+    */
+   brw_batchbuffer_reset(batch);
+}
+
+
+/*  This is the only way buffers get added to the validate list.
+ */
+GLboolean
+brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+                             struct brw_winsys_buffer *buffer,
+                             uint32_t read_domains, uint32_t write_domain,
+			     uint32_t delta)
+{
+   int ret;
+
+   if (batch->ptr - batch->map > batch->buf->size)
+      _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+		    batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+
+   ret = batch->sws->bo_emit_reloc(batch->buf,
+				   read_domains,
+				   write_domain,
+				   delta, 
+				   batch->ptr - batch->map,
+				   buffer);
+
+   /*
+    * Using the old buffer offset, write in what the right data would be, in case
+    * the buffer doesn't move and we can short-circuit the relocation processing
+    * in the kernel
+    */
+   brw_batchbuffer_emit_dword (batch, buffer->offset + delta);
+
+   return GL_TRUE;
+}
+
+void
+brw_batchbuffer_data(struct brw_batchbuffer *batch,
+                       const void *data, GLuint bytes,
+		       enum cliprect_mode cliprect_mode)
+{
+   assert((bytes & 3) == 0);
+   brw_batchbuffer_require_space(batch, bytes);
+   __memcpy(batch->ptr, data, bytes);
+   batch->ptr += bytes;
+}
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index b8492882e1..25bb9cefca 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -33,18 +33,16 @@ void brw_batchbuffer_reset(struct brw_batchbuffer *batch);
  * Consider it a convenience function wrapping multple
  * intel_buffer_dword() calls.
  */
-void brw_batchbuffer_data(struct brw_batchbuffer *batch,
+int brw_batchbuffer_data(struct brw_batchbuffer *batch,
                             const void *data, GLuint bytes,
 			    enum cliprect_mode cliprect_mode);
 
-void brw_batchbuffer_release_space(struct brw_batchbuffer *batch,
-                                     GLuint bytes);
 
-GLboolean brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
-                                       struct brw_winsys_buffer *buffer,
-				       uint32_t read_domains,
-				       uint32_t write_domain,
-				       uint32_t offset);
+int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
+			       struct brw_winsys_buffer *buffer,
+			       uint32_t read_domains,
+			       uint32_t write_domain,
+			       uint32_t offset);
 
 /* Inline functions - might actually be better off with these
  * non-inlined.  Certainly better off switching all command packets to
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index 76759304eb..ca10bc73f6 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -57,7 +57,7 @@ static void calc_sane_viewport( const struct pipe_viewport_state *vp,
    svp->far = 1;
 }
 
-static void prepare_cc_vp( struct brw_context *brw )
+static int prepare_cc_vp( struct brw_context *brw )
 {
    struct brw_cc_viewport ccv;
    struct sane_viewport svp;
@@ -72,6 +72,8 @@ static void prepare_cc_vp( struct brw_context *brw )
 
    brw->sws->bo_unreference(brw->cc.vp_bo);
    brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_cc_vp = {
@@ -158,7 +160,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
    return bo;
 }
 
-static void prepare_cc_unit( struct brw_context *brw )
+static int prepare_cc_unit( struct brw_context *brw )
 {
    struct brw_cc_unit_key key;
 
@@ -172,6 +174,8 @@ static void prepare_cc_unit( struct brw_context *brw )
 
    if (brw->cc.state_bo == NULL)
       brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
+   
+   return 0;
 }
 
 const struct brw_tracked_state brw_cc_unit = {
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index 622d9dba96..1a52fa771b 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -146,7 +146,7 @@ static void compile_clip_prog( struct brw_context *brw,
 
 /* Calculate interpolants for triangle and line rasterization.
  */
-static void upload_clip_prog(struct brw_context *brw)
+static int upload_clip_prog(struct brw_context *brw)
 {
    struct brw_clip_prog_key key;
 
@@ -173,6 +173,8 @@ static void upload_clip_prog(struct brw_context *brw)
 					&brw->clip.prog_data);
    if (brw->clip.prog_bo == NULL)
       compile_clip_prog( brw, &key );
+
+   return 0;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 25b8c6372f..bf4e6f5103 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -159,7 +159,7 @@ clip_unit_create_from_key(struct brw_context *brw,
    return bo;
 }
 
-static void upload_clip_unit( struct brw_context *brw )
+static int upload_clip_unit( struct brw_context *brw )
 {
    struct brw_clip_unit_key key;
 
@@ -173,6 +173,8 @@ static void upload_clip_unit( struct brw_context *brw )
    if (brw->clip.state_bo == NULL) {
       brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
    }
+   
+   return 0;
 }
 
 const struct brw_tracked_state brw_clip_unit = {
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index e9605bafe6..e10b7d8bf5 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -105,7 +105,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen)
    brw->state.dirty.mesa = ~0;
    brw->state.dirty.brw = ~0;
 
-   brw->emit_state_always = 0;
+   brw->flags.always_emit_state = 0;
 
    make_empty_list(&brw->query.active_head);
 
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index dd782fdba9..7ead641811 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -182,6 +182,8 @@ struct brw_fragment_shader {
 #define PIPE_NEW_FRAGMENT_CONSTANTS     0x2
 #define PIPE_NEW_VERTEX_CONSTANTS       0x2
 #define PIPE_NEW_CLIP                   0x2
+#define PIPE_NEW_INDEX_BUFFER           0x2
+#define PIPE_NEW_INDEX_RANGE            0x2
 
 
 #define BRW_NEW_URB_FENCE               0x1
@@ -387,8 +389,8 @@ struct brw_cache {
  */
 struct brw_tracked_state {
    struct brw_state_flags dirty;
-   void (*prepare)( struct brw_context *brw );
-   void (*emit)( struct brw_context *brw );
+   int (*prepare)( struct brw_context *brw );
+   int (*emit)( struct brw_context *brw );
 };
 
 /* Flags for brw->state.cache.
@@ -465,9 +467,7 @@ struct brw_context
    GLuint primitive;
    GLuint reduced_primitive;
 
-   GLboolean emit_state_always;
-
-   /* Active vertex program: 
+   /* Active state from the state tracker: 
     */
    struct {
       const struct brw_vertex_shader *vertex_shader;
@@ -475,11 +475,31 @@ struct brw_context
       const struct brw_blend_state *blend;
       const struct brw_rasterizer_state *rast;
       const struct brw_depth_stencil_alpha_state *zstencil;
+
+      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+      struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+      unsigned num_vertex_elements;
+      unsigned num_vertex_buffers;
+
       struct pipe_framebuffer_state fb;
       struct pipe_viewport_state vp;
       struct pipe_clip_state ucp;
       struct pipe_buffer *vertex_constants;
       struct pipe_buffer *fragment_constants;
+
+      /**
+       * Index buffer for this draw_prims call.
+       *
+       * Updates are signaled by PIPE_NEW_INDEX_BUFFER.
+       */
+      struct pipe_buffer *index_buffer;
+      unsigned index_size;
+
+      /* Updates are signalled by PIPE_NEW_INDEX_RANGE:
+       */
+      unsigned min_index;
+      unsigned max_index;
+
    } curr;
 
    struct {
@@ -504,30 +524,26 @@ struct brw_context
    struct brw_cached_batch_item *cached_batch_items;
 
    struct {
-      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-      struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-      unsigned num_vertex_element;
-      unsigned num_vertex_buffer;
-
       struct u_upload_mgr *upload_vertex;
       struct u_upload_mgr *upload_index;
       
-
-      /* Summary of size and varying of active arrays, so we can check
-       * for changes to this state:
+      /* Information on uploaded vertex buffers:
        */
-      struct brw_vertex_info info;
-      unsigned int min_index, max_index;
+      struct {
+	 unsigned stride;	/* in bytes between successive vertices */
+	 unsigned offset;	/* in bytes, of first vertex in bo */
+	 unsigned vertex_count;	/* count of valid vertices which may be accessed */
+	 struct brw_winsys_buffer *bo;
+      } vb[PIPE_MAX_ATTRIBS];
+
+      struct {
+      } ve[PIPE_MAX_ATTRIBS];
+
+      unsigned nr_vb;		/* currently the same as curr.num_vertex_buffers */
+      unsigned nr_ve;		/* currently the same as curr.num_vertex_elements */
    } vb;
 
    struct {
-      /**
-       * Index buffer for this draw_prims call.
-       *
-       * Updates are signaled by BRW_NEW_INDICES.
-       */
-      const struct _mesa_index_buffer *ib;
-
       /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
       struct brw_winsys_buffer *bo;
       unsigned int offset;
@@ -668,6 +684,14 @@ struct brw_context
       int index;
       GLboolean active;
    } query;
+
+   struct {
+      unsigned always_emit_state:1;
+      unsigned always_flush_batch:1;
+      unsigned force_swtnl:1;
+      unsigned no_swtnl:1;
+   } flags;
+
    /* Used to give every program string a unique id
     */
    GLuint program_id;
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index edc39ff223..278ffa4ca2 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -48,7 +48,7 @@
  * constants.  That greatly reduces the demand for space in the CURBE.
  * Some of the comments within are dated...
  */
-static void calculate_curbe_offsets( struct brw_context *brw )
+static int calculate_curbe_offsets( struct brw_context *brw )
 {
    /* CACHE_NEW_WM_PROG */
    const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
@@ -104,6 +104,8 @@ static void calculate_curbe_offsets( struct brw_context *brw )
 
       brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
    }
+
+   return 0;
 }
 
 
@@ -157,7 +159,7 @@ static GLfloat fixed_plane[6][4] = {
  * cache mechanism, but maybe would benefit from a comparison against
  * the current uploaded set of constants.
  */
-static void prepare_constant_buffer(struct brw_context *brw)
+static int prepare_constant_buffer(struct brw_context *brw)
 {
    const GLuint sz = brw->curbe.total_size;
    const GLuint bufsz = sz * 16 * sizeof(GLfloat);
@@ -170,7 +172,7 @@ static void prepare_constant_buffer(struct brw_context *brw)
 	 brw->curbe.last_buf = NULL;
 	 brw->curbe.last_bufsz  = 0;
       }
-      return;
+      return 0;
    }
 
    buf = (GLfloat *) CALLOC(bufsz, 1);
@@ -305,9 +307,11 @@ static void prepare_constant_buffer(struct brw_context *brw)
     * flushes as necessary when doublebuffering of CURBEs isn't
     * possible.
     */
+
+   return 0;
 }
 
-static void emit_constant_buffer(struct brw_context *brw)
+static int emit_constant_buffer(struct brw_context *brw)
 {
    GLuint sz = brw->curbe.total_size;
 
@@ -322,6 +326,7 @@ static void emit_constant_buffer(struct brw_context *brw)
 		(sz - 1) + brw->curbe.curbe_offset);
    }
    ADVANCE_BATCH();
+   return 0;
 }
 
 const struct brw_tracked_state brw_constant_buffer = {
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 7af490bc5a..b5fe7c9601 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -26,15 +26,18 @@
  **************************************************************************/
 
 
+#include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
+
 #include "brw_draw.h"
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_debug.h"
+#include "brw_screen.h"
 
 #include "brw_batchbuffer.h"
 
-#define FILE_DEBUG_FLAG DEBUG_BATCH
 
 static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
    _3DPRIM_POINTLIST,
@@ -56,18 +59,21 @@ static uint32_t prim_to_hw_prim[PIPE_PRIM_POLYGON+1] = {
  * programs be immune to the active primitive (ie. cope with all
  * possibilities).  That may not be realistic however.
  */
-static GLuint brw_set_prim(struct brw_context *brw, unsigned prim)
+static int brw_set_prim(struct brw_context *brw, unsigned prim )
 {
 
    if (BRW_DEBUG & DEBUG_PRIMS)
       debug_printf("PRIM: %s\n", u_prim_name(prim));
    
    if (prim != brw->primitive) {
+      unsigned reduced_prim;
+
       brw->primitive = prim;
       brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
 
-      if (reduced_prim[prim] != brw->reduced_primitive) {
-	 brw->reduced_primitive = reduced_prim[prim];
+      reduced_prim = u_reduced_prim(prim);
+      if (reduced_prim != brw->reduced_primitive) {
+	 brw->reduced_primitive = reduced_prim;
 	 brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
       }
    }
@@ -77,17 +83,14 @@ static GLuint brw_set_prim(struct brw_context *brw, unsigned prim)
 
 
-static enum pipe_error brw_emit_prim(struct brw_context *brw,
-				     unsigned prim,
-				     unsigned start,
-				     unsigned count,
-				     boolean indexed,
-				     uint32_t hw_prim)
+static int brw_emit_prim(struct brw_context *brw,
+			 unsigned start,
+			 unsigned count,
+			 boolean indexed,
+			 uint32_t hw_prim)
 {
    struct brw_3d_primitive prim_packet;
-
-   if (INTEL_DEBUG & DEBUG_PRIMS)
-      debug_printf("PRIM: %s %d %d\n", u_prim_name(prim), start, count);
+   int ret;
 
    prim_packet.header.opcode = CMD_3D_PRIM;
    prim_packet.header.length = sizeof(prim_packet)/4 - 2;
@@ -101,7 +104,7 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw,
       prim_packet.start_vert_location += brw->ib.start_vertex_offset;
    prim_packet.instance_count = 1;
    prim_packet.start_instance_location = 0;
-   prim_packet.base_vert_location = prim->basevertex;
+   prim_packet.base_vert_location = 0; // prim->basevertex; XXX: add this to gallium
 
 
    /* If we're set to always flush, do it before and after the primitive emit.
@@ -109,20 +112,20 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw,
     * and missed flushes of the render cache as it heads to other parts of
     * the besides the draw code.
     */
-   if (intel->always_flush_cache) {
-      BEGIN_BATCH(1, IGNORE_CLIPRECTS)
-      OUT_BATCH(intel->vtbl.flush_cmd());
+   if (0) {
+      BEGIN_BATCH(1, IGNORE_CLIPRECTS);
+      OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
       ADVANCE_BATCH();
    }
    if (prim_packet.verts_per_instance) {
-      ret = brw_batchbuffer_data( brw->intel.batch, &prim_packet,
+      ret = brw_batchbuffer_data( brw->batch, &prim_packet,
 				  sizeof(prim_packet), LOOP_CLIPRECTS);
       if (ret)
 	 return ret;
    }
-   if (intel->always_flush_cache) {
+   if (0) {
       BEGIN_BATCH(1, IGNORE_CLIPRECTS);
-      OUT_BATCH(intel->vtbl.flush_cmd());
+      OUT_BATCH((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
       ADVANCE_BATCH();
    }
 
@@ -133,44 +136,24 @@ static enum pipe_error brw_emit_prim(struct brw_context *brw,
 /* May fail if out of video memory for texture or vbo upload, or on
  * fallback conditions.
  */
-static GLboolean brw_try_draw_prims( struct brw_context *brw,
-				     const struct gl_client_array *arrays[],
-				     const struct _mesa_prim *prim,
-				     GLuint nr_prims,
-				     const struct _mesa_index_buffer *ib,
-				     GLuint min_index,
-				     GLuint max_index )
+static int
+try_draw_range_elements(struct brw_context *brw,
+			struct pipe_buffer *index_buffer,
+			unsigned hw_prim, 
+			unsigned start, unsigned count)
 {
-   struct brw_context *brw = brw_context(ctx);
-   GLboolean retval = GL_FALSE;
-   GLboolean warn = GL_FALSE;
-   GLboolean first_time = GL_TRUE;
-   uint32_t hw_prim;
-   GLuint i;
-
-   if (ctx->NewState)
-      _mesa_update_state( ctx );
-
-   /* Bind all inputs, derive varying and size information:
-    */
-   brw_merge_inputs( brw, arrays );
-
-   brw->ib.ib = ib;
-   brw->state.dirty.brw |= BRW_NEW_INDICES;
-
-   brw->vb.min_index = min_index;
-   brw->vb.max_index = max_index;
-   brw->state.dirty.brw |= BRW_NEW_VERTICES;
-
-   hw_prim = brw_set_prim(brw, prim[i].mode);
+   int ret;
 
-   brw_validate_state(brw);
+   ret = brw_validate_state(brw);
+   if (ret)
+      return ret;
 
    /* Check that we can fit our state in with our existing batchbuffer, or
     * flush otherwise.
     */
-   ret = dri_bufmgr_check_aperture_space(brw->state.validated_bos,
-					 brw->state.validated_bo_count);
+   ret = brw->sws->check_aperture_space(brw->sws,
+					brw->state.validated_bos,
+					brw->state.validated_bo_count);
    if (ret)
       return ret;
 
@@ -178,12 +161,12 @@ static GLboolean brw_try_draw_prims( struct brw_context *brw,
    if (ret)
       return ret;
    
-   ret = brw_emit_prim(brw, &prim[i], hw_prim);
+   ret = brw_emit_prim(brw, start, count, index_buffer != NULL, hw_prim);
    if (ret)
       return ret;
 
-   if (intel->always_flush_batch)
-      brw_batchbuffer_flush(intel->batch);
+   if (brw->flags.always_flush_batch)
+      brw_batchbuffer_flush(brw->batch);
 
    return 0;
 }
@@ -197,22 +180,45 @@ brw_draw_range_elements(struct pipe_context *pipe,
 			unsigned max_index,
 			unsigned mode, unsigned start, unsigned count)
 {
-   enum pipe_error ret;
+   struct brw_context *brw = brw_context(pipe);
+   int ret;
+   uint32_t hw_prim;
+
+   hw_prim = brw_set_prim(brw, mode);
 
-   if (!vbo_all_varyings_in_vbos(arrays)) {
-      if (!index_bounds_valid)
-	 vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+   if (BRW_DEBUG & DEBUG_PRIMS)
+      debug_printf("PRIM: %s %d %d\n", u_prim_name(mode), start, count);
+
+   /* Potentially trigger upload of new index buffer.
+    *
+    * XXX: do we need to go through state validation to achieve this?
+    * Could just call upload code directly.
+    */
+   if (brw->curr.index_buffer != index_buffer) {
+      pipe_buffer_reference( &brw->curr.index_buffer, index_buffer );
+      brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER;
+   }
+
+   /* XXX: do we really care?
+    */
+   if (brw->curr.min_index != min_index ||
+       brw->curr.max_index != max_index) 
+   { 
+      brw->curr.min_index = min_index;
+      brw->curr.max_index = max_index;
+      brw->state.dirty.mesa |= PIPE_NEW_INDEX_RANGE;
    }
 
+
    /* Make a first attempt at drawing:
     */
-   ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+   ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
 
    /* Otherwise, flush and retry:
     */
    if (ret != 0) {
-      brw_batchbuffer_flush(intel->batch);
-      ret = brw_try_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+      brw_batchbuffer_flush(brw->batch);
+      ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
       assert(ret == 0);
    }
 
@@ -242,28 +248,37 @@ brw_draw_arrays(struct pipe_context *pipe, unsigned mode,
 
 
-void brw_draw_init( struct brw_context *brw )
+boolean brw_draw_init( struct brw_context *brw )
 {
    /* Register our drawing function: 
     */
    brw->base.draw_arrays = brw_draw_arrays;
    brw->base.draw_elements = brw_draw_elements;
    brw->base.draw_range_elements = brw_draw_range_elements;
-}
 
-void brw_draw_destroy( struct brw_context *brw )
-{
-   int i;
+   /* Create helpers for uploading data in user buffers:
+    */
+   brw->vb.upload_vertex = u_upload_create( &brw->brw_screen->base,
+					    128 * 1024,
+					    64,
+					    PIPE_BUFFER_USAGE_VERTEX );
+   if (brw->vb.upload_vertex == NULL)
+      return FALSE;
+
+   brw->vb.upload_index = u_upload_create( &brw->brw_screen->base,
+					   128 * 1024,
+					   64,
+					   PIPE_BUFFER_USAGE_INDEX );
+   if (brw->vb.upload_index == NULL)
+      return FALSE;
 
-   if (brw->vb.upload.bo != NULL) {
-      brw->sws->bo_unreference(brw->vb.upload.bo);
-      brw->vb.upload.bo = NULL;
-   }
+   return TRUE;
+}
 
-   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
-      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
-      brw->vb.inputs[i].bo = NULL;
-   }
+void brw_draw_cleanup( struct brw_context *brw )
+{
+   u_upload_destroy( brw->vb.upload_vertex );
+   u_upload_destroy( brw->vb.upload_index );
 
    brw->sws->bo_unreference(brw->ib.bo);
    brw->ib.bo = NULL;
diff --git a/src/gallium/drivers/i965/brw_draw.h b/src/gallium/drivers/i965/brw_draw.h
index 13f0443a81..8dc5dbce62 100644
--- a/src/gallium/drivers/i965/brw_draw.h
+++ b/src/gallium/drivers/i965/brw_draw.h
@@ -32,8 +32,7 @@
 
 struct brw_context;
 
-
-void brw_draw_init( struct brw_context *brw );
+boolean brw_draw_init( struct brw_context *brw );
 void brw_draw_cleanup( struct brw_context *brw );
 
 
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index 7b0860d04c..040d8ca93a 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -26,21 +26,23 @@
  **************************************************************************/
 
 #include "pipe/p_context.h"
+#include "pipe/p_error.h"
 
 #include "util/u_upload_mgr.h"
+#include "util/u_math.h"
 
 #include "brw_draw.h"
 #include "brw_defines.h"
 #include "brw_context.h"
 #include "brw_state.h"
-#include "brw_fallback.h"
-
+#include "brw_screen.h"
 #include "brw_batchbuffer.h"
+#include "brw_debug.h"
 
 
-unsigned brw_translate_surface_format( unsigned id )
+static unsigned brw_translate_surface_format( unsigned id )
 {
    switch (id) {
    case PIPE_FORMAT_R64_FLOAT:
@@ -186,70 +188,136 @@ static unsigned get_index_type(int type)
 }
 
 
-
-static boolean brw_prepare_vertices(struct brw_context *brw)
+static int brw_prepare_vertices(struct brw_context *brw)
 {
-   GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; 
+   unsigned int min_index = brw->curr.min_index;
+   unsigned int max_index = brw->curr.max_index;
    GLuint i;
-   const unsigned char *ptr = NULL;
-   GLuint interleave = 0;
-   unsigned int min_index = brw->vb.min_index;
-   unsigned int max_index = brw->vb.max_index;
-
-   struct brw_vertex_element *upload[VERT_ATTRIB_MAX];
-   GLuint nr_uploads = 0;
-
-   /* First build an array of pointers to ve's in vb.inputs_read
-    */
-   if (0)
-      _mesa_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
-
+   int ret;
 
+   if (BRW_DEBUG & DEBUG_VERTS)
+      debug_printf("%s %d..%d\n", __FUNCTION__, min_index, max_index);
 
-   for (i = 0; i < brw->vb.num_vertex_buffer; i++) {
-      struct brw_vertex_buffer *vb = brw->vb.vertex_buffer[i];
-      unsigned size = (vb->stride == 0 ? 
-		       vb->size :
-		       vb->stride * (max_index + 1 - min_index));
 
-
-      if (brw_is_user_buffer(vb)) {
-	 u_upload_buffer( brw->upload_vertex, 
-			  min_index * vb->stride,
-			  size,
-			  &offset,
-			  &buffer );
+   for (i = 0; i < brw->curr.num_vertex_buffers; i++) {
+      struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i];
+      struct brw_winsys_buffer *bo;
+      struct pipe_buffer *upload_buf;
+      unsigned offset;
+      
+      if (BRW_DEBUG & DEBUG_VERTS)
+	 debug_printf("%s vb[%d] user:%d offset:0x%x sz:0x%x stride:0x%x\n",
+		      __FUNCTION__, i,
+		      brw_buffer_is_user_buffer(vb->buffer),
+		      vb->buffer_offset,
+		      vb->buffer->size,
+		      vb->stride);
+
+      if (brw_buffer_is_user_buffer(vb->buffer)) {
+
+	 /* XXX: simplify this.  Stop the state trackers from generating
+	  * zero-stride buffers & have them use additional constants (or
+	  * add support for >1 constant buffer) instead.
+	  */
+	 unsigned size = (vb->stride == 0 ? 
+			  vb->buffer->size - vb->buffer_offset :
+			  MAX2(vb->buffer->size - vb->buffer_offset,
+			       vb->stride * (max_index + 1 - min_index)));
+
+	 ret = u_upload_buffer( brw->vb.upload_vertex, 
+				vb->buffer_offset + min_index * vb->stride,
+				size,
+				vb->buffer,
+				&offset,
+				&upload_buf );
+	 if (ret)
+	    return ret;
+
+	 bo = brw_buffer(upload_buf)->bo;
+	 
+	 assert(offset + size <= bo->size);
       }
       else
       {
-	 offset = 0;
-	 buffer = vb->buffer;
+	 offset = vb->buffer_offset;
+	 bo = brw_buffer(vb->buffer)->bo;
       }
+
+      assert(offset < bo->size);
       
       /* Set up post-upload info about this vertex buffer:
        */
-      input->offset = (unsigned long)offset;
-      input->stride = vb->stride;
-      input->count = count;
-      brw->sws->bo_unreference(input->bo);
-      input->bo = intel_bufferobj_buffer(intel, intel_buffer,
-					 INTEL_READ);
-      brw->sws->bo_reference(input->bo);
-
-      assert(input->offset < input->bo->size);
-      assert(input->offset + size <= input->bo->size);
+      brw->vb.vb[i].offset = offset;
+      brw->vb.vb[i].stride = vb->stride;
+      brw->vb.vb[i].vertex_count = (vb->stride == 0 ?
+				    1 :
+				    (bo->size - offset) / vb->stride);
+      brw->sws->bo_unreference(brw->vb.vb[i].bo);
+      brw->vb.vb[i].bo = bo;
+      brw->sws->bo_reference(brw->vb.vb[i].bo);
+
+      /* Don't need to retain this reference.  We have a reference on
+       * the underlying winsys buffer:
+       */
+      pipe_buffer_reference( &upload_buf, NULL );
    }
 
+   brw->vb.nr_vb = i;
    brw_prepare_query_begin(brw);
 
-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      struct brw_vertex_element *input = brw->vb.enabled[i];
+   for (i = 0; i < brw->vb.nr_vb; i++) {
+      brw_add_validated_bo(brw, brw->vb.vb[i].bo);
+   }
+
+   return 0;
+}
+
+static int brw_emit_vertex_buffers( struct brw_context *brw )
+{
+   int i;
+
+   /* If the VS doesn't read any inputs (calculating vertex position from
+    * a state variable for some reason, for example), just bail.
+    *
+    * The stale VB state stays in place, but they don't do anything unless
+    * a VE loads from them.
+    */
+   if (brw->vb.nr_vb == 0) {
+      if (BRW_DEBUG & DEBUG_VERTS)
+	 debug_printf("%s: no active vertex buffers\n", __FUNCTION__);
 
-      brw_add_validated_bo(brw, input->bo);
+      return 0;
+   }
+
+   /* Emit VB state packets.
+    */
+   BEGIN_BATCH(1 + brw->vb.nr_vb * 4, IGNORE_CLIPRECTS);
+   OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
+	     ((1 + brw->vb.nr_vb * 4) - 2));
+
+   for (i = 0; i < brw->vb.nr_vb; i++) {
+      OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
+		BRW_VB0_ACCESS_VERTEXDATA |
+		(brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT));
+      OUT_RELOC(brw->vb.vb[i].bo,
+		I915_GEM_DOMAIN_VERTEX, 0,
+		brw->vb.vb[i].offset);
+      if (BRW_IS_IGDNG(brw)) {
+	 OUT_RELOC(brw->vb.vb[i].bo,
+		   I915_GEM_DOMAIN_VERTEX, 0,
+		   brw->vb.vb[i].bo->size - 1);
+      } else
+	 OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0);
+      OUT_BATCH(0); /* Instance data step rate */
    }
+   ADVANCE_BATCH();
+   return 0;
 }
 
-static void brw_emit_vertices(struct brw_context *brw)
+
+
+
+static int brw_emit_vertex_elements(struct brw_context *brw)
 {
    GLuint i;
 
@@ -262,7 +330,7 @@ static void brw_emit_vertices(struct brw_context *brw)
     * The stale VB state stays in place, but they don't do anything unless
     * a VE loads from them.
     */
-   if (brw->vb.nr_enabled == 0) {
+   if (brw->vb.nr_ve == 0) {
       BEGIN_BATCH(3, IGNORE_CLIPRECTS);
       OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
       OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
@@ -274,59 +342,23 @@ static void brw_emit_vertices(struct brw_context *brw)
 		(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
 		(BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
       ADVANCE_BATCH();
-      return;
+      return 0;
    }
 
-   /* Now emit VB and VEP state packets.
+   /* Now emit vertex element (VEP) state packets.
     *
-    * This still defines a hardware VB for each input, even if they
-    * are interleaved or from the same VBO.  TBD if this makes a
-    * performance difference.
     */
-   BEGIN_BATCH(1 + brw->vb.nr_enabled * 4, IGNORE_CLIPRECTS);
-   OUT_BATCH((CMD_VERTEX_BUFFER << 16) |
-	     ((1 + brw->vb.nr_enabled * 4) - 2));
-
-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      struct brw_vertex_element *input = brw->vb.enabled[i];
-
-      OUT_BATCH((i << BRW_VB0_INDEX_SHIFT) |
-		BRW_VB0_ACCESS_VERTEXDATA |
-		(input->stride << BRW_VB0_PITCH_SHIFT));
-      OUT_RELOC(input->bo,
-		I915_GEM_DOMAIN_VERTEX, 0,
-		input->offset);
-      if (BRW_IS_IGDNG(brw)) {
-          if (input->stride) {
-              OUT_RELOC(input->bo,
-                        I915_GEM_DOMAIN_VERTEX, 0,
-                        input->offset + input->stride * input->count - 1);
-          } else {
-              assert(input->count == 1);
-              OUT_RELOC(input->bo,
-                        I915_GEM_DOMAIN_VERTEX, 0,
-                        input->offset + input->element_size - 1);
-          }
-      } else
-          OUT_BATCH(input->stride ? input->count : 0);
-      OUT_BATCH(0); /* Instance data step rate */
-   }
-   ADVANCE_BATCH();
-
-   BEGIN_BATCH(1 + brw->vb.nr_enabled * 2, IGNORE_CLIPRECTS);
-   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2));
-   for (i = 0; i < brw->vb.nr_enabled; i++) {
-      struct brw_vertex_element *input = brw->vb.enabled[i];
-      uint32_t format = get_surface_type(input->glarray->Type,
-					 input->glarray->Size,
-					 input->glarray->Format,
-					 input->glarray->Normalized);
+   BEGIN_BATCH(1 + brw->curr.num_vertex_elements * 2, IGNORE_CLIPRECTS);
+   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_ve * 2) - 2));
+   for (i = 0; i < brw->vb.nr_ve; i++) {
+      const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
+      uint32_t format = brw_translate_surface_format( input->src_format );
       uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
       uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
       uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
       uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
 
-      switch (input->glarray->Size) {
+      switch (input->nr_components) {
       case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
       case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
       case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
@@ -352,11 +384,29 @@ static void brw_emit_vertices(struct brw_context *brw)
                     ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
    }
    ADVANCE_BATCH();
+   return 0;
+}
+
+
+static int brw_emit_vertices( struct brw_context *brw )
+{
+   int ret;
+
+   ret = brw_emit_vertex_buffers( brw );
+   if (ret)
+      return ret;
+
+   ret = brw_emit_vertex_elements( brw );
+   if (ret)
+      return ret;
+   
+   return 0;
 }
 
+
 const struct brw_tracked_state brw_vertices = {
    .dirty = {
-      .mesa = 0,
+      .mesa = PIPE_NEW_INDEX_RANGE,
       .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
       .cache = 0,
    },
@@ -364,104 +414,106 @@ const struct brw_tracked_state brw_vertices = {
    .emit = brw_emit_vertices,
 };
 
-static void brw_prepare_indices(struct brw_context *brw)
+
+static int brw_prepare_indices(struct brw_context *brw)
 {
-   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-   GLuint ib_size;
+   struct pipe_buffer *index_buffer = brw->curr.index_buffer;
    struct brw_winsys_buffer *bo = NULL;
-   struct gl_buffer_object *bufferobj;
    GLuint offset;
-   GLuint ib_type_size;
+   GLuint index_size;
+   GLuint ib_size;
+   int ret;
 
    if (index_buffer == NULL)
-      return;
+      return 0;
 
-   ib_type_size = get_size(index_buffer->type);
-   ib_size = ib_type_size * index_buffer->count;
-   bufferobj = index_buffer->obj;;
+   if (DEBUG & DEBUG_VERTS)
+      debug_printf("%s: index_size:%d index_buffer->size:%d\n",
+		   __FUNCTION__,
+		   brw->curr.index_size,
+		   brw->curr.index_buffer->size);
 
-   /* Turn into a proper VBO:
-    */
-   if (!_mesa_is_bufferobj(bufferobj)) {
-      brw->ib.start_vertex_offset = 0;
+   ib_size = index_buffer->size;
+   index_size = brw->curr.index_size;
 
-      /* Get new bufferobj, offset:
-       */
-      get_space(brw, ib_size, &bo, &offset);
-
-      /* Straight upload
+   /* Turn userbuffer into a proper hardware buffer?
+    */
+   if (brw_buffer_is_user_buffer(index_buffer)) {
+      struct pipe_buffer *upload_buf;
+
+      ret = u_upload_buffer( brw->vb.upload_index,
+			     0,
+			     ib_size,
+			     index_buffer,
+			     &offset,
+			     &upload_buf );
+      if (ret)
+	 return ret;
+
+      bo = brw_buffer(upload_buf)->bo;
+      brw->sws->bo_reference(bo);
+      pipe_buffer_reference( &upload_buf, NULL );
+
+      /* XXX: annotate the userbuffer with the upload information so
+       * that successive calls don't get re-uploaded.
        */
-      brw_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
-
-   } else {
-      offset = (GLuint) (unsigned long) index_buffer->ptr;
-      brw->ib.start_vertex_offset = 0;
+   }
+   else {
+      bo = brw_buffer(index_buffer)->bo;
+      brw->sws->bo_reference(bo);
+      
+      ib_size = bo->size;
+      offset = 0;
+   }
 
-      /* If the index buffer isn't aligned to its element size, we have to
-       * rebase it into a temporary.
-       */
-       if ((get_size(index_buffer->type) - 1) & offset) {
-           GLubyte *map = ctx->Driver.MapBuffer(ctx,
-                                                GL_ELEMENT_ARRAY_BUFFER_ARB,
-                                                GL_DYNAMIC_DRAW_ARB,
-                                                bufferobj);
-           map += offset;
-
-	   get_space(brw, ib_size, &bo, &offset);
-
-	   dri_bo_subdata(bo, offset, ib_size, map);
-
-           ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj);
-       } else {
-	  bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj),
-				      INTEL_READ);
-	  brw->sws->bo_reference(bo);
-
-	  /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading
-	   * the index buffer state when we're just moving the start index
-	   * of our drawing.
-	   */
-	  brw->ib.start_vertex_offset = offset / ib_type_size;
-	  offset = 0;
-	  ib_size = bo->size;
-       }
+   /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading the
+    * index buffer state when we're just moving the start index of our
+    * drawing.
+    *
+    * In gallium this will happen in the case where successive draw
+    * calls are made with (distinct?) userbuffers, but the upload_mgr
+    * places the data into a single winsys buffer.
+    * 
+    * This statechange doesn't raise any state flags and is always
+    * just merged into the final draw packet:
+    */
+   if (1) {
+      assert((offset & (index_size - 1)) == 0);
+      brw->ib.start_vertex_offset = offset / index_size;
    }
 
+   /* These statechanges trigger a new CMD_INDEX_BUFFER packet:
+    */
    if (brw->ib.bo != bo ||
-       brw->ib.offset != offset ||
        brw->ib.size != ib_size)
    {
-      drm_intel_bo_unreference(brw->ib.bo);
+      brw->sws->bo_unreference(brw->ib.bo);
       brw->ib.bo = bo;
-      brw->ib.offset = offset;
       brw->ib.size = ib_size;
-
       brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
-   } else {
-      drm_intel_bo_unreference(bo);
+   }
+   else {
+      brw->sws->bo_unreference(bo);
    }
 
    brw_add_validated_bo(brw, brw->ib.bo);
+   return 0;
 }
 
 const struct brw_tracked_state brw_indices = {
    .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_INDICES,
+      .mesa = PIPE_NEW_INDEX_BUFFER,
+      .brw = 0,
       .cache = 0,
    },
    .prepare = brw_prepare_indices,
 };
 
-static void brw_emit_index_buffer(struct brw_context *brw)
+static int brw_emit_index_buffer(struct brw_context *brw)
 {
-   const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
-
-   if (index_buffer == NULL)
-      return;
-
    /* Emit the indexbuffer packet:
     */
+   if (brw->ib.bo)
    {
       struct brw_indexbuffer ib;
 
@@ -469,7 +521,7 @@ static void brw_emit_index_buffer(struct brw_context *brw)
 
       ib.header.bits.opcode = CMD_INDEX_BUFFER;
       ib.header.bits.length = sizeof(ib)/4 - 2;
-      ib.header.bits.index_format = get_index_type(index_buffer->type);
+      ib.header.bits.index_format = get_index_type(brw->ib.size);
       ib.header.bits.cut_index_enable = 0;
 
       BEGIN_BATCH(4, IGNORE_CLIPRECTS);
@@ -483,6 +535,8 @@ static void brw_emit_index_buffer(struct brw_context *brw)
       OUT_BATCH( 0 );
       ADVANCE_BATCH();
    }
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_index_buffer = {
diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c
index 1df561386e..df49d4b72f 100644
--- a/src/gallium/drivers/i965/brw_eu.c
+++ b/src/gallium/drivers/i965/brw_eu.c
@@ -29,6 +29,7 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
   
+#include "util/u_memory.h"
 
 #include "brw_context.h"
 #include "brw_defines.h"
@@ -237,7 +238,7 @@ brw_resolve_cals(struct brw_compile *c)
         struct brw_glsl_call *call, *next;
         for (call = c->first_call; call; call = next) {
 	    next = call->next;
-	    _mesa_free(call);
+	    FREE(call);
 	}
 	c->first_call = NULL;
     }
@@ -247,7 +248,7 @@ brw_resolve_cals(struct brw_compile *c)
         struct brw_glsl_label *label, *next;
 	for (label = c->first_label; label; label = next) {
 	    next = label->next;
-	    _mesa_free(label);
+	    FREE(label);
 	}
 	c->first_label = NULL;
     }
diff --git a/src/gallium/drivers/i965/brw_eu_debug.c b/src/gallium/drivers/i965/brw_eu_debug.c
index ad7ec36e86..5989f5a04e 100644
--- a/src/gallium/drivers/i965/brw_eu_debug.c
+++ b/src/gallium/drivers/i965/brw_eu_debug.c
@@ -28,7 +28,8 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-    
+ 
+#include "util/u_debug.h"
 
 #include "brw_eu.h"
 
@@ -52,7 +53,7 @@ void brw_print_reg( struct brw_reg hwreg )
       "f"
    };
 
-   _mesa_printf("%s%s", 
+   debug_printf("%s%s", 
 		hwreg.abs ? "abs/" : "",
 		hwreg.negate ? "-" : "");
      
@@ -64,7 +65,7 @@ void brw_print_reg( struct brw_reg hwreg )
        hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
        hwreg.type == BRW_REGISTER_TYPE_F) {
       /* vector register */
-      _mesa_printf("vec%d", hwreg.nr);
+      debug_printf("vec%d", hwreg.nr);
    }
    else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
 	    hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
@@ -72,13 +73,13 @@ void brw_print_reg( struct brw_reg hwreg )
 	    hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
 	    hwreg.type == BRW_REGISTER_TYPE_F) {      
       /* "scalar" register */
-      _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+      debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
    }
    else if (hwreg.file == BRW_IMMEDIATE_VALUE) {
-      _mesa_printf("imm %f", hwreg.dw1.f);
+      debug_printf("imm %f", hwreg.dw1.f);
    }
    else {
-      _mesa_printf("%s%d.%d<%d;%d,%d>:%s", 
+      debug_printf("%s%d.%d<%d;%d,%d>:%s", 
 		   file[hwreg.file],
 		   hwreg.nr,
 		   hwreg.subnr / type_sz(hwreg.type),
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index 0f2612c181..98fec85c1d 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -315,24 +315,20 @@ const struct brw_tracked_state brw_polygon_stipple = {
 
 static void upload_polygon_stipple_offset(struct brw_context *brw)
 {
-   __DRIdrawablePrivate *dPriv = brw->intel.driDrawable;
    struct brw_polygon_stipple_offset bpso;
 
    memset(&bpso, 0, sizeof(bpso));
    bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
    bpso.header.length = sizeof(bpso)/4-2;
 
-   /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0),
-    * we have to invert the Y axis in order to match the OpenGL
-    * pixel coordinate system, and our offset must be matched
-    * to the window position.  If we're drawing to a FBO
-    * (ctx->DrawBuffer->Name != 0), then our native pixel coordinate
-    * system works just fine, and there's no window system to
-    * worry about.
+   /* Never need to offset stipple coordinates.
+    *
+    * XXX: is it ever necessary to invert Y values?
     */
-   if (brw->intel.ctx.DrawBuffer->Name == 0) {
-      bpso.bits0.x_offset = (32 - (dPriv->x & 31)) & 31;
-      bpso.bits0.y_offset = (32 - ((dPriv->y + dPriv->h) & 31)) & 31;
+   if (0) {
+      int x = 0, y = 0, h = 0;
+      bpso.bits0.x_offset = (32 - (x & 31)) & 31;
+      bpso.bits0.y_offset = (32 - ((y + h) & 31)) & 31;
    }
    else {
       bpso.bits0.y_offset = 0;
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
index e85a1a9c1b..65e7151517 100644
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -53,6 +53,9 @@ static void brw_note_fence( struct brw_context *brw, GLuint fence )
 static GLuint brw_flush_cmd( void )
 {
    struct brw_mi_flush flush;
+
+   return ;
+
    flush.opcode = CMD_MI_FLUSH;
    flush.pad = 0;
    flush.flags = BRW_FLUSH_STATE_CACHE;
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index fbb772d18c..8b61da763c 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -33,6 +33,25 @@
 #include "brw_util.h"
 #include "brw_wm.h"
 
+
+/**
+ * Determine if the given fragment program uses GLSL features such
+ * as flow conditionals, loops, subroutines.
+ * Some GLSL shaders may use these features, others might not.
+ */
+GLboolean brw_wm_is_glsl(const struct brw_fragment_shader *fp)
+{
+    return (fp->info.insn_count[TGSI_OPCODE_ARL] > 0 ||
+	    fp->info.insn_count[TGSI_OPCODE_IF] > 0 ||
+	    fp->info.insn_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */
+	    fp->info.insn_count[TGSI_OPCODE_CAL] > 0 ||
+	    fp->info.insn_count[TGSI_OPCODE_BRK] > 0 ||   /* redundant - BGNLOOP */
+	    fp->info.insn_count[TGSI_OPCODE_RET] > 0 ||	  /* redundant - CAL */
+	    fp->info.insn_count[TGSI_OPCODE_BGNLOOP] > 0);
+}
+
+
+
 static void brwBindProgram( struct brw_context *brw,
 			    GLenum target, 
 			    struct gl_program *prog )
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index b0928adbe4..d1d0d7cd43 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -1,26 +1,11 @@
 
-static void brw_merge_inputs( struct brw_context *brw,
-		       const struct gl_client_array *arrays[])
-{
-   struct brw_vertex_info old = brw->vb.info;
-   GLuint i;
-
-   for (i = 0; i < VERT_ATTRIB_MAX; i++)
-      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
 
-   memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs));
-   memset(&brw->vb.info, 0, sizeof(brw->vb.info));
 
+void 
+brw_pipe_vertex_cleanup( struct brw_context *brw )
+{
    for (i = 0; i < VERT_ATTRIB_MAX; i++) {
-      brw->vb.inputs[i].glarray = arrays[i];
-      brw->vb.inputs[i].attrib = (gl_vert_attrib) i;
-
-      if (arrays[i]->StrideB != 0)
-	 brw->vb.info.sizes[i/16] |= (brw->vb.inputs[i].glarray->Size - 1) <<
-	    ((i%16) * 2);
+      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
+      brw->vb.inputs[i].bo = NULL;
    }
-
-   /* Raise statechanges if input sizes have changed. */
-   if (memcmp(brw->vb.info.sizes, old.sizes, sizeof(old.sizes)) != 0)
-      brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
 }
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index 79d595d0ad..b0be0e1f8a 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -56,6 +56,14 @@ struct brw_transfer
    unsigned offset;
 };
 
+struct brw_buffer
+{
+   struct pipe_buffer base;
+   struct brw_winsys_buffer *bo;
+   void *ptr;
+   boolean is_user_buffer;
+};
+
 
 /*
  * Cast wrappers
@@ -72,5 +80,19 @@ brw_transfer(struct pipe_transfer *transfer)
    return (struct brw_transfer *)transfer;
 }
 
+static INLINE struct brw_buffer *
+brw_buffer(struct pipe_buffer *buffer)
+{
+   return (struct brw_buffer *)buffer;
+}
+
+
+/* Pipe buffer helpers
+ */
+static INLINE boolean
+brw_buffer_is_user_buffer( const struct pipe_buffer *buf )
+{
+   return ((const struct brw_buffer *)buf)->is_user_buffer;
+}
 
 #endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index 53e8f09e37..e2db2e76e6 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -134,7 +134,7 @@ static void upload_sf_prog(struct brw_context *brw)
    key.attrs = brw->vs.prog_data->outputs_written; 
 
    /* BRW_NEW_REDUCED_PRIMITIVE */
-   switch (brw->intel.reduced_primitive) {
+   switch (brw->reduced_primitive) {
    case GL_TRIANGLES: 
       /* NOTE: We just use the edgeflag attribute as an indicator that
        * unfilled triangles are active.  We don't actually do the
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index 0e406f12e1..648a16a038 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -40,19 +40,12 @@ static void upload_sf_vp(struct brw_context *brw)
    const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
    struct brw_sf_viewport sfv;
    GLfloat y_scale, y_bias;
-   const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
    const GLfloat *v = ctx->Viewport._WindowMap.m;
 
    memset(&sfv, 0, sizeof(sfv));
 
-   if (render_to_fbo) {
-      y_scale = 1.0;
-      y_bias = 0;
-   }
-   else {
-      y_scale = -1.0;
-      y_bias = ctx->DrawBuffer->Height;
-   }
+   y_scale = 1.0;
+   y_bias = 0;
 
    /* _NEW_VIEWPORT */
 
@@ -73,20 +66,11 @@ static void upload_sf_vp(struct brw_context *brw)
     * Note that the hardware's coordinates are inclusive, while Mesa's min is
     * inclusive but max is exclusive.
     */
-   if (render_to_fbo) {
-      /* texmemory: Y=0=bottom */
-      sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
-      sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
-      sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
-      sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
-   }
-   else {
-      /* memory: Y=0=top */
-      sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
-      sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
-      sfv.scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
-      sfv.scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
-   }
+   /* Y=0=bottom */
+   sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
+   sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
+   sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
+   sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
 
    brw->sws->bo_unreference(brw->sf.vp_bo);
    brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
@@ -151,7 +135,7 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
    /* _NEW_LIGHT */
    key->provoking_vertex = ctx->Light.ProvokingVertex;
 
-   key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   key->render_to_fbo = 1;
 }
 
 static struct brw_winsys_buffer *
@@ -211,11 +195,6 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
    else
       sf.sf5.front_winding = BRW_FRONTWINDING_CW;
 
-   /* The viewport is inverted for rendering to a FBO, and that inverts
-    * polygon front/back orientation.
-    */
-   sf.sf5.front_winding ^= key->render_to_fbo;
-
    switch (key->cull_face) {
    case GL_FRONT:
       sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
@@ -245,7 +224,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
        sf.sf6.line_width = 0;
 
    /* _NEW_BUFFERS */
-   key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
+   key->render_to_fbo = 1;
    if (!key->render_to_fbo) {
       /* Rendering to an OpenGL window */
       sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index 02657eaba7..9bf34c3fe4 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -109,8 +109,8 @@ struct brw_surface_key {
 /***********************************************************************
  * brw_state.c
  */
-void brw_validate_state(struct brw_context *brw);
-void brw_upload_state(struct brw_context *brw);
+int brw_validate_state(struct brw_context *brw);
+int brw_upload_state(struct brw_context *brw);
 void brw_init_state(struct brw_context *brw);
 void brw_destroy_state(struct brw_context *brw);
 
@@ -157,7 +157,7 @@ void brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer
 /***********************************************************************
  * brw_state_batch.c
  */
-#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
+#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->batch, (s), sizeof(*(s)), IGNORE_CLIPRECTS)
 #define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) )
 
 GLboolean brw_cached_batch_struct( struct brw_context *brw,
diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c
index b285837070..324fce5163 100644
--- a/src/gallium/drivers/i965/brw_state_batch.c
+++ b/src/gallium/drivers/i965/brw_state_batch.c
@@ -47,7 +47,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
    struct header *newheader = (struct header *)data;
 
    if (brw->emit_state_always) {
-      brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+      brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
       return GL_TRUE;
    }
 
@@ -74,7 +74,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
 
  emit:
    memcpy(item->header, newheader, sz);
-   brw_batchbuffer_data(brw->intel.batch, data, sz, IGNORE_CLIPRECTS);
+   brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
    return GL_TRUE;
 }
 
diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c
index 83f138f67a..d2df8af9f4 100644
--- a/src/gallium/drivers/i965/brw_swtnl.c
+++ b/src/gallium/drivers/i965/brw_swtnl.c
@@ -12,13 +12,13 @@ static GLboolean check_fallbacks( struct brw_context *brw,
     * use fallbacks.  If we're forcing fallbacks, always
     * use fallfacks.
     */
-   if (brw->intel.conformance_mode == 0)
+   if (brw->flags.no_swtnl)
       return GL_FALSE;
 
-   if (brw->intel.conformance_mode == 2)
+   if (brw->flags.force_swtnl)
       return GL_TRUE;
 
-   if (ctx->Polygon.SmoothFlag) {
+   if (brw->curr.rast->tmpl.smooth_polys) {
       for (i = 0; i < nr_prims; i++)
 	 if (reduced_prim[prim[i].mode] == GL_TRIANGLES) 
 	    return GL_TRUE;
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index 82cd8007ac..51e23b9640 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -161,6 +161,13 @@ struct brw_winsys_screen {
 		      size_t size,
 		      const void *data);
 
+   /* XXX: couldn't this be handled by returning true/false on
+    * bo_emit_reloc?
+    */
+   boolean (*check_aperture_space)( struct brw_winsys_screen *iws,
+				    struct brw_winsys_buffer **buffers,
+				    unsigned count );
+
    /**
     * Map a buffer.
     */
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 284cf42f8b..4948ea0dff 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -158,7 +158,7 @@ static void do_wm_prog( struct brw_context *brw,
    memcpy(&c->key, key, sizeof(*key));
 
    c->fp = fp;
-   c->env_param = brw->intel.ctx.FragmentProgram.Parameters;
+   c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/
 
    brw_init_compile(brw, &c->func);
 
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 18775830f9..e06de95a8a 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -131,9 +131,9 @@ struct brw_wm_ref {
    GLuint insn:24;
 };
 
-struct brw_wm_constref {
+struct brw_wm_imm_ref {
    const struct brw_wm_ref *ref;
-   GLfloat constval;
+   GLfloat imm1f;
 };
 
 
@@ -232,8 +232,8 @@ struct brw_wm_compile {
    struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
    GLuint nr_insns;
 
-   struct brw_wm_constref constref[BRW_WM_MAX_CONST];
-   GLuint nr_constrefs;
+   struct brw_wm_imm_ref imm_ref[BRW_WM_MAX_CONST];
+   GLuint nr_imm_refs;
 
    struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
 
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index c4f0711793..a8de5fdd0b 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -7,34 +7,6 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
                                   const struct prog_instruction *inst,
                                   GLuint component);
 
-/**
- * Determine if the given fragment program uses GLSL features such
- * as flow conditionals, loops, subroutines.
- * Some GLSL shaders may use these features, others might not.
- */
-GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp)
-{
-    int i;
-
-    for (i = 0; i < fp->Base.NumInstructions; i++) {
-	const struct prog_instruction *inst = &fp->Base.Instructions[i];
-	switch (inst->Opcode) {
-	    case OPCODE_ARL:
-	    case OPCODE_IF:
-	    case OPCODE_ENDIF:
-	    case OPCODE_CAL:
-	    case OPCODE_BRK:
-	    case OPCODE_RET:
-	    case OPCODE_BGNLOOP:
-		return GL_TRUE; 
-	    default:
-		break;
-	}
-    }
-    return GL_FALSE; 
-}
-
-
 
 static void
 reclaim_temps(struct brw_wm_compile *c);
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
index de5f5fe821..31b0270e84 100644
--- a/src/gallium/drivers/i965/brw_wm_pass0.c
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -124,33 +124,33 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
 }
 
 
-/** Return a ref to a constant/literal value */
-static const struct brw_wm_ref *get_const_ref( struct brw_wm_compile *c,
-					       const GLfloat *constval )
+/** Return a ref to an immediate value */
+static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c,
+					     const GLfloat *imm1f )
 {
    GLuint i;
 
    /* Search for an existing const value matching the request:
     */
-   for (i = 0; i < c->nr_constrefs; i++) {
-      if (c->constref[i].constval == *constval) 
-	 return c->constref[i].ref;
+   for (i = 0; i < c->nr_imm_refs; i++) {
+      if (c->imm_ref[i].imm_val == *imm1f) 
+	 return c->imm_ref[i].ref;
    }
 
    /* Else try to add a new one:
     */
-   if (c->nr_constrefs < BRW_WM_MAX_CONST) {
-      GLuint i = c->nr_constrefs++;
+   if (c->nr_imm_refs < BRW_WM_MAX_IMM) {
+      GLuint i = c->nr_imm_refs++;
 
-      /* A constant is a special type of parameter:
+      /* An immediate is a special type of parameter:
        */
-      c->constref[i].constval = *constval;
-      c->constref[i].ref = get_param_ref(c, constval);
+      c->imm_ref[i].imm_val = *imm_val;
+      c->imm_ref[i].ref = get_param_ref(c, imm_val);
 
-      return c->constref[i].ref;
+      return c->imm_ref[i].ref;
    }
    else {
-      _mesa_printf("%s: out of constrefs\n", __FUNCTION__);
+      _mesa_printf("%s: out of imm_refs\n", __FUNCTION__);
       c->prog_data.error = 1;
       return NULL;
    }
@@ -200,7 +200,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 	 case PROGRAM_CONSTANT:
 	    /* These are invarient:
 	     */
-	    ref = get_const_ref(c, &plist->ParameterValues[idx][component]);
+	    ref = get_imm_ref(c, &plist->ParameterValues[idx][component]);
 	    break;
 
 	 case PROGRAM_STATE_VAR:
@@ -266,9 +266,9 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
    static const GLfloat const_one = 1.0;
 
    if (component == SWIZZLE_ZERO) 
-      src_ref = get_const_ref(c, &const_zero);
+      src_ref = get_imm_ref(c, &const_zero);
    else if (component == SWIZZLE_ONE) 
-      src_ref = get_const_ref(c, &const_one);
+      src_ref = get_imm_ref(c, &const_one);
    else 
       src_ref = pass0_get_reg(c, src.File, src.Index, component);
 
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index c76bff9181..ec9c859fcb 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -554,7 +554,8 @@ st_draw_vbo(GLcontext *ctx,
 
    /* Gallium probably doesn't want this in some cases. */
    if (!index_bounds_valid)
-      vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
+      if (!vbo_all_varyings_in_vbos(arrays))
+	 vbo_get_minmax_index(ctx, prims, ib, &min_index, &max_index);
 
    /* sanity check for pointer arithmetic below */
    assert(sizeof(arrays[0]->Ptr[0]) == 1);
-- 
cgit v1.2.3


From 562ca4eae257dd3b268e7f13487c8cd91f618eae Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 25 Oct 2009 01:20:56 +0100
Subject: i965g: more compiling wip

---
 src/gallium/drivers/i965/brw_context.h    |  15 +-
 src/gallium/drivers/i965/brw_curbe.c      |   3 +-
 src/gallium/drivers/i965/brw_gs.c         |  48 +++----
 src/gallium/drivers/i965/brw_gs.h         |   4 +-
 src/gallium/drivers/i965/brw_gs_state.c   |  21 +--
 src/gallium/drivers/i965/brw_misc_state.c | 222 ++++++++++++++----------------
 src/gallium/drivers/i965/brw_pipe_blend.c |  19 +++
 src/gallium/drivers/i965/brw_pipe_rast.c  |  20 +++
 src/gallium/drivers/i965/brw_screen.h     |   7 +
 src/gallium/drivers/i965/brw_sf.c         |   2 +-
 src/gallium/drivers/i965/brw_state.h      |   4 +-
 src/gallium/drivers/i965/brw_urb.c        |   3 +-
 src/gallium/drivers/i965/brw_vs.c         |   4 +-
 src/gallium/drivers/i965/brw_vs_emit.c    |  67 +++++----
 src/gallium/drivers/i965/brw_wm.c         |   2 +-
 src/gallium/drivers/i965/brw_wm.h         |   2 +-
 16 files changed, 243 insertions(+), 200 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 7ead641811..2e17e150bb 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -184,6 +184,8 @@ struct brw_fragment_shader {
 #define PIPE_NEW_CLIP                   0x2
 #define PIPE_NEW_INDEX_BUFFER           0x2
 #define PIPE_NEW_INDEX_RANGE            0x2
+#define PIPE_NEW_BLEND_COLOR            0x2
+#define PIPE_NEW_POLYGON_STIPPLE        0x2
 
 
 #define BRW_NEW_URB_FENCE               0x1
@@ -202,7 +204,9 @@ struct brw_fragment_shader {
 #define BRW_NEW_VERTICES		0x8000
 /**
  * Used for any batch entry with a relocated pointer that will be used
- * by any 3D rendering.
+ * by any 3D rendering.  Need to re-emit these fresh in each
+ * batchbuffer as the referenced buffers may be relocated in the
+ * meantime.
  */
 #define BRW_NEW_BATCH			0x10000
 /** brw->depth_region updated */
@@ -271,7 +275,7 @@ struct brw_vs_prog_data {
    GLuint curb_read_length;
    GLuint urb_read_length;
    GLuint total_grf;
-   GLuint outputs_written;
+   GLuint nr_outputs_written;
    GLuint nr_params;       /**< number of float params/constants */
 
    GLuint inputs_read;
@@ -487,6 +491,9 @@ struct brw_context
       struct pipe_buffer *vertex_constants;
       struct pipe_buffer *fragment_constants;
 
+      struct brw_blend_constant_color bcc;
+      struct brw_polygon_stipple bps;
+
       /**
        * Index buffer for this draw_prims call.
        *
@@ -726,11 +733,11 @@ void brw_init_shader_funcs( struct brw_context *brw );
 
 /* brw_urb.c
  */
-void brw_upload_urb_fence(struct brw_context *brw);
+int brw_upload_urb_fence(struct brw_context *brw);
 
 /* brw_curbe.c
  */
-void brw_upload_cs_urb_state(struct brw_context *brw);
+int brw_upload_cs_urb_state(struct brw_context *brw);
 
 /* brw_disasm.c */
 int brw_disasm (FILE *file, struct brw_instruction *inst);
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 278ffa4ca2..3dd08f6eeb 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -126,7 +126,7 @@ const struct brw_tracked_state brw_curbe_offsets = {
  * fixed-function hardware in a double-buffering scheme to avoid a
  * pipeline stall each time the contents of the curbe is changed.
  */
-void brw_upload_cs_urb_state(struct brw_context *brw)
+int brw_upload_cs_urb_state(struct brw_context *brw)
 {
    struct brw_cs_urb_state cs_urb;
    memset(&cs_urb, 0, sizeof(cs_urb));
@@ -144,6 +144,7 @@ void brw_upload_cs_urb_state(struct brw_context *brw)
 
    assert(brw->urb.nr_cs_entries);
    BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
+   return 0;
 }
 
 static GLfloat fixed_plane[6][4] = {
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
index 692ce46679..3ecaa74e4f 100644
--- a/src/gallium/drivers/i965/brw_gs.c
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -54,7 +54,7 @@ static void compile_gs_prog( struct brw_context *brw,
    /* Need to locate the two positions present in vertex + header.
     * These are currently hardcoded:
     */
-   c.nr_attrs = util_count_bits(c.key.attrs);
+   c.nr_attrs = c.key.nr_attrs;
 
    if (BRW_IS_IGDNG(brw))
        c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
@@ -80,30 +80,30 @@ static void compile_gs_prog( struct brw_context *brw,
     * already been weeded out by this stage:
     */
    switch (key->primitive) {
-   case GL_QUADS:
+   case PIPE_PRIM_QUADS:
       brw_gs_quads( &c ); 
       break;
-   case GL_QUAD_STRIP:
+   case PIPE_PRIM_QUAD_STRIP:
       brw_gs_quad_strip( &c );
       break;
-   case GL_LINE_LOOP:
+   case PIPE_PRIM_LINE_LOOP:
       brw_gs_lines( &c );
       break;
-   case GL_LINES:
+   case PIPE_PRIM_LINES:
       if (key->hint_gs_always)
 	 brw_gs_lines( &c );
       else {
 	 return;
       }
       break;
-   case GL_TRIANGLES:
+   case PIPE_PRIM_TRIANGLES:
       if (key->hint_gs_always)
 	 brw_gs_tris( &c );
       else {
 	 return;
       }
       break;
-   case GL_POINTS:
+   case PIPE_PRIM_POINTS:
       if (key->hint_gs_always)
 	 brw_gs_points( &c );
       else {
@@ -129,17 +129,17 @@ static void compile_gs_prog( struct brw_context *brw,
 				       &brw->gs.prog_data );
 }
 
-static const GLenum gs_prim[GL_POLYGON+1] = {  
-   GL_POINTS,
-   GL_LINES,
-   GL_LINE_LOOP,
-   GL_LINES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_TRIANGLES,
-   GL_QUADS,
-   GL_QUAD_STRIP,
-   GL_TRIANGLES
+static const unsigned gs_prim[PIPE_PRIM_MAX] = {  
+   PIPE_PRIM_POINTS,
+   PIPE_PRIM_LINES,
+   PIPE_PRIM_LINE_LOOP,
+   PIPE_PRIM_LINES,
+   PIPE_PRIM_TRIANGLES,
+   PIPE_PRIM_TRIANGLES,
+   PIPE_PRIM_TRIANGLES,
+   PIPE_PRIM_QUADS,
+   PIPE_PRIM_QUAD_STRIP,
+   PIPE_PRIM_TRIANGLES
 };
 
 static void populate_key( struct brw_context *brw,
@@ -148,7 +148,7 @@ static void populate_key( struct brw_context *brw,
    memset(key, 0, sizeof(*key));
 
    /* CACHE_NEW_VS_PROG */
-   key->attrs = brw->vs.prog_data->outputs_written;
+   key->nr_attrs = brw->vs.prog_data->nr_outputs_written;
 
    /* BRW_NEW_PRIMITIVE */
    key->primitive = gs_prim[brw->primitive];
@@ -156,14 +156,14 @@ static void populate_key( struct brw_context *brw,
    key->hint_gs_always = 0;	/* debug code? */
 
    key->need_gs_prog = (key->hint_gs_always ||
-			brw->primitive == GL_QUADS ||
-			brw->primitive == GL_QUAD_STRIP ||
-			brw->primitive == GL_LINE_LOOP);
+			brw->primitive == PIPE_PRIM_QUADS ||
+			brw->primitive == PIPE_PRIM_QUAD_STRIP ||
+			brw->primitive == PIPE_PRIM_LINE_LOOP);
 }
 
 /* Calculate interpolants for triangle and line rasterization.
  */
-static void prepare_gs_prog(struct brw_context *brw)
+static int prepare_gs_prog(struct brw_context *brw)
 {
    struct brw_gs_prog_key key;
    /* Populate the key:
@@ -184,6 +184,8 @@ static void prepare_gs_prog(struct brw_context *brw)
       if (brw->gs.prog_bo == NULL)
 	 compile_gs_prog( brw, &key );
    }
+
+   return 0;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_gs.h b/src/gallium/drivers/i965/brw_gs.h
index bbb991ea2e..6e616dcb87 100644
--- a/src/gallium/drivers/i965/brw_gs.h
+++ b/src/gallium/drivers/i965/brw_gs.h
@@ -40,11 +40,11 @@
 #define MAX_GS_VERTS (4)	     
 
 struct brw_gs_prog_key {
-   GLuint attrs:32;
+   GLuint nr_attrs:8;
    GLuint primitive:4;
    GLuint hint_gs_always:1;
    GLuint need_gs_prog:1;
-   GLuint pad:26;
+   GLuint pad:18;
 };
 
 struct brw_gs_compile {
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
index 6d03d72d96..15a66c9741 100644
--- a/src/gallium/drivers/i965/brw_gs_state.c
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -29,11 +29,12 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
  
-
+#include "util/u_math.h"
 
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_debug.h"
 
 struct brw_gs_unit_key {
    unsigned int total_grf;
@@ -76,7 +77,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
 
    memset(&gs, 0, sizeof(gs));
 
-   gs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    if (key->prog_active) /* reloc */
       gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
 
@@ -100,7 +101,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
    if (BRW_IS_IGDNG(brw))
       gs.thread4.rendering_enable = 1;
 
-   if (INTEL_DEBUG & DEBUG_STATS)
+   if (BRW_DEBUG & DEBUG_STATS)
       gs.thread4.stats_enable = 1;
 
    bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
@@ -111,17 +112,17 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
 
    if (key->prog_active) {
       /* Emit GS program relocation */
-      dri_bo_emit_reloc(bo,
-			I915_GEM_DOMAIN_INSTRUCTION, 0,
-			gs.thread0.grf_reg_count << 1,
-			offsetof(struct brw_gs_unit_state, thread0),
-			brw->gs.prog_bo);
+      brw->sws->bo_emit_reloc(bo,
+			      I915_GEM_DOMAIN_INSTRUCTION, 0,
+			      gs.thread0.grf_reg_count << 1,
+			      offsetof(struct brw_gs_unit_state, thread0),
+			      brw->gs.prog_bo);
    }
 
    return bo;
 }
 
-static void prepare_gs_unit(struct brw_context *brw)
+static int prepare_gs_unit(struct brw_context *brw)
 {
    struct brw_gs_unit_key key;
 
@@ -135,6 +136,8 @@ static void prepare_gs_unit(struct brw_context *brw)
    if (brw->gs.state_bo == NULL) {
       brw->gs.state_bo = gs_unit_create_from_key(brw, &key);
    }
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_gs_unit = {
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index 98fec85c1d..ccebe08b4f 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -31,10 +31,12 @@
  
 
+#include "brw_debug.h"
 #include "brw_batchbuffer.h"
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_screen.h"
 
 
@@ -44,25 +46,16 @@
  * Blend color
  */
 
-static void upload_blend_constant_color(struct brw_context *brw)
+static int upload_blend_constant_color(struct brw_context *brw)
 {
-   struct brw_blend_constant_color bcc;
-
-   memset(&bcc, 0, sizeof(bcc));      
-   bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR;
-   bcc.header.length = sizeof(bcc)/4-2;
-   bcc.blend_constant_color[0] = ctx->Color.BlendColor[0];
-   bcc.blend_constant_color[1] = ctx->Color.BlendColor[1];
-   bcc.blend_constant_color[2] = ctx->Color.BlendColor[2];
-   bcc.blend_constant_color[3] = ctx->Color.BlendColor[3];
-
-   BRW_CACHED_BATCH_STRUCT(brw, &bcc);
+   BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bcc);
+   return 0;
 }
 
 
 const struct brw_tracked_state brw_blend_constant_color = {
    .dirty = {
-      .mesa = _NEW_COLOR,
+      .mesa = PIPE_NEW_BLEND_COLOR,
       .brw = 0,
       .cache = 0
    },
@@ -70,30 +63,32 @@ const struct brw_tracked_state brw_blend_constant_color = {
 };
 
 /* Constant single cliprect for framebuffer object or DRI2 drawing */
-static void upload_drawing_rect(struct brw_context *brw)
+static int upload_drawing_rect(struct brw_context *brw)
 {
    BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
    OUT_BATCH(_3DSTATE_DRAWRECT_INFO_I965);
    OUT_BATCH(0);
-   OUT_BATCH(((brw->fb.width - 1) & 0xffff) |
-	    ((brw->fb.height - 1) << 16));
+   OUT_BATCH(((brw->curr.fb.width - 1) & 0xffff) |
+	    ((brw->curr.fb.height - 1) << 16));
    OUT_BATCH(0);
    ADVANCE_BATCH();
+   return 0;
 }
 
 const struct brw_tracked_state brw_drawing_rect = {
    .dirty = {
-      .mesa = _NEW_BUFFERS,
+      .mesa = PIPE_NEW_FRAMEBUFFER,
       .brw = 0,
       .cache = 0
    },
    .emit = upload_drawing_rect
 };
 
-static void prepare_binding_table_pointers(struct brw_context *brw)
+static int prepare_binding_table_pointers(struct brw_context *brw)
 {
    brw_add_validated_bo(brw, brw->vs.bind_bo);
    brw_add_validated_bo(brw, brw->wm.bind_bo);
+   return 0;
 }
 
 /**
@@ -103,7 +98,7 @@ static void prepare_binding_table_pointers(struct brw_context *brw)
  * The binding table pointers are relative to the surface state base address,
  * which is 0.
  */
-static void upload_binding_table_pointers(struct brw_context *brw)
+static int upload_binding_table_pointers(struct brw_context *brw)
 {
    BEGIN_BATCH(6, IGNORE_CLIPRECTS);
    OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
@@ -116,6 +111,7 @@ static void upload_binding_table_pointers(struct brw_context *brw)
    OUT_BATCH(0); /* sf */
    OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
    ADVANCE_BATCH();
+   return 0;
 }
 
 const struct brw_tracked_state brw_binding_table_pointers = {
@@ -135,7 +131,7 @@ const struct brw_tracked_state brw_binding_table_pointers = {
  * The state pointers in this packet are all relative to the general state
  * base address set by CMD_STATE_BASE_ADDRESS, which is 0.
  */
-static void upload_pipelined_state_pointers(struct brw_context *brw )
+static int upload_pipelined_state_pointers(struct brw_context *brw )
 {
    BEGIN_BATCH(7, IGNORE_CLIPRECTS);
    OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
@@ -151,10 +147,11 @@ static void upload_pipelined_state_pointers(struct brw_context *brw )
    ADVANCE_BATCH();
 
    brw->state.dirty.brw |= BRW_NEW_PSP;
+   return 0;
 }
 
 
-static void prepare_psp_urb_cbs(struct brw_context *brw)
+static int prepare_psp_urb_cbs(struct brw_context *brw)
 {
    brw_add_validated_bo(brw, brw->vs.state_bo);
    brw_add_validated_bo(brw, brw->gs.state_bo);
@@ -162,13 +159,26 @@ static void prepare_psp_urb_cbs(struct brw_context *brw)
    brw_add_validated_bo(brw, brw->sf.state_bo);
    brw_add_validated_bo(brw, brw->wm.state_bo);
    brw_add_validated_bo(brw, brw->cc.state_bo);
+   return 0;
 }
 
-static void upload_psp_urb_cbs(struct brw_context *brw )
+static int upload_psp_urb_cbs(struct brw_context *brw )
 {
-   upload_pipelined_state_pointers(brw);
-   brw_upload_urb_fence(brw);
-   brw_upload_cs_urb_state(brw);
+   int ret;
+   
+   ret = upload_pipelined_state_pointers(brw);
+   if (ret)
+      return ret;
+
+   ret = brw_upload_urb_fence(brw);
+   if (ret)
+      return ret;
+
+   ret = brw_upload_cs_urb_state(brw);
+   if (ret)
+      return ret;
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_psp_urb_cbs = {
@@ -187,20 +197,22 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
    .emit = upload_psp_urb_cbs,
 };
 
-static void prepare_depthbuffer(struct brw_context *brw)
+static int prepare_depthbuffer(struct brw_context *brw)
 {
-   struct intel_region *region = brw->state.depth_region;
+   struct pipe_surface *zsbuf = brw->curr.fb.zsbuf;
 
-   if (region != NULL)
-      brw_add_validated_bo(brw, region->buffer);
+   if (zsbuf)
+      brw_add_validated_bo(brw, brw_surface_bo(zsbuf));
+
+   return 0;
 }
 
-static void emit_depthbuffer(struct brw_context *brw)
+static int emit_depthbuffer(struct brw_context *brw)
 {
-   struct intel_region *region = brw->state.depth_region;
+   struct pipe_surface *surface = brw->curr.fb.zsbuf;
    unsigned int len = (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) ? 6 : 5;
 
-   if (region == NULL) {
+   if (surface == NULL) {
       BEGIN_BATCH(len, IGNORE_CLIPRECTS);
       OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
       OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
@@ -214,38 +226,45 @@ static void emit_depthbuffer(struct brw_context *brw)
 
       ADVANCE_BATCH();
    } else {
+      struct brw_winsys_buffer *bo;
       unsigned int format;
+      unsigned int pitch;
+      unsigned int cpp;
 
-      switch (region->cpp) {
-      case 2:
+      switch (surface->format) {
+      case PIPE_FORMAT_Z16_UNORM:
 	 format = BRW_DEPTHFORMAT_D16_UNORM;
+	 cpp = 2;
+	 break;
+      case PIPE_FORMAT_Z24S8_UNORM:
+	 format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+	 cpp = 4;
 	 break;
-      case 4:
-	 if (intel->depth_buffer_is_float)
-	    format = BRW_DEPTHFORMAT_D32_FLOAT;
-	 else
-	    format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
+      case PIPE_FORMAT_Z32_FLOAT:
+	 format = BRW_DEPTHFORMAT_D32_FLOAT;
+	 cpp = 4;
 	 break;
       default:
 	 assert(0);
-	 return;
+	 return PIPE_ERROR_BAD_INPUT;
       }
 
-      assert(region->tiling != I915_TILING_X);
+      bo = brw_surface_bo(surface);
+      pitch = brw_surface_pitch(surface);
 
       BEGIN_BATCH(len, IGNORE_CLIPRECTS);
       OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
-      OUT_BATCH(((region->pitch * region->cpp) - 1) |
+      OUT_BATCH(((pitch * cpp) - 1) |
 		(format << 18) |
 		(BRW_TILEWALK_YMAJOR << 26) |
-		((region->tiling != I915_TILING_NONE) << 27) |
+		((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) |
 		(BRW_SURFACE_2D << 29));
-      OUT_RELOC(region->buffer,
+      OUT_RELOC(bo,
 		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-		0);
+		surface->offset);
       OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
-		((region->pitch - 1) << 6) |
-		((region->height - 1) << 19));
+		((pitch - 1) << 6) |
+		((surface->height - 1) << 19));
       OUT_BATCH(0);
 
       if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
@@ -253,6 +272,8 @@ static void emit_depthbuffer(struct brw_context *brw)
 
       ADVANCE_BATCH();
    }
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_depthbuffer = {
@@ -271,37 +292,15 @@ const struct brw_tracked_state brw_depthbuffer = {
  * Polygon stipple packet
  */
 
-static void upload_polygon_stipple(struct brw_context *brw)
+static int upload_polygon_stipple(struct brw_context *brw)
 {
-   struct brw_polygon_stipple bps;
-   GLuint i;
-
-   memset(&bps, 0, sizeof(bps));
-   bps.header.opcode = CMD_POLY_STIPPLE_PATTERN;
-   bps.header.length = sizeof(bps)/4-2;
-
-   /* Polygon stipple is provided in OpenGL order, i.e. bottom
-    * row first.  If we're rendering to a window (i.e. the
-    * default frame buffer object, 0), then we need to invert
-    * it to match our pixel layout.  But if we're rendering
-    * to a FBO (i.e. any named frame buffer object), we *don't*
-    * need to invert - we already match the layout.
-    */
-   if (ctx->DrawBuffer->Name == 0) {
-      for (i = 0; i < 32; i++)
-         bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */
-   }
-   else {
-      for (i = 0; i < 32; i++)
-         bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */
-   }
-
-   BRW_CACHED_BATCH_STRUCT(brw, &bps);
+   BRW_CACHED_BATCH_STRUCT(brw, &brw->curr.bps);
+   return 0;
 }
 
 const struct brw_tracked_state brw_polygon_stipple = {
    .dirty = {
-      .mesa = _NEW_POLYGONSTIPPLE,
+      .mesa = PIPE_NEW_POLYGON_STIPPLE,
       .brw = 0,
       .cache = 0
    },
@@ -313,37 +312,26 @@ const struct brw_tracked_state brw_polygon_stipple = {
  * Polygon stipple offset packet
  */
 
-static void upload_polygon_stipple_offset(struct brw_context *brw)
+static int upload_polygon_stipple_offset(struct brw_context *brw)
 {
    struct brw_polygon_stipple_offset bpso;
 
+   /* This is invarient state in gallium:
+    */
    memset(&bpso, 0, sizeof(bpso));
    bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
    bpso.header.length = sizeof(bpso)/4-2;
-
-   /* Never need to offset stipple coordinates.
-    *
-    * XXX: is it ever necessary to invert Y values?
-    */
-   if (0) {
-      int x = 0, y = 0, h = 0;
-      bpso.bits0.x_offset = (32 - (x & 31)) & 31;
-      bpso.bits0.y_offset = (32 - ((y + h) & 31)) & 31;
-   }
-   else {
-      bpso.bits0.y_offset = 0;
-      bpso.bits0.x_offset = 0;
-   }
+   bpso.bits0.y_offset = 0;
+   bpso.bits0.x_offset = 0;
 
    BRW_CACHED_BATCH_STRUCT(brw, &bpso);
+   return 0;
 }
 
-#define _NEW_WINDOW_POS 0x40000000
-
 const struct brw_tracked_state brw_polygon_stipple_offset = {
    .dirty = {
-      .mesa = _NEW_WINDOW_POS,
-      .brw = 0,
+      .mesa = 0,
+      .brw = BRW_NEW_CONTEXT,
       .cache = 0
    },
    .emit = upload_polygon_stipple_offset
@@ -352,12 +340,12 @@ const struct brw_tracked_state brw_polygon_stipple_offset = {
 /**********************************************************************
  * AA Line parameters
  */
-static void upload_aa_line_parameters(struct brw_context *brw)
+static int upload_aa_line_parameters(struct brw_context *brw)
 {
    struct brw_aa_line_parameters balp;
    
    if (BRW_IS_965(brw))
-      return;
+      return 0;
 
    /* use legacy aa line coverage computation */
    memset(&balp, 0, sizeof(balp));
@@ -365,6 +353,7 @@ static void upload_aa_line_parameters(struct brw_context *brw)
    balp.header.length = sizeof(balp) / 4 - 2;
    
    BRW_CACHED_BATCH_STRUCT(brw, &balp);
+   return 0;
 }
 
 const struct brw_tracked_state brw_aa_line_parameters = {
@@ -380,31 +369,16 @@ const struct brw_tracked_state brw_aa_line_parameters = {
  * Line stipple packet
  */
 
-static void upload_line_stipple(struct brw_context *brw)
+static int upload_line_stipple(struct brw_context *brw)
 {
-   struct brw_line_stipple bls;
-   GLfloat tmp;
-   GLint tmpi;
-
-   memset(&bls, 0, sizeof(bls));
-   bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
-   bls.header.length = sizeof(bls)/4 - 2;
-
-   bls.bits0.pattern = ctx->Line.StipplePattern;
-   bls.bits1.repeat_count = ctx->Line.StippleFactor;
-
-   tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
-   tmpi = tmp * (1<<13);
-
-
-   bls.bits1.inverse_repeat_count = tmpi;
-
-   BRW_CACHED_BATCH_STRUCT(brw, &bls);
+   struct brw_line_stipple *bls = NULL; //brw->curr.rast->bls;
+   BRW_CACHED_BATCH_STRUCT(brw, bls);
+   return 0;
 }
 
 const struct brw_tracked_state brw_line_stipple = {
    .dirty = {
-      .mesa = _NEW_LINE,
+      .mesa = PIPE_NEW_RAST,
       .brw = 0,
       .cache = 0
    },
@@ -416,7 +390,7 @@ const struct brw_tracked_state brw_line_stipple = {
  * Misc invarient state packets
  */
 
-static void upload_invarient_state( struct brw_context *brw )
+static int upload_invarient_state( struct brw_context *brw )
 {
    {
       /* 0x61040000  Pipeline Select */
@@ -424,7 +398,10 @@ static void upload_invarient_state( struct brw_context *brw )
       struct brw_pipeline_select ps;
 
       memset(&ps, 0, sizeof(ps));
-      ps.header.opcode = CMD_PIPELINE_SELECT(brw);
+      if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw))
+	 ps.header.opcode = CMD_PIPELINE_SELECT_GM45;
+      else
+	 ps.header.opcode = CMD_PIPELINE_SELECT_965;
       ps.header.pipeline_select = 0;
       BRW_BATCH_STRUCT(brw, &ps);
    }
@@ -460,12 +437,18 @@ static void upload_invarient_state( struct brw_context *brw )
       struct brw_vf_statistics vfs;
       memset(&vfs, 0, sizeof(vfs));
 
-      vfs.opcode = CMD_VF_STATISTICS(brw);
-      if (INTEL_DEBUG & DEBUG_STATS)
+      if (BRW_IS_G4X(brw) || BRW_IS_IGDNG(brw)) 
+	 vfs.opcode = CMD_VF_STATISTICS_GM45;
+      else 
+	 vfs.opcode = CMD_VF_STATISTICS_965;
+
+      if (BRW_DEBUG & DEBUG_STATS)
 	 vfs.statistics_enable = 1; 
 
       BRW_BATCH_STRUCT(brw, &vfs);
    }
+   
+   return 0;
 }
 
 const struct brw_tracked_state brw_invarient_state = {
@@ -485,7 +468,7 @@ const struct brw_tracked_state brw_invarient_state = {
  * state pools.  This comes at the expense of memory, and more expensive cache
  * misses.
  */
-static void upload_state_base_address( struct brw_context *brw )
+static int upload_state_base_address( struct brw_context *brw )
 {
    /* Output the structure (brw_state_base_address) directly to the
     * batchbuffer, so we can emit relocations inline.
@@ -511,6 +494,7 @@ static void upload_state_base_address( struct brw_context *brw )
        OUT_BATCH(1); /* Indirect object upper bound */
        ADVANCE_BATCH();
    }
+   return 0;
 }
 
 const struct brw_tracked_state brw_state_base_address = {
diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c
index 17895d2782..54d09d9e45 100644
--- a/src/gallium/drivers/i965/brw_pipe_blend.c
+++ b/src/gallium/drivers/i965/brw_pipe_blend.c
@@ -43,3 +43,22 @@
    if (INTEL_DEBUG & DEBUG_STATS)
       cc.cc5.statistics_enable = 1;
 }
+
+
+
+static void brw_set_blend_color(struct pipe_context *pipe,
+				const float *blend_color)
+{
+   struct brw_context *brw = brw_context(pipe);
+   struct brw_blend_constant_color *bcc = &brw->curr.blend_color.bcc;
+
+   memset(bcc, 0, sizeof(*bcc));      
+   bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR;
+   bcc->header.length = sizeof(*bcc)/4-2;
+   bcc->blend_constant_color[0] = blend_color[0];
+   bcc->blend_constant_color[1] = blend_color[1];
+   bcc->blend_constant_color[2] = blend_color[2];
+   bcc->blend_constant_color[3] = blend_color[3];
+
+   brw->state.dirty.pipe |= PIPE_NEW_BLEND_COLOR;
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c
index ff64dbd48d..86822d478a 100644
--- a/src/gallium/drivers/i965/brw_pipe_rast.c
+++ b/src/gallium/drivers/i965/brw_pipe_rast.c
@@ -44,3 +44,23 @@ calculate_clip_key_rast()
       }
    }
 }
+
+
+static void
+calculate_line_stipple_rast()
+{
+   GLfloat tmp;
+   GLint tmpi;
+
+   memset(&bls, 0, sizeof(bls));
+   bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
+   bls.header.length = sizeof(bls)/4 - 2;
+   bls.bits0.pattern = brw->curr.rast.line_stipple_pattern;
+   bls.bits1.repeat_count = brw->curr.rast.line_stipple_factor + 1;
+
+   tmp = 1.0 / (GLfloat) bls.bits1.repeat_count;
+   tmpi = tmp * (1<<13);
+
+   bls.bits1.inverse_repeat_count = tmpi;
+
+}
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index b0be0e1f8a..eafd8ddf77 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -95,4 +95,11 @@ brw_buffer_is_user_buffer( const struct pipe_buffer *buf )
    return ((const struct brw_buffer *)buf)->is_user_buffer;
 }
 
+struct brw_winsys_buffer *
+brw_surface_bo( struct pipe_surface *surface );
+
+unsigned
+brw_surface_pitch( const struct pipe_surface *surface );
+
+
 #endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index e2db2e76e6..1b73b3fd51 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -131,7 +131,7 @@ static void upload_sf_prog(struct brw_context *brw)
    /* Populate the key, noting state dependencies:
     */
    /* CACHE_NEW_VS_PROG */
-   key.attrs = brw->vs.prog_data->outputs_written; 
+   key.attrs = brw->vs.prog_data->nr_outputs_written; 
 
    /* BRW_NEW_REDUCED_PRIMITIVE */
    switch (brw->reduced_primitive) {
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index 9bf34c3fe4..663fc839df 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -33,9 +33,11 @@
 #ifndef BRW_STATE_H
 #define BRW_STATE_H
 
-#include "brw_context.h"
+#include "pipe/p_error.h"
 #include "util/u_memory.h"
 
+#include "brw_context.h"
+
 static inline void
 brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo)
 {
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
index 18d79c5ebb..a2277519ad 100644
--- a/src/gallium/drivers/i965/brw_urb.c
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -221,7 +221,7 @@ const struct brw_tracked_state brw_recalculate_urb_fence = {
 
 
-void brw_upload_urb_fence(struct brw_context *brw)
+int brw_upload_urb_fence(struct brw_context *brw)
 {
    struct brw_urb_fence uf;
    memset(&uf, 0, sizeof(uf));
@@ -247,4 +247,5 @@ void brw_upload_urb_fence(struct brw_context *brw)
    uf.bits1.cs_fence  = URB_SIZES(brw);
 
    BRW_BATCH_STRUCT(brw, &uf);
+   return 0;
 }
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index dcd687ac34..010ac115d3 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -51,11 +51,11 @@ static void do_vs_prog( struct brw_context *brw,
    brw_init_compile(brw, &c.func);
    c.vp = vp;
 
-   c.prog_data.outputs_written = vp->program.Base.OutputsWritten;
+   c.prog_data.nr_outputs_written = vp->program.Base.OutputsWritten;
    c.prog_data.inputs_read = vp->program.Base.InputsRead;
 
    if (c.key.copy_edgeflag) {
-      c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE;
+      c.prog_data.nr_outputs_written |= 1<<VERT_RESULT_EDGE;
       c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
    }
 
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index e946944295..086f54799e 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -140,35 +140,33 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    c->first_overflow_output = 0;
 
    if (BRW_IS_IGDNG(c->func.brw))
-       mrf = 8;
+      mrf = 8;
    else
-       mrf = 4;
+      mrf = 4;
 
-   for (i = 0; i < VERT_RESULT_MAX; i++) {
-      if (c->prog_data.outputs_written & (1 << i)) {
-	 c->nr_outputs++;
-         assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
-	 if (i == VERT_RESULT_HPOS) {
-	    c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
-	    reg++;
+   for (i = 0; i < c->prog_data.nr_outputs_written; i++) {
+      c->nr_outputs++;
+      assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
+      if (i == VERT_RESULT_HPOS) {
+	 c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+	 reg++;
+      }
+      else if (i == VERT_RESULT_PSIZ) {
+	 c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+	 reg++;
+	 mrf++;		/* just a placeholder?  XXX fix later stages & remove this */
+      }
+      else {
+	 if (mrf < 16) {
+	    c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
+	    mrf++;
 	 }
-	 else if (i == VERT_RESULT_PSIZ) {
+	 else {
+	    /* too many vertex results to fit in MRF, use GRF for overflow */
+	    if (!c->first_overflow_output)
+	       c->first_overflow_output = i;
 	    c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
 	    reg++;
-	    mrf++;		/* just a placeholder?  XXX fix later stages & remove this */
-	 }
-	 else {
-            if (mrf < 16) {
-               c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
-               mrf++;
-            }
-            else {
-               /* too many vertex results to fit in MRF, use GRF for overflow */
-               if (!c->first_overflow_output)
-                  c->first_overflow_output = i;
-               c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
-               reg++;
-            }
 	 }
       }
    }     
@@ -238,9 +236,9 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
 
    if (BRW_IS_IGDNG(c->func.brw))
-       c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
+      c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
    else
-       c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
+      c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
 
    c->prog_data.total_grf = reg;
 
@@ -1050,8 +1048,9 @@ static void emit_vertex_write( struct brw_vs_compile *c)
    /* Update the header for point size, user clipping flags, and -ve rhw
     * workaround.
     */
-   if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
-       c->key.nr_userclip || BRW_IS_965(p->brw))
+   if (c->prog_data.writes_psiz ||
+       c->key.nr_userclip || 
+       BRW_IS_965(p->brw))
    {
       struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
       GLuint i;
@@ -1060,7 +1059,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 
       brw_set_access_mode(p, BRW_ALIGN_16);	
 
-      if (c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) {
+      if (c->prog_data.writes_psiz) {
 	 struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
 	 brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
 	 brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
@@ -1149,12 +1148,10 @@ static void emit_vertex_write( struct brw_vs_compile *c)
        * at mrf[4] atm...
        */
       GLuint i, mrf = 0;
-      for (i = c->first_overflow_output; i < VERT_RESULT_MAX; i++) {
-         if (c->prog_data.outputs_written & (1 << i)) {
-            /* move from GRF to MRF */
-            brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
-            mrf++;
-         }
+      for (i = c->first_overflow_output; i < c->prog_data.nr_outputs_written; i++) {
+	 /* move from GRF to MRF */
+	 brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
+	 mrf++;
       }
 
       brw_urb_WRITE(p,
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 4948ea0dff..764708f7df 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -310,7 +310,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
    }
 
    /* CACHE_NEW_VS_PROG */
-   key->vp_outputs_written = brw->vs.prog_data->outputs_written; /* bitmask */
+   key->vp_nr_outputs_written = brw->vs.prog_data->nr_outputs_written;
 
    /* The unique fragment program ID */
    key->program_string_id = fp->id;
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index e06de95a8a..bf241f5fa4 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -76,7 +76,7 @@ struct brw_wm_prog_key {
 
    GLuint program_string_id:32;
    GLuint drawable_height;
-   GLuint vp_outputs_written;
+   GLuint vp_nr_outputs_written;
 };
 
 
-- 
cgit v1.2.3


From 118dfe16887d1ec4d3b96d49b76fffa0d2924132 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sun, 25 Oct 2009 05:05:27 +0100
Subject: r300g: added support for 3D textures

Mipmaps not tested. Also, I am not sure why piglit/texturing/tex3d needs
to have color tolerance +-1 to pass. The classic Mesa driver doesn't
need that.
---
 src/gallium/drivers/r300/r300_context.h |  3 ++
 src/gallium/drivers/r300/r300_screen.c  | 59 ++++++++++-----------------------
 src/gallium/drivers/r300/r300_texture.c | 39 ++++++++++++++++------
 src/gallium/drivers/r300/r300_texture.h |  3 ++
 4 files changed, 51 insertions(+), 53 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 30b80fa9db..4d73567bbe 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -181,6 +181,9 @@ struct r300_texture {
     /* Offsets into the buffer. */
     unsigned offset[PIPE_MAX_TEXTURE_LEVELS];
 
+    /* Size of one zslice or face based on the texture target */
+    unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS];
+
     /**
      * If non-zero, override the natural texture layout with
      * a custom stride (in bytes).
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 1d9f91d0f7..f581f0ca09 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -119,32 +119,13 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
         case PIPE_CAP_TEXTURE_SHADOW_MAP:
             return 1;
         case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
-            if (r300screen->caps->is_r500) {
-                /* 13 == 4096x4096 */
-                return 13;
-            } else {
-                /* 12 == 2048x2048 */
-                return 12;
-            }
         case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-            /* So, technically, the limit is the same as above, but some math
-             * shows why this is silly. Assuming RGBA, 4cpp, we can see that
-             * 4096*4096*4096 = 64.0 GiB exactly, so it's not exactly
-             * practical. However, if at some point a game really wants this,
-             * then we can remove or raise this limit. */
-            if (r300screen->caps->is_r500) {
-                /* 9 == 256x256x256 */
-                return 9;
-            } else {
-                /* 8 == 128*128*128 */
-                return 8;
-            }
         case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
             if (r300screen->caps->is_r500) {
-                /* 13 == 4096x4096 */
+                /* 13 == 4096 */
                 return 13;
             } else {
-                /* 12 == 2048x2048 */
+                /* 12 == 2048 */
                 return 12;
             }
         case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
@@ -191,8 +172,8 @@ static float r300_get_paramf(struct pipe_screen* pscreen, int param)
     }
 }
 
-static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
-                                   boolean is_r500)
+static boolean check_tex_format(enum pipe_format format, uint32_t usage,
+                                boolean is_r500)
 {
     uint32_t retval = 0;
 
@@ -286,7 +267,6 @@ static boolean check_tex_2d_format(enum pipe_format format, uint32_t usage,
     return (retval >= usage);
 }
 
-/* XXX moar targets */
 static boolean r300_is_format_supported(struct pipe_screen* pscreen,
                                         enum pipe_format format,
                                         enum pipe_texture_target target,
@@ -294,15 +274,17 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen,
                                         unsigned geom_flags)
 {
     switch (target) {
+        case PIPE_TEXTURE_1D:   /* handle 1D textures as 2D ones */
         case PIPE_TEXTURE_2D:
-            return check_tex_2d_format(format, tex_usage,
-                r300_screen(pscreen)->caps->is_r500);
-        case PIPE_TEXTURE_1D:
         case PIPE_TEXTURE_3D:
+            return check_tex_format(format, tex_usage,
+                r300_screen(pscreen)->caps->is_r500);
+
         case PIPE_TEXTURE_CUBE:
             debug_printf("r300: Implementation error: Unsupported format "
                     "target: %d\n", target);
             break;
+
         default:
             debug_printf("r300: Fatal: This is not a format target: %d\n",
                 target);
@@ -322,22 +304,9 @@ r300_get_tex_transfer(struct pipe_screen *screen,
 {
     struct r300_texture *tex = (struct r300_texture *)texture;
     struct r300_transfer *trans;
-    unsigned offset = 0;  /* in bytes */
+    unsigned offset;
 
-    /* XXX Add support for these things */
-    if (texture->target == PIPE_TEXTURE_CUBE) {
-        debug_printf("PIPE_TEXTURE_CUBE is not yet supported.\n");
-        /* offset = tex->image_offset[level][face]; */
-    }
-    else if (texture->target == PIPE_TEXTURE_3D) {
-        debug_printf("PIPE_TEXTURE_3D is not yet supported.\n");
-        /* offset = tex->image_offset[level][zslice]; */
-    }
-    else {
-        offset = tex->offset[level];
-        assert(face == 0);
-        assert(zslice == 0);
-    }
+    offset = r300_texture_get_offset(tex, level, zslice, face);  /* in bytes */
 
     trans = CALLOC_STRUCT(r300_transfer);
     if (trans) {
@@ -352,6 +321,12 @@ r300_get_tex_transfer(struct pipe_screen *screen,
         trans->transfer.nblocksy = texture->nblocksy[level];
         trans->transfer.stride = r300_texture_get_stride(tex, level);
         trans->transfer.usage = usage;
+
+        /* XXX not sure whether it's required to set these two,
+               the driver doesn't use them */
+        trans->transfer.zslice = zslice;
+        trans->transfer.face = face;
+
         trans->offset = offset;
     }
     return &trans->transfer;
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 3c8ff24e17..37c7910d80 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -36,7 +36,7 @@ static void r300_setup_texture_state(struct r300_texture* tex)
     state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) |
         R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) |
         R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) |
-        R300_TX_NUM_LEVELS(pt->last_level) |
+        R300_TX_NUM_LEVELS(pt->last_level & 0xf) |
         R300_TX_PITCH_EN;
 
     /* XXX */
@@ -48,7 +48,8 @@ static void r300_setup_texture_state(struct r300_texture* tex)
         state->format1 |= R300_TX_FORMAT_3D;
     }
 
-    state->format2 = (r300_texture_get_stride(tex, 0) / pt->block.size) - 1;
+    state->format2 = ((r300_texture_get_stride(tex, 0) / pt->block.size) - 1)
+                     & 0x1fff;
 
     /* Don't worry about accidentally setting this bit on non-r500;
      * the kernel should catch it. */
@@ -63,6 +64,26 @@ static void r300_setup_texture_state(struct r300_texture* tex)
 		 pt->width[0], pt->height[0], pt->last_level);
 }
 
+unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
+                                 unsigned zslice, unsigned face)
+{
+    unsigned offset = tex->offset[level];
+
+    switch (tex->tex.target) {
+        case PIPE_TEXTURE_3D:
+            assert(face == 0);
+            return offset + zslice * tex->layer_size[level];
+
+        case PIPE_TEXTURE_CUBE:
+            assert(zslice == 0);
+            return offset + face * tex->layer_size[level];
+
+        default:
+            assert(zslice == 0 && face == 0);
+            return offset;
+    }
+}
+
 /**
  * Return the stride, in bytes, of the texture images of the given texture
  * at the given level.
@@ -84,7 +105,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level)
 static void r300_setup_miptree(struct r300_texture* tex)
 {
     struct pipe_texture* base = &tex->tex;
-    int stride, size;
+    int stride, size, layer_size;
     int i;
 
     for (i = 0; i <= base->last_level; i++) {
@@ -98,10 +119,12 @@ static void r300_setup_miptree(struct r300_texture* tex)
         base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]);
 
         stride = r300_texture_get_stride(tex, i);
-        size = stride * base->nblocksy[i] * base->depth[i];
+        layer_size = stride * base->nblocksy[i];
+        size = layer_size * base->depth[i];
 
         tex->offset[i] = align(tex->size, 32);
         tex->size = tex->offset[i] + size;
+        tex->layer_size[i] = layer_size;
 
         debug_printf("r300: Texture miptree: Level %d "
                 "(%dx%dx%d px, pitch %d bytes)\n",
@@ -161,8 +184,7 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
     struct pipe_surface* surface = CALLOC_STRUCT(pipe_surface);
     unsigned offset;
 
-    /* XXX this is certainly dependent on tex target */
-    offset = tex->offset[level];
+    offset = r300_texture_get_offset(tex, level, zslice, face);
 
     if (surface) {
         pipe_reference_init(&surface->reference, 1);
@@ -191,11 +213,6 @@ static struct pipe_texture*
 {
     struct r300_texture* tex;
 
-    if (base->target != PIPE_TEXTURE_2D ||
-        base->depth[0] != 1) {
-        return NULL;
-    }
-
     tex = CALLOC_STRUCT(r300_texture);
     if (!tex) {
         return NULL;
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 55d1a0ac5c..35e06a9acb 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -33,6 +33,9 @@ void r300_init_screen_texture_functions(struct pipe_screen* screen);
 
 unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level);
 
+unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
+                                 unsigned zslice, unsigned face);
+
 /* Note the signature of R300_EASY_TX_FORMAT(A, R, G, B, FORMAT)... */
 static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
 {
-- 
cgit v1.2.3


From c2df759cd73e281c4698c717e0ab89757a7affd5 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sun, 25 Oct 2009 09:57:53 +0100
Subject: r300g: fix redefining mipmaps and fetching from them

---
 src/gallium/drivers/r300/r300_texture.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 37c7910d80..762806822c 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -36,8 +36,9 @@ static void r300_setup_texture_state(struct r300_texture* tex)
     state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) |
         R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) |
         R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) |
-        R300_TX_NUM_LEVELS(pt->last_level & 0xf) |
-        R300_TX_PITCH_EN;
+        R300_TX_NUM_LEVELS(pt->last_level & 0xf);/* |
+        R300_TX_PITCH_EN;*/
+    /* XXX TX_PITCH_EN breaks rendering mipmap levels > 0, weard */
 
     /* XXX */
     state->format1 = r300_translate_texformat(pt->format);
@@ -194,6 +195,10 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
         surface->height = texture->height[level];
         surface->offset = offset;
         surface->usage = flags;
+        surface->zslice = zslice;
+        surface->texture = texture;
+        surface->face = face;
+        surface->level = level;
     }
 
     return surface;
-- 
cgit v1.2.3


From b4f6907b8d8a966df56c06155049c52dadea105f Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 20:42:16 +0100
Subject: llvmpipe: Move a few format/sampling functions into better space.

---
 src/gallium/drivers/llvmpipe/Makefile            |   1 +
 src/gallium/drivers/llvmpipe/SConscript          |   1 +
 src/gallium/drivers/llvmpipe/lp_bld_format.h     |  16 ---
 src/gallium/drivers/llvmpipe/lp_bld_format_soa.c |  71 -------------
 src/gallium/drivers/llvmpipe/lp_bld_sample.c     | 124 +++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_sample.h     |   9 ++
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c |  59 +++++------
 7 files changed, 158 insertions(+), 123 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_sample.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index b96ee23a99..ea771392b1 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -22,6 +22,7 @@ C_SOURCES = \
 	lp_bld_intr.c \
 	lp_bld_logic.c \
 	lp_bld_pack.c \
+	lp_bld_sample.c \
 	lp_bld_sample_soa.c \
 	lp_bld_swizzle.c \
 	lp_bld_struct.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 403e4daa43..72e445b881 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -34,6 +34,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_interp.c',
 		'lp_bld_intr.c',
         'lp_bld_pack.c',
+        'lp_bld_sample.c',
 		'lp_bld_sample_soa.c',
 		'lp_bld_struct.c',
 		'lp_bld_logic.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index c087fc986e..1ea694509d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -91,14 +91,6 @@ lp_build_store_rgba_aos(LLVMBuilderRef builder,
                         LLVMValueRef ptr,
                         LLVMValueRef rgba);
 
-LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
-                unsigned length,
-                unsigned src_width,
-                unsigned dst_width,
-                LLVMValueRef base_ptr,
-                LLVMValueRef offsets);
-
 
 void
 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
@@ -108,12 +100,4 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
                          LLVMValueRef *rgba);
 
 
-void
-lp_build_load_rgba_soa(LLVMBuilderRef builder,
-                       const struct util_format_description *format_desc,
-                       struct lp_type type,
-                       LLVMValueRef base_ptr,
-                       LLVMValueRef offsets,
-                       LLVMValueRef *rgba);
-
 #endif /* !LP_BLD_FORMAT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
index 66bebdcdec..60ad4c0ee6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -34,55 +34,6 @@
 #include "lp_bld_format.h"
 
 
-/**
- * Gather elements from scatter positions in memory into a single vector.
- *
- * @param src_width src element width
- * @param dst_width result element width (source will be expanded to fit)
- * @param length length of the offsets,
- * @param base_ptr base pointer, should be a i8 pointer type.
- * @param offsets vector with offsets
- */
-LLVMValueRef
-lp_build_gather(LLVMBuilderRef builder,
-                unsigned length,
-                unsigned src_width,
-                unsigned dst_width,
-                LLVMValueRef base_ptr,
-                LLVMValueRef offsets)
-{
-   LLVMTypeRef src_type = LLVMIntType(src_width);
-   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
-   LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
-   LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
-   LLVMValueRef res;
-   unsigned i;
-
-   res = LLVMGetUndef(dst_vec_type);
-   for(i = 0; i < length; ++i) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      LLVMValueRef elem_offset;
-      LLVMValueRef elem_ptr;
-      LLVMValueRef elem;
-
-      elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
-      elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
-      elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
-      elem = LLVMBuildLoad(builder, elem_ptr, "");
-
-      assert(src_width <= dst_width);
-      if(src_width > dst_width)
-         elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
-      if(src_width < dst_width)
-         elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
-
-      res = LLVMBuildInsertElement(builder, res, elem, index, "");
-   }
-
-   return res;
-}
-
-
 static LLVMValueRef
 lp_build_format_swizzle(struct lp_type type,
                         const LLVMValueRef *inputs,
@@ -185,25 +136,3 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
       }
    }
 }
-
-
-void
-lp_build_load_rgba_soa(LLVMBuilderRef builder,
-                       const struct util_format_description *format_desc,
-                       struct lp_type type,
-                       LLVMValueRef base_ptr,
-                       LLVMValueRef offsets,
-                       LLVMValueRef *rgba)
-{
-   LLVMValueRef packed;
-
-   assert(format_desc->block.width == 1);
-   assert(format_desc->block.height == 1);
-   assert(format_desc->block.bits <= type.width);
-
-   packed = lp_build_gather(builder,
-                            type.length, format_desc->block.bits, type.width,
-                            base_ptr, offsets);
-
-   lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
new file mode 100644
index 0000000000..b0543f22c9
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
@@ -0,0 +1,124 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Texture sampling -- common code.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "lp_bld_debug.h"
+#include "lp_bld_format.h"
+#include "lp_bld_sample.h"
+
+
+void
+lp_sampler_static_state(struct lp_sampler_static_state *state,
+                        const struct pipe_texture *texture,
+                        const struct pipe_sampler_state *sampler)
+{
+   memset(state, 0, sizeof *state);
+
+   if(!texture)
+      return;
+
+   if(!sampler)
+      return;
+
+   state->format            = texture->format;
+   state->target            = texture->target;
+   state->pot_width         = util_is_pot(texture->width[0]);
+   state->pot_height        = util_is_pot(texture->height[0]);
+   state->pot_depth         = util_is_pot(texture->depth[0]);
+
+   state->wrap_s            = sampler->wrap_s;
+   state->wrap_t            = sampler->wrap_t;
+   state->wrap_r            = sampler->wrap_r;
+   state->min_img_filter    = sampler->min_img_filter;
+   state->min_mip_filter    = sampler->min_mip_filter;
+   state->mag_img_filter    = sampler->mag_img_filter;
+   if(sampler->compare_mode) {
+      state->compare_mode      = sampler->compare_mode;
+      state->compare_func      = sampler->compare_func;
+   }
+   state->normalized_coords = sampler->normalized_coords;
+   state->prefilter         = sampler->prefilter;
+}
+
+
+/**
+ * Gather elements from scatter positions in memory into a single vector.
+ *
+ * @param src_width src element width
+ * @param dst_width result element width (source will be expanded to fit)
+ * @param length length of the offsets,
+ * @param base_ptr base pointer, should be a i8 pointer type.
+ * @param offsets vector with offsets
+ */
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+                unsigned length,
+                unsigned src_width,
+                unsigned dst_width,
+                LLVMValueRef base_ptr,
+                LLVMValueRef offsets)
+{
+   LLVMTypeRef src_type = LLVMIntType(src_width);
+   LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
+   LLVMTypeRef dst_elem_type = LLVMIntType(dst_width);
+   LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
+   LLVMValueRef res;
+   unsigned i;
+
+   res = LLVMGetUndef(dst_vec_type);
+   for(i = 0; i < length; ++i) {
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      LLVMValueRef elem_offset;
+      LLVMValueRef elem_ptr;
+      LLVMValueRef elem;
+
+      elem_offset = LLVMBuildExtractElement(builder, offsets, index, "");
+      elem_ptr = LLVMBuildGEP(builder, base_ptr, &elem_offset, 1, "");
+      elem_ptr = LLVMBuildBitCast(builder, elem_ptr, src_ptr_type, "");
+      elem = LLVMBuildLoad(builder, elem_ptr, "");
+
+      assert(src_width <= dst_width);
+      if(src_width > dst_width)
+         elem = LLVMBuildTrunc(builder, elem, dst_elem_type, "");
+      if(src_width < dst_width)
+         elem = LLVMBuildZExt(builder, elem, dst_elem_type, "");
+
+      res = LLVMBuildInsertElement(builder, res, elem, index, "");
+   }
+
+   return res;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
index 403d0e4836..2b56179eb8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -119,6 +119,15 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
                         const struct pipe_sampler_state *sampler);
 
 
+LLVMValueRef
+lp_build_gather(LLVMBuilderRef builder,
+                unsigned length,
+                unsigned src_width,
+                unsigned dst_width,
+                LLVMValueRef base_ptr,
+                LLVMValueRef offsets);
+
+
 void
 lp_build_sample_soa(LLVMBuilderRef builder,
                     const struct lp_sampler_static_state *static_state,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 1a47ca32d2..4af0454935 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -27,7 +27,7 @@
 
 /**
  * @file
- * Texture sampling.
+ * Texture sampling -- SoA.
  *
  * @author Jose Fonseca <jfonseca@vmware.com>
  */
@@ -48,41 +48,6 @@
 #include "lp_bld_sample.h"
 
 
-void
-lp_sampler_static_state(struct lp_sampler_static_state *state,
-                        const struct pipe_texture *texture,
-                        const struct pipe_sampler_state *sampler)
-{
-   memset(state, 0, sizeof *state);
-
-   if(!texture)
-      return;
-
-   if(!sampler)
-      return;
-
-   state->format            = texture->format;
-   state->target            = texture->target;
-   state->pot_width         = util_is_pot(texture->width[0]);
-   state->pot_height        = util_is_pot(texture->height[0]);
-   state->pot_depth         = util_is_pot(texture->depth[0]);
-
-   state->wrap_s            = sampler->wrap_s;
-   state->wrap_t            = sampler->wrap_t;
-   state->wrap_r            = sampler->wrap_r;
-   state->min_img_filter    = sampler->min_img_filter;
-   state->min_mip_filter    = sampler->min_mip_filter;
-   state->mag_img_filter    = sampler->mag_img_filter;
-   if(sampler->compare_mode) {
-      state->compare_mode      = sampler->compare_mode;
-      state->compare_func      = sampler->compare_func;
-   }
-   state->normalized_coords = sampler->normalized_coords;
-   state->prefilter         = sampler->prefilter;
-}
-
-
-
 /**
  * Keep all information for sampling code generation in a single place.
  */
@@ -110,6 +75,28 @@ struct lp_build_sample_context
 };
 
 
+static void
+lp_build_load_rgba_soa(LLVMBuilderRef builder,
+                       const struct util_format_description *format_desc,
+                       struct lp_type type,
+                       LLVMValueRef base_ptr,
+                       LLVMValueRef offsets,
+                       LLVMValueRef *rgba)
+{
+   LLVMValueRef packed;
+
+   assert(format_desc->block.width == 1);
+   assert(format_desc->block.height == 1);
+   assert(format_desc->block.bits <= type.width);
+
+   packed = lp_build_gather(builder,
+                            type.length, format_desc->block.bits, type.width,
+                            base_ptr, offsets);
+
+   lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
+}
+
+
 static void
 lp_build_sample_texel(struct lp_build_sample_context *bld,
                       LLVMValueRef x,
-- 
cgit v1.2.3


From 232b5864647d4c8d6cebb0845c046f1612e6054d Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 20:58:35 +0100
Subject: llvmpipe: Eliminate lp_build_load_rgba_aos.

---
 src/gallium/drivers/llvmpipe/lp_bld_format.h     | 16 +-----------
 src/gallium/drivers/llvmpipe/lp_bld_format_aos.c | 32 +-----------------------
 src/gallium/drivers/llvmpipe/lp_test_format.c    | 19 +++++++++-----
 3 files changed, 15 insertions(+), 52 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index 1ea694509d..b30537b2e9 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -51,7 +51,7 @@ struct lp_type;
  */
 LLVMValueRef
 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
-                         enum pipe_format format,
+                         const struct util_format_description *desc,
                          LLVMValueRef packed);
 
 
@@ -66,20 +66,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
                        LLVMValueRef rgba);
 
 
-/**
- * Load a pixel into its RGBA components.
- *
- * @param ptr value with the pointer to the packed pixel. Pointer type is
- * irrelevant.
- *
- * @return RGBA in a 4 floats vector.
- */
-LLVMValueRef
-lp_build_load_rgba_aos(LLVMBuilderRef builder,
-                       enum pipe_format format,
-                       LLVMValueRef ptr);
-
-
 /**
  * Store a pixel.
  *
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index b9b5d84bed..840e54e558 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -33,10 +33,9 @@
 
 LLVMValueRef
 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
-                         enum pipe_format format,
+                         const struct util_format_description *desc,
                          LLVMValueRef packed)
 {
-   const struct util_format_description *desc;
    LLVMTypeRef type;
    LLVMValueRef shifted, casted, scaled, masked;
    LLVMValueRef shifts[4];
@@ -49,8 +48,6 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
    unsigned shift;
    unsigned i;
 
-   desc = util_format_description(format);
-
    /* FIXME: Support more formats */
    assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
    assert(desc->block.width == 1);
@@ -249,33 +246,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 }
 
 
-LLVMValueRef
-lp_build_load_rgba_aos(LLVMBuilderRef builder,
-                       enum pipe_format format,
-                       LLVMValueRef ptr)
-{
-   const struct util_format_description *desc;
-   LLVMTypeRef type;
-   LLVMValueRef packed;
-
-   desc = util_format_description(format);
-
-   /* FIXME: Support more formats */
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
-   assert(desc->block.width == 1);
-   assert(desc->block.height == 1);
-   assert(desc->block.bits <= 32);
-
-   type = LLVMIntType(desc->block.bits);
-
-   ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
-   packed = LLVMBuildLoad(builder, ptr, "");
-
-   return lp_build_unpack_rgba_aos(builder, format, packed);
-}
-
-
 void
 lp_build_store_rgba_aos(LLVMBuilderRef builder,
                         enum pipe_format format,
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 5dc8297fe9..6e501195f8 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -89,34 +89,41 @@ struct pixel_test_case test_cases[] =
 };
 
 
-typedef void (*load_ptr_t)(const void *, float *);
+typedef void (*load_ptr_t)(const uint32_t packed, float *);
 
 
 static LLVMValueRef
 add_load_rgba_test(LLVMModuleRef module,
                    enum pipe_format format)
 {
+   const struct util_format_description *desc;
    LLVMTypeRef args[2];
    LLVMValueRef func;
-   LLVMValueRef ptr;
+   LLVMValueRef packed;
    LLVMValueRef rgba_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    LLVMValueRef rgba;
 
-   args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+   desc = util_format_description(format);
+
+   args[0] = LLVMInt32Type();
    args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
 
    func = LLVMAddFunction(module, "load", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
    LLVMSetFunctionCallConv(func, LLVMCCallConv);
-   ptr = LLVMGetParam(func, 0);
+   packed = LLVMGetParam(func, 0);
    rgba_ptr = LLVMGetParam(func, 1);
 
    block = LLVMAppendBasicBlock(func, "entry");
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
-   rgba = lp_build_load_rgba_aos(builder, format, ptr);
+   if(desc->block.bits < 32)
+      packed = LLVMBuildTrunc(builder, packed, LLVMIntType(desc->block.bits), "");
+
+   rgba = lp_build_unpack_rgba_aos(builder, desc, packed);
+
    LLVMBuildStore(builder, rgba, rgba_ptr);
 
    LLVMBuildRetVoid(builder);
@@ -224,7 +231,7 @@ test_format(const struct pixel_test_case *test)
    memset(unpacked, 0, sizeof unpacked);
    packed = 0;
 
-   load_ptr(&test->packed, unpacked);
+   load_ptr(test->packed, unpacked);
    store_ptr(&packed, unpacked);
 
    success = TRUE;
-- 
cgit v1.2.3


From 17afb6dd6959a3df692a6a49e6370e81ebe00038 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 21:06:03 +0100
Subject: llvmpipe: Eliminate lp_build_store_rgba_aos.

---
 src/gallium/drivers/llvmpipe/lp_bld_format.h     | 14 +----------
 src/gallium/drivers/llvmpipe/lp_bld_format_aos.c | 32 +-----------------------
 src/gallium/drivers/llvmpipe/lp_test_format.c    | 19 ++++++++++----
 3 files changed, 16 insertions(+), 49 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index b30537b2e9..42ee3c7d90 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -62,22 +62,10 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
  */
 LLVMValueRef
 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
-                       enum pipe_format format,
+                       const struct util_format_description *desc,
                        LLVMValueRef rgba);
 
 
-/**
- * Store a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
-void 
-lp_build_store_rgba_aos(LLVMBuilderRef builder,
-                        enum pipe_format format,
-                        LLVMValueRef ptr,
-                        LLVMValueRef rgba);
-
-
 void
 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
                          const struct util_format_description *format_desc,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index 840e54e558..0591d77860 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -150,10 +150,9 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
 
 LLVMValueRef
 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
-                       enum pipe_format format,
+                       const struct util_format_description *desc,
                        LLVMValueRef rgba)
 {
-   const struct util_format_description *desc;
    LLVMTypeRef type;
    LLVMValueRef packed = NULL;
    LLVMValueRef swizzles[4];
@@ -164,8 +163,6 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
    unsigned shift;
    unsigned i, j;
 
-   desc = util_format_description(format);
-
    assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
    assert(desc->block.width == 1);
    assert(desc->block.height == 1);
@@ -244,30 +241,3 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 
    return packed;
 }
-
-
-void
-lp_build_store_rgba_aos(LLVMBuilderRef builder,
-                        enum pipe_format format,
-                        LLVMValueRef ptr,
-                        LLVMValueRef rgba)
-{
-   const struct util_format_description *desc;
-   LLVMTypeRef type;
-   LLVMValueRef packed;
-
-   desc = util_format_description(format);
-
-   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
-   assert(desc->block.width == 1);
-   assert(desc->block.height == 1);
-
-   type = LLVMIntType(desc->block.bits);
-
-   packed = lp_build_pack_rgba_aos(builder, format, rgba);
-
-   ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, 0), "");
-
-   LLVMBuildStore(builder, packed, ptr);
-}
-
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 6e501195f8..0fe47426f6 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -133,27 +133,31 @@ add_load_rgba_test(LLVMModuleRef module,
 }
 
 
-typedef void (*store_ptr_t)(void *, const float *);
+typedef void (*store_ptr_t)(uint32_t *, const float *);
 
 
 static LLVMValueRef
 add_store_rgba_test(LLVMModuleRef module,
                     enum pipe_format format)
 {
+   const struct util_format_description *desc;
    LLVMTypeRef args[2];
    LLVMValueRef func;
-   LLVMValueRef ptr;
+   LLVMValueRef packed_ptr;
    LLVMValueRef rgba_ptr;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    LLVMValueRef rgba;
+   LLVMValueRef packed;
+
+   desc = util_format_description(format);
 
-   args[0] = LLVMPointerType(LLVMInt8Type(), 0);
+   args[0] = LLVMPointerType(LLVMInt32Type(), 0);
    args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
 
    func = LLVMAddFunction(module, "store", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
    LLVMSetFunctionCallConv(func, LLVMCCallConv);
-   ptr = LLVMGetParam(func, 0);
+   packed_ptr = LLVMGetParam(func, 0);
    rgba_ptr = LLVMGetParam(func, 1);
 
    block = LLVMAppendBasicBlock(func, "entry");
@@ -162,7 +166,12 @@ add_store_rgba_test(LLVMModuleRef module,
 
    rgba = LLVMBuildLoad(builder, rgba_ptr, "");
 
-   lp_build_store_rgba_aos(builder, format, ptr, rgba);
+   packed = lp_build_pack_rgba_aos(builder, desc, rgba);
+
+   if(desc->block.bits < 32)
+      packed = LLVMBuildZExt(builder, packed, LLVMInt32Type(), "");
+
+   LLVMBuildStore(builder, packed, packed_ptr);
 
    LLVMBuildRetVoid(builder);
 
-- 
cgit v1.2.3


From fedd054d534206a5ebd6fed204aa97cbb5053b3a Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 21:16:26 +0100
Subject: llvmpipe: Share testing infrastructure with lp_test_format.

---
 src/gallium/drivers/llvmpipe/SConscript       |  2 +-
 src/gallium/drivers/llvmpipe/lp_test_format.c | 67 ++++++++++++++++++---------
 2 files changed, 46 insertions(+), 23 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 72e445b881..169e0abc2b 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -77,7 +77,7 @@ env.Prepend(LIBS = [llvmpipe] + auxiliaries)
 
 env.Program(
     target = 'lp_test_format',
-    source = ['lp_test_format.c'],
+    source = ['lp_test_format.c', 'lp_test_main.c'],
 )
 
 env.Program(
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index 0fe47426f6..b2403ad521 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -39,6 +39,7 @@
 #include "util/u_format.h"
 
 #include "lp_bld_format.h"
+#include "lp_test.h"
 
 
 struct pixel_test_case
@@ -89,14 +90,37 @@ struct pixel_test_case test_cases[] =
 };
 
 
+void
+write_tsv_header(FILE *fp)
+{
+   fprintf(fp,
+           "result\t"
+           "format\n");
+
+   fflush(fp);
+}
+
+
+static void
+write_tsv_row(FILE *fp,
+              const struct util_format_description *desc,
+              boolean success)
+{
+   fprintf(fp, "%s\t", success ? "pass" : "fail");
+
+   fprintf(fp, "%s\n", desc->name);
+
+   fflush(fp);
+}
+
+
 typedef void (*load_ptr_t)(const uint32_t packed, float *);
 
 
 static LLVMValueRef
 add_load_rgba_test(LLVMModuleRef module,
-                   enum pipe_format format)
+                   const struct util_format_description *desc)
 {
-   const struct util_format_description *desc;
    LLVMTypeRef args[2];
    LLVMValueRef func;
    LLVMValueRef packed;
@@ -105,8 +129,6 @@ add_load_rgba_test(LLVMModuleRef module,
    LLVMBuilderRef builder;
    LLVMValueRef rgba;
 
-   desc = util_format_description(format);
-
    args[0] = LLVMInt32Type();
    args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
 
@@ -138,9 +160,8 @@ typedef void (*store_ptr_t)(uint32_t *, const float *);
 
 static LLVMValueRef
 add_store_rgba_test(LLVMModuleRef module,
-                    enum pipe_format format)
+                    const struct util_format_description *desc)
 {
-   const struct util_format_description *desc;
    LLVMTypeRef args[2];
    LLVMValueRef func;
    LLVMValueRef packed_ptr;
@@ -150,8 +171,6 @@ add_store_rgba_test(LLVMModuleRef module,
    LLVMValueRef rgba;
    LLVMValueRef packed;
 
-   desc = util_format_description(format);
-
    args[0] = LLVMPointerType(LLVMInt32Type(), 0);
    args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
 
@@ -181,7 +200,7 @@ add_store_rgba_test(LLVMModuleRef module,
 
 
 static boolean
-test_format(const struct pixel_test_case *test)
+test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test)
 {
    LLVMModuleRef module = NULL;
    LLVMValueRef load = NULL;
@@ -203,8 +222,8 @@ test_format(const struct pixel_test_case *test)
 
    module = LLVMModuleCreateWithName("test");
 
-   load = add_load_rgba_test(module, test->format);
-   store = add_store_rgba_test(module, test->format);
+   load = add_load_rgba_test(module, desc);
+   store = add_store_rgba_test(module, desc);
 
    if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
       LLVMDumpModule(module);
@@ -266,25 +285,29 @@ test_format(const struct pixel_test_case *test)
    if(pass)
       LLVMDisposePassManager(pass);
 
+   if(fp)
+      write_tsv_row(fp, desc, success);
+
    return success;
 }
 
 
-int main(int argc, char **argv)
+boolean
+test_all(unsigned verbose, FILE *fp)
 {
    unsigned i;
-   int ret;
+   bool success = TRUE;
 
-#ifdef LLVM_NATIVE_ARCH
-   LLVMLinkInJIT();
-   LLVMInitializeNativeTarget();
-#endif
+   for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i)
+      if(!test_format(verbose, fp, &test_cases[i]))
+        success = FALSE;
 
-   util_cpu_detect();
+   return success;
+}
 
-   for (i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); ++i)
-      if(!test_format(&test_cases[i]))
-        ret = 1;
 
-   return ret;
+boolean
+test_some(unsigned verbose, FILE *fp, unsigned long n)
+{
+   return test_all(verbose, fp);
 }
-- 
cgit v1.2.3


From bc93e9181cf179a797679d30cd1a3a563e1756c0 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 22:37:06 +0100
Subject: llvmpipe: Factor our pixel offset computation.

---
 src/gallium/drivers/llvmpipe/lp_bld_sample.c     | 66 ++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_sample.h     | 11 ++++
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 49 +++---------------
 3 files changed, 83 insertions(+), 43 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
index b0543f22c9..4d272bea87 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
@@ -37,6 +37,9 @@
 #include "util/u_format.h"
 #include "util/u_math.h"
 #include "lp_bld_debug.h"
+#include "lp_bld_const.h"
+#include "lp_bld_arit.h"
+#include "lp_bld_type.h"
 #include "lp_bld_format.h"
 #include "lp_bld_sample.h"
 
@@ -122,3 +125,66 @@ lp_build_gather(LLVMBuilderRef builder,
 
    return res;
 }
+
+
+/**
+ * Compute the offset of a pixel.
+ *
+ * x, y, y_stride are vectors
+ */
+LLVMValueRef
+lp_build_sample_offset(struct lp_build_context *bld,
+                       const struct util_format_description *format_desc,
+                       LLVMValueRef x,
+                       LLVMValueRef y,
+                       LLVMValueRef y_stride,
+                       LLVMValueRef data_ptr)
+{
+   LLVMValueRef x_stride;
+   LLVMValueRef offset;
+
+   x_stride = lp_build_const_scalar(bld->type, format_desc->block.bits/8);
+
+   if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+      LLVMValueRef x_lo, x_hi;
+      LLVMValueRef y_lo, y_hi;
+      LLVMValueRef x_stride_lo, x_stride_hi;
+      LLVMValueRef y_stride_lo, y_stride_hi;
+      LLVMValueRef x_offset_lo, x_offset_hi;
+      LLVMValueRef y_offset_lo, y_offset_hi;
+      LLVMValueRef offset_lo, offset_hi;
+
+      x_lo = LLVMBuildAnd(bld->builder, x, bld->one, "");
+      y_lo = LLVMBuildAnd(bld->builder, y, bld->one, "");
+
+      x_hi = LLVMBuildLShr(bld->builder, x, bld->one, "");
+      y_hi = LLVMBuildLShr(bld->builder, y, bld->one, "");
+
+      x_stride_lo = x_stride;
+      y_stride_lo = lp_build_const_scalar(bld->type, 2*format_desc->block.bits/8);
+
+      x_stride_hi = lp_build_const_scalar(bld->type, 4*format_desc->block.bits/8);
+      y_stride_hi = LLVMBuildShl(bld->builder, y_stride, bld->one, "");
+
+      x_offset_lo = lp_build_mul(bld, x_lo, x_stride_lo);
+      y_offset_lo = lp_build_mul(bld, y_lo, y_stride_lo);
+      offset_lo = lp_build_add(bld, x_offset_lo, y_offset_lo);
+
+      x_offset_hi = lp_build_mul(bld, x_hi, x_stride_hi);
+      y_offset_hi = lp_build_mul(bld, y_hi, y_stride_hi);
+      offset_hi = lp_build_add(bld, x_offset_hi, y_offset_hi);
+
+      offset = lp_build_add(bld, offset_hi, offset_lo);
+   }
+   else {
+      LLVMValueRef x_offset;
+      LLVMValueRef y_offset;
+
+      x_offset = lp_build_mul(bld, x, x_stride);
+      y_offset = lp_build_mul(bld, y, y_stride);
+
+      offset = lp_build_add(bld, x_offset, y_offset);
+   }
+
+   return offset;
+}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.h b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
index 2b56179eb8..8cb8210ca7 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.h
@@ -40,7 +40,9 @@
 
 struct pipe_texture;
 struct pipe_sampler_state;
+struct util_format_description;
 struct lp_type;
+struct lp_build_context;
 
 
 /**
@@ -128,6 +130,15 @@ lp_build_gather(LLVMBuilderRef builder,
                 LLVMValueRef offsets);
 
 
+LLVMValueRef
+lp_build_sample_offset(struct lp_build_context *bld,
+                       const struct util_format_description *format_desc,
+                       LLVMValueRef x,
+                       LLVMValueRef y,
+                       LLVMValueRef y_stride,
+                       LLVMValueRef data_ptr);
+
+
 void
 lp_build_sample_soa(LLVMBuilderRef builder,
                     const struct lp_sampler_static_state *static_state,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 4af0454935..6aa7ad4b45 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -106,51 +106,14 @@ lp_build_sample_texel(struct lp_build_sample_context *bld,
                       LLVMValueRef *texel)
 {
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
-   LLVMValueRef x_stride;
    LLVMValueRef offset;
 
-   x_stride = lp_build_const_scalar(bld->int_coord_type, bld->format_desc->block.bits/8);
-
-   if(bld->format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
-      LLVMValueRef x_lo, x_hi;
-      LLVMValueRef y_lo, y_hi;
-      LLVMValueRef x_stride_lo, x_stride_hi;
-      LLVMValueRef y_stride_lo, y_stride_hi;
-      LLVMValueRef x_offset_lo, x_offset_hi;
-      LLVMValueRef y_offset_lo, y_offset_hi;
-      LLVMValueRef offset_lo, offset_hi;
-
-      x_lo = LLVMBuildAnd(bld->builder, x, int_coord_bld->one, "");
-      y_lo = LLVMBuildAnd(bld->builder, y, int_coord_bld->one, "");
-
-      x_hi = LLVMBuildLShr(bld->builder, x, int_coord_bld->one, "");
-      y_hi = LLVMBuildLShr(bld->builder, y, int_coord_bld->one, "");
-
-      x_stride_lo = x_stride;
-      y_stride_lo = lp_build_const_scalar(bld->int_coord_type, 2*bld->format_desc->block.bits/8);
-
-      x_stride_hi = lp_build_const_scalar(bld->int_coord_type, 4*bld->format_desc->block.bits/8);
-      y_stride_hi = LLVMBuildShl(bld->builder, y_stride, int_coord_bld->one, "");
-
-      x_offset_lo = lp_build_mul(int_coord_bld, x_lo, x_stride_lo);
-      y_offset_lo = lp_build_mul(int_coord_bld, y_lo, y_stride_lo);
-      offset_lo = lp_build_add(int_coord_bld, x_offset_lo, y_offset_lo);
-
-      x_offset_hi = lp_build_mul(int_coord_bld, x_hi, x_stride_hi);
-      y_offset_hi = lp_build_mul(int_coord_bld, y_hi, y_stride_hi);
-      offset_hi = lp_build_add(int_coord_bld, x_offset_hi, y_offset_hi);
-
-      offset = lp_build_add(int_coord_bld, offset_hi, offset_lo);
-   }
-   else {
-      LLVMValueRef x_offset;
-      LLVMValueRef y_offset;
-
-      x_offset = lp_build_mul(int_coord_bld, x, x_stride);
-      y_offset = lp_build_mul(int_coord_bld, y, y_stride);
-
-      offset = lp_build_add(int_coord_bld, x_offset, y_offset);
-   }
+   offset = lp_build_sample_offset(&bld->int_coord_bld,
+                                   bld->format_desc,
+                                   x,
+                                   y,
+                                   y_stride,
+                                   data_ptr);
 
    lp_build_load_rgba_soa(bld->builder,
                           bld->format_desc,
-- 
cgit v1.2.3


From a55b305c5b3be3fed8112d44878e712cf09303ce Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 22 Oct 2009 22:44:32 +0100
Subject: llvmpipe: Merge lp_build_load_rgba_soa into lp_build_sample_texel.

---
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 48 ++++++++----------------
 1 file changed, 16 insertions(+), 32 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 6aa7ad4b45..a7d2118c9b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -75,28 +75,6 @@ struct lp_build_sample_context
 };
 
 
-static void
-lp_build_load_rgba_soa(LLVMBuilderRef builder,
-                       const struct util_format_description *format_desc,
-                       struct lp_type type,
-                       LLVMValueRef base_ptr,
-                       LLVMValueRef offsets,
-                       LLVMValueRef *rgba)
-{
-   LLVMValueRef packed;
-
-   assert(format_desc->block.width == 1);
-   assert(format_desc->block.height == 1);
-   assert(format_desc->block.bits <= type.width);
-
-   packed = lp_build_gather(builder,
-                            type.length, format_desc->block.bits, type.width,
-                            base_ptr, offsets);
-
-   lp_build_unpack_rgba_soa(builder, format_desc, type, packed, rgba);
-}
-
-
 static void
 lp_build_sample_texel(struct lp_build_sample_context *bld,
                       LLVMValueRef x,
@@ -105,22 +83,28 @@ lp_build_sample_texel(struct lp_build_sample_context *bld,
                       LLVMValueRef data_ptr,
                       LLVMValueRef *texel)
 {
-   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef offset;
+   LLVMValueRef packed;
 
    offset = lp_build_sample_offset(&bld->int_coord_bld,
                                    bld->format_desc,
-                                   x,
-                                   y,
-                                   y_stride,
+                                   x, y, y_stride,
                                    data_ptr);
 
-   lp_build_load_rgba_soa(bld->builder,
-                          bld->format_desc,
-                          bld->texel_type,
-                          data_ptr,
-                          offset,
-                          texel);
+   assert(bld->format_desc->block.width == 1);
+   assert(bld->format_desc->block.height == 1);
+   assert(bld->format_desc->block.bits <= bld->texel_type.width);
+
+   packed = lp_build_gather(bld->builder,
+                            bld->texel_type.length,
+                            bld->format_desc->block.bits,
+                            bld->texel_type.width,
+                            data_ptr, offset);
+
+   lp_build_unpack_rgba_soa(bld->builder,
+                            bld->format_desc,
+                            bld->texel_type,
+                            packed, texel);
 }
 
 
-- 
cgit v1.2.3


From b544ab72994a7eda1e8c17fa217213ff3713dd99 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 25 Oct 2009 09:03:18 +0000
Subject: llvmpipe: Add inlines to quickly generate types matching the native
 SIMD register bitwidth.

---
 src/gallium/drivers/llvmpipe/lp_bld_type.h   | 94 +++++++++++++++++++++++++++-
 src/gallium/drivers/llvmpipe/lp_test_blend.c | 20 +++---
 2 files changed, 102 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_type.h b/src/gallium/drivers/llvmpipe/lp_bld_type.h
index 46c298fa20..2fb233d335 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_type.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_type.h
@@ -42,14 +42,19 @@
 #include <pipe/p_compiler.h>
 
 
+/**
+ * Native SIMD register width.
+ *
+ * 128 for all architectures we care about.
+ */
+#define LP_NATIVE_VECTOR_WIDTH 128
+
 /**
  * Several functions can only cope with vectors of length up to this value.
  * You may need to increase that value if you want to represent bigger vectors.
  */
 #define LP_MAX_VECTOR_LENGTH 16
 
-#define LP_MAX_TYPE_WIDTH 64
-
 
 /**
  * The LLVM type system can't conveniently express all the things we care about
@@ -134,6 +139,91 @@ struct lp_build_context
 };
 
 
+static INLINE struct lp_type
+lp_type_float(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.floating = TRUE;
+   res_type.sign = TRUE;
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_int(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.sign = TRUE;
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_uint(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_unorm(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.norm = TRUE;
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_fixed(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.sign = TRUE;
+   res_type.fixed = TRUE;
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
+static INLINE struct lp_type
+lp_type_ufixed(unsigned width)
+{
+   struct lp_type res_type;
+
+   memset(&res_type, 0, sizeof res_type);
+   res_type.fixed = TRUE;
+   res_type.width = width;
+   res_type.length = LP_NATIVE_VECTOR_WIDTH / width;
+
+   return res_type;
+}
+
+
 LLVMTypeRef
 lp_build_elem_type(struct lp_type type);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index e3af81cffb..149fec1d54 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -530,11 +530,11 @@ test_one(unsigned verbose,
    success = TRUE;
    for(i = 0; i < n && success; ++i) {
       if(mode == AoS) {
-         uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t con[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t res[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
+         uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
+         uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
+         uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
+         uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
          int64_t start_counter = 0;
          int64_t end_counter = 0;
 
@@ -595,11 +595,11 @@ test_one(unsigned verbose,
 
       if(mode == SoA) {
          const unsigned stride = type.length*type.width/8;
-         uint8_t src[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t dst[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t con[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t res[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
-         uint8_t ref[4*LP_MAX_VECTOR_LENGTH*LP_MAX_TYPE_WIDTH/8];
+         uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
+         uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
+         uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
+         uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
+         uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
          int64_t start_counter = 0;
          int64_t end_counter = 0;
          boolean mismatch;
-- 
cgit v1.2.3


From 8d80fd3f554cab2db962a903ce4eaba7c8fed7ac Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 25 Oct 2009 09:03:50 +0000
Subject: llvmpipe: Allow different signs when unpacking.

---
 src/gallium/drivers/llvmpipe/lp_bld_pack.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_pack.c b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
index fe82fda039..bc360ad77a 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_pack.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_pack.c
@@ -159,11 +159,10 @@ lp_build_unpack2(LLVMBuilderRef builder,
 
    assert(!src_type.floating);
    assert(!dst_type.floating);
-   assert(dst_type.sign == src_type.sign);
    assert(dst_type.width == src_type.width * 2);
    assert(dst_type.length * 2 == src_type.length);
 
-   if(src_type.sign) {
+   if(dst_type.sign && src_type.sign) {
       /* Replicate the sign bit in the most significant bits */
       msb = LLVMBuildAShr(builder, src, lp_build_int_const_scalar(src_type, src_type.width - 1), "");
    }
-- 
cgit v1.2.3


From abff4214ef870f26d5c64adac1235b9e9438a51e Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 25 Oct 2009 09:06:05 +0000
Subject: llvmpipe: Split the format swizzling step from the unpacking.

---
 src/gallium/drivers/llvmpipe/lp_bld_format.h     |  7 ++++
 src/gallium/drivers/llvmpipe/lp_bld_format_soa.c | 43 +++++++++++++++---------
 2 files changed, 34 insertions(+), 16 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index 42ee3c7d90..8b08c016c0 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -42,6 +42,13 @@ struct util_format_description;
 struct lp_type;
 
 
+void
+lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
+                            struct lp_type type,
+                            const LLVMValueRef *unswizzled,
+                            LLVMValueRef *swizzled);
+
+
 /**
  * Unpack a pixel into its RGBA components.
  *
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
index 60ad4c0ee6..64151d169d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_soa.c
@@ -35,16 +35,16 @@
 
 
 static LLVMValueRef
-lp_build_format_swizzle(struct lp_type type,
-                        const LLVMValueRef *inputs,
-                        enum util_format_swizzle swizzle)
+lp_build_format_swizzle_chan_soa(struct lp_type type,
+                                 const LLVMValueRef *unswizzled,
+                                 enum util_format_swizzle swizzle)
 {
    switch (swizzle) {
    case UTIL_FORMAT_SWIZZLE_X:
    case UTIL_FORMAT_SWIZZLE_Y:
    case UTIL_FORMAT_SWIZZLE_Z:
    case UTIL_FORMAT_SWIZZLE_W:
-      return inputs[swizzle];
+      return unswizzled[swizzle];
    case UTIL_FORMAT_SWIZZLE_0:
       return lp_build_zero(type);
    case UTIL_FORMAT_SWIZZLE_1:
@@ -58,6 +58,28 @@ lp_build_format_swizzle(struct lp_type type,
 }
 
 
+void
+lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
+                            struct lp_type type,
+                            const LLVMValueRef *unswizzled,
+                            LLVMValueRef *swizzled)
+{
+   if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+      enum util_format_swizzle swizzle = format_desc->swizzle[0];
+      LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+      swizzled[2] = swizzled[1] = swizzled[0] = depth;
+      swizzled[3] = lp_build_one(type);
+   }
+   else {
+      unsigned chan;
+      for (chan = 0; chan < 4; ++chan) {
+         enum util_format_swizzle swizzle = format_desc->swizzle[chan];
+         swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+      }
+   }
+}
+
+
 void
 lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
                          const struct util_format_description *format_desc,
@@ -123,16 +145,5 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
       start = stop;
    }
 
-   if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
-      enum util_format_swizzle swizzle = format_desc->swizzle[0];
-      LLVMValueRef depth = lp_build_format_swizzle(type, inputs, swizzle);
-      rgba[2] = rgba[1] = rgba[0] = depth;
-      rgba[3] = lp_build_one(type);
-   }
-   else {
-      for (chan = 0; chan < 4; ++chan) {
-         enum util_format_swizzle swizzle = format_desc->swizzle[chan];
-         rgba[chan] = lp_build_format_swizzle(type, inputs, swizzle);
-      }
-   }
+   lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
 }
-- 
cgit v1.2.3


From 47d241be9ff89b65b978dd4fe4ea7473e07fa2c4 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 25 Oct 2009 09:09:23 +0000
Subject: llvmpipe: New function to unpack rgba8 formats into 4 x u8n AoS.

---
 src/gallium/drivers/llvmpipe/lp_bld_format.h     |  19 ++-
 src/gallium/drivers/llvmpipe/lp_bld_format_aos.c | 141 +++++++++++++++++++++++
 2 files changed, 148 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index 8b08c016c0..fa560576be 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -49,24 +49,19 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
                             LLVMValueRef *swizzled);
 
 
-/**
- * Unpack a pixel into its RGBA components.
- *
- * @param packed integer.
- *
- * @return RGBA in a 4 floats vector.
- */
 LLVMValueRef
 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
                          const struct util_format_description *desc,
                          LLVMValueRef packed);
 
 
-/**
- * Pack a pixel.
- *
- * @param rgba 4 float vector with the unpacked components.
- */
+LLVMValueRef
+lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
+                          const struct util_format_description *desc,
+                          struct lp_type type,
+                          LLVMValueRef packed);
+
+
 LLVMValueRef
 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
                        const struct util_format_description *desc,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index 0591d77860..5836e0173f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -25,12 +25,34 @@
  *
  **************************************************************************/
 
+/**
+ * @file
+ * AoS pixel format manipulation.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
 
+#include "util/u_cpu_detect.h"
 #include "util/u_format.h"
 
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_logic.h"
+#include "lp_bld_swizzle.h"
 #include "lp_bld_format.h"
 
 
+/**
+ * Unpack a single pixel into its RGBA components.
+ *
+ * @param packed integer.
+ *
+ * @return RGBA in a 4 floats vector.
+ *
+ * XXX: This is mostly for reference and testing -- operating a single pixel at
+ * a time is rarely if ever needed.
+ */
 LLVMValueRef
 lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
                          const struct util_format_description *desc,
@@ -148,6 +170,125 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
 }
 
 
+/**
+ * Take a vector with packed pixels and unpack into a rgba8 vector.
+ *
+ * Formats with bit depth smaller than 32bits are accepted, but they must be
+ * padded to 32bits.
+ */
+LLVMValueRef
+lp_build_unpack_rgba8_aos(LLVMBuilderRef builder,
+                          const struct util_format_description *desc,
+                          struct lp_type type,
+                          LLVMValueRef packed)
+{
+   struct lp_build_context bld;
+   bool rgba8;
+   LLVMValueRef res;
+   unsigned i;
+
+   lp_build_context_init(&bld, builder, type);
+
+   /* FIXME: Support more formats */
+   assert(desc->layout == UTIL_FORMAT_LAYOUT_ARITH);
+   assert(desc->block.width == 1);
+   assert(desc->block.height == 1);
+   assert(desc->block.bits <= 32);
+
+   assert(!type.floating);
+   assert(!type.fixed);
+   assert(type.norm);
+   assert(type.width == 8);
+   assert(type.length % 4 == 0);
+
+   rgba8 = TRUE;
+   for(i = 0; i < 4; ++i) {
+      assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
+             desc->channel[i].type == UTIL_FORMAT_TYPE_VOID);
+      if(desc->channel[0].size != 8)
+         rgba8 = FALSE;
+   }
+
+   if(rgba8) {
+      /*
+       * The pixel is already in a rgba8 format variant. All it is necessary
+       * is to swizzle the channels.
+       */
+
+      unsigned char swizzles[4];
+      boolean zeros[4]; /* bitwise AND mask */
+      boolean ones[4]; /* bitwise OR mask */
+      boolean swizzles_needed = FALSE;
+      boolean zeros_needed = FALSE;
+      boolean ones_needed = FALSE;
+
+      for(i = 0; i < 4; ++i) {
+         enum util_format_swizzle swizzle = desc->swizzle[i];
+
+         /* Initialize with the no-op case */
+         swizzles[i] = util_cpu_caps.little_endian ? 3 - i : i;
+         zeros[i] = TRUE;
+         ones[i] = FALSE;
+
+         switch (swizzle) {
+         case UTIL_FORMAT_SWIZZLE_X:
+         case UTIL_FORMAT_SWIZZLE_Y:
+         case UTIL_FORMAT_SWIZZLE_Z:
+         case UTIL_FORMAT_SWIZZLE_W:
+            if(swizzle != swizzles[i]) {
+               swizzles[i] = swizzle;
+               swizzles_needed = TRUE;
+            }
+            break;
+         case UTIL_FORMAT_SWIZZLE_0:
+            zeros[i] = FALSE;
+            zeros_needed = TRUE;
+            break;
+         case UTIL_FORMAT_SWIZZLE_1:
+            ones[i] = TRUE;
+            ones_needed = TRUE;
+            break;
+         case UTIL_FORMAT_SWIZZLE_NONE:
+            assert(0);
+            break;
+         }
+      }
+
+      res = packed;
+
+      if(swizzles_needed)
+         res = lp_build_swizzle1_aos(&bld, res, swizzles);
+
+      if(zeros_needed) {
+         /* Mask out zero channels */
+         LLVMValueRef mask = lp_build_const_mask_aos(type, zeros);
+         res = LLVMBuildAnd(builder, res, mask, "");
+      }
+
+      if(ones_needed) {
+         /* Or one channels */
+         LLVMValueRef mask = lp_build_const_mask_aos(type, ones);
+         res = LLVMBuildOr(builder, res, mask, "");
+      }
+   }
+   else {
+      /* FIXME */
+      assert(0);
+      res = lp_build_undef(type);
+   }
+
+   return res;
+}
+
+
+/**
+ * Pack a single pixel.
+ *
+ * @param rgba 4 float vector with the unpacked components.
+ *
+ * XXX: This is mostly for reference and testing -- operating a single pixel at
+ * a time is rarely if ever needed.
+ */
 LLVMValueRef
 lp_build_pack_rgba_aos(LLVMBuilderRef builder,
                        const struct util_format_description *desc,
-- 
cgit v1.2.3


From bfd7a9ca967e5521fb3847db8615127c3ee7b9b3 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 25 Oct 2009 09:09:59 +0000
Subject: llvmpipe: New module to help make assertions about formats.

---
 src/gallium/drivers/llvmpipe/Makefile              |  1 +
 src/gallium/drivers/llvmpipe/SConscript            |  1 +
 src/gallium/drivers/llvmpipe/lp_bld_format.h       |  4 ++
 src/gallium/drivers/llvmpipe/lp_bld_format_query.c | 72 ++++++++++++++++++++++
 4 files changed, 78 insertions(+)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_format_query.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index ea771392b1..96c014e592 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -17,6 +17,7 @@ C_SOURCES = \
 	lp_bld_depth.c \
 	lp_bld_flow.c \
 	lp_bld_format_aos.c \
+	lp_bld_format_query.c \
 	lp_bld_format_soa.c \
 	lp_bld_interp.c \
 	lp_bld_intr.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 169e0abc2b..52983039fd 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -30,6 +30,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_depth.c',
 		'lp_bld_flow.c',
 		'lp_bld_format_aos.c',
+        'lp_bld_format_query.c',
 		'lp_bld_format_soa.c',
 		'lp_bld_interp.c',
 		'lp_bld_intr.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format.h b/src/gallium/drivers/llvmpipe/lp_bld_format.h
index fa560576be..970bee379f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format.h
@@ -42,6 +42,10 @@ struct util_format_description;
 struct lp_type;
 
 
+boolean
+lp_format_is_rgba8(const struct util_format_description *desc);
+
+
 void
 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
                             struct lp_type type,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_query.c b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c
new file mode 100644
index 0000000000..f3832d07ff
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_query.c
@@ -0,0 +1,72 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Utility functions to make assertions about formats.
+ *
+ * This module centralizes most of logic used when determining what algorithm
+ * is most suitable (i.e., most efficient yet correct) for a given format.
+ *
+ * It might be possible to move some of these functions to u_format module,
+ * but since tiny differences in the format my render it more/less
+ * appropriate to a given algorithm it is impossible to make any long term
+ * guarantee about the semantics of these functions.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "util/u_format.h"
+
+#include "lp_bld_format.h"
+
+
+/**
+ * Whether this format is a 4 rgba8 variant
+ */
+boolean
+lp_format_is_rgba8(const struct util_format_description *desc)
+{
+   unsigned chan;
+
+   if(desc->block.width != 1 ||
+      desc->block.height != 1 ||
+      desc->block.bits != 32)
+      return FALSE;
+
+   for(chan = 0; chan < 4; ++chan) {
+      if(desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED &&
+         desc->channel[chan].type != UTIL_FORMAT_TYPE_SIGNED &&
+         desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID)
+         return FALSE;
+      if(desc->channel[chan].size != 8)
+         return FALSE;
+   }
+
+   return TRUE;
+}
-- 
cgit v1.2.3


From f3893ca9c8bfdba9323ef2fc179ac203e85eda70 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 25 Oct 2009 09:16:38 +0000
Subject: llvmpipe: Make lerping work for 8.8 fixed point values.

---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 54 +++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 83ca06acf8..93e797cb44 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -361,6 +361,8 @@ lp_build_mul(struct lp_build_context *bld,
              LLVMValueRef b)
 {
    const struct lp_type type = bld->type;
+   LLVMValueRef shift;
+   LLVMValueRef res;
 
    if(a == bld->zero)
       return bld->zero;
@@ -394,10 +396,31 @@ lp_build_mul(struct lp_build_context *bld,
       assert(0);
    }
 
-   if(LLVMIsConstant(a) && LLVMIsConstant(b))
-      return LLVMConstMul(a, b);
+   if(type.fixed)
+      shift = lp_build_int_const_scalar(type, type.width/2);
+   else
+      shift = NULL;
+
+   if(LLVMIsConstant(a) && LLVMIsConstant(b)) {
+      res =  LLVMConstMul(a, b);
+      if(shift) {
+         if(type.sign)
+            res = LLVMConstAShr(res, shift);
+         else
+            res = LLVMConstLShr(res, shift);
+      }
+   }
+   else {
+      res = LLVMBuildMul(bld->builder, a, b, "");
+      if(shift) {
+         if(type.sign)
+            res = LLVMBuildAShr(bld->builder, res, shift, "");
+         else
+            res = LLVMBuildLShr(bld->builder, res, shift, "");
+      }
+   }
 
-   return LLVMBuildMul(bld->builder, a, b, "");
+   return res;
 }
 
 
@@ -432,13 +455,36 @@ lp_build_div(struct lp_build_context *bld,
 }
 
 
+/**
+ * Linear interpolation.
+ *
+ * This also works for integer values with a few caveats.
+ *
+ * @sa http://www.stereopsis.com/doubleblend.html
+ */
 LLVMValueRef
 lp_build_lerp(struct lp_build_context *bld,
               LLVMValueRef x,
               LLVMValueRef v0,
               LLVMValueRef v1)
 {
-   return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
+   LLVMValueRef delta;
+   LLVMValueRef res;
+
+   delta = lp_build_sub(bld, v1, v0);
+
+   res = lp_build_mul(bld, x, delta);
+
+   res = lp_build_add(bld, v0, res);
+
+   if(bld->type.fixed)
+      /* XXX: This step is necessary for lerping 8bit colors stored on 16bits,
+       * but it will be wrong for other uses. Basically we need a more
+       * powerful lp_type, capable of further distinguishing the values
+       * interpretation from the value storage. */
+      res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), "");
+
+   return res;
 }
 
 
-- 
cgit v1.2.3


From e1342f871b2ec1ed0293f564540d03aaa11b1720 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 25 Oct 2009 09:51:57 +0000
Subject: llvmpipe: Fast path for sampling rgba8 textures with linear
 filtering.

Implement Keith's suggestion of doing most of the sampling with 16x8
and 8x16 AoS, and only doing the conversion to floating point SoA at
the very last step.

Improves gloss performance by 10%.
---
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 268 ++++++++++++++++++++++-
 1 file changed, 256 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index a7d2118c9b..f7a030fb8c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -38,12 +38,15 @@
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/u_format.h"
+#include "util/u_cpu_detect.h"
 #include "lp_bld_debug.h"
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
+#include "lp_bld_conv.h"
 #include "lp_bld_arit.h"
 #include "lp_bld_logic.h"
 #include "lp_bld_swizzle.h"
+#include "lp_bld_pack.h"
 #include "lp_bld_format.h"
 #include "lp_bld_sample.h"
 
@@ -76,12 +79,12 @@ struct lp_build_sample_context
 
 
 static void
-lp_build_sample_texel(struct lp_build_sample_context *bld,
-                      LLVMValueRef x,
-                      LLVMValueRef y,
-                      LLVMValueRef y_stride,
-                      LLVMValueRef data_ptr,
-                      LLVMValueRef *texel)
+lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
+                          LLVMValueRef x,
+                          LLVMValueRef y,
+                          LLVMValueRef y_stride,
+                          LLVMValueRef data_ptr,
+                          LLVMValueRef *texel)
 {
    LLVMValueRef offset;
    LLVMValueRef packed;
@@ -108,6 +111,32 @@ lp_build_sample_texel(struct lp_build_sample_context *bld,
 }
 
 
+static LLVMValueRef
+lp_build_sample_packed(struct lp_build_sample_context *bld,
+                       LLVMValueRef x,
+                       LLVMValueRef y,
+                       LLVMValueRef y_stride,
+                       LLVMValueRef data_ptr)
+{
+   LLVMValueRef offset;
+
+   offset = lp_build_sample_offset(&bld->int_coord_bld,
+                                   bld->format_desc,
+                                   x, y, y_stride,
+                                   data_ptr);
+
+   assert(bld->format_desc->block.width == 1);
+   assert(bld->format_desc->block.height == 1);
+   assert(bld->format_desc->block.bits <= bld->texel_type.width);
+
+   return lp_build_gather(bld->builder,
+                          bld->texel_type.length,
+                          bld->format_desc->block.bits,
+                          bld->texel_type.width,
+                          data_ptr, offset);
+}
+
+
 static LLVMValueRef
 lp_build_sample_wrap(struct lp_build_sample_context *bld,
                      LLVMValueRef coord,
@@ -174,7 +203,7 @@ lp_build_sample_2d_nearest_soa(struct lp_build_sample_context *bld,
    x = lp_build_sample_wrap(bld, x, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
    y = lp_build_sample_wrap(bld, y, height, bld->static_state->pot_height, bld->static_state->wrap_t);
 
-   lp_build_sample_texel(bld, x, y, stride, data_ptr, texel);
+   lp_build_sample_texel_soa(bld, x, y, stride, data_ptr, texel);
 }
 
 
@@ -220,10 +249,10 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
    x1 = lp_build_sample_wrap(bld, x1, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
    y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
 
-   lp_build_sample_texel(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
-   lp_build_sample_texel(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
-   lp_build_sample_texel(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
-   lp_build_sample_texel(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
+   lp_build_sample_texel_soa(bld, x0, y0, stride, data_ptr, neighbors[0][0]);
+   lp_build_sample_texel_soa(bld, x1, y0, stride, data_ptr, neighbors[0][1]);
+   lp_build_sample_texel_soa(bld, x0, y1, stride, data_ptr, neighbors[1][0]);
+   lp_build_sample_texel_soa(bld, x1, y1, stride, data_ptr, neighbors[1][1]);
 
    /* TODO: Don't interpolate missing channels */
    for(chan = 0; chan < 4; ++chan) {
@@ -237,6 +266,218 @@ lp_build_sample_2d_linear_soa(struct lp_build_sample_context *bld,
 }
 
 
+static void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+                          struct lp_type dst_type,
+                          LLVMValueRef packed,
+                          LLVMValueRef *rgba)
+{
+   LLVMValueRef mask = lp_build_int_const_scalar(dst_type, 0xff);
+   unsigned chan;
+
+   /* Decode the input vector components */
+   for (chan = 0; chan < 4; ++chan) {
+      unsigned start = chan*8;
+      unsigned stop = start + 8;
+      LLVMValueRef input;
+
+      input = packed;
+
+      if(start)
+         input = LLVMBuildLShr(builder, input, lp_build_int_const_scalar(dst_type, start), "");
+
+      if(stop < 32)
+         input = LLVMBuildAnd(builder, input, mask, "");
+
+      input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
+
+      rgba[chan] = input;
+   }
+}
+
+
+static void
+lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
+                              LLVMValueRef s,
+                              LLVMValueRef t,
+                              LLVMValueRef width,
+                              LLVMValueRef height,
+                              LLVMValueRef stride,
+                              LLVMValueRef data_ptr,
+                              LLVMValueRef *texel)
+{
+   LLVMBuilderRef builder = bld->builder;
+   struct lp_build_context i32, h16, u8n;
+   LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
+   LLVMValueRef f32_c256, i32_c8, i32_c128, i32_c255;
+   LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
+   LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
+   LLVMValueRef x0, x1;
+   LLVMValueRef y0, y1;
+   LLVMValueRef neighbors[2][2];
+   LLVMValueRef neighbors_lo[2][2];
+   LLVMValueRef neighbors_hi[2][2];
+   LLVMValueRef packed, packed_lo, packed_hi;
+   LLVMValueRef unswizzled[4];
+
+   lp_build_context_init(&i32, builder, lp_type_int(32));
+   lp_build_context_init(&h16, builder, lp_type_ufixed(16));
+   lp_build_context_init(&u8n, builder, lp_type_unorm(8));
+
+   i32_vec_type = lp_build_vec_type(i32.type);
+   h16_vec_type = lp_build_vec_type(h16.type);
+   u8n_vec_type = lp_build_vec_type(u8n.type);
+
+   f32_c256 = lp_build_const_scalar(bld->coord_type, 256.0);
+   s = lp_build_mul(&bld->coord_bld, s, f32_c256);
+   t = lp_build_mul(&bld->coord_bld, t, f32_c256);
+
+   s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
+   t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
+
+   i32_c128 = lp_build_int_const_scalar(i32.type, -128);
+   s = LLVMBuildAdd(builder, s, i32_c128, "");
+   t = LLVMBuildAdd(builder, t, i32_c128, "");
+
+   i32_c8 = lp_build_int_const_scalar(i32.type, 8);
+   s_ipart = LLVMBuildAShr(builder, s, i32_c8, "");
+   t_ipart = LLVMBuildAShr(builder, t, i32_c8, "");
+
+   i32_c255 = lp_build_int_const_scalar(i32.type, 255);
+   s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
+   t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
+
+   x0 = s_ipart;
+   y0 = t_ipart;
+
+   x0 = lp_build_sample_wrap(bld, x0, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y0 = lp_build_sample_wrap(bld, y0, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
+   y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
+
+   x1 = lp_build_sample_wrap(bld, x1, width,  bld->static_state->pot_width,  bld->static_state->wrap_s);
+   y1 = lp_build_sample_wrap(bld, y1, height, bld->static_state->pot_height, bld->static_state->wrap_t);
+
+   /*
+    * Transform 4 x i32 in
+    *
+    *   s_fpart = {s0, s1, s2, s3}
+    *
+    * into 8 x i16
+    *
+    *   s_fpart = {00, s0, 00, s1, 00, s2, 00, s3}
+    *
+    * into two 8 x i16
+    *
+    *   s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1}
+    *   s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3}
+    *
+    * and likewise for t_fpart. There is no risk of loosing precision here
+    * since the fractional parts only use the lower 8bits.
+    */
+
+   s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, "");
+   t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, "");
+
+   {
+      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH];
+      LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
+      LLVMValueRef shuffle_lo;
+      LLVMValueRef shuffle_hi;
+      unsigned i, j;
+
+      for(j = 0; j < h16.type.length; j += 4) {
+         unsigned subindex = util_cpu_caps.little_endian ? 0 : 1;
+         LLVMValueRef index;
+
+         index = LLVMConstInt(elem_type, j/2 + subindex, 0);
+         for(i = 0; i < 4; ++i)
+            shuffles_lo[j + i] = index;
+
+         index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0);
+         for(i = 0; i < 4; ++i)
+            shuffles_hi[j + i] = index;
+      }
+
+      shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length);
+      shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length);
+
+      s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_lo, "");
+      t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_lo, "");
+      s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, shuffle_hi, "");
+      t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
+   }
+
+   /*
+    * Fetch the pixels as 4 x 32bit (rgba order might differ):
+    *
+    *   rgba0 rgba1 rgba2 rgba3
+    *
+    * bit cast them into 16 x u8
+    *
+    *   r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3
+    *
+    * unpack them into two 8 x i16:
+    *
+    *   r0 g0 b0 a0 r1 g1 b1 a1
+    *   r2 g2 b2 a2 r3 g3 b3 a3
+    *
+    * The higher 8 bits of the resulting elements will be zero.
+    */
+
+   neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_ptr);
+   neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_ptr);
+   neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_ptr);
+   neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_ptr);
+
+   neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
+   neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
+   neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
+   neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
+
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
+   lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
+
+   /*
+    * Linear interpolate with 8.8 fixed point.
+    */
+
+   packed_lo = lp_build_lerp_2d(&h16,
+                                s_fpart_lo, t_fpart_lo,
+                                neighbors_lo[0][0],
+                                neighbors_lo[0][1],
+                                neighbors_lo[1][0],
+                                neighbors_lo[1][1]);
+
+   packed_hi = lp_build_lerp_2d(&h16,
+                                s_fpart_hi, t_fpart_hi,
+                                neighbors_hi[0][0],
+                                neighbors_hi[0][1],
+                                neighbors_hi[1][0],
+                                neighbors_hi[1][1]);
+
+   packed = lp_build_pack2(builder, h16.type, u8n.type, packed_lo, packed_hi);
+
+   /*
+    * Convert to SoA and swizzle.
+    */
+
+   packed = LLVMBuildBitCast(builder, packed, i32_vec_type, "");
+
+   lp_build_rgba8_to_f32_soa(bld->builder,
+                             bld->texel_type,
+                             packed, unswizzled);
+
+   lp_build_format_swizzle_soa(bld->format_desc,
+                               bld->texel_type, unswizzled,
+                               texel);
+}
+
+
 static void
 lp_build_sample_compare(struct lp_build_sample_context *bld,
                         LLVMValueRef p,
@@ -336,7 +577,10 @@ lp_build_sample_soa(LLVMBuilderRef builder,
       break;
    case PIPE_TEX_FILTER_LINEAR:
    case PIPE_TEX_FILTER_ANISO:
-      lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
+      if(lp_format_is_rgba8(bld.format_desc))
+         lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel);
+      else
+         lp_build_sample_2d_linear_soa(&bld, s, t, width, height, stride, data_ptr, texel);
       break;
    default:
       assert(0);
-- 
cgit v1.2.3


From 590949553f737902008dea020420311e2085aa1a Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 25 Oct 2009 11:36:22 +0000
Subject: i965g: start hooking up some to the gallium context interfaces

- create/bind/destroy blend and depth state
- framebuffer and viewport
- etc.
---
 src/gallium/drivers/i965/brw_cc.c           |   2 +-
 src/gallium/drivers/i965/brw_context.h      |  81 ++++++-----
 src/gallium/drivers/i965/brw_misc_state.c   | 110 +++++++-------
 src/gallium/drivers/i965/brw_pipe_blend.c   | 214 ++++++++++++++++++++++------
 src/gallium/drivers/i965/brw_pipe_depth.c   | 172 ++++++++++++++++------
 src/gallium/drivers/i965/brw_pipe_fb.c      |  72 +++++++---
 src/gallium/drivers/i965/brw_pipe_misc.c    |  14 ++
 src/gallium/drivers/i965/brw_pipe_rast.h    |   1 +
 src/gallium/drivers/i965/brw_pipe_sampler.c |  52 +++++++
 src/gallium/drivers/i965/brw_state_debug.c  |  31 +---
 src/gallium/drivers/i965/brw_util.c         |  56 --------
 src/gallium/drivers/i965/brw_wm_state.c     |   8 +-
 12 files changed, 519 insertions(+), 294 deletions(-)
 create mode 100644 src/gallium/drivers/i965/brw_pipe_misc.c
 create mode 100644 src/gallium/drivers/i965/brw_pipe_sampler.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index ca10bc73f6..bdd6418ae1 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -117,7 +117,7 @@ cc_unit_populate_key(const struct brw_context *brw,
    key->cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 );
    key->cc5 = brw->curr.blend->cc5;
    key->cc6 = brw->curr.blend->cc6;
-   key->cc7 = brw->curr.blend->cc7;
+   key->cc7 = brw->curr.zstencil->cc7;
 }
 
 /**
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 2e17e150bb..df43d8ba4d 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -122,8 +122,8 @@
 
 struct brw_context;
 
-struct brw_depth_stencil_alpha_state {
-   struct pipe_depth_stencil_alpha_state templ; /* for draw module */
+struct brw_depth_stencil_state {
+   //struct pipe_depth_stencil_alpha_state templ; /* for draw module */
 
    /* Precalculated hardware state:
     */
@@ -131,18 +131,19 @@ struct brw_depth_stencil_alpha_state {
    struct brw_cc1 cc1;
    struct brw_cc2 cc2;
    struct brw_cc3 cc3;
+   struct brw_cc7 cc7;
 };
 
 
 struct brw_blend_state {
-   struct pipe_depth_stencil_alpha_state templ; /* for draw module */
+   //struct pipe_depth_stencil_alpha_state templ; /* for draw module */
 
    /* Precalculated hardware state:
     */
+   struct brw_cc2 cc2;
    struct brw_cc3 cc3;
    struct brw_cc5 cc5;
    struct brw_cc6 cc6;
-   struct brw_cc7 cc7;
 };
 
 
@@ -172,20 +173,24 @@ struct brw_fragment_shader {
 
 #define PIPE_NEW_DEPTH_STENCIL_ALPHA    0x1
 #define PIPE_NEW_RAST                   0x2
-#define PIPE_NEW_BLEND                  0x2
-#define PIPE_NEW_VIEWPORT               0x2
-#define PIPE_NEW_FRAMEBUFFER            0x2
-#define PIPE_NEW_VERTEX_BUFFER          0x2
-#define PIPE_NEW_VERTEX_ELEMENT         0x2
-#define PIPE_NEW_FRAGMENT_SHADER        0x2
-#define PIPE_NEW_VERTEX_SHADER          0x2
-#define PIPE_NEW_FRAGMENT_CONSTANTS     0x2
-#define PIPE_NEW_VERTEX_CONSTANTS       0x2
-#define PIPE_NEW_CLIP                   0x2
-#define PIPE_NEW_INDEX_BUFFER           0x2
-#define PIPE_NEW_INDEX_RANGE            0x2
-#define PIPE_NEW_BLEND_COLOR            0x2
-#define PIPE_NEW_POLYGON_STIPPLE        0x2
+#define PIPE_NEW_BLEND                  0x4
+#define PIPE_NEW_VIEWPORT               0x8
+#define PIPE_NEW_SAMPLERS               0x10
+#define PIPE_NEW_VERTEX_BUFFER          0x20
+#define PIPE_NEW_VERTEX_ELEMENT         0x40
+#define PIPE_NEW_FRAGMENT_SHADER        0x80
+#define PIPE_NEW_VERTEX_SHADER          0x100
+#define PIPE_NEW_FRAGMENT_CONSTANTS     0x200
+#define PIPE_NEW_VERTEX_CONSTANTS       0x400
+#define PIPE_NEW_CLIP                   0x800
+#define PIPE_NEW_INDEX_BUFFER           0x1000
+#define PIPE_NEW_INDEX_RANGE            0x2000
+#define PIPE_NEW_BLEND_COLOR            0x4000
+#define PIPE_NEW_POLYGON_STIPPLE        0x8000
+#define PIPE_NEW_FRAMEBUFFER_DIMENSIONS 0x10000
+#define PIPE_NEW_DEPTH_BUFFER           0x20000
+#define PIPE_NEW_COLOR_BUFFERS          0x40000
+
 
 
 #define BRW_NEW_URB_FENCE               0x1
@@ -209,8 +214,6 @@ struct brw_fragment_shader {
  * meantime.
  */
 #define BRW_NEW_BATCH			0x10000
-/** brw->depth_region updated */
-#define BRW_NEW_DEPTH_BUFFER		0x20000
 #define BRW_NEW_NR_WM_SURFACES		0x40000
 #define BRW_NEW_NR_VS_SURFACES		0x80000
 #define BRW_NEW_INDEX_BUFFER		0x100000
@@ -385,12 +388,6 @@ struct brw_cache {
 };
 
 
-/* Considered adding a member to this struct to document which flags
- * an update might raise so that ordering of the state atoms can be
- * checked or derived at runtime.  Dropped the idea in favor of having
- * a debug mode where the state is monitored for flags which are
- * raised that have already been tested against.
- */
 struct brw_tracked_state {
    struct brw_state_flags dirty;
    int (*prepare)( struct brw_context *brw );
@@ -478,7 +475,7 @@ struct brw_context
       const struct brw_fragment_shader *fragment_shader;
       const struct brw_blend_state *blend;
       const struct brw_rasterizer_state *rast;
-      const struct brw_depth_stencil_alpha_state *zstencil;
+      const struct brw_depth_stencil_state *zstencil;
 
       struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
       struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
@@ -491,6 +488,7 @@ struct brw_context
       struct pipe_buffer *vertex_constants;
       struct pipe_buffer *fragment_constants;
 
+      struct pipe_viewport_state viewport;
       struct brw_blend_constant_color bcc;
       struct brw_polygon_stipple bps;
 
@@ -719,16 +717,31 @@ void brw_emit_query_end(struct brw_context *brw);
  */
 void brw_debug_batch(struct brw_context *intel);
 
-/*======================================================================
- * brw_tex.c
- */
-void brw_validate_textures( struct brw_context *brw );
-
 
 /*======================================================================
- * brw_pipe_shader.c
+ * brw_pipe_*.c
  */
-void brw_init_shader_funcs( struct brw_context *brw );
+void brw_pipe_blend_init( struct brw_context *brw );
+void brw_pipe_depth_stencil_init( struct brw_context *brw );
+void brw_pipe_framebuffer_init( struct brw_context *brw );
+void brw_pipe_flush_init( struct brw_context *brw );
+void brw_pipe_misc_init( struct brw_context *brw );
+void brw_pipe_query_init( struct brw_context *brw );
+void brw_pipe_rast_init( struct brw_context *brw );
+void brw_pipe_sampler_init( struct brw_context *brw );
+void brw_pipe_shader_init( struct brw_context *brw );
+void brw_pipe_vertex_init( struct brw_context *brw );
+
+void brw_pipe_blend_cleanup( struct brw_context *brw );
+void brw_pipe_depth_stencil_cleanup( struct brw_context *brw );
+void brw_pipe_framebuffer_cleanup( struct brw_context *brw );
+void brw_pipe_flush_cleanup( struct brw_context *brw );
+void brw_pipe_misc_cleanup( struct brw_context *brw );
+void brw_pipe_query_cleanup( struct brw_context *brw );
+void brw_pipe_rast_cleanup( struct brw_context *brw );
+void brw_pipe_sampler_cleanup( struct brw_context *brw );
+void brw_pipe_shader_cleanup( struct brw_context *brw );
+void brw_pipe_vertex_cleanup( struct brw_context *brw );
 
 
 /* brw_urb.c
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index ccebe08b4f..db8a2a5008 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -62,7 +62,9 @@ const struct brw_tracked_state brw_blend_constant_color = {
    .emit = upload_blend_constant_color
 };
 
-/* Constant single cliprect for framebuffer object or DRI2 drawing */
+/***********************************************************************
+ * Drawing rectangle - framebuffer dimensions
+ */
 static int upload_drawing_rect(struct brw_context *brw)
 {
    BEGIN_BATCH(4, NO_LOOP_CLIPRECTS);
@@ -77,13 +79,18 @@ static int upload_drawing_rect(struct brw_context *brw)
 
 const struct brw_tracked_state brw_drawing_rect = {
    .dirty = {
-      .mesa = PIPE_NEW_FRAMEBUFFER,
+      .mesa = PIPE_NEW_FRAMEBUFFER_DIMENSIONS,
       .brw = 0,
       .cache = 0
    },
    .emit = upload_drawing_rect
 };
 
+
+/***********************************************************************
+ * Binding table pointers
+ */
+
 static int prepare_binding_table_pointers(struct brw_context *brw)
 {
    brw_add_validated_bo(brw, brw->vs.bind_bo);
@@ -125,7 +132,7 @@ const struct brw_tracked_state brw_binding_table_pointers = {
 };
 
 
-/**
+/**********************************************************************
  * Upload pointers to the per-stage state.
  *
  * The state pointers in this packet are all relative to the general state
@@ -197,6 +204,11 @@ const struct brw_tracked_state brw_psp_urb_cbs = {
    .emit = upload_psp_urb_cbs,
 };
 
+
+/***********************************************************************
+ * Depth buffer 
+ */
+
 static int prepare_depthbuffer(struct brw_context *brw)
 {
    struct pipe_surface *zsbuf = brw->curr.fb.zsbuf;
@@ -278,8 +290,8 @@ static int emit_depthbuffer(struct brw_context *brw)
 
 const struct brw_tracked_state brw_depthbuffer = {
    .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_DEPTH_BUFFER | BRW_NEW_BATCH,
+      .mesa = PIPE_NEW_DEPTH_BUFFER,
+      .brw = BRW_NEW_BATCH,
       .cache = 0,
    },
    .prepare = prepare_depthbuffer,
@@ -308,63 +320,6 @@ const struct brw_tracked_state brw_polygon_stipple = {
 };
 
 
-/***********************************************************************
- * Polygon stipple offset packet
- */
-
-static int upload_polygon_stipple_offset(struct brw_context *brw)
-{
-   struct brw_polygon_stipple_offset bpso;
-
-   /* This is invarient state in gallium:
-    */
-   memset(&bpso, 0, sizeof(bpso));
-   bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
-   bpso.header.length = sizeof(bpso)/4-2;
-   bpso.bits0.y_offset = 0;
-   bpso.bits0.x_offset = 0;
-
-   BRW_CACHED_BATCH_STRUCT(brw, &bpso);
-   return 0;
-}
-
-const struct brw_tracked_state brw_polygon_stipple_offset = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_CONTEXT,
-      .cache = 0
-   },
-   .emit = upload_polygon_stipple_offset
-};
-
-/**********************************************************************
- * AA Line parameters
- */
-static int upload_aa_line_parameters(struct brw_context *brw)
-{
-   struct brw_aa_line_parameters balp;
-   
-   if (BRW_IS_965(brw))
-      return 0;
-
-   /* use legacy aa line coverage computation */
-   memset(&balp, 0, sizeof(balp));
-   balp.header.opcode = CMD_AA_LINE_PARAMETERS;
-   balp.header.length = sizeof(balp) / 4 - 2;
-   
-   BRW_CACHED_BATCH_STRUCT(brw, &balp);
-   return 0;
-}
-
-const struct brw_tracked_state brw_aa_line_parameters = {
-   .dirty = {
-      .mesa = 0,
-      .brw = BRW_NEW_CONTEXT,
-      .cache = 0
-   },
-   .emit = upload_aa_line_parameters
-};
-
 /***********************************************************************
  * Line stipple packet
  */
@@ -448,6 +403,32 @@ static int upload_invarient_state( struct brw_context *brw )
       BRW_BATCH_STRUCT(brw, &vfs);
    }
    
+   if (!BRW_IS_965(brw))
+   {
+      struct brw_aa_line_parameters balp;
+
+      /* use legacy aa line coverage computation */
+      memset(&balp, 0, sizeof(balp));
+      balp.header.opcode = CMD_AA_LINE_PARAMETERS;
+      balp.header.length = sizeof(balp) / 4 - 2;
+   
+      BRW_BATCH_STRUCT(brw, &balp);
+   }
+
+   {
+      struct brw_polygon_stipple_offset bpso;
+      
+      /* This is invarient state in gallium:
+       */
+      memset(&bpso, 0, sizeof(bpso));
+      bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET;
+      bpso.header.length = sizeof(bpso)/4-2;
+      bpso.bits0.y_offset = 0;
+      bpso.bits0.x_offset = 0;
+
+      BRW_BATCH_STRUCT(brw, &bpso);
+   }
+   
    return 0;
 }
 
@@ -460,6 +441,11 @@ const struct brw_tracked_state brw_invarient_state = {
    .emit = upload_invarient_state
 };
 
+
+/***********************************************************************
+ * State base address 
+ */
+
 /**
  * Define the base addresses which some state is referenced from.
  *
diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c
index 54d09d9e45..d3bb882b1a 100644
--- a/src/gallium/drivers/i965/brw_pipe_blend.c
+++ b/src/gallium/drivers/i965/brw_pipe_blend.c
@@ -1,64 +1,188 @@
 
+#include "util/u_memory.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
 
-   /* _NEW_COLOR */
-   if (key->logic_op != GL_COPY) {
-      cc.cc2.logicop_enable = 1;
-      cc.cc5.logicop_func = intel_translate_logic_op(key->logic_op);
-   } else if (key->color_blend) {
-      GLenum eqRGB = key->blend_eq_rgb;
-      GLenum eqA = key->blend_eq_a;
-      GLenum srcRGB = key->blend_src_rgb;
-      GLenum dstRGB = key->blend_dst_rgb;
-      GLenum srcA = key->blend_src_a;
-      GLenum dstA = key->blend_dst_a;
-
-      if (eqRGB == GL_MIN || eqRGB == GL_MAX) {
-	 srcRGB = dstRGB = GL_ONE;
-      }
-
-      if (eqA == GL_MIN || eqA == GL_MAX) {
-	 srcA = dstA = GL_ONE;
-      }
-
-      cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB);
-      cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB);
-      cc.cc6.blend_function = brw_translate_blend_equation(eqRGB);
-
-      cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA);
-      cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA);
-      cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA);
-
-      cc.cc3.blend_enable = 1;
-      cc.cc3.ia_blend_enable = (srcA != srcRGB ||
-				dstA != dstRGB ||
-				eqA != eqRGB);
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+static int translate_logicop(unsigned logicop)
+{
+   switch (logicop) {
+   case PIPE_LOGICOP_CLEAR:
+      return BRW_LOGICOPFUNCTION_CLEAR;
+   case PIPE_LOGICOP_AND:
+      return BRW_LOGICOPFUNCTION_AND;
+   case PIPE_LOGICOP_AND_REVERSE:
+      return BRW_LOGICOPFUNCTION_AND_REVERSE;
+   case PIPE_LOGICOP_COPY:
+      return BRW_LOGICOPFUNCTION_COPY;
+   case PIPE_LOGICOP_COPY_INVERTED:
+      return BRW_LOGICOPFUNCTION_COPY_INVERTED;
+   case PIPE_LOGICOP_AND_INVERTED:
+      return BRW_LOGICOPFUNCTION_AND_INVERTED;
+   case PIPE_LOGICOP_NOOP:
+      return BRW_LOGICOPFUNCTION_NOOP;
+   case PIPE_LOGICOP_XOR:
+      return BRW_LOGICOPFUNCTION_XOR;
+   case PIPE_LOGICOP_OR:
+      return BRW_LOGICOPFUNCTION_OR;
+   case PIPE_LOGICOP_OR_INVERTED:
+      return BRW_LOGICOPFUNCTION_OR_INVERTED;
+   case PIPE_LOGICOP_NOR:
+      return BRW_LOGICOPFUNCTION_NOR;
+   case PIPE_LOGICOP_EQUIV:
+      return BRW_LOGICOPFUNCTION_EQUIV;
+   case PIPE_LOGICOP_INVERT:
+      return BRW_LOGICOPFUNCTION_INVERT;
+   case PIPE_LOGICOP_OR_REVERSE:
+      return BRW_LOGICOPFUNCTION_OR_REVERSE;
+   case PIPE_LOGICOP_NAND:
+      return BRW_LOGICOPFUNCTION_NAND;
+   case PIPE_LOGICOP_SET:
+      return BRW_LOGICOPFUNCTION_SET;
+   default:
+      assert(0);
+      return BRW_LOGICOPFUNCTION_SET;
    }
+}
+
 
-   if (key->dither) {
-      cc.cc5.dither_enable = 1;
-      cc.cc6.y_dither_offset = 0;
-      cc.cc6.x_dither_offset = 0;
+static unsigned translate_blend_equation( unsigned mode )
+{
+   switch (mode) {
+   case PIPE_BLEND_ADD: 
+      return BRW_BLENDFUNCTION_ADD; 
+   case PIPE_BLEND_MIN: 
+      return BRW_BLENDFUNCTION_MIN; 
+   case PIPE_BLEND_MAX: 
+      return BRW_BLENDFUNCTION_MAX; 
+   case PIPE_BLEND_SUBTRACT: 
+      return BRW_BLENDFUNCTION_SUBTRACT; 
+   case PIPE_BLEND_REVERSE_SUBTRACT: 
+      return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; 
+   default: 
+      assert(0);
+      return BRW_BLENDFUNCTION_ADD;
    }
+}
 
-   if (INTEL_DEBUG & DEBUG_STATS)
-      cc.cc5.statistics_enable = 1;
+static unsigned translate_blend_factor( unsigned factor )
+{
+   switch(factor) {
+   case PIPE_BLENDFACTOR_ZERO: 
+      return BRW_BLENDFACTOR_ZERO; 
+   case PIPE_BLENDFACTOR_SRC_ALPHA: 
+      return BRW_BLENDFACTOR_SRC_ALPHA; 
+   case PIPE_BLENDFACTOR_ONE: 
+      return BRW_BLENDFACTOR_ONE; 
+   case PIPE_BLENDFACTOR_SRC_COLOR: 
+      return BRW_BLENDFACTOR_SRC_COLOR; 
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR: 
+      return BRW_BLENDFACTOR_INV_SRC_COLOR; 
+   case PIPE_BLENDFACTOR_DST_COLOR: 
+      return BRW_BLENDFACTOR_DST_COLOR; 
+   case PIPE_BLENDFACTOR_INV_DST_COLOR: 
+      return BRW_BLENDFACTOR_INV_DST_COLOR; 
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+      return BRW_BLENDFACTOR_INV_SRC_ALPHA; 
+   case PIPE_BLENDFACTOR_DST_ALPHA: 
+      return BRW_BLENDFACTOR_DST_ALPHA; 
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+      return BRW_BLENDFACTOR_INV_DST_ALPHA; 
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 
+      return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
+   case PIPE_BLENDFACTOR_CONST_COLOR:
+      return BRW_BLENDFACTOR_CONST_COLOR; 
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+      return BRW_BLENDFACTOR_INV_CONST_COLOR;
+   case PIPE_BLENDFACTOR_CONST_ALPHA:
+      return BRW_BLENDFACTOR_CONST_ALPHA; 
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+      return BRW_BLENDFACTOR_INV_CONST_ALPHA;
+   default:
+      assert(0);
+      return BRW_BLENDFACTOR_ZERO;
+   }   
 }
 
+static void *brw_create_blend_state( struct pipe_context *pipe,
+				     const struct pipe_blend_state *templ )
+{
+   struct brw_blend_state *blend = CALLOC_STRUCT(brw_blend_state);
+
+   if (templ->logicop_enable) {
+      blend->cc2.logicop_enable = 1;
+      blend->cc5.logicop_func = translate_logicop(templ->logicop_func);
+   } 
+   else if (templ->blend_enable) {
+      blend->cc6.dest_blend_factor = translate_blend_factor(templ->rgb_dst_factor);
+      blend->cc6.src_blend_factor = translate_blend_factor(templ->rgb_src_factor);
+      blend->cc6.blend_function = translate_blend_equation(templ->rgb_func);
+
+      blend->cc5.ia_dest_blend_factor = translate_blend_factor(templ->alpha_dst_factor);
+      blend->cc5.ia_src_blend_factor = translate_blend_factor(templ->alpha_src_factor);
+      blend->cc5.ia_blend_function = translate_blend_equation(templ->alpha_func);
+
+      blend->cc3.blend_enable = 1;
+      blend->cc3.ia_blend_enable = 
+	 (blend->cc6.dest_blend_factor != blend->cc5.ia_dest_blend_factor ||
+	  blend->cc6.src_blend_factor != blend->cc5.ia_src_blend_factor ||
+	  blend->cc6.blend_function != blend->cc5.ia_blend_function);
+   }
+
+   blend->cc5.dither_enable = templ->dither;
+
+   if (BRW_DEBUG & DEBUG_STATS)
+      blend->cc5.statistics_enable = 1;
+
+   return (void *)blend;
+}
+
+static void brw_bind_blend_state(struct pipe_context *pipe,
+				 void *cso)
+{
+   struct brw_context *brw = brw_context(pipe);
+   brw->curr.blend = (const struct brw_blend_state *)cso;
+   brw->state.dirty.mesa |= PIPE_NEW_BLEND;
+}
+
+static void brw_delete_blend_state(struct pipe_context *pipe,
+				  void *cso)
+{
+   struct brw_context *brw = brw_context(pipe);
+   assert((const void *)cso != (const void *)brw->curr.blend);
+   FREE(cso);
+}
 
 
 static void brw_set_blend_color(struct pipe_context *pipe,
-				const float *blend_color)
+				const struct pipe_blend_color *blend_color)
 {
    struct brw_context *brw = brw_context(pipe);
-   struct brw_blend_constant_color *bcc = &brw->curr.blend_color.bcc;
+   struct brw_blend_constant_color *bcc = &brw->curr.bcc;
 
    memset(bcc, 0, sizeof(*bcc));      
    bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR;
    bcc->header.length = sizeof(*bcc)/4-2;
-   bcc->blend_constant_color[0] = blend_color[0];
-   bcc->blend_constant_color[1] = blend_color[1];
-   bcc->blend_constant_color[2] = blend_color[2];
-   bcc->blend_constant_color[3] = blend_color[3];
+   bcc->blend_constant_color[0] = blend_color->color[0];
+   bcc->blend_constant_color[1] = blend_color->color[1];
+   bcc->blend_constant_color[2] = blend_color->color[2];
+   bcc->blend_constant_color[3] = blend_color->color[3];
 
-   brw->state.dirty.pipe |= PIPE_NEW_BLEND_COLOR;
+   brw->state.dirty.mesa |= PIPE_NEW_BLEND_COLOR;
+}
+
+
+void brw_pipe_blend_init( struct brw_context *brw )
+{
+   brw->base.set_blend_color = brw_set_blend_color;
+   brw->base.create_blend_state = brw_create_blend_state;
+   brw->base.bind_blend_state = brw_bind_blend_state;
+   brw->base.delete_blend_state = brw_delete_blend_state;
+}
+
+void brw_pipe_blend_cleanup( struct brw_context *brw )
+{
 }
diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c
index 29f135d37a..33fe517e0b 100644
--- a/src/gallium/drivers/i965/brw_pipe_depth.c
+++ b/src/gallium/drivers/i965/brw_pipe_depth.c
@@ -1,58 +1,142 @@
 
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+
+static unsigned brw_translate_compare_func(unsigned func)
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER:
+      return BRW_COMPAREFUNCTION_NEVER;
+   case PIPE_FUNC_LESS:
+      return BRW_COMPAREFUNCTION_LESS;
+   case PIPE_FUNC_LEQUAL:
+      return BRW_COMPAREFUNCTION_LEQUAL;
+   case PIPE_FUNC_GREATER:
+      return BRW_COMPAREFUNCTION_GREATER;
+   case PIPE_FUNC_GEQUAL:
+      return BRW_COMPAREFUNCTION_GEQUAL;
+   case PIPE_FUNC_NOTEQUAL:
+      return BRW_COMPAREFUNCTION_NOTEQUAL;
+   case PIPE_FUNC_EQUAL:
+      return BRW_COMPAREFUNCTION_EQUAL;
+   case PIPE_FUNC_ALWAYS:
+      return BRW_COMPAREFUNCTION_ALWAYS;
+   default:
+      assert(0);
+      return BRW_COMPAREFUNCTION_ALWAYS;
+   }
+}
+
+static unsigned translate_stencil_op(unsigned op)
+{
+   switch (op) {
+   case PIPE_STENCIL_OP_KEEP:
+      return BRW_STENCILOP_KEEP;
+   case PIPE_STENCIL_OP_ZERO:
+      return BRW_STENCILOP_ZERO;
+   case PIPE_STENCIL_OP_REPLACE:
+      return BRW_STENCILOP_REPLACE;
+   case PIPE_STENCIL_OP_INCR:
+      return BRW_STENCILOP_INCRSAT;
+   case PIPE_STENCIL_OP_DECR:
+      return BRW_STENCILOP_DECRSAT;
+   case PIPE_STENCIL_OP_INCR_WRAP:
+      return BRW_STENCILOP_INCR;
+   case PIPE_STENCIL_OP_DECR_WRAP:
+      return BRW_STENCILOP_DECR;
+   case PIPE_STENCIL_OP_INVERT:
+      return BRW_STENCILOP_INVERT;
+   default:
+      assert(0);
+      return BRW_STENCILOP_ZERO;
+   }
+}
+
+
 static void *
-brw_create_depth_stencil( struct pipe_context *pipe,
-			  const struct pipe_depth_stencil_alpha_state *tmpl )
+brw_create_depth_stencil_state( struct pipe_context *pipe,
+				const struct pipe_depth_stencil_alpha_state *templ )
 {
-   if (tmpl->stencil[0].enable) {
-      cc.cc0.stencil_enable = 1;
-      cc.cc0.stencil_func =
-	 intel_translate_compare_func(key->stencil_func[0]);
-      cc.cc0.stencil_fail_op =
-	 intel_translate_stencil_op(key->stencil_fail_op[0]);
-      cc.cc0.stencil_pass_depth_fail_op =
-	 intel_translate_stencil_op(key->stencil_pass_depth_fail_op[0]);
-      cc.cc0.stencil_pass_depth_pass_op =
-	 intel_translate_stencil_op(key->stencil_pass_depth_pass_op[0]);
-      cc.cc1.stencil_ref = key->stencil_ref[0];
-      cc.cc1.stencil_write_mask = key->stencil_write_mask[0];
-      cc.cc1.stencil_test_mask = key->stencil_test_mask[0];
-
-      if (tmpl->stencil[1].enable) {
-	 cc.cc0.bf_stencil_enable = 1;
-	 cc.cc0.bf_stencil_func =
-	    intel_translate_compare_func(key->stencil_func[1]);
-	 cc.cc0.bf_stencil_fail_op =
-	    intel_translate_stencil_op(key->stencil_fail_op[1]);
-	 cc.cc0.bf_stencil_pass_depth_fail_op =
-	    intel_translate_stencil_op(key->stencil_pass_depth_fail_op[1]);
-	 cc.cc0.bf_stencil_pass_depth_pass_op =
-	    intel_translate_stencil_op(key->stencil_pass_depth_pass_op[1]);
-	 cc.cc1.bf_stencil_ref = key->stencil_ref[1];
-	 cc.cc2.bf_stencil_write_mask = key->stencil_write_mask[1];
-	 cc.cc2.bf_stencil_test_mask = key->stencil_test_mask[1];
+   struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state);
+
+   if (templ->stencil[0].enabled) {
+      zstencil->cc0.stencil_enable = 1;
+      zstencil->cc0.stencil_func =
+	 brw_translate_compare_func(templ->stencil[0].func);
+      zstencil->cc0.stencil_fail_op =
+	 translate_stencil_op(templ->stencil[0].fail_op);
+      zstencil->cc0.stencil_pass_depth_fail_op =
+	 translate_stencil_op(templ->stencil[0].zfail_op);
+      zstencil->cc0.stencil_pass_depth_pass_op =
+	 translate_stencil_op(templ->stencil[0].zpass_op);
+      zstencil->cc1.stencil_ref = templ->stencil[0].ref_value;
+      zstencil->cc1.stencil_write_mask = templ->stencil[0].writemask;
+      zstencil->cc1.stencil_test_mask = templ->stencil[0].valuemask;
+
+      if (templ->stencil[1].enabled) {
+	 zstencil->cc0.bf_stencil_enable = 1;
+	 zstencil->cc0.bf_stencil_func =
+	    brw_translate_compare_func(templ->stencil[1].func);
+	 zstencil->cc0.bf_stencil_fail_op =
+	    translate_stencil_op(templ->stencil[1].fail_op);
+	 zstencil->cc0.bf_stencil_pass_depth_fail_op =
+	    translate_stencil_op(templ->stencil[1].zfail_op);
+	 zstencil->cc0.bf_stencil_pass_depth_pass_op =
+	    translate_stencil_op(templ->stencil[1].zpass_op);
+	 zstencil->cc1.bf_stencil_ref = templ->stencil[1].ref_value;
+	 zstencil->cc2.bf_stencil_write_mask = templ->stencil[1].writemask;
+	 zstencil->cc2.bf_stencil_test_mask = templ->stencil[1].valuemask;
       }
 
-      /* Not really sure about this:
-       */
-      cc.cc0.stencil_write_enable = (cc.cc1.stencil_write_mask ||
-				     cc.cc2.bf_stencil_write_mask);
+      zstencil->cc0.stencil_write_enable = (zstencil->cc1.stencil_write_mask ||
+					    zstencil->cc2.bf_stencil_write_mask);
    }
 
 
-   if (key->alpha_enabled) {
-      cc.cc3.alpha_test = 1;
-      cc.cc3.alpha_test_func = intel_translate_compare_func(key->alpha_func);
-      cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
-
-      UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], key->alpha_ref);
+   if (templ->alpha.enabled) {
+      zstencil->cc3.alpha_test = 1;
+      zstencil->cc3.alpha_test_func = brw_translate_compare_func(templ->alpha.func);
+      zstencil->cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8;
+      zstencil->cc7.alpha_ref.ub[0] = float_to_ubyte(templ->alpha.ref_value);
    }
 
-   /* _NEW_DEPTH */
-   if (key->depth_test) {
-      cc.cc2.depth_test = 1;
-      cc.cc2.depth_test_function = intel_translate_compare_func(key->depth_func);
-      cc.cc2.depth_write_enable = key->depth_write;
+   if (templ->depth.enabled) {
+      zstencil->cc2.depth_test = 1;
+      zstencil->cc2.depth_test_function = brw_translate_compare_func(templ->depth.func);
+      zstencil->cc2.depth_write_enable = templ->depth.writemask;
    }
 
+   return (void *)zstencil;
+}
+
 
+static void brw_bind_depth_stencil_state(struct pipe_context *pipe,
+					 void *cso)
+{
+   struct brw_context *brw = brw_context(pipe);
+   brw->curr.zstencil = (const struct brw_depth_stencil_state *)cso;
+   brw->state.dirty.mesa |= PIPE_NEW_DEPTH_STENCIL_ALPHA;
+}
+
+static void brw_delete_depth_stencil_state(struct pipe_context *pipe,
+					   void *cso)
+{
+   struct brw_context *brw = brw_context(pipe);
+   assert((const void *)cso != (const void *)brw->curr.zstencil);
+   FREE(cso);
+}
+
+
+void brw_pipe_depth_stencil_init( struct brw_context *brw )
+{
+   brw->base.create_depth_stencil_alpha_state = brw_create_depth_stencil_state;
+   brw->base.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state;
+   brw->base.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state;
+}
+
+void brw_pipe_depth_stencil_cleanup( struct brw_context *brw )
+{
 }
diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
index dbf97a0544..6391717227 100644
--- a/src/gallium/drivers/i965/brw_pipe_fb.c
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -1,25 +1,61 @@
+#include "util/u_math.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
 
 /**
  * called from intelDrawBuffer()
  */
-static void brw_set_draw_region( struct pipe_context *pipe, 
-                                 struct intel_region *color_regions[],
-                                 struct intel_region *depth_region,
-                                 GLuint num_color_regions)
+static void brw_set_framebuffer_state( struct pipe_context *pipe, 
+				       const struct pipe_framebuffer_state *fb )
+{
+   struct brw_context *brw = brw_context(pipe);
+   unsigned i;
+
+   /* Dimensions:
+    */
+   if (brw->curr.fb.width != fb->width ||
+       brw->curr.fb.height != fb->height) {
+      brw->curr.fb.width = fb->width;
+      brw->curr.fb.height = fb->height;
+      brw->state.dirty.mesa |= PIPE_NEW_FRAMEBUFFER_DIMENSIONS;
+   }
+   
+   /* Z/Stencil
+    */
+   if (brw->curr.fb.zsbuf != fb->zsbuf) {
+      pipe_surface_reference(&brw->curr.fb.zsbuf, fb->zsbuf);
+      brw->state.dirty.mesa |= PIPE_NEW_DEPTH_BUFFER;
+   }
+
+   /* Color buffers:
+    */
+   for (i = 0; i < MAX2(fb->nr_cbufs, brw->curr.fb.nr_cbufs); i++) {
+      if (brw->curr.fb.cbufs[i] != fb->cbufs[i]) {
+	 brw->state.dirty.mesa |= PIPE_NEW_COLOR_BUFFERS;
+	 pipe_surface_reference(&brw->curr.fb.cbufs[i], fb->cbufs[i]);
+      }
+   }
+   
+   brw->curr.fb.nr_cbufs = fb->nr_cbufs;
+}
+
+static void brw_set_viewport_state( struct pipe_context *pipe,
+				    const struct pipe_viewport_state *viewport )
 {
    struct brw_context *brw = brw_context(pipe);
-   GLuint i;
-
-   /* release old color/depth regions */
-   if (brw->state.depth_region != depth_region)
-      brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER;
-   for (i = 0; i < brw->state.nr_color_regions; i++)
-       intel_region_release(&brw->state.color_regions[i]);
-   intel_region_release(&brw->state.depth_region);
-
-   /* reference new color/depth regions */
-   for (i = 0; i < num_color_regions; i++)
-       intel_region_reference(&brw->state.color_regions[i], color_regions[i]);
-   intel_region_reference(&brw->state.depth_region, depth_region);
-   brw->state.nr_color_regions = num_color_regions;
+   brw->curr.viewport = *viewport;
+   brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT;
+}
+
+
+void brw_pipe_framebuffer_init( struct brw_context *brw )
+{
+   brw->base.set_framebuffer_state = brw_set_framebuffer_state;
+   brw->base.set_framebuffer_state = brw_set_framebuffer_state;
+}
+
+void brw_pipe_framebuffer_cleanup( struct brw_context *brw )
+{
 }
diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c
new file mode 100644
index 0000000000..fb8d7ecc59
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_misc.c
@@ -0,0 +1,14 @@
+
+static void brw_set_polygon_stipple( struct pipe_context *pipe,
+				     const unsigned *stipple )
+{
+   struct brw_polygon_stipple *bps = &brw->curr.bps;
+   GLuint i;
+
+   memset(bps, 0, sizeof *bps);
+   bps->header.opcode = CMD_POLY_STIPPLE_PATTERN;
+   bps->header.length = sizeof *bps/4-2;
+
+   for (i = 0; i < 32; i++)
+      bps->stipple[i] = brw->curr.poly_stipple[i]; /* don't invert */
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h
index 6ceaa1fb09..800a9208a7 100644
--- a/src/gallium/drivers/i965/brw_pipe_rast.h
+++ b/src/gallium/drivers/i965/brw_pipe_rast.h
@@ -9,6 +9,7 @@ struct brw_rasterizer_state {
    /* Precalculated hardware state:
     */
    struct brw_clip_prog_key clip_key;
+   struct brw_line_stipple bls;
 };
 
 #endif
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
new file mode 100644
index 0000000000..b3069f08c0
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -0,0 +1,52 @@
+
+#include "util/u_memory.h"
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_debug.h"
+
+
+
+static void *brw_create_sampler_state( struct pipe_context *pipe,
+				     const struct pipe_sampler_state *templ )
+{
+   struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state);
+
+
+   return (void *)sampler;
+}
+
+static void brw_bind_sampler_state(struct pipe_context *pipe,
+				 void *cso)
+{
+   struct brw_context *brw = brw_context(pipe);
+   brw->curr.sampler = (const struct brw_sampler_state *)cso;
+   brw->state.dirty.mesa |= PIPE_NEW_SAMPLER;
+}
+
+static void brw_delete_sampler_state(struct pipe_context *pipe,
+				  void *cso)
+{
+   struct brw_context *brw = brw_context(pipe);
+   FREE(cso);
+}
+
+static void brw_set_sampler_textures(struct pipe_context *pipe,
+				     unsigned num_textures,
+				     struct pipe_texture **tex)
+{
+   struct brw_context *brw = brw_context(pipe);
+
+   brw->state.dirty.mesa |= PIPE_NEW_BOUND_TEXTURES;
+}
+
+
+void brw_sampler_init( struct brw_context *brw )
+{
+   brw->base.set_sampler_textures = brw_set_sampler_textures;
+   brw->base.create_sampler_state = brw_create_sampler_state;
+   brw->base.bind_sampler_state = brw_bind_sampler_state;
+   brw->base.destroy_sampler_state = brw_destroy_sampler_state;
+}
diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c
index 812b761d40..22cea4b7d8 100644
--- a/src/gallium/drivers/i965/brw_state_debug.c
+++ b/src/gallium/drivers/i965/brw_state_debug.c
@@ -40,35 +40,7 @@ struct dirty_bit_map {
 #define DEFINE_BIT(name) {name, #name, 0}
 
 static struct dirty_bit_map mesa_bits[] = {
-   DEFINE_BIT(_NEW_MODELVIEW),
-   DEFINE_BIT(_NEW_PROJECTION),
-   DEFINE_BIT(_NEW_TEXTURE_MATRIX),
-   DEFINE_BIT(_NEW_COLOR_MATRIX),
-   DEFINE_BIT(_NEW_ACCUM),
-   DEFINE_BIT(_NEW_COLOR),
-   DEFINE_BIT(_NEW_DEPTH),
-   DEFINE_BIT(_NEW_EVAL),
-   DEFINE_BIT(_NEW_FOG),
-   DEFINE_BIT(_NEW_HINT),
-   DEFINE_BIT(_NEW_LIGHT),
-   DEFINE_BIT(_NEW_LINE),
-   DEFINE_BIT(_NEW_PIXEL),
-   DEFINE_BIT(_NEW_POINT),
-   DEFINE_BIT(_NEW_POLYGON),
-   DEFINE_BIT(_NEW_POLYGONSTIPPLE),
-   DEFINE_BIT(_NEW_SCISSOR),
-   DEFINE_BIT(_NEW_STENCIL),
-   DEFINE_BIT(_NEW_TEXTURE),
-   DEFINE_BIT(_NEW_TRANSFORM),
-   DEFINE_BIT(_NEW_VIEWPORT),
-   DEFINE_BIT(_NEW_PACKUNPACK),
-   DEFINE_BIT(_NEW_ARRAY),
-   DEFINE_BIT(_NEW_RENDERMODE),
-   DEFINE_BIT(_NEW_BUFFERS),
-   DEFINE_BIT(_NEW_MULTISAMPLE),
-   DEFINE_BIT(_NEW_TRACK_MATRIX),
-   DEFINE_BIT(_NEW_PROGRAM),
-   DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
+   DEFINE_BIT(PIPE_NEW_BLEND_COLOR),
    {0, 0, 0}
 };
 
@@ -88,7 +60,6 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
    DEFINE_BIT(BRW_NEW_VERTICES),
    DEFINE_BIT(BRW_NEW_BATCH),
-   DEFINE_BIT(BRW_NEW_DEPTH_BUFFER),
    {0, 0, 0}
 };
 
diff --git a/src/gallium/drivers/i965/brw_util.c b/src/gallium/drivers/i965/brw_util.c
index c5244e58ab..458058d668 100644
--- a/src/gallium/drivers/i965/brw_util.c
+++ b/src/gallium/drivers/i965/brw_util.c
@@ -35,60 +35,4 @@
 
 
-GLuint brw_translate_blend_equation( GLenum mode )
-{
-   switch (mode) {
-   case GL_FUNC_ADD: 
-      return BRW_BLENDFUNCTION_ADD; 
-   case GL_MIN: 
-      return BRW_BLENDFUNCTION_MIN; 
-   case GL_MAX: 
-      return BRW_BLENDFUNCTION_MAX; 
-   case GL_FUNC_SUBTRACT: 
-      return BRW_BLENDFUNCTION_SUBTRACT; 
-   case GL_FUNC_REVERSE_SUBTRACT: 
-      return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; 
-   default: 
-      assert(0);
-      return BRW_BLENDFUNCTION_ADD;
-   }
-}
 
-GLuint brw_translate_blend_factor( GLenum factor )
-{
-   switch(factor) {
-   case GL_ZERO: 
-      return BRW_BLENDFACTOR_ZERO; 
-   case GL_SRC_ALPHA: 
-      return BRW_BLENDFACTOR_SRC_ALPHA; 
-   case GL_ONE: 
-      return BRW_BLENDFACTOR_ONE; 
-   case GL_SRC_COLOR: 
-      return BRW_BLENDFACTOR_SRC_COLOR; 
-   case GL_ONE_MINUS_SRC_COLOR: 
-      return BRW_BLENDFACTOR_INV_SRC_COLOR; 
-   case GL_DST_COLOR: 
-      return BRW_BLENDFACTOR_DST_COLOR; 
-   case GL_ONE_MINUS_DST_COLOR: 
-      return BRW_BLENDFACTOR_INV_DST_COLOR; 
-   case GL_ONE_MINUS_SRC_ALPHA:
-      return BRW_BLENDFACTOR_INV_SRC_ALPHA; 
-   case GL_DST_ALPHA: 
-      return BRW_BLENDFACTOR_DST_ALPHA; 
-   case GL_ONE_MINUS_DST_ALPHA:
-      return BRW_BLENDFACTOR_INV_DST_ALPHA; 
-   case GL_SRC_ALPHA_SATURATE: 
-      return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
-   case GL_CONSTANT_COLOR:
-      return BRW_BLENDFACTOR_CONST_COLOR; 
-   case GL_ONE_MINUS_CONSTANT_COLOR:
-      return BRW_BLENDFACTOR_INV_CONST_COLOR;
-   case GL_CONSTANT_ALPHA:
-      return BRW_BLENDFACTOR_CONST_ALPHA; 
-   case GL_ONE_MINUS_CONSTANT_ALPHA:
-      return BRW_BLENDFACTOR_INV_CONST_ALPHA;
-   default:
-      assert(0);
-      return BRW_BLENDFACTOR_ZERO;
-   }   
-}
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index 16a2324049..4989aae830 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -105,11 +105,11 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
    /* as far as we can tell */
    key->computes_depth =
       (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
-   /* BRW_NEW_DEPTH_BUFFER
+   /* PIPE_NEW_DEPTH_BUFFER
     * Override for NULL depthbuffer case, required by the Pixel Shader Computed
     * Depth field.
     */
-   if (brw->state.depth_region == NULL)
+   if (brw->curr.fb.zsbuf == NULL)
       key->computes_depth = 0;
 
    /* _NEW_COLOR */
@@ -295,7 +295,8 @@ static void upload_wm_unit( struct brw_context *brw )
 
 const struct brw_tracked_state brw_wm_unit = {
    .dirty = {
-      .mesa = (_NEW_POLYGON | 
+      .mesa = (PIPE_NEW_DEPTH_BUFFER |
+	       _NEW_POLYGON | 
 	       _NEW_POLYGONSTIPPLE | 
 	       _NEW_LINE | 
 	       _NEW_COLOR |
@@ -303,7 +304,6 @@ const struct brw_tracked_state brw_wm_unit = {
 
       .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
 	      BRW_NEW_CURBE_OFFSETS |
-	      BRW_NEW_DEPTH_BUFFER |
 	      BRW_NEW_NR_WM_SURFACES),
 
       .cache = (CACHE_NEW_WM_PROG |
-- 
cgit v1.2.3


From e4c5e01c109e51baaad23e90d08e8543b0fd6c07 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 25 Oct 2009 11:48:17 +0000
Subject: llvmpipe: Immediate multiplication.

---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c       | 54 ++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_arit.h       |  5 +++
 src/gallium/drivers/llvmpipe/lp_bld_interp.c     | 30 +------------
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c |  7 ++-
 4 files changed, 64 insertions(+), 32 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 93e797cb44..9c59677a74 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -47,6 +47,7 @@
 
 #include "util/u_memory.h"
 #include "util/u_debug.h"
+#include "util/u_math.h"
 #include "util/u_string.h"
 #include "util/u_cpu_detect.h"
 
@@ -424,6 +425,59 @@ lp_build_mul(struct lp_build_context *bld,
 }
 
 
+/**
+ * Small vector x scale multiplication optimization.
+ */
+LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 int b)
+{
+   LLVMValueRef factor;
+
+   if(b == 0)
+      return bld->zero;
+
+   if(b == 1)
+      return a;
+
+   if(b == -1)
+      return LLVMBuildNeg(bld->builder, a, "");
+
+   if(b == 2 && bld->type.floating)
+      return lp_build_add(bld, a, a);
+
+   if(util_is_pot(b)) {
+      unsigned shift = ffs(b) - 1;
+
+      if(bld->type.floating) {
+#if 0
+         /*
+          * Power of two multiplication by directly manipulating the mantissa.
+          *
+          * XXX: This might not be always faster, it will introduce a small error
+          * for multiplication by zero, and it will produce wrong results
+          * for Inf and NaN.
+          */
+         unsigned mantissa = lp_mantissa(bld->type);
+         factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
+         a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
+         a = LLVMBuildAdd(bld->builder, a, factor, "");
+         a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
+         return a;
+#endif
+      }
+      else {
+         factor = lp_build_const_scalar(bld->type, shift);
+         return LLVMBuildShl(bld->builder, a, factor, "");
+      }
+   }
+
+   factor = lp_build_const_scalar(bld->type, (double)b);
+   return lp_build_mul(bld, a, factor);
+}
+
+
 /**
  * Generate a / b
  */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
index 4e568c055e..62be4b9aee 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h
@@ -66,6 +66,11 @@ lp_build_mul(struct lp_build_context *bld,
              LLVMValueRef a,
              LLVMValueRef b);
 
+LLVMValueRef
+lp_build_mul_imm(struct lp_build_context *bld,
+                 LLVMValueRef a,
+                 int b);
+
 LLVMValueRef
 lp_build_div(struct lp_build_context *bld,
              LLVMValueRef a,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 338dbca6d1..818c0e943e 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -108,32 +108,6 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
 }
 
 
-/**
- * Small vector x scale multiplication optimization.
- *
- * TODO: Should be elsewhere.
- */
-static LLVMValueRef
-coeff_multiply(struct lp_build_interp_soa_context *bld,
-               LLVMValueRef coeff,
-               int step)
-{
-   LLVMValueRef factor;
-
-   switch(step) {
-   case 0:
-      return bld->base.zero;
-   case 1:
-      return coeff;
-   case 2:
-      return lp_build_add(&bld->base, coeff, coeff);
-   default:
-      factor = lp_build_const_scalar(bld->base.type, (double)step);
-      return lp_build_mul(&bld->base, coeff, factor);
-   }
-}
-
-
 /**
  * Multiply the dadx and dady with the xstep and ystep respectively.
  */
@@ -149,8 +123,8 @@ coeffs_update(struct lp_build_interp_soa_context *bld)
       if (mode != TGSI_INTERPOLATE_CONSTANT) {
          for(chan = 0; chan < NUM_CHANNELS; ++chan) {
             if(mask & (1 << chan)) {
-               bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
-               bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
+               bld->dadx[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dadx[attrib][chan], bld->xstep);
+               bld->dady[attrib][chan] = lp_build_mul_imm(&bld->base, bld->dady[attrib][chan], bld->ystep);
             }
          }
       }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index f7a030fb8c..42e4ee6986 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -309,7 +309,7 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    LLVMBuilderRef builder = bld->builder;
    struct lp_build_context i32, h16, u8n;
    LLVMTypeRef i32_vec_type, h16_vec_type, u8n_vec_type;
-   LLVMValueRef f32_c256, i32_c8, i32_c128, i32_c255;
+   LLVMValueRef i32_c8, i32_c128, i32_c255;
    LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
    LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
    LLVMValueRef x0, x1;
@@ -328,9 +328,8 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
    h16_vec_type = lp_build_vec_type(h16.type);
    u8n_vec_type = lp_build_vec_type(u8n.type);
 
-   f32_c256 = lp_build_const_scalar(bld->coord_type, 256.0);
-   s = lp_build_mul(&bld->coord_bld, s, f32_c256);
-   t = lp_build_mul(&bld->coord_bld, t, f32_c256);
+   s = lp_build_mul_imm(&bld->coord_bld, s, 256);
+   t = lp_build_mul_imm(&bld->coord_bld, t, 256);
 
    s = LLVMBuildFPToSI(builder, s, i32_vec_type, "");
    t = LLVMBuildFPToSI(builder, t, i32_vec_type, "");
-- 
cgit v1.2.3


From 5fcb75758c50bd10e8bd730e55bcbf73614eeb60 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 25 Oct 2009 11:49:01 +0000
Subject: llvmpipe: Dump the sampler state of the shader key.

---
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 7728ba6076..530a2d448c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -400,6 +400,7 @@ generate_fragment(struct llvmpipe_context *lp,
 #ifdef DEBUG
    tgsi_dump(shader->base.tokens, 0);
    if(key->depth.enabled) {
+      debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
       debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
       debug_printf("depth.writemask = %u\n", key->depth.writemask);
    }
@@ -419,6 +420,23 @@ generate_fragment(struct llvmpipe_context *lp,
       debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
    }
    debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
+   for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+      if(key->sampler[i].format) {
+         debug_printf("sampler[%u] = \n", i);
+         debug_printf("  .format = %s\n", pf_name(key->sampler[i].format));
+         debug_printf("  .target = %u\n", key->sampler[i].target);
+         debug_printf("  .pot = %u%u%u\n", key->sampler[i].pot_width, key->sampler[i].pot_height, key->sampler[i].pot_depth);
+         debug_printf("  .wrap = %u %u %u\n", key->sampler[i].wrap_s, key->sampler[i].wrap_t, key->sampler[i].wrap_r);
+         debug_printf("  .min_img_filter = %u\n", key->sampler[i].min_img_filter);
+         debug_printf("  .min_mip_filter = %u\n", key->sampler[i].min_mip_filter);
+         debug_printf("  .mag_img_filter = %u\n", key->sampler[i].mag_img_filter);
+         if(key->sampler[i].compare_mode)
+            debug_printf("  .compare_mode = %s\n", debug_dump_blend_func(key->sampler[i].compare_func, TRUE));
+         debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+         debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
+      }
+   }
+
 #endif
 
    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
-- 
cgit v1.2.3


From 88e08d7c6de89279c737dbf5139492b39f96dc43 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 25 Oct 2009 12:27:14 +0000
Subject: llvmpipe: Human friendlier sampler state dump.

---
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c |  4 +++-
 src/gallium/drivers/llvmpipe/lp_state_fs.c       | 27 +++++++++++++++++-------
 2 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 42e4ee6986..47b68b71e2 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -35,6 +35,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
 #include "util/u_debug.h"
+#include "util/u_debug_dump.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/u_format.h"
@@ -171,7 +172,8 @@ lp_build_sample_wrap(struct lp_build_sample_context *bld,
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
       /* FIXME */
-      _debug_printf("warning: failed to translate texture wrap mode %u\n", wrap_mode);
+      _debug_printf("warning: failed to translate texture wrap mode %s\n",
+                    debug_dump_tex_wrap(wrap_mode, TRUE));
       coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero);
       coord = lp_build_min(int_coord_bld, coord, length_minus_one);
       break;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 530a2d448c..2e9aa9fffe 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -423,15 +423,26 @@ generate_fragment(struct llvmpipe_context *lp,
    for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
       if(key->sampler[i].format) {
          debug_printf("sampler[%u] = \n", i);
-         debug_printf("  .format = %s\n", pf_name(key->sampler[i].format));
-         debug_printf("  .target = %u\n", key->sampler[i].target);
-         debug_printf("  .pot = %u%u%u\n", key->sampler[i].pot_width, key->sampler[i].pot_height, key->sampler[i].pot_depth);
-         debug_printf("  .wrap = %u %u %u\n", key->sampler[i].wrap_s, key->sampler[i].wrap_t, key->sampler[i].wrap_r);
-         debug_printf("  .min_img_filter = %u\n", key->sampler[i].min_img_filter);
-         debug_printf("  .min_mip_filter = %u\n", key->sampler[i].min_mip_filter);
-         debug_printf("  .mag_img_filter = %u\n", key->sampler[i].mag_img_filter);
+         debug_printf("  .format = %s\n",
+                      pf_name(key->sampler[i].format));
+         debug_printf("  .target = %s\n",
+                      debug_dump_tex_target(key->sampler[i].target, TRUE));
+         debug_printf("  .pot = %u %u %u\n",
+                      key->sampler[i].pot_width,
+                      key->sampler[i].pot_height,
+                      key->sampler[i].pot_depth);
+         debug_printf("  .wrap = %s %s %s\n",
+                      debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+                      debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+                      debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+         debug_printf("  .min_img_filter = %s\n",
+                      debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+         debug_printf("  .min_mip_filter = %s\n",
+                      debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+         debug_printf("  .mag_img_filter = %s\n",
+                      debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
          if(key->sampler[i].compare_mode)
-            debug_printf("  .compare_mode = %s\n", debug_dump_blend_func(key->sampler[i].compare_func, TRUE));
+            debug_printf("  .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
          debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
          debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
       }
-- 
cgit v1.2.3


From 09c231f84a20a306a173b60c82484ce1f9331edf Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 26 Oct 2009 00:20:33 +0000
Subject: i965g: still working on compilation

---
 src/gallium/auxiliary/tgsi/tgsi_scan.h          |   3 +
 src/gallium/drivers/i965/Makefile               |   9 +-
 src/gallium/drivers/i965/brw_batchbuffer.c      |  14 +-
 src/gallium/drivers/i965/brw_context.h          |  18 +-
 src/gallium/drivers/i965/brw_eu_emit.c          |   4 +-
 src/gallium/drivers/i965/brw_pipe_fb.c          |   2 +-
 src/gallium/drivers/i965/brw_pipe_flush.c       |   9 +-
 src/gallium/drivers/i965/brw_pipe_query.c       | 110 +++++++-----
 src/gallium/drivers/i965/brw_pipe_sampler.c     |  81 +++++++++
 src/gallium/drivers/i965/brw_screen_surface.c   | 156 ++++++++++++++---
 src/gallium/drivers/i965/brw_screen_texture.c   | 218 ++++++++++++++++++++++++
 src/gallium/drivers/i965/brw_sf.c               |  80 ++++-----
 src/gallium/drivers/i965/brw_sf.h               |  13 +-
 src/gallium/drivers/i965/brw_sf_emit.c          | 145 +++++++++-------
 src/gallium/drivers/i965/brw_sf_state.c         | 178 +++++++++----------
 src/gallium/drivers/i965/brw_state.h            |  13 +-
 src/gallium/drivers/i965/brw_state_batch.c      |   8 +-
 src/gallium/drivers/i965/brw_state_cache.c      |  64 ++++---
 src/gallium/drivers/i965/brw_state_debug.c      |  19 ++-
 src/gallium/drivers/i965/brw_state_dump.c       |  64 +++----
 src/gallium/drivers/i965/brw_state_upload.c     |  37 ++--
 src/gallium/drivers/i965/brw_tex.c              |  50 ------
 src/gallium/drivers/i965/brw_tex_layout.c       | 218 ------------------------
 src/gallium/drivers/i965/brw_urb.c              |  10 +-
 src/gallium/drivers/i965/brw_vs.h               |   2 +-
 src/gallium/drivers/i965/brw_vs_emit.c          |  20 +--
 src/gallium/drivers/i965/brw_vs_state.c         |   4 +-
 src/gallium/drivers/i965/brw_winsys.h           |  18 +-
 src/gallium/drivers/i965/brw_wm.c               |   4 +-
 src/gallium/drivers/i965/brw_wm.h               |  36 ++--
 src/gallium/drivers/i965/brw_wm_debug.c         |  68 ++++----
 src/gallium/drivers/i965/brw_wm_emit.c          |   8 +-
 src/gallium/drivers/i965/brw_wm_fp.c            |  18 +-
 src/gallium/drivers/i965/brw_wm_glsl.c          |  16 +-
 src/gallium/drivers/i965/brw_wm_pass0.c         |   6 +-
 src/gallium/drivers/i965/brw_wm_pass1.c         |   2 +-
 src/gallium/drivers/i965/brw_wm_pass2.c         |   4 +-
 src/gallium/drivers/i965/brw_wm_sampler_state.c | 170 ++++--------------
 src/gallium/drivers/i965/brw_wm_state.c         |   6 +-
 39 files changed, 1007 insertions(+), 898 deletions(-)
 create mode 100644 src/gallium/drivers/i965/brw_screen_texture.c
 delete mode 100644 src/gallium/drivers/i965/brw_tex.c
 delete mode 100644 src/gallium/drivers/i965/brw_tex_layout.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 8a7ee0c7e4..6754001e88 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -61,6 +61,9 @@ struct tgsi_shader_info
    boolean uses_kill;  /**< KIL or KILP instruction used? */
    boolean uses_fogcoord; /**< fragment shader uses fog coord? */
    boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */
+
+   uint texture_max;
+   uint texture_mask;
 };
 
 
diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 40e8aa8786..c3dbad72ae 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -28,10 +28,7 @@ C_SOURCES = \
 	brw_pipe_blend.c \
 	brw_pipe_depth.c \
 	brw_pipe_fb.c \
-	brw_pipe_flush.c \
 	brw_pipe_query.c \
-	brw_pipe_shader.c \
-	brw_screen_surface.c \
 	brw_sf.c \
 	brw_sf_emit.c \
 	brw_sf_state.c \
@@ -40,8 +37,6 @@ C_SOURCES = \
 	brw_state_dump.c \
 	brw_state_upload.c \
 	brw_swtnl.c \
-	brw_tex.c \
-	brw_tex_layout.c \
 	brw_urb.c \
 	brw_util.c \
 	brw_vs.c \
@@ -60,8 +55,12 @@ C_SOURCES = \
 	brw_wm_sampler_state.c \
 	brw_wm_state.c \
 	brw_wm_surface_state.c \
+	brw_screen_surface.c \
+	brw_screen_texture.c \
 	brw_bo.c \
 	brw_batchbuffer.c \
+	brw_pipe_shader.c \
+	brw_pipe_flush.c \
 	intel_tex_layout.c 
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index 8bcac76ede..45fbd59273 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -105,13 +105,13 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file,
    }
 
 
-   if (INTEL_DEBUG & DEBUG_BATCH)
-      fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,
+   if (BRW_DEBUG & DEBUG_BATCH)
+      debug_printf("%s:%d: Batchbuffer flush with %db used\n", file, line,
 	      used);
 
    /* Emit a flush if the bufmgr doesn't do it for us. */
    if (intel->always_flush_cache || !intel->ttm) {
-      *(GLuint *) (batch->ptr) = intel->vtbl.flush_cmd();
+      *(GLuint *) (batch->ptr) = ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
       batch->ptr += 4;
       used = batch->ptr - batch->map;
    }
@@ -136,15 +136,15 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file,
       
    batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 );
       
-   if (INTEL_DEBUG & DEBUG_BATCH) {
+   if (BRW_DEBUG & DEBUG_BATCH) {
       dri_bo_map(batch->buf, GL_FALSE);
       intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
 		   brw->brw_screen->pci_id);
       dri_bo_unmap(batch->buf);
    }
 
-   if (INTEL_DEBUG & DEBUG_SYNC) {
-      fprintf(stderr, "waiting for idle\n");
+   if (BRW_DEBUG & DEBUG_SYNC) {
+      debug_printf("waiting for idle\n");
       dri_bo_map(batch->buf, GL_TRUE);
       dri_bo_unmap(batch->buf);
    }
@@ -166,7 +166,7 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
    int ret;
 
    if (batch->ptr - batch->map > batch->buf->size)
-      _mesa_printf ("bad relocation ptr %p map %p offset %d size %d\n",
+      debug_printf ("bad relocation ptr %p map %p offset %d size %d\n",
 		    batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
 
    ret = batch->sws->bo_emit_reloc(batch->buf,
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index df43d8ba4d..10c1cf6f33 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -190,6 +190,8 @@ struct brw_fragment_shader {
 #define PIPE_NEW_FRAMEBUFFER_DIMENSIONS 0x10000
 #define PIPE_NEW_DEPTH_BUFFER           0x20000
 #define PIPE_NEW_COLOR_BUFFERS          0x40000
+#define PIPE_NEW_QUERY                  0x80000
+#define PIPE_NEW_SCISSOR                0x100000
 
 
@@ -204,7 +206,7 @@ struct brw_fragment_shader {
 #define BRW_NEW_WM_INPUT_DIMENSIONS     0x100
 #define BRW_NEW_PSP                     0x800
 #define BRW_NEW_WM_SURFACES		0x1000
-#define BRW_NEW_FENCE                   0x2000
+#define BRW_NEW_xxx                     0x2000 /* was FENCE */
 #define BRW_NEW_INDICES			0x4000
 #define BRW_NEW_VERTICES		0x8000
 /**
@@ -373,6 +375,7 @@ struct brw_cache_item {
 
 struct brw_cache {
    struct brw_context *brw;
+   struct brw_winsys_screen *sws;
 
    struct brw_cache_item **items;
    GLuint size, n_items;
@@ -380,6 +383,7 @@ struct brw_cache {
    GLuint key_size[BRW_MAX_CACHE];		/* for fixed-size keys */
    GLuint aux_size[BRW_MAX_CACHE];
    char *name[BRW_MAX_CACHE];
+   
 
    /* Record of the last BOs chosen for each cache_id.  Used to set
     * brw->state.dirty.cache when a new cache item is chosen.
@@ -448,7 +452,7 @@ struct brw_query_object {
    int last_index;
 
    /* Total count of pixels from previous BOs */
-   unsigned int count;
+   uint64_t result;
 };
 
 
@@ -477,11 +481,18 @@ struct brw_context
       const struct brw_rasterizer_state *rast;
       const struct brw_depth_stencil_state *zstencil;
 
+      const struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+      const struct pipe_sampler *sampler[PIPE_MAX_SAMPLERS];
+      unsigned num_textures;
+      unsigned num_samplers;
+      
+
       struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
       struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
       unsigned num_vertex_elements;
       unsigned num_vertex_buffers;
 
+      struct pipe_scissor_state scissor;
       struct pipe_framebuffer_state fb;
       struct pipe_viewport_state vp;
       struct pipe_clip_state ucp;
@@ -492,6 +503,8 @@ struct brw_context
       struct brw_blend_constant_color bcc;
       struct brw_polygon_stipple bps;
 
+      
+
       /**
        * Index buffer for this draw_prims call.
        *
@@ -688,6 +701,7 @@ struct brw_context
       struct brw_winsys_buffer *bo;
       int index;
       GLboolean active;
+      int stats_wm;
    } query;
 
    struct {
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
index f6b8843e01..f7fa520348 100644
--- a/src/gallium/drivers/i965/brw_eu_emit.c
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -1262,7 +1262,7 @@ void brw_SAMPLE(struct brw_compile *p,
    GLboolean need_stall = 0;
    
    if (writemask == 0) {
-      /*_mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
+      /*debug_printf("%s: zero writemask??\n", __FUNCTION__); */
       return;
    }
    
@@ -1294,7 +1294,7 @@ void brw_SAMPLE(struct brw_compile *p,
 
       if (newmask != writemask) {
 	 need_stall = 1;
-         /* _mesa_printf("need stall %x %x\n", newmask , writemask); */
+         /* debug_printf("need stall %x %x\n", newmask , writemask); */
       }
       else {
 	 struct brw_reg m1 = brw_message_reg(msg_reg_nr);
diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
index 6391717227..c65f9bc374 100644
--- a/src/gallium/drivers/i965/brw_pipe_fb.c
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -53,7 +53,7 @@ static void brw_set_viewport_state( struct pipe_context *pipe,
 void brw_pipe_framebuffer_init( struct brw_context *brw )
 {
    brw->base.set_framebuffer_state = brw_set_framebuffer_state;
-   brw->base.set_framebuffer_state = brw_set_framebuffer_state;
+   brw->base.set_viewport_state = brw_set_viewport_state;
 }
 
 void brw_pipe_framebuffer_cleanup( struct brw_context *brw )
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
index 65e7151517..fb4a784de9 100644
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -52,14 +52,7 @@ static void brw_note_fence( struct brw_context *brw, GLuint fence )
  */
 static GLuint brw_flush_cmd( void )
 {
-   struct brw_mi_flush flush;
-
-   return ;
-
-   flush.opcode = CMD_MI_FLUSH;
-   flush.pad = 0;
-   flush.flags = BRW_FLUSH_STATE_CACHE;
-   return *(GLuint *)&flush;
+   return ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
index a2da1373bf..18a9b71af0 100644
--- a/src/gallium/drivers/i965/brw_pipe_query.c
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -46,25 +46,38 @@
 #include "brw_reg.h"
 
 /** Waits on the query object's BO and totals the results for this query */
-static void
-brw_queryobj_get_results(struct brw_query_object *query)
+static boolean
+brw_query_get_result(struct pipe_context *pipe,
+		     struct pipe_query *q,
+		     boolean wait,
+		     uint64_t *result)
 {
-   int i;
-   uint64_t *results;
-
-   if (query->bo == NULL)
-      return;
+   struct brw_context *brw = brw_context(pipe);
+   struct brw_query_object *query = (struct brw_query_object *)q;
 
    /* Map and count the pixels from the current query BO */
-   dri_bo_map(query->bo, GL_FALSE);
-   results = query->bo->virtual;
-   for (i = query->first_index; i <= query->last_index; i++) {
-      query->Base.Result += results[i * 2 + 1] - results[i * 2];
+   if (query->bo) {
+      int i;
+      uint64_t *map;
+      
+      if (brw->sws->bo_is_busy(query->bo) && !wait)
+	 return FALSE;
+      
+      map = brw->sws->bo_map(query->bo, GL_FALSE);
+      if (map == NULL)
+	 return FALSE;
+      
+      for (i = query->first_index; i <= query->last_index; i++) {
+	 query->result += map[i * 2 + 1] - map[i * 2];
+      }
+
+      brw->sws->bo_unmap(query->bo);
+      brw->sws->bo_unreference(query->bo);
+      query->bo = NULL;
    }
-   dri_bo_unmap(query->bo);
 
-   brw->sws->bo_unreference(query->bo);
-   query->bo = NULL;
+   *result = query->result;
+   return TRUE;
 }
 
 static struct pipe_query *
@@ -72,12 +85,12 @@ brw_query_create(struct pipe_context *pipe, unsigned type )
 {
    struct brw_query_object *query;
 
-   switch (query->type) {
+   switch (type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
       query = CALLOC_STRUCT( brw_query_object );
       if (query == NULL)
 	 return NULL;
-      return &query->Base;
+      return (struct pipe_query *)query;
       
    default:
       return NULL;
@@ -87,6 +100,7 @@ brw_query_create(struct pipe_context *pipe, unsigned type )
 static void
 brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q)
 {
+   struct brw_context *brw = brw_context(pipe);
    struct brw_query_object *query = (struct brw_query_object *)q;
 
    brw->sws->bo_unreference(query->bo);
@@ -94,24 +108,25 @@ brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q)
 }
 
 static void
-brw_begin_query(struct pipe_context *pipe, struct pipe_query *q)
+brw_query_begin(struct pipe_context *pipe, struct pipe_query *q)
 {
    struct brw_context *brw = brw_context(pipe);
    struct brw_query_object *query = (struct brw_query_object *)q;
 
    /* Reset our driver's tracking of query state. */
    brw->sws->bo_unreference(query->bo);
+   query->result = 0;
    query->bo = NULL;
    query->first_index = -1;
    query->last_index = -1;
 
    insert_at_head(&brw->query.active_head, query);
-   brw->stats_wm++;
-   brw->dirty.mesa |= PIPE_NEW_QUERY;
+   brw->query.stats_wm++;
+   brw->state.dirty.mesa |= PIPE_NEW_QUERY;
 }
 
 static void
-brw_end_query(struct pipe_context *pipe, struct pipe_query *q)
+brw_query_end(struct pipe_context *pipe, struct pipe_query *q)
 {
    struct brw_context *brw = brw_context(pipe);
    struct brw_query_object *query = (struct brw_query_object *)q;
@@ -129,27 +144,13 @@ brw_end_query(struct pipe_context *pipe, struct pipe_query *q)
    }
 
    remove_from_list(query);
-   brw->stats_wm--;
-   brw->dirty.mesa |= PIPE_NEW_QUERY;
+   brw->query.stats_wm--;
+   brw->state.dirty.mesa |= PIPE_NEW_QUERY;
 }
 
-static void brw_wait_query(struct pipe_context *pipe, struct pipe_query *q)
-{
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   brw_queryobj_get_results(query);
-   query->Base.Ready = GL_TRUE;
-}
-
-static void brw_check_query(struct pipe_context *pipe, struct pipe_query *q)
-{
-   struct brw_query_object *query = (struct brw_query_object *)q;
-
-   if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) {
-      brw_queryobj_get_results(query);
-      query->Base.Ready = GL_TRUE;
-   }
-}
+/***********************************************************************
+ * Internal functions and callbacks to implement queries 
+ */
 
 /** Called to set up the query BO and account for its aperture space */
 void
@@ -201,8 +202,17 @@ brw_emit_query_begin(struct brw_context *brw)
 
    foreach(query, &brw->query.active_head) {
       if (query->bo != brw->query.bo) {
+	 uint64_t tmp;
+	 
+	 /* Propogate the results from this buffer to all of the
+	  * active queries, as the bo is going away.
+	  */
 	 if (query->bo != NULL)
-	    brw_queryobj_get_results(query);
+	    brw_query_get_result( &brw->base, 
+				  (struct pipe_query *)query,
+				  FALSE,
+				  &tmp );
+
 	 brw->sws->bo_reference(brw->query.bo);
 	 query->bo = brw->query.bo;
 	 query->first_index = brw->query.index;
@@ -235,12 +245,18 @@ brw_emit_query_end(struct brw_context *brw)
    brw->query.index++;
 }
 
-void brw_init_queryobj_functions(struct dd_function_table *functions)
+void brw_pipe_query_init( struct brw_context *brw )
 {
-   functions->NewQueryObject = brw_new_query_object;
-   functions->DeleteQuery = brw_delete_query;
-   functions->BeginQuery = brw_begin_query;
-   functions->EndQuery = brw_end_query;
-   functions->CheckQuery = brw_check_query;
-   functions->WaitQuery = brw_wait_query;
+   brw->base.create_query = brw_query_create;
+   brw->base.destroy_query = brw_query_destroy;
+   brw->base.begin_query = brw_query_begin;
+   brw->base.end_query = brw_query_end;
+   brw->base.get_query_result = brw_query_get_result;
+}
+
+
+void brw_pipe_query_cleanup( struct brw_context *brw )
+{
+   /* Unreference brw->query.bo ??
+    */
 }
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
index b3069f08c0..bc20eef6fb 100644
--- a/src/gallium/drivers/i965/brw_pipe_sampler.c
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -14,6 +14,87 @@ static void *brw_create_sampler_state( struct pipe_context *pipe,
 {
    struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state);
 
+   switch (key->minfilter) {
+   case GL_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      break;
+   case GL_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
+      break;
+   case GL_NEAREST_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_LINEAR_MIPMAP_NEAREST:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
+      break;
+   case GL_NEAREST_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   case GL_LINEAR_MIPMAP_LINEAR:
+      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
+      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
+      break;
+   default:
+      break;
+   }
+
+   /* Set Anisotropy: 
+    */
+   if (key->max_aniso > 1.0) {
+      sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; 
+      sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
+
+      if (key->max_aniso > 2.0) {
+	 sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2,
+				       BRW_ANISORATIO_16);
+      }
+   }
+   else {
+      switch (key->magfilter) {
+      case GL_NEAREST:
+	 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
+	 break;
+      case GL_LINEAR:
+	 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
+	 break;
+      default:
+	 break;
+      }
+   }
+
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
+
+   /* Set LOD bias: 
+    */
+   sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6);
+
+   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
+   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+
+   /* Set shadow function: 
+    */
+   if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+      /* Shadowing is "enabled" by emitting a particular sampler
+       * message (sample_c).  So need to recompile WM program when
+       * shadow comparison is enabled on each/any texture unit.
+       */
+      sampler->ss0.shadow_function =
+	 intel_translate_shadow_compare_func(key->comparefunc);
+   }
+
+   /* Set BaseMipLevel, MaxLOD, MinLOD: 
+    */
+   sampler->ss0.base_level = U_FIXED(0, 1);
+
+   sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6);
+   sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6);
 
    return (void *)sampler;
 }
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
index 544be6a089..e0df6cc629 100644
--- a/src/gallium/drivers/i965/brw_screen_surface.c
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -1,27 +1,131 @@
-   /* _NEW_BUFFERS */
-   if (IS_965(brw->brw_screen->pci_id) &&
-       !IS_G4X(brw->brw_screen->pci_id)) {
-      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
-	 struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
-	 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
-
-	 /* The original gen4 hardware couldn't set up WM surfaces pointing
-	  * at an offset within a tile, which can happen when rendering to
-	  * anything but the base level of a texture or the +X face/0 depth.
-	  * This was fixed with the 4 Series hardware.
-	  *
-	  * For these original chips, you would have to make the depth and
-	  * color destination surfaces include information on the texture
-	  * type, LOD, face, and various limits to use them as a destination.
-	  * I would have done this, but there's also a nasty requirement that
-	  * the depth and the color surfaces all be of the same LOD, which
-	  * may be a worse requirement than this alignment.  (Also, we may
-	  * want to just demote the texture to untiled, instead).
-	  */
-	 if (irb->region && 
-	     irb->region->tiling != I915_TILING_NONE &&
-	     (irb->region->draw_offset & 4095)) {
-	    DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n");
-	    return GL_TRUE;
-	 }
+
+#include "pipe/p_screen.h"
+#include "brw_screen.h"
+
+struct brw_surface_id {
+   unsigned face:3;
+   unsigned zslice:13;
+   unsigned level:16;
+};
+
+static boolean need_linear_view( struct brw_screen *brw_screen,
+				 struct brw_texture *brw_texture,
+				 unsigned face,
+				 unsigned level,
+				 unsigned zslice )
+{
+#if 0
+   /* XXX: what about IDGNG?
+    */
+   if (!BRW_IS_G4X(brw->brw_screen->pci_id))
+   {
+      struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+      struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+
+      /* The original gen4 hardware couldn't set up WM surfaces pointing
+       * at an offset within a tile, which can happen when rendering to
+       * anything but the base level of a texture or the +X face/0 depth.
+       * This was fixed with the 4 Series hardware.
+       *
+       * For these original chips, you would have to make the depth and
+       * color destination surfaces include information on the texture
+       * type, LOD, face, and various limits to use them as a destination.
+       *
+       * This is easy in Gallium as surfaces are all backed by
+       * textures, but there's also a nasty requirement that the depth
+       * and the color surfaces all be of the same LOD, which is
+       * harder to get around as we can't look at a surface in
+       * isolation and decide if it's legal.
+       *
+       * Instead, end up being pessimistic and say that for i965,
+       * ... ??
+       */
+      if (brw_tex->tiling != I915_TILING_NONE &&
+	  (brw_tex_image_offset(brw_tex, face, level, zslize) & 4095)) {
+	 if (BRW_DEBUG & DEBUG_VIEW)
+	    debug_printf("%s: need surface view for non-aligned tex image\n",
+			 __FUNCTION__);
+	 return GL_TRUE;
       }
+   }
+#endif
+
+   /* Tiled 3d textures don't have subsets that look like 2d surfaces:
+    */
+   
+   /* Everything else should be fine to render to in-place:
+    */
+   return GL_FALSE;
+}
+
+/* Look at all texture views and figure out if any of them need to be
+ * back-copied into the texture for sampling
+ */
+void brw_update_texture( struct pipe_screen *screen,
+			 struct pipe_texture *texture )
+{
+   /* currently nothing to do */
+}
+
+
+static struct pipe_surface *create_linear_view( struct brw_screen *brw_screen,
+						struct brw_texture *brw_tex,
+						struct brw_surface_id id )
+{
+   
+}
+
+static struct pipe_surface *create_in_place_view( struct brw_screen *brw_screen,
+						  struct brw_texture *brw_tex,
+						  struct brw_surface_id id )
+{
+   struct brw_surface *surface = CALLOC_STRUCT(brw_surface);
+   surface->id = id;
+   
+}
+
+/* Get a surface which is view into a texture 
+ */
+struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen,
+					 struct pipe_texture *texture,
+					 unsigned face, unsigned level,
+					 unsigned zslice,
+					 unsigned usage )
+{
+   struct brw_screen *bscreen = brw_screen(screen);
+   struct brw_surface_id id;
+
+   id.face = face;
+   id.level = level;
+   id.zslice = zslice;
+
+   if (need_linear_view(brw_screen, brw_tex, id)) 
+      type = BRW_VIEW_LINEAR;
+   else
+      type = BRW_VIEW_IN_PLACE;
+
+   
+   foreach (surface, texture->views[type]) {
+      if (id.value == surface->id.value)
+	 return surface;
+   }
+
+   switch (type) {
+   case BRW_VIEW_LINEAR:
+      surface = create_linear_view( texture, id, type );
+      break;
+   case BRW_VIEW_IN_PLACE:
+      surface = create_in_place_view( texture, id, type );
+      break;
+   default:
+      return NULL;
+   }
+
+   insert_at_head( texture->views[type], surface );
+   return surface;
+}
+
+
+void brw_tex_surface_destroy( struct pipe_surface *surface )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
new file mode 100644
index 0000000000..50c30878c6
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -0,0 +1,218 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+/* Code to layout images in a mipmap tree for i965.
+ */
+
+#include "brw_tex_layout.h"
+
+#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+
+GLboolean brw_miptree_layout(struct brw_context *brw,
+			     struct intel_mipmap_tree *mt,
+			     uint32_t tiling)
+{
+   /* XXX: these vary depending on image format: */
+   /* GLint align_w = 4; */
+
+   switch (mt->target) {
+   case GL_TEXTURE_CUBE_MAP:
+      if (IS_IGDNG(brw->brw_screen->pci_id)) {
+          GLuint align_h = 2, align_w = 4;
+          GLuint level;
+          GLuint x = 0;
+          GLuint y = 0;
+          GLuint width = mt->width0;
+          GLuint height = mt->height0;
+          GLuint qpitch = 0;
+          GLuint y_pitch = 0;
+
+          mt->pitch = mt->width0;
+          intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+          y_pitch = ALIGN(height, align_h);
+
+          if (mt->compressed) {
+              mt->pitch = ALIGN(mt->width0, align_w);
+          }
+
+          if (mt->last_level != 0) {
+              GLuint mip1_width;
+
+              if (mt->compressed) {
+                  mip1_width = ALIGN(minify(mt->width0), align_w)
+                      + ALIGN(minify(minify(mt->width0)), align_w);
+              } else {
+                  mip1_width = ALIGN(minify(mt->width0), align_w)
+                      + minify(minify(mt->width0));
+              }
+
+              if (mip1_width > mt->pitch) {
+                  mt->pitch = mip1_width;
+              }
+          }
+
+          mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch);
+
+          if (mt->compressed) {
+              qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
+              mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
+          } else {
+              qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
+              mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
+          }
+
+          for (level = 0; level <= mt->last_level; level++) {
+              GLuint img_height;
+              GLuint nr_images = 6;
+              GLuint q = 0;
+
+              intel_miptree_set_level_info(mt, level, nr_images, x, y, width, 
+                                           height, 1);
+
+              for (q = 0; q < nr_images; q++)
+                  intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch);
+
+              if (mt->compressed)
+                  img_height = MAX2(1, height/4);
+              else
+                  img_height = ALIGN(height, align_h);
+
+              if (level == 1) {
+                  x += ALIGN(width, align_w);
+              }
+              else {
+                  y += img_height;
+              }
+
+              width  = minify(width);
+              height = minify(height);
+          }
+
+          break;
+      }
+
+   case GL_TEXTURE_3D: {
+      GLuint width  = mt->width0;
+      GLuint height = mt->height0;
+      GLuint depth = mt->depth0;
+      GLuint pack_x_pitch, pack_x_nr;
+      GLuint pack_y_pitch;
+      GLuint level;
+      GLuint align_h = 2;
+      GLuint align_w = 4;
+
+      mt->total_height = 0;
+      intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
+
+      if (mt->compressed) {
+          mt->pitch = ALIGN(width, align_w);
+          pack_y_pitch = (height + 3) / 4;
+      } else {
+	 mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
+	 pack_y_pitch = ALIGN(mt->height0, align_h);
+      }
+
+      pack_x_pitch = width;
+      pack_x_nr = 1;
+
+      for (level = 0 ; level <= mt->last_level ; level++) {
+	 GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
+	 GLint x = 0;
+	 GLint y = 0;
+	 GLint q, j;
+
+	 intel_miptree_set_level_info(mt, level, nr_images,
+				      0, mt->total_height,
+				      width, height, depth);
+
+	 for (q = 0; q < nr_images;) {
+	    for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+	       intel_miptree_set_image_offset(mt, level, q, x, y);
+	       x += pack_x_pitch;
+	    }
+
+	    x = 0;
+	    y += pack_y_pitch;
+	 }
+
+
+	 mt->total_height += y;
+	 width  = minify(width);
+	 height = minify(height);
+	 depth  = minify(depth);
+
+	 if (mt->compressed) {
+	    pack_y_pitch = (height + 3) / 4;
+
+	    if (pack_x_pitch > ALIGN(width, align_w)) {
+	       pack_x_pitch = ALIGN(width, align_w);
+	       pack_x_nr <<= 1;
+	    }
+	 } else {
+	    if (pack_x_pitch > 4) {
+	       pack_x_pitch >>= 1;
+	       pack_x_nr <<= 1;
+	       assert(pack_x_pitch * pack_x_nr <= mt->pitch);
+	    }
+
+	    if (pack_y_pitch > 2) {
+	       pack_y_pitch >>= 1;
+	       pack_y_pitch = ALIGN(pack_y_pitch, align_h);
+	    }
+	 }
+
+      }
+      /* The 965's sampler lays cachelines out according to how accesses
+       * in the texture surfaces run, so they may be "vertical" through
+       * memory.  As a result, the docs say in Surface Padding Requirements:
+       * Sampling Engine Surfaces that two extra rows of padding are required.
+       * We don't know of similar requirements for pre-965, but given that
+       * those docs are silent on padding requirements in general, let's play
+       * it safe.
+       */
+      if (mt->target == GL_TEXTURE_CUBE_MAP)
+	 mt->total_height += 2;
+      break;
+   }
+
+   default:
+      i945_miptree_layout_2d(intel, mt, tiling);
+      break;
+   }
+   DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+		mt->pitch,
+		mt->total_height,
+		mt->cpp,
+		mt->pitch * mt->total_height * mt->cpp );
+
+   return GL_TRUE;
+}
+
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index 1b73b3fd51..013d839e37 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -29,11 +29,12 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
   
+#include "pipe/p_state.h"
 
 #include "brw_batchbuffer.h"
-
 #include "brw_defines.h"
 #include "brw_context.h"
+#include "brw_pipe_rast.h"
 #include "brw_eu.h"
 #include "brw_util.h"
 #include "brw_sf.h"
@@ -45,7 +46,6 @@ static void compile_sf_prog( struct brw_context *brw,
    struct brw_sf_compile c;
    const GLuint *program;
    GLuint program_size;
-   GLuint i, idx;
 
    memset(&c, 0, sizeof(c));
 
@@ -54,7 +54,7 @@ static void compile_sf_prog( struct brw_context *brw,
    brw_init_compile(brw, &c.func);
 
    c.key = *key;
-   c.nr_attrs = util_count_bits(c.key.attrs);
+   c.nr_attrs = c.key.nr_attrs;
    c.nr_attr_regs = (c.nr_attrs+1)/2;
    c.nr_setup_attrs = c.key.nr_attrs;
    c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
@@ -62,21 +62,6 @@ static void compile_sf_prog( struct brw_context *brw,
    c.prog_data.urb_read_length = c.nr_attr_regs;
    c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
 
-   /* Construct map from attribute number to position in the vertex.
-    */
-   for (i = idx = 0; i < VERT_RESULT_MAX; i++) 
-      if (c.key.attrs & (1<<i)) {
-	 c.attr_to_idx[i] = idx;
-	 c.idx_to_attr[idx] = i;
-	 if (i >= VERT_RESULT_TEX0 && i <= VERT_RESULT_TEX7) {
-            c.point_attrs[i].CoordReplace = 
-               ctx->Point.CoordReplace[i - VERT_RESULT_TEX0];
-	 }
-         else {
-            c.point_attrs[i].CoordReplace = GL_FALSE;
-         }
-	 idx++;
-      }
    
    /* Which primitive?  Or all three? 
     */
@@ -122,7 +107,7 @@ static void compile_sf_prog( struct brw_context *brw,
 
 /* Calculate interpolants for triangle and line rasterization.
  */
-static void upload_sf_prog(struct brw_context *brw)
+static int upload_sf_prog(struct brw_context *brw)
 {
    struct brw_sf_prog_key key;
 
@@ -131,46 +116,49 @@ static void upload_sf_prog(struct brw_context *brw)
    /* Populate the key, noting state dependencies:
     */
    /* CACHE_NEW_VS_PROG */
-   key.attrs = brw->vs.prog_data->nr_outputs_written; 
+   key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_OUTPUT] + 1;
+
+
+   /* XXX: this is probably where the mapping between vertex shader
+    * outputs and fragment shader inputs should be handled.  Assume
+    * for now 1:1 correspondance.
+    *
+    * XXX: scan frag shader inputs to work out linear vs. perspective
+    * interpolation below.
+    *
+    * XXX: as long as we're hard-wiring, is eg. position required to
+    * be linear?
+    */
+   key.linear_attrs = 0;
+   key.persp_attrs = (1 << key.nr_attrs) - 1;
 
    /* BRW_NEW_REDUCED_PRIMITIVE */
    switch (brw->reduced_primitive) {
-   case GL_TRIANGLES: 
-      /* NOTE: We just use the edgeflag attribute as an indicator that
-       * unfilled triangles are active.  We don't actually do the
-       * edgeflag testing here, it is already done in the clip
-       * program.
+   case PIPE_PRIM_TRIANGLES: 
+      /* PIPE_NEW_RAST
        */
-      if (key.attrs & (1<<VERT_RESULT_EDGE))
+      if (brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL ||
+	  brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL)
 	 key.primitive = SF_UNFILLED_TRIS;
       else
 	 key.primitive = SF_TRIANGLES;
       break;
-   case GL_LINES: 
+   case PIPE_PRIM_LINES: 
       key.primitive = SF_LINES; 
       break;
-   case GL_POINTS: 
+   case PIPE_PRIM_POINTS: 
       key.primitive = SF_POINTS; 
       break;
    }
 
-   key.do_point_sprite = ctx->Point.PointSprite;
-   key.SpriteOrigin = ctx->Point.SpriteOrigin;
-   /* _NEW_LIGHT */
-   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
-   key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide);
+   key.do_point_sprite = brw->curr.rast->templ.point_sprite;
+   key.sprite_origin_lower_left = 0; /* XXX: ctx->Point.SpriteOrigin - fix rast state */
+   key.do_flat_shading = brw->curr.rast->templ.flatshade;
+   key.do_twoside_color = brw->curr.rast->templ.light_twoside;
 
-   /* _NEW_HINT */
-   key.linear_color = 0;
-
-   /* _NEW_POLYGON */
    if (key.do_twoside_color) {
-      /* If we're rendering to a FBO, we have to invert the polygon
-       * face orientation, just as we invert the viewport in
-       * sf_unit_create_from_key().  ctx->DrawBuffer->Name will be
-       * nonzero if we're rendering to such an FBO.
-       */
-      key.frontface_ccw = (ctx->Polygon.FrontFace == GL_CCW) ^ (ctx->DrawBuffer->Name != 0);
+      key.frontface_ccw = (brw->curr.rast->templ.front_winding == 
+			   PIPE_WINDING_CCW);
    }
 
    brw->sws->bo_unreference(brw->sf.prog_bo);
@@ -180,14 +168,16 @@ static void upload_sf_prog(struct brw_context *brw)
 				      &brw->sf.prog_data);
    if (brw->sf.prog_bo == NULL)
       compile_sf_prog( brw, &key );
+
+   return 0;
 }
 
 
 const struct brw_tracked_state brw_sf_prog = {
    .dirty = {
-      .mesa  = (_NEW_HINT | _NEW_LIGHT | _NEW_POLYGON | _NEW_POINT),
+      .mesa  = (PIPE_NEW_RAST | PIPE_NEW_VERTEX_SHADER),
       .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
-      .cache = CACHE_NEW_VS_PROG
+      .cache = 0
    },
    .prepare = upload_sf_prog
 };
diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h
index c99116b8b1..0b7003dc5e 100644
--- a/src/gallium/drivers/i965/brw_sf.h
+++ b/src/gallium/drivers/i965/brw_sf.h
@@ -49,14 +49,21 @@ struct brw_sf_prog_key {
     */
    GLuint persp_attrs:32;
    GLuint linear_attrs:32;
+   GLuint point_coord_replace_attrs:32;
 
+   GLuint nr_attrs:8;
    GLuint primitive:2;
    GLuint do_twoside_color:1;
    GLuint do_flat_shading:1;
    GLuint frontface_ccw:1;
    GLuint do_point_sprite:1;
    GLuint sprite_origin_lower_left:1;
-   GLuint pad:25;
+   GLuint pad:17;
+
+   GLuint attr_col0:8;
+   GLuint attr_col1:8;
+   GLuint attr_bfc0:8;
+   GLuint attr_bfc1:8;
 };
 
 struct brw_sf_point_tex {
@@ -101,9 +108,7 @@ struct brw_sf_compile {
    GLuint nr_setup_attrs;
    GLuint nr_setup_regs;
 
-   GLubyte attr_to_idx[VERT_RESULT_MAX];   
-   GLubyte idx_to_attr[VERT_RESULT_MAX];   
-   struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX];
+   GLuint point_coord_replace_mask;
 };
 
  
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
index 4acb2b7d72..db52c9553e 100644
--- a/src/gallium/drivers/i965/brw_sf_emit.c
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -43,17 +43,12 @@ static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
 				    struct brw_reg vert,
 				    GLuint attr)
 {
-   GLuint off = c->attr_to_idx[attr] / 2;
-   GLuint sub = c->attr_to_idx[attr] % 2;
+   GLuint off = attr / 2;
+   GLuint sub = attr % 2;
 
    return brw_vec4_grf(vert.nr + off, sub * 4);
 }
 
-static GLboolean have_attr(struct brw_sf_compile *c,
-			   GLuint attr)
-{
-   return (c->key.attrs & (1<<attr)) ? 1 : 0;
-}
 
 /*********************************************************************** 
  * Twoside lighting
@@ -62,15 +57,16 @@ static void copy_bfc( struct brw_sf_compile *c,
 		      struct brw_reg vert )
 {
    struct brw_compile *p = &c->func;
-   GLuint i;
 
-   for (i = 0; i < 2; i++) {
-      if (have_attr(c, VERT_RESULT_COL0+i) &&
-	  have_attr(c, VERT_RESULT_BFC0+i))
-	 brw_MOV(p, 
-		 get_vert_attr(c, vert, VERT_RESULT_COL0+i), 
-		 get_vert_attr(c, vert, VERT_RESULT_BFC0+i));
-   }
+   if (c->key.attr_col0 && c->key.attr_bfc0)
+      brw_MOV(p, 
+	      get_vert_attr(c, vert, c->key.attr_col0), 
+	      get_vert_attr(c, vert, c->key.attr_bfc0));
+
+   if (c->key.attr_col1 && c->key.attr_bfc1)
+      brw_MOV(p, 
+	      get_vert_attr(c, vert, c->key.attr_col1), 
+	      get_vert_attr(c, vert, c->key.attr_bfc1));
 }
 
 
@@ -89,8 +85,8 @@ static void do_twoside_color( struct brw_sf_compile *c )
     * for user-supplied vertex programs, as t_vp_build.c always does
     * the right thing.
     */
-   if (!(have_attr(c, VERT_RESULT_COL0) && have_attr(c, VERT_RESULT_BFC0)) &&
-       !(have_attr(c, VERT_RESULT_COL1) && have_attr(c, VERT_RESULT_BFC1)))
+   if (!(c->key.attr_col0 && c->key.attr_bfc0) &&
+       !(c->key.attr_col1 && c->key.attr_bfc1))
       return;
    
    /* Need to use BRW_EXECUTE_4 and also do an 4-wide compare in order
@@ -126,14 +122,17 @@ static void copy_colors( struct brw_sf_compile *c,
 		     struct brw_reg src)
 {
    struct brw_compile *p = &c->func;
-   GLuint i;
 
-   for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
-      if (have_attr(c,i))
-	 brw_MOV(p, 
-		 get_vert_attr(c, dst, i), 
-		 get_vert_attr(c, src, i));
-   }
+   if (c->key.attr_col0)
+      brw_MOV(p, 
+	      get_vert_attr(c, dst, c->key.attr_col0), 
+	      get_vert_attr(c, src, c->key.attr_col0));
+
+   if (c->key.attr_col1)
+      brw_MOV(p, 
+	      get_vert_attr(c, dst, c->key.attr_col1), 
+	      get_vert_attr(c, src, c->key.attr_col1));
+
 }
 
 
@@ -146,10 +145,16 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
 {
    struct brw_compile *p = &c->func;
    struct brw_reg ip = brw_ip_reg();
-   GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
    GLuint jmpi = 1;
+   GLuint nr = 0;
 
-   if (!nr)
+   if (c->key.attr_col0)
+      nr++;
+
+   if (c->key.attr_col1)
+      nr++;
+
+   if (nr == 0)
       return;
 
    /* Already done in clip program:
@@ -184,10 +189,16 @@ static void do_flatshade_line( struct brw_sf_compile *c )
 {
    struct brw_compile *p = &c->func;
    struct brw_reg ip = brw_ip_reg();
-   GLuint nr = util_count_bits(c->key.attrs & VERT_RESULT_COLOR_BITS);
    GLuint jmpi = 1;
+   GLuint nr = 0;
+
+   if (c->key.attr_col0)
+      nr++;
+
+   if (c->key.attr_col1)
+      nr++;
 
-   if (!nr)
+   if (nr == 0)
       return;
 
    /* Already done in clip program: 
@@ -319,10 +330,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
    *pc_linear = 0;
    *pc = 0xf;
       
-   if (persp_mask & (1 << c->idx_to_attr[reg*2])) 
+   if (persp_mask & (1 << (reg*2))) 
       *pc_persp = 0xf;
 
-   if (linear_mask & (1 << c->idx_to_attr[reg*2])) 
+   if (linear_mask & (1 << (reg*2))) 
       *pc_linear = 0xf;
 
    /* Maybe only processs one attribute on the final round:
@@ -330,10 +341,10 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
    if (reg*2+1 < c->nr_setup_attrs) {
       *pc |= 0xf0;
 
-      if (persp_mask & (1 << c->idx_to_attr[reg*2+1])) 
+      if (persp_mask & (1 << (reg*2+1))) 
 	 *pc_persp |= 0xf0;
 
-      if (linear_mask & (1 << c->idx_to_attr[reg*2+1])) 
+      if (linear_mask & (1 << (reg*2+1))) 
 	 *pc_linear |= 0xf0;
    }
 
@@ -513,24 +524,28 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
       alloc_regs(c);
 
    copy_z_inv_w(c);
+
    for (i = 0; i < c->nr_setup_regs; i++)
    {
-      struct brw_sf_point_tex *tex = &c->point_attrs[c->idx_to_attr[2*i]];
+      /* XXX: only seems to check point_coord_replace_attrs for every
+       * second attribute?!?
+       */
+      boolean coord_replace = !!(c->key.point_coord_replace_attrs & (1<<(2*i)));
       struct brw_reg a0 = offset(c->vert[0], i);
       GLushort pc, pc_persp, pc_linear;
       GLboolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
             
       if (pc_persp)
       {				
-	  if (!tex->CoordReplace) {
-	      brw_set_predicate_control_flag_value(p, pc_persp);
-	      brw_MUL(p, a0, a0, c->inv_w[0]);
-	  }
+	 if (coord_replace) {
+	    brw_set_predicate_control_flag_value(p, pc_persp);
+	    brw_MUL(p, a0, a0, c->inv_w[0]);
+	 }
       }
 
-      if (tex->CoordReplace) {
-	  /* Caculate 1.0/PointWidth */
-	  brw_math(&c->func,
+      if (coord_replace) {
+	 /* Caculate 1.0/PointWidth */
+	 brw_math(&c->func,
 		  c->tmp,
 		  BRW_MATH_FUNCTION_INV,
 		  BRW_MATH_SATURATE_NONE,
@@ -539,33 +554,37 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate)
 		  BRW_MATH_DATA_SCALAR,
 		  BRW_MATH_PRECISION_FULL);
 
-	  if (c->key.SpriteOrigin == GL_LOWER_LEFT) {
-	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
-		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
-	  	brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
-		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
-	  } else {
-	   	brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
-		brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
-	  	brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
-		brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
-	  }
-      } else {
-	  brw_MOV(p, c->m1Cx, brw_imm_ud(0));
-	  brw_MOV(p, c->m2Cy, brw_imm_ud(0));
+	 if (c->key.sprite_origin_lower_left) {
+	    brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+	    brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+	    brw_MUL(p, c->m2Cy, c->tmp, negate(c->inv_w[0]));
+	    brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+	 } 
+	 else {
+	    brw_MUL(p, c->m1Cx, c->tmp, c->inv_w[0]);
+	    brw_MOV(p, vec1(suboffset(c->m1Cx, 1)), brw_imm_f(0.0));
+	    brw_MUL(p, c->m2Cy, c->tmp, c->inv_w[0]);
+	    brw_MOV(p, vec1(suboffset(c->m2Cy, 0)), brw_imm_f(0.0));
+	 }
+      } 
+      else {
+	 brw_MOV(p, c->m1Cx, brw_imm_ud(0));
+	 brw_MOV(p, c->m2Cy, brw_imm_ud(0));
       }
 
       {
 	 brw_set_predicate_control_flag_value(p, pc); 
-	 if (tex->CoordReplace) {
-	     if (c->key.sprite_origin_lower_left) {
-		 brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
-		 brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
-	     }
-	     else
-		 brw_MOV(p, c->m3C0, brw_imm_f(0.0));
-	 } else {
-	 	brw_MOV(p, c->m3C0, a0); /* constant value */
+	 if (coord_replace) {
+	    if (c->key.sprite_origin_lower_left) {
+	       brw_MUL(p, c->m3C0, c->inv_w[0], brw_imm_f(1.0));
+	       brw_MOV(p, vec1(suboffset(c->m3C0, 0)), brw_imm_f(0.0));
+	    }
+	    else {
+	       brw_MOV(p, c->m3C0, brw_imm_f(0.0));
+	    }
+	 } 
+	 else {
+	    brw_MOV(p, c->m3C0, a0); /* constant value */
 	 }
 
 	 /* Copy m0..m3 to URB. 
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index 648a16a038..fbc9f15eb4 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -29,58 +29,48 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
    
+#include "util/u_math.h"
 
+#include "pipe/p_state.h"
 
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
 
-static void upload_sf_vp(struct brw_context *brw)
+static int upload_sf_vp(struct brw_context *brw)
 {
-   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   const struct pipe_viewport_state *vp = &brw->curr.vp;
+   const struct pipe_scissor_state *scissor = &brw->curr.scissor;
    struct brw_sf_viewport sfv;
-   GLfloat y_scale, y_bias;
-   const GLfloat *v = ctx->Viewport._WindowMap.m;
 
    memset(&sfv, 0, sizeof(sfv));
 
-   y_scale = 1.0;
-   y_bias = 0;
+   /* PIPE_NEW_VIEWPORT, PIPE_NEW_SCISSOR */
 
-   /* _NEW_VIEWPORT */
+   sfv.viewport.m00 = vp->scale[0];
+   sfv.viewport.m11 = vp->scale[1];
+   sfv.viewport.m22 = vp->scale[2];
+   sfv.viewport.m30 = vp->translate[0];
+   sfv.viewport.m31 = vp->translate[1];
+   sfv.viewport.m32 = vp->translate[2];
 
-   sfv.viewport.m00 = v[MAT_SX];
-   sfv.viewport.m11 = v[MAT_SY] * y_scale;
-   sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
-   sfv.viewport.m30 = v[MAT_TX];
-   sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
-   sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
-
-   /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
-    * for DrawBuffer->_[XY]{min,max}
-    */
-
-   /* The scissor only needs to handle the intersection of drawable and
-    * scissor rect.
-    *
-    * Note that the hardware's coordinates are inclusive, while Mesa's min is
-    * inclusive but max is exclusive.
-    */
-   /* Y=0=bottom */
-   sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
-   sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
-   sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
-   sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
+   sfv.scissor.xmin = scissor->minx;
+   sfv.scissor.xmax = scissor->maxx; /* -1 ?? */
+   sfv.scissor.ymin = scissor->miny;
+   sfv.scissor.ymax = scissor->maxy; /* -1 ?? */
 
    brw->sws->bo_unreference(brw->sf.vp_bo);
    brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_sf_vp = {
    .dirty = {
-      .mesa  = (_NEW_VIEWPORT | 
-		_NEW_SCISSOR |
-		_NEW_BUFFERS),
+      .mesa  = (PIPE_NEW_VIEWPORT | 
+		PIPE_NEW_SCISSOR),
       .brw   = 0,
       .cache = 0
    },
@@ -90,15 +80,17 @@ const struct brw_tracked_state brw_sf_vp = {
 struct brw_sf_unit_key {
    unsigned int total_grf;
    unsigned int urb_entry_read_length;
-
    unsigned int nr_urb_entries, urb_size, sfsize;
-
-   GLenum front_face, cull_face, provoking_vertex;
+   
    unsigned scissor:1;
    unsigned line_smooth:1;
    unsigned point_sprite:1;
    unsigned point_attenuated:1;
-   unsigned render_to_fbo:1;
+   unsigned front_face:2;
+   unsigned cull_mode:2;
+   unsigned flatshade_first:1;
+   unsigned gl_rasterization_rules:1;
+   unsigned line_last_pixel_enable:1;
    float line_width;
    float point_size;
 };
@@ -106,6 +98,7 @@ struct brw_sf_unit_key {
 static void
 sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
 {
+   const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ;
    memset(key, 0, sizeof(*key));
 
    /* CACHE_NEW_SF_PROG */
@@ -117,25 +110,22 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
    key->urb_size = brw->urb.vsize;
    key->sfsize = brw->urb.sfsize;
 
-   key->scissor = ctx->Scissor.Enabled;
-   key->front_face = ctx->Polygon.FrontFace;
-
-   if (ctx->Polygon.CullFlag)
-      key->cull_face = ctx->Polygon.CullFaceMode;
-   else
-      key->cull_face = GL_NONE;
-
-   key->line_width = ctx->Line.Width;
-   key->line_smooth = ctx->Line.SmoothFlag;
-
-   key->point_sprite = ctx->Point.PointSprite;
-   key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
-   key->point_attenuated = ctx->Point._Attenuated;
-
-   /* _NEW_LIGHT */
-   key->provoking_vertex = ctx->Light.ProvokingVertex;
-
-   key->render_to_fbo = 1;
+   /* PIPE_NEW_RAST */
+   key->scissor = rast->scissor;
+   key->front_face = rast->front_winding;
+   key->cull_mode = rast->cull_mode;
+   key->line_smooth = rast->line_smooth;
+   key->line_width = rast->line_width;
+   key->flatshade_first = rast->flatshade_first;
+   key->line_last_pixel_enable = rast->line_last_pixel;
+   key->gl_rasterization_rules = rast->gl_rasterization_rules;
+
+   key->point_sprite = rast->point_sprite;
+   key->point_attenuated = rast->point_size_per_vertex;
+
+   key->point_size = CLAMP(rast->point_size, 
+			   rast->point_size_min, 
+			   rast->point_size_max);
 }
 
 static struct brw_winsys_buffer *
@@ -147,7 +137,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
    int chipset_max_threads;
    memset(&sf, 0, sizeof(sf));
 
-   sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
 
    sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
@@ -174,10 +164,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 
    sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
 
-   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+   if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
       sf.thread4.max_threads = 0;
 
-   if (INTEL_DEBUG & DEBUG_STATS)
+   if (BRW_DEBUG & DEBUG_STATS)
       sf.thread4.stats_enable = 1;
 
    /* CACHE_NEW_SF_VP */
@@ -185,31 +175,30 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 
    sf.sf5.viewport_transform = 1;
 
-   /* _NEW_SCISSOR */
    if (key->scissor)
       sf.sf6.scissor = 1;
 
-   /* _NEW_POLYGON */
-   if (key->front_face == GL_CCW)
+   if (key->front_face == PIPE_WINDING_CCW)
       sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
    else
       sf.sf5.front_winding = BRW_FRONTWINDING_CW;
 
-   switch (key->cull_face) {
-   case GL_FRONT:
-      sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
+   switch (key->cull_mode) {
+   case PIPE_WINDING_CCW:
+   case PIPE_WINDING_CW:
+      sf.sf6.cull_mode = (key->front_face == key->cull_mode ?
+			  BRW_CULLMODE_FRONT :
+			  BRW_CULLMODE_BACK);
       break;
-   case GL_BACK:
-      sf.sf6.cull_mode = BRW_CULLMODE_BACK;
-      break;
-   case GL_FRONT_AND_BACK:
+   case PIPE_WINDING_BOTH:
       sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
       break;
-   case GL_NONE:
+   case PIPE_WINDING_NONE:
       sf.sf6.cull_mode = BRW_CULLMODE_NONE;
       break;
    default:
       assert(0);
+      sf.sf6.cull_mode = BRW_CULLMODE_NONE;
       break;
    }
 
@@ -223,9 +212,9 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
    else if (sf.sf6.line_width <= 0x2)
        sf.sf6.line_width = 0;
 
-   /* _NEW_BUFFERS */
-   key->render_to_fbo = 1;
-   if (!key->render_to_fbo) {
+   /* XXX: gl_rasterization_rules?  something else?
+    */
+   if (0) {
       /* Rendering to an OpenGL window */
       sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
    }
@@ -261,7 +250,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 
    /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
     */
-   if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) {
+   if (!key->flatshade_first) {
       sf.sf7.trifan_pv = 2;
       sf.sf7.linestrip_pv = 1;
       sf.sf7.tristrip_pv = 2;
@@ -270,12 +259,19 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
       sf.sf7.linestrip_pv = 0;
       sf.sf7.tristrip_pv = 0;
    }
-   sf.sf7.line_last_pixel_enable = 0;
+
+   sf.sf7.line_last_pixel_enable = key->line_last_pixel_enable;
 
    /* Set bias for OpenGL rasterization rules:
     */
-   sf.sf6.dest_org_vbias = 0x8;
-   sf.sf6.dest_org_hbias = 0x8;
+   if (key->gl_rasterization_rules) {
+      sf.sf6.dest_org_vbias = 0x8;
+      sf.sf6.dest_org_hbias = 0x8;
+   }
+   else {
+      sf.sf6.dest_org_vbias = 0x0;
+      sf.sf6.dest_org_hbias = 0x0;
+   }
 
    bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
 			 key, sizeof(*key),
@@ -287,23 +283,23 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
     * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
     */
    /* Emit SF program relocation */
-   dri_bo_emit_reloc(bo,
-		     I915_GEM_DOMAIN_INSTRUCTION, 0,
-		     sf.thread0.grf_reg_count << 1,
-		     offsetof(struct brw_sf_unit_state, thread0),
-		     brw->sf.prog_bo);
+   brw->sws->bo_emit_reloc(bo,
+			   I915_GEM_DOMAIN_INSTRUCTION, 0,
+			   sf.thread0.grf_reg_count << 1,
+			   offsetof(struct brw_sf_unit_state, thread0),
+			   brw->sf.prog_bo);
 
    /* Emit SF viewport relocation */
-   dri_bo_emit_reloc(bo,
-		     I915_GEM_DOMAIN_INSTRUCTION, 0,
-		     sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
-		     offsetof(struct brw_sf_unit_state, sf5),
-		     brw->sf.vp_bo);
+   brw->sws->bo_emit_reloc(bo,
+			   I915_GEM_DOMAIN_INSTRUCTION, 0,
+			   sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
+			   offsetof(struct brw_sf_unit_state, sf5),
+			   brw->sf.vp_bo);
 
    return bo;
 }
 
-static void upload_sf_unit( struct brw_context *brw )
+static int upload_sf_unit( struct brw_context *brw )
 {
    struct brw_sf_unit_key key;
    struct brw_winsys_buffer *reloc_bufs[2];
@@ -321,16 +317,12 @@ static void upload_sf_unit( struct brw_context *brw )
    if (brw->sf.state_bo == NULL) {
       brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs);
    }
+   return 0;
 }
 
 const struct brw_tracked_state brw_sf_unit = {
    .dirty = {
-      .mesa  = (_NEW_POLYGON | 
-		_NEW_LIGHT |
-		_NEW_LINE | 
-		_NEW_POINT | 
-		_NEW_SCISSOR |
-		_NEW_BUFFERS),
+      .mesa  = (PIPE_NEW_RAST),
       .brw   = BRW_NEW_URB_FENCE,
       .cache = (CACHE_NEW_SF_VP |
 		CACHE_NEW_SF_PROG)
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index 663fc839df..2275e9ad69 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -168,9 +168,20 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
 void brw_destroy_batch_cache( struct brw_context *brw );
 void brw_clear_batch_cache( struct brw_context *brw );
 
-/* brw_wm_surface_state.c */
+/***********************************************************************
+ * brw_wm_surface_state.c 
+ */
 struct brw_winsys_buffer *
 brw_create_constant_surface( struct brw_context *brw,
                              struct brw_surface_key *key );
 
+/***********************************************************************
+ * brw_state_debug.c
+ */
+void brw_update_dirty_counts( unsigned mesa,
+			      unsigned brw,
+			      unsigned cache );
+
+
+
 #endif
diff --git a/src/gallium/drivers/i965/brw_state_batch.c b/src/gallium/drivers/i965/brw_state_batch.c
index 324fce5163..7d212e5c24 100644
--- a/src/gallium/drivers/i965/brw_state_batch.c
+++ b/src/gallium/drivers/i965/brw_state_batch.c
@@ -46,7 +46,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
    struct brw_cached_batch_item *item = brw->cached_batch_items;
    struct header *newheader = (struct header *)data;
 
-   if (brw->emit_state_always) {
+   if (brw->flags.always_emit_state) {
       brw_batchbuffer_data(brw->batch, data, sz, IGNORE_CLIPRECTS);
       return GL_TRUE;
    }
@@ -56,8 +56,8 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
 	 if (item->sz == sz && memcmp(item->header, newheader, sz) == 0)
 	    return GL_FALSE;
 	 if (item->sz != sz) {
-	    _mesa_free(item->header);
-	    item->header = _mesa_malloc(sz);
+	    FREE(item->header);
+	    item->header = MALLOC(sz);
 	    item->sz = sz;
 	 }
 	 goto emit;
@@ -67,7 +67,7 @@ GLboolean brw_cached_batch_struct( struct brw_context *brw,
 
    assert(!item);
    item = CALLOC_STRUCT(brw_cached_batch_item);
-   item->header = _mesa_malloc(sz);
+   item->header = MALLOC(sz);
    item->sz = sz;
    item->next = brw->cached_batch_items;
    brw->cached_batch_items = item;
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
index 97f88b3ab3..4310d01ba2 100644
--- a/src/gallium/drivers/i965/brw_state_cache.c
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -55,7 +55,9 @@
  * only one of the two buffers referenced gets put into the offset, and the
  * incorrect program is run for the other instance.
  */
+#include "util/u_memory.h"
 
+#include "brw_debug.h"
 #include "brw_state.h"
 #include "brw_batchbuffer.h"
 
@@ -107,9 +109,9 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
    if (bo == cache->last_bo[cache_id])
       return; /* no change */
 
-   brw->sws->bo_unreference(cache->last_bo[cache_id]);
+   cache->sws->bo_unreference(cache->last_bo[cache_id]);
    cache->last_bo[cache_id] = bo;
-   brw->sws->bo_reference(cache->last_bo[cache_id]);
+   cache->sws->bo_reference(cache->last_bo[cache_id]);
    cache->brw->state.dirty.cache |= 1 << cache_id;
 }
 
@@ -127,7 +129,7 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
    for (c = cache->items[hash % cache->size]; c; c = c->next)
       bucketcount++;
 
-   fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
+   debug_printf("bucket %d/%d = %d/%d items\n", hash % cache->size,
 	   cache->size, bucketcount, cache->n_items);
 #endif
 
@@ -154,7 +156,7 @@ rehash(struct brw_cache *cache)
    GLuint size, i;
 
    size = cache->size * 3;
-   items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items));
+   items = (struct brw_cache_item**) CALLOC(size, sizeof(*items));
 
    for (i = 0; i < cache->size; i++)
       for (c = cache->items[i]; c; c = next) {
@@ -194,7 +196,7 @@ brw_search_cache(struct brw_cache *cache,
 
    update_cache_last(cache, cache_id, item->bo);
 
-   brw->sws->bo_reference(item->bo);
+   cache->sws->bo_reference(item->bo);
    return item->bo;
 }
 
@@ -219,20 +221,25 @@ brw_upload_cache( struct brw_cache *cache,
    struct brw_winsys_buffer *bo;
    int i;
 
-   /* Create the buffer object to contain the data */
-   bo = brw->sws->bo_alloc(cache->sws,
-			   cache->buffer_type[cache_id], data_size, 1 << 6);
+   /* Create the buffer object to contain the data.  For now, use a
+    * single buffer type to describe all cached state atoms.  Later,
+    * may want to take advantage of hardware distinctions between
+    * these various entities.
+    */
+   bo = cache->sws->bo_alloc(cache->sws,
+			     BRW_BUFFER_TYPE_STATE_CACHE, 
+			     data_size, 1 << 6);
 
 
    /* Set up the memory containing the key, aux_data, and reloc_bufs */
-   tmp = _mesa_malloc(key_size + aux_size + relocs_size);
+   tmp = MALLOC(key_size + aux_size + relocs_size);
 
    memcpy(tmp, key, key_size);
    memcpy(tmp + key_size, aux, cache->aux_size[cache_id]);
    memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
    for (i = 0; i < nr_reloc_bufs; i++) {
       if (reloc_bufs[i] != NULL)
-	 brw->sws->bo_reference(reloc_bufs[i]);
+	 cache->sws->bo_reference(reloc_bufs[i]);
    }
 
    item->cache_id = cache_id;
@@ -243,7 +250,7 @@ brw_upload_cache( struct brw_cache *cache,
    item->nr_reloc_bufs = nr_reloc_bufs;
 
    item->bo = bo;
-   brw->sws->bo_reference(bo);
+   cache->sws->bo_reference(bo);
    item->data_size = data_size;
 
    if (cache->n_items > cache->size * 1.5)
@@ -259,13 +266,13 @@ brw_upload_cache( struct brw_cache *cache,
       *(void **)aux_return = (void *)((char *)item->key + item->key_size);
    }
 
-   if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("upload %s: %d bytes to cache id %d\n",
+   if (BRW_DEBUG & DEBUG_STATE)
+      debug_printf("upload %s: %d bytes to cache id %d\n",
 		   cache->name[cache_id],
 		   data_size, cache_id);
 
    /* Copy data to the buffer */
-   dri_bo_subdata(bo, 0, data_size, data);
+   cache->sws->bo_subdata(bo, 0, data_size, data);
 
    update_cache_last(cache, cache_id, bo);
 
@@ -292,7 +299,7 @@ brw_cache_data_sz(struct brw_cache *cache,
 		       reloc_bufs, nr_reloc_bufs);
    if (item) {
       update_cache_last(cache, cache_id, item->bo);
-      brw->sws->bo_reference(item->bo);
+      cache->sws->bo_reference(item->bo);
       return item->bo;
    }
 
@@ -349,11 +356,12 @@ brw_init_non_surface_cache(struct brw_context *brw)
    struct brw_cache *cache = &brw->cache;
 
    cache->brw = brw;
+   cache->sws = brw->sws;
 
    cache->size = 7;
    cache->n_items = 0;
    cache->items = (struct brw_cache_item **)
-      _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
+      CALLOC(cache->size, sizeof(struct brw_cache_item));
 
    brw_init_cache_id(cache,
 		     "CC_VP",
@@ -457,7 +465,7 @@ brw_init_surface_cache(struct brw_context *brw)
    cache->size = 7;
    cache->n_items = 0;
    cache->items = (struct brw_cache_item **)
-      _mesa_calloc(cache->size * sizeof(struct brw_cache_item));
+      CALLOC(cache->size, sizeof(struct brw_cache_item));
 
    brw_init_cache_id(cache,
 		     "SS_SURFACE",
@@ -487,8 +495,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
    struct brw_cache_item *c, *next;
    GLuint i;
 
-   if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("%s\n", __FUNCTION__);
+   if (BRW_DEBUG & DEBUG_STATE)
+      debug_printf("%s\n", __FUNCTION__);
 
    for (i = 0; i < cache->size; i++) {
       for (c = cache->items[i]; c; c = next) {
@@ -507,7 +515,7 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
    cache->n_items = 0;
 
    if (brw->curbe.last_buf) {
-      _mesa_free(brw->curbe.last_buf);
+      FREE(brw->curbe.last_buf);
       brw->curbe.last_buf = NULL;
    }
 
@@ -527,8 +535,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo)
    struct brw_cache_item **prev;
    GLuint i;
 
-   if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("%s\n", __FUNCTION__);
+   if (BRW_DEBUG & DEBUG_STATE)
+      debug_printf("%s\n", __FUNCTION__);
 
    for (i = 0; i < cache->size; i++) {
       for (prev = &cache->items[i]; *prev;) {
@@ -540,8 +548,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo)
 	    *prev = c->next;
 
 	    for (j = 0; j < c->nr_reloc_bufs; j++)
-	       brw->sws->bo_unreference(c->reloc_bufs[j]);
-	    brw->sws->bo_unreference(c->bo);
+	       cache->sws->bo_unreference(c->reloc_bufs[j]);
+	    cache->sws->bo_unreference(c->bo);
 	    free((void *)c->key);
 	    free(c);
 	    cache->n_items--;
@@ -555,8 +563,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo)
 void
 brw_state_cache_check_size(struct brw_context *brw)
 {
-   if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
+   if (BRW_DEBUG & DEBUG_STATE)
+      debug_printf("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items);
 
    /* un-tuned guess.  We've got around 20 state objects for a total of around
     * 32k, so 1000 of them is around 1.5MB.
@@ -574,8 +582,8 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
 {
    GLuint i;
 
-   if (INTEL_DEBUG & DEBUG_STATE)
-      _mesa_printf("%s\n", __FUNCTION__);
+   if (BRW_DEBUG & DEBUG_STATE)
+      debug_printf("%s\n", __FUNCTION__);
 
    brw_clear_cache(brw, cache);
    for (i = 0; i < BRW_MAX_CACHE; i++) {
diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c
index 22cea4b7d8..cc4744dc16 100644
--- a/src/gallium/drivers/i965/brw_state_debug.c
+++ b/src/gallium/drivers/i965/brw_state_debug.c
@@ -109,8 +109,25 @@ brw_print_dirty_count(struct dirty_bit_map *bit_map, int32_t bits)
       if (bit_map[i].bit == 0)
 	 return;
 
-      fprintf(stderr, "0x%08x: %12d (%s)\n",
+      debug_printf("0x%08x: %12d (%s)\n",
 	      bit_map[i].bit, bit_map[i].count, bit_map[i].name);
    }
 }
 
+void
+brw_update_dirty_counts( unsigned mesa,
+			 unsigned brw,
+			 unsigned cache )
+{
+   static int dirty_count = 0;
+
+   brw_update_dirty_count(mesa_bits, mesa);
+   brw_update_dirty_count(brw_bits, brw);
+   brw_update_dirty_count(cache_bits, cache);
+      if (dirty_count++ % 1000 == 0) {
+	 brw_print_dirty_count(mesa_bits, mesa);
+	 brw_print_dirty_count(brw_bits, brw);
+	 brw_print_dirty_count(cache_bits, cache);
+	 debug_printf("\n");
+      }
+}
diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c
index 1bc83fb9c1..72604304d4 100644
--- a/src/gallium/drivers/i965/brw_state_dump.c
+++ b/src/gallium/drivers/i965/brw_state_dump.c
@@ -28,6 +28,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_winsys.h"
 
 /**
  * Prints out a header, the contents, and the message associated with
@@ -44,28 +45,32 @@ state_out(const char *name, void *data, uint32_t hw_offset, int index,
 {
     va_list va;
 
-    fprintf(stderr, "%8s: 0x%08x: 0x%08x: ",
-	    name, hw_offset + index * 4, ((uint32_t *)data)[index]);
+    debug_printf("%8s: 0x%08x: 0x%08x: ",
+		 name, hw_offset + index * 4, ((uint32_t *)data)[index]);
     va_start(va, fmt);
-    vfprintf(stderr, fmt, va);
+    debug_vprintf(fmt, va);
     va_end(va);
 }
 
 /** Generic, undecoded state buffer debug printout */
 static void
-state_struct_out(const char *name, struct brw_winsys_buffer *buffer, unsigned int state_size)
+state_struct_out(struct brw_winsys_screen *sws,
+		 const char *name,
+		 struct brw_winsys_buffer *buffer,
+		 unsigned int state_size)
 {
    int i;
+   void *data;
 
    if (buffer == NULL)
       return;
 
-   dri_bo_map(buffer, GL_FALSE);
+   data = sws->bo_map(buffer, GL_FALSE);
    for (i = 0; i < state_size / 4; i++) {
-      state_out(name, buffer->virtual, buffer->offset, i,
+      state_out(name, data, buffer->offset, i,
 		"dword %d\n", i);
    }
-   dri_bo_unmap(buffer);
+   sws->bo_unmap(buffer);
 }
 
 static const char *
@@ -106,12 +111,11 @@ static void dump_wm_surface_state(struct brw_context *brw)
       char name[20];
 
       if (surf_bo == NULL) {
-	 fprintf(stderr, "  WM SS%d: NULL\n", i);
+	 debug_printf("  WM SS%d: NULL\n", i);
 	 continue;
       }
-      dri_bo_map(surf_bo, GL_FALSE);
+      surf = (struct brw_surface_state *)brw->sws->bo_map(surf_bo, GL_FALSE);
       surfoff = surf_bo->offset;
-      surf = (struct brw_surface_state *)(surf_bo->virtual);
 
       sprintf(name, "WM SS%d", i);
       state_out(name, surf, surfoff, 0, "%s %s\n",
@@ -127,7 +131,7 @@ static void dump_wm_surface_state(struct brw_context *brw)
       state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n",
 		surf->ss5.x_offset, surf->ss5.y_offset);
 
-      dri_bo_unmap(surf_bo);
+      brw->sws->bo_unmap(surf_bo);
    }
 }
 
@@ -140,9 +144,7 @@ static void dump_sf_viewport_state(struct brw_context *brw)
    if (brw->sf.vp_bo == NULL)
       return;
 
-   dri_bo_map(brw->sf.vp_bo, GL_FALSE);
-
-   vp = brw->sf.vp_bo->virtual;
+   vp = (struct brw_sf_viewport *)brw->sws->bo_map(brw->sf.vp_bo, GL_FALSE);
    vp_off = brw->sf.vp_bo->offset;
 
    state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
@@ -157,10 +159,12 @@ static void dump_sf_viewport_state(struct brw_context *brw)
    state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n",
 	     vp->scissor.xmax, vp->scissor.ymax);
 
-   dri_bo_unmap(brw->sf.vp_bo);
+   brw->sws->bo_unmap(brw->sf.vp_bo);
 }
 
-static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog)
+static void brw_debug_prog(struct brw_winsys_screen *sws,
+			   const char *name,
+			   struct brw_winsys_buffer *prog)
 {
    unsigned int i;
    uint32_t *data;
@@ -168,12 +172,10 @@ static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog)
    if (prog == NULL)
       return;
 
-   dri_bo_map(prog, GL_FALSE);
-
-   data = prog->virtual;
+   data = (uint32_t *)sws->bo_map(prog, GL_FALSE);
 
    for (i = 0; i < prog->size / 4 / 4; i++) {
-      fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+      debug_printf("%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
 	      name, (unsigned int)prog->offset + i * 4 * 4,
 	      data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
       /* Stop at the end of the program.  It'd be nice to keep track of the actual
@@ -186,7 +188,7 @@ static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog)
 	 break;
    }
 
-   dri_bo_unmap(prog);
+   sws->bo_unmap(prog);
 }
 
 
@@ -202,19 +204,21 @@ static void brw_debug_prog(const char *name, struct brw_winsys_buffer *prog)
  */
 void brw_debug_batch(struct brw_context *brw)
 {
-   state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
+   struct brw_winsys_screen *sws = brw->sws;
+
+   state_struct_out(sws, "WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
    dump_wm_surface_state(brw);
 
-   state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state));
-   brw_debug_prog("VS prog", brw->vs.prog_bo);
+   state_struct_out(sws, "VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state));
+   brw_debug_prog(sws, "VS prog", brw->vs.prog_bo);
 
-   state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state));
-   brw_debug_prog("GS prog", brw->gs.prog_bo);
+   state_struct_out(sws, "GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state));
+   brw_debug_prog(sws, "GS prog", brw->gs.prog_bo);
 
-   state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state));
+   state_struct_out(sws, "SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state));
    dump_sf_viewport_state(brw);
-   brw_debug_prog("SF prog", brw->sf.prog_bo);
+   brw_debug_prog(sws, "SF prog", brw->sf.prog_bo);
 
-   state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state));
-   brw_debug_prog("WM prog", brw->wm.prog_bo);
+   state_struct_out(sws, "WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state));
+   brw_debug_prog(sws, "WM prog", brw->wm.prog_bo);
 }
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index 8659e35289..eff3a40a46 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -34,6 +34,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_batchbuffer.h"
+#include "brw_debug.h"
 
 /* This is used to initialize brw->state.atoms[].  We could use this
  * list directly except for a single atom, brw_constant_buffer, which
@@ -83,12 +84,8 @@ const struct brw_tracked_state *atoms[] =
    &brw_blend_constant_color,
 
    &brw_depthbuffer,
-
    &brw_polygon_stipple,
-   &brw_polygon_stipple_offset,
-
    &brw_line_stipple,
-   &brw_aa_line_parameters,
 
    &brw_psp_urb_cbs,
 
@@ -163,11 +160,12 @@ enum pipe_error brw_validate_state( struct brw_context *brw )
 {
    struct brw_state_flags *state = &brw->state.dirty;
    GLuint i;
+   int ret;
 
    brw_clear_validated_bos(brw);
-   brw_add_validated_bo(brw, intel->batch->buf);
+   brw_add_validated_bo(brw, brw->batch->buf);
 
-   if (brw->emit_state_always) {
+   if (brw->flags.always_emit_state) {
       state->mesa |= ~0;
       state->brw |= ~0;
       state->cache |= ~0;
@@ -199,10 +197,10 @@ enum pipe_error brw_validate_state( struct brw_context *brw )
     * If this fails, we can experience GPU lock-ups.
     */
    {
-      const struct brw_fragment_program *fp = brw->fragment_program;
+      const struct brw_fragment_shader *fp = brw->curr.fragment_shader;
       if (fp) {
-         assert(fp->info.max_sampler <= brw->nr_samplers &&
-		fp->info.max_texture <= brw->nr_textures);
+         assert(fp->info.file_max[TGSI_FILE_SAMPLER] < brw->curr.num_samplers &&
+		fp->info.texture_max < brw->curr.num_textures);
       }
    }
 
@@ -213,18 +211,18 @@ enum pipe_error brw_validate_state( struct brw_context *brw )
 enum pipe_error brw_upload_state(struct brw_context *brw)
 {
    struct brw_state_flags *state = &brw->state.dirty;
+   int ret;
    int i;
-   static int dirty_count = 0;
 
    brw_clear_validated_bos(brw);
 
-   if (INTEL_DEBUG) {
+   if (BRW_DEBUG) {
       /* Debug version which enforces various sanity checks on the
        * state flags which are generated and checked to help ensure
        * state atoms are ordered correctly in the list.
        */
       struct brw_state_flags examined, prev;      
-      _mesa_memset(&examined, 0, sizeof(examined));
+      memset(&examined, 0, sizeof(examined));
       prev = *state;
 
       for (i = 0; i < Elements(atoms); i++) {
@@ -268,19 +266,14 @@ enum pipe_error brw_upload_state(struct brw_context *brw)
       }
    }
 
-   if (INTEL_DEBUG & DEBUG_STATE) {
-      brw_update_dirty_count(mesa_bits, state->mesa);
-      brw_update_dirty_count(brw_bits, state->brw);
-      brw_update_dirty_count(cache_bits, state->cache);
-      if (dirty_count++ % 1000 == 0) {
-	 brw_print_dirty_count(mesa_bits, state->mesa);
-	 brw_print_dirty_count(brw_bits, state->brw);
-	 brw_print_dirty_count(cache_bits, state->cache);
-	 debug_printf("\n");
-      }
+   if (BRW_DEBUG & DEBUG_STATE) {
+      brw_update_dirty_counts( state->mesa, 
+			       state->brw,
+			       state->cache );
    }
    
    /* Clear dirty flags:
     */
    memset(state, 0, sizeof(*state));
+   return 0;
 }
diff --git a/src/gallium/drivers/i965/brw_tex.c b/src/gallium/drivers/i965/brw_tex.c
deleted file mode 100644
index 6f7adb6393..0000000000
--- a/src/gallium/drivers/i965/brw_tex.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
- 
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- 
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
- 
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  */
-        
-
-#include "brw_context.h"
-
-/**
- * Finalizes all textures, completing any rendering that needs to be done
- * to prepare them.
- */
-void brw_validate_textures( struct brw_context *brw )
-{
-   int i;
-
-   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
-      struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
-
-      if (texUnit->_ReallyEnabled) {
-	 intel_finalize_mipmap_tree(intel, i);
-      }
-   }
-}
diff --git a/src/gallium/drivers/i965/brw_tex_layout.c b/src/gallium/drivers/i965/brw_tex_layout.c
deleted file mode 100644
index 50c30878c6..0000000000
--- a/src/gallium/drivers/i965/brw_tex_layout.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
- develop this 3D driver.
- 
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- 
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
- 
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- 
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  */
-
-/* Code to layout images in a mipmap tree for i965.
- */
-
-#include "brw_tex_layout.h"
-
-#define FILE_DEBUG_FLAG DEBUG_MIPTREE
-
-GLboolean brw_miptree_layout(struct brw_context *brw,
-			     struct intel_mipmap_tree *mt,
-			     uint32_t tiling)
-{
-   /* XXX: these vary depending on image format: */
-   /* GLint align_w = 4; */
-
-   switch (mt->target) {
-   case GL_TEXTURE_CUBE_MAP:
-      if (IS_IGDNG(brw->brw_screen->pci_id)) {
-          GLuint align_h = 2, align_w = 4;
-          GLuint level;
-          GLuint x = 0;
-          GLuint y = 0;
-          GLuint width = mt->width0;
-          GLuint height = mt->height0;
-          GLuint qpitch = 0;
-          GLuint y_pitch = 0;
-
-          mt->pitch = mt->width0;
-          intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
-          y_pitch = ALIGN(height, align_h);
-
-          if (mt->compressed) {
-              mt->pitch = ALIGN(mt->width0, align_w);
-          }
-
-          if (mt->last_level != 0) {
-              GLuint mip1_width;
-
-              if (mt->compressed) {
-                  mip1_width = ALIGN(minify(mt->width0), align_w)
-                      + ALIGN(minify(minify(mt->width0)), align_w);
-              } else {
-                  mip1_width = ALIGN(minify(mt->width0), align_w)
-                      + minify(minify(mt->width0));
-              }
-
-              if (mip1_width > mt->pitch) {
-                  mt->pitch = mip1_width;
-              }
-          }
-
-          mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch);
-
-          if (mt->compressed) {
-              qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
-              mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
-          } else {
-              qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
-              mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
-          }
-
-          for (level = 0; level <= mt->last_level; level++) {
-              GLuint img_height;
-              GLuint nr_images = 6;
-              GLuint q = 0;
-
-              intel_miptree_set_level_info(mt, level, nr_images, x, y, width, 
-                                           height, 1);
-
-              for (q = 0; q < nr_images; q++)
-                  intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch);
-
-              if (mt->compressed)
-                  img_height = MAX2(1, height/4);
-              else
-                  img_height = ALIGN(height, align_h);
-
-              if (level == 1) {
-                  x += ALIGN(width, align_w);
-              }
-              else {
-                  y += img_height;
-              }
-
-              width  = minify(width);
-              height = minify(height);
-          }
-
-          break;
-      }
-
-   case GL_TEXTURE_3D: {
-      GLuint width  = mt->width0;
-      GLuint height = mt->height0;
-      GLuint depth = mt->depth0;
-      GLuint pack_x_pitch, pack_x_nr;
-      GLuint pack_y_pitch;
-      GLuint level;
-      GLuint align_h = 2;
-      GLuint align_w = 4;
-
-      mt->total_height = 0;
-      intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
-
-      if (mt->compressed) {
-          mt->pitch = ALIGN(width, align_w);
-          pack_y_pitch = (height + 3) / 4;
-      } else {
-	 mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
-	 pack_y_pitch = ALIGN(mt->height0, align_h);
-      }
-
-      pack_x_pitch = width;
-      pack_x_nr = 1;
-
-      for (level = 0 ; level <= mt->last_level ; level++) {
-	 GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
-	 GLint x = 0;
-	 GLint y = 0;
-	 GLint q, j;
-
-	 intel_miptree_set_level_info(mt, level, nr_images,
-				      0, mt->total_height,
-				      width, height, depth);
-
-	 for (q = 0; q < nr_images;) {
-	    for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
-	       intel_miptree_set_image_offset(mt, level, q, x, y);
-	       x += pack_x_pitch;
-	    }
-
-	    x = 0;
-	    y += pack_y_pitch;
-	 }
-
-
-	 mt->total_height += y;
-	 width  = minify(width);
-	 height = minify(height);
-	 depth  = minify(depth);
-
-	 if (mt->compressed) {
-	    pack_y_pitch = (height + 3) / 4;
-
-	    if (pack_x_pitch > ALIGN(width, align_w)) {
-	       pack_x_pitch = ALIGN(width, align_w);
-	       pack_x_nr <<= 1;
-	    }
-	 } else {
-	    if (pack_x_pitch > 4) {
-	       pack_x_pitch >>= 1;
-	       pack_x_nr <<= 1;
-	       assert(pack_x_pitch * pack_x_nr <= mt->pitch);
-	    }
-
-	    if (pack_y_pitch > 2) {
-	       pack_y_pitch >>= 1;
-	       pack_y_pitch = ALIGN(pack_y_pitch, align_h);
-	    }
-	 }
-
-      }
-      /* The 965's sampler lays cachelines out according to how accesses
-       * in the texture surfaces run, so they may be "vertical" through
-       * memory.  As a result, the docs say in Surface Padding Requirements:
-       * Sampling Engine Surfaces that two extra rows of padding are required.
-       * We don't know of similar requirements for pre-965, but given that
-       * those docs are silent on padding requirements in general, let's play
-       * it safe.
-       */
-      if (mt->target == GL_TEXTURE_CUBE_MAP)
-	 mt->total_height += 2;
-      break;
-   }
-
-   default:
-      i945_miptree_layout_2d(intel, mt, tiling);
-      break;
-   }
-   DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
-		mt->pitch,
-		mt->total_height,
-		mt->cpp,
-		mt->pitch * mt->total_height * mt->cpp );
-
-   return GL_TRUE;
-}
-
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
index a2277519ad..ff2466528d 100644
--- a/src/gallium/drivers/i965/brw_urb.c
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -184,17 +184,17 @@ static void recalculate_urb_fence( struct brw_context *brw )
 	     * entries and the values for minimum nr of entries
 	     * provided above.
 	     */
-	    _mesa_printf("couldn't calculate URB layout!\n");
+	    debug_printf("couldn't calculate URB layout!\n");
 	    exit(1);
 	 }
 	 
-	 if (INTEL_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
-	    _mesa_printf("URB CONSTRAINED\n");
+	 if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
+	    debug_printf("URB CONSTRAINED\n");
       }
 
 done:
-      if (INTEL_DEBUG & DEBUG_URB)
-	 _mesa_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+      if (BRW_DEBUG & DEBUG_URB)
+	 debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
 		      brw->urb.vs_start,
 		      brw->urb.gs_start,
 		      brw->urb.clip_start,
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index 54f7d7d7c4..e33fa2f0aa 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -64,7 +64,7 @@ struct brw_vs_compile {
 
    struct brw_reg r0;
    struct brw_reg r1;
-   struct brw_reg regs[PROGRAM_ADDRESS+1][128];
+   struct brw_reg regs[TGSI_FILE_COUNT][128];
    struct brw_reg tmp;
    struct brw_reg stack;
 
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 086f54799e..04132a167b 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -242,10 +242,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 
    c->prog_data.total_grf = reg;
 
-   if (INTEL_DEBUG & DEBUG_VS) {
-      _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
-      _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
-      _mesa_printf("%s reg = %d\n", __FUNCTION__, reg);
+   if (BRW_DEBUG & DEBUG_VS) {
+      debug_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
+      debug_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
+      debug_printf("%s reg = %d\n", __FUNCTION__, reg);
    }
 }
 
@@ -1248,10 +1248,10 @@ void brw_vs_emit(struct brw_vs_compile *c )
    GLuint index;
    GLuint file;
 
-   if (INTEL_DEBUG & DEBUG_VS) {
-      _mesa_printf("vs-mesa:\n");
+   if (BRW_DEBUG & DEBUG_VS) {
+      debug_printf("vs-mesa:\n");
       _mesa_print_program(&c->vp->program.Base); 
-      _mesa_printf("\n");
+      debug_printf("\n");
    }
 
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
@@ -1526,12 +1526,12 @@ void brw_vs_emit(struct brw_vs_compile *c )
 
    post_vs_emit(c, end_inst, last_inst);
 
-   if (INTEL_DEBUG & DEBUG_VS) {
+   if (BRW_DEBUG & DEBUG_VS) {
       int i;
 
-      _mesa_printf("vs-native:\n");
+      debug_printf("vs-native:\n");
       for (i = 0; i < p->nr_insn; i++)
 	 brw_disasm(stderr, &p->store[i]);
-      _mesa_printf("\n");
+      debug_printf("\n");
    }
 }
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index 1717223e49..05a91f2de4 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -122,7 +122,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
    vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
 				  1, chipset_max_threads) - 1;
 
-   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+   if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
       vs.thread4.max_threads = 0;
 
    /* No samplers for ARB_vp programs:
@@ -131,7 +131,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
     */
    vs.vs5.sampler_count = 0;
 
-   if (INTEL_DEBUG & DEBUG_STATS)
+   if (BRW_DEBUG & DEBUG_STATS)
       vs.thread4.stats_enable = 1;
 
    /* Vertex program always enabled:
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index 51e23b9640..33032276bc 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -69,6 +69,7 @@ enum brw_buffer_type
    BRW_BUFFER_TYPE_SHADER_CONSTANTS,
    BRW_BUFFER_TYPE_WM_SCRATCH,
    BRW_BUFFER_TYPE_BATCH,
+   BRW_BUFFER_TYPE_STATE_CACHE,
 };
 
 
@@ -156,11 +157,15 @@ struct brw_winsys_screen {
 			  unsigned offset,
 			  struct brw_winsys_buffer *b2);
 
-   void (*bo_subdata)(struct brw_winsys_buffer *dst,
+   void (*bo_subdata)(struct brw_winsys_buffer *buffer,
 		      size_t offset,
 		      size_t size,
 		      const void *data);
 
+   boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer);
+   boolean (*bo_references)(struct brw_winsys_buffer *a,
+			    struct brw_winsys_buffer *b);
+
    /* XXX: couldn't this be handled by returning true/false on
     * bo_emit_reloc?
     */
@@ -171,18 +176,13 @@ struct brw_winsys_screen {
    /**
     * Map a buffer.
     */
-   void *(*buffer_map)(struct brw_winsys *iws,
-                       struct brw_winsys_buffer *buffer,
-                       boolean write);
+   void *(*bo_map)(struct brw_winsys_buffer *buffer,
+		   boolean write);
 
    /**
     * Unmap a buffer.
     */
-   void (*buffer_unmap)(struct brw_winsys *iws,
-                        struct brw_winsys_buffer *buffer);
-
-   void (*buffer_destroy)(struct brw_winsys *iws,
-                          struct brw_winsys_buffer *buffer);
+   void (*bo_unmap)(struct brw_winsys_buffer *buffer);
    /*@}*/
 
 
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 764708f7df..3d889699f8 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -178,8 +178,8 @@ static void do_wm_prog( struct brw_context *brw,
       brw_wm_non_glsl_emit(brw, c);
    }
 
-   if (INTEL_DEBUG & DEBUG_WM)
-      fprintf(stderr, "\n");
+   if (BRW_DEBUG & DEBUG_WM)
+      debug_printf("\n");
 
    /* get the program
     */
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index bf241f5fa4..5bc2a49c1f 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -33,6 +33,7 @@
 #ifndef BRW_WM_H
 #define BRW_WM_H
 
+#include "tgsi/tgsi_ureg.h"
 
 #include "brw_context.h"
 #include "brw_eu.h"
@@ -57,17 +58,18 @@
 #define AA_ALWAYS    2
 
 struct brw_wm_prog_key {
+   unsigned proj_attrib_mask; /**< one bit per fragment program attribute */
+   unsigned linear_attrib_mask:1;  /**< linear interpolation vs perspective interp */
+
    GLuint source_depth_reg:3;
    GLuint aa_dest_stencil_reg:3;
    GLuint dest_depth_reg:3;
    GLuint nr_depth_regs:3;
-   GLuint computes_depth:1;	/* could be derived from program string */
+   GLuint computes_depth:1;
    GLuint source_depth_to_render_target:1;
    GLuint flat_shade:1;
-   GLuint linear_color:1;  /**< linear interpolation vs perspective interp */
    GLuint runtime_check_aads_emit:1;
-   
-   GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */
+
    GLuint shadowtex_mask:16;
    GLuint yuvtex_mask:16;
    GLuint yuvtex_swap_mask:16;	/* UV swaped */
@@ -75,7 +77,7 @@ struct brw_wm_prog_key {
    GLuint tex_swizzles[BRW_MAX_TEX_UNIT];
 
    GLuint program_string_id:32;
-   GLuint drawable_height;
+
    GLuint vp_nr_outputs_written;
 };
 
@@ -151,7 +153,7 @@ struct brw_wm_instruction {
 };
 
 
-#define BRW_WM_MAX_INSN  (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + FRAG_ATTRIB_MAX + 3)
+#define BRW_WM_MAX_INSN  2048
 #define BRW_WM_MAX_GRF   128		/* hardware limit */
 #define BRW_WM_MAX_VREG  (BRW_WM_MAX_INSN * 4)
 #define BRW_WM_MAX_REF   (BRW_WM_MAX_INSN * 12)
@@ -161,11 +163,19 @@ struct brw_wm_instruction {
 #define BRW_WM_MAX_SUBROUTINE 16
 
 
+struct ureg_instruction {
+   unsigned opcode:8;
+   unsigned tex_target:3;
+   struct ureg_dst dst;
+   struct ureg_src src[3];
+};
+
 
 /* New opcodes to track internal operations required for WM unit.
  * These are added early so that the registers used can be tracked,
  * freed and reused like those of other instructions.
  */
+#define MAX_OPCODE        TGSI_OPCODE_LAST
 #define WM_PIXELXY        (MAX_OPCODE)
 #define WM_DELTAXY        (MAX_OPCODE + 1)
 #define WM_PIXELW         (MAX_OPCODE + 2)
@@ -177,7 +187,7 @@ struct brw_wm_instruction {
 #define WM_FRONTFACING    (MAX_OPCODE + 8)
 #define MAX_WM_OPCODE     (MAX_OPCODE + 9)
 
-#define PROGRAM_PAYLOAD   (PROGRAM_FILE_MAX)
+#define PROGRAM_PAYLOAD   (TGSI_FILE_COUNT)
 #define PAYLOAD_DEPTH     (FRAG_ATTRIB_MAX)
 
 struct brw_wm_compile {
@@ -198,15 +208,15 @@ struct brw_wm_compile {
     * simplifying and adding instructions for interpolation and
     * framebuffer writes.
     */
-   struct prog_instruction prog_instructions[BRW_WM_MAX_INSN];
+   struct ureg_instruction prog_instructions[BRW_WM_MAX_INSN];
    GLuint nr_fp_insns;
    GLuint fp_temp;
    GLuint fp_interp_emitted;
    GLuint fp_fragcolor_emitted;
 
-   struct prog_src_register pixel_xy;
-   struct prog_src_register delta_xy;
-   struct prog_src_register pixel_w;
+   struct ureg_src pixel_xy;
+   struct ureg_src delta_xy;
+   struct ureg_src pixel_w;
 
 
    struct brw_wm_value vreg[BRW_WM_MAX_VREG];
@@ -217,7 +227,7 @@ struct brw_wm_compile {
 
    struct {
       struct brw_wm_value depth[4]; /* includes r0/r1 */
-      struct brw_wm_value input_interp[FRAG_ATTRIB_MAX];
+      struct brw_wm_value input_interp[PIPE_MAX_SHADER_INPUTS];
    } payload;
 
 
@@ -295,7 +305,7 @@ void brw_wm_lookup_iz( GLuint line_aa,
 		       GLboolean ps_uses_depth,
 		       struct brw_wm_prog_key *key );
 
-GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
+//GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
 void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
 
 void emit_ddxy(struct brw_compile *p,
diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c
index c6659646f2..04dec5ba39 100644
--- a/src/gallium/drivers/i965/brw_wm_debug.c
+++ b/src/gallium/drivers/i965/brw_wm_debug.c
@@ -41,21 +41,21 @@ void brw_wm_print_value( struct brw_wm_compile *c,
    if (c->state >= PASS2_DONE) 
       brw_print_reg(value->hw_reg);
    else if( value == &c->undef_value )
-      _mesa_printf("undef");
+      debug_printf("undef");
    else if( value - c->vreg >= 0 &&
 	    value - c->vreg < BRW_WM_MAX_VREG)
-      _mesa_printf("r%d", value - c->vreg);
+      debug_printf("r%d", value - c->vreg);
    else if (value - c->creg >= 0 &&
 	    value - c->creg < BRW_WM_MAX_PARAM)
-      _mesa_printf("c%d", value - c->creg);
+      debug_printf("c%d", value - c->creg);
    else if (value - c->payload.input_interp >= 0 &&
 	    value - c->payload.input_interp < FRAG_ATTRIB_MAX)
-      _mesa_printf("i%d", value - c->payload.input_interp);
+      debug_printf("i%d", value - c->payload.input_interp);
    else if (value - c->payload.depth >= 0 &&
 	    value - c->payload.depth < FRAG_ATTRIB_MAX)
-      _mesa_printf("d%d", value - c->payload.depth);
+      debug_printf("d%d", value - c->payload.depth);
    else 
-      _mesa_printf("?");
+      debug_printf("?");
 }
 
 void brw_wm_print_ref( struct brw_wm_compile *c,
@@ -64,16 +64,16 @@ void brw_wm_print_ref( struct brw_wm_compile *c,
    struct brw_reg hw_reg = ref->hw_reg;
 
    if (ref->unspill_reg)
-      _mesa_printf("UNSPILL(%x)/", ref->value->spill_slot);
+      debug_printf("UNSPILL(%x)/", ref->value->spill_slot);
 
    if (c->state >= PASS2_DONE)
       brw_print_reg(ref->hw_reg);
    else {
-      _mesa_printf("%s", hw_reg.negate ? "-" : "");
-      _mesa_printf("%s", hw_reg.abs ? "abs/" : "");
+      debug_printf("%s", hw_reg.negate ? "-" : "");
+      debug_printf("%s", hw_reg.abs ? "abs/" : "");
       brw_wm_print_value(c, ref->value);
       if ((hw_reg.nr&1) || hw_reg.subnr) {
-	 _mesa_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
+	 debug_printf("->%d.%d", (hw_reg.nr&1), hw_reg.subnr);
       }
    }
 }
@@ -84,22 +84,22 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
    GLuint i, arg;
    GLuint nr_args = brw_wm_nr_args(inst->opcode);
 
-   _mesa_printf("[");
+   debug_printf("[");
    for (i = 0; i < 4; i++) {
       if (inst->dst[i]) {
 	 brw_wm_print_value(c, inst->dst[i]);
 	 if (inst->dst[i]->spill_slot)
-	    _mesa_printf("/SPILL(%x)",inst->dst[i]->spill_slot);
+	    debug_printf("/SPILL(%x)",inst->dst[i]->spill_slot);
       }
       else
-	 _mesa_printf("#");
+	 debug_printf("#");
       if (i < 3)      
-	 _mesa_printf(",");
+	 debug_printf(",");
    }
-   _mesa_printf("]");
+   debug_printf("]");
 
    if (inst->writemask != BRW_WRITEMASK_XYZW)
-      _mesa_printf(".%s%s%s%s", 
+      debug_printf(".%s%s%s%s", 
 		   GET_BIT(inst->writemask, 0) ? "x" : "",
 		   GET_BIT(inst->writemask, 1) ? "y" : "",
 		   GET_BIT(inst->writemask, 2) ? "z" : "",
@@ -107,58 +107,58 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
 
    switch (inst->opcode) {
    case WM_PIXELXY:
-      _mesa_printf(" = PIXELXY");
+      debug_printf(" = PIXELXY");
       break;
    case WM_DELTAXY:
-      _mesa_printf(" = DELTAXY");
+      debug_printf(" = DELTAXY");
       break;
    case WM_PIXELW:
-      _mesa_printf(" = PIXELW");
+      debug_printf(" = PIXELW");
       break;
    case WM_WPOSXY:
-      _mesa_printf(" = WPOSXY");
+      debug_printf(" = WPOSXY");
       break;
    case WM_PINTERP:
-      _mesa_printf(" = PINTERP");
+      debug_printf(" = PINTERP");
       break;
    case WM_LINTERP:
-      _mesa_printf(" = LINTERP");
+      debug_printf(" = LINTERP");
       break;
    case WM_CINTERP:
-      _mesa_printf(" = CINTERP");
+      debug_printf(" = CINTERP");
       break;
    case WM_FB_WRITE:
-      _mesa_printf(" = FB_WRITE");
+      debug_printf(" = FB_WRITE");
       break;
    case WM_FRONTFACING:
-      _mesa_printf(" = FRONTFACING");
+      debug_printf(" = FRONTFACING");
       break;
    default:
-      _mesa_printf(" = %s", _mesa_opcode_string(inst->opcode));
+      debug_printf(" = %s", _mesa_opcode_string(inst->opcode));
       break;
    }
 
    if (inst->saturate)
-      _mesa_printf("_SAT");
+      debug_printf("_SAT");
 
    for (arg = 0; arg < nr_args; arg++) {
 
-      _mesa_printf(" [");
+      debug_printf(" [");
 
       for (i = 0; i < 4; i++) {
 	 if (inst->src[arg][i]) {
 	    brw_wm_print_ref(c, inst->src[arg][i]);
 	 }
 	 else
-	    _mesa_printf("%%");
+	    debug_printf("%%");
 
 	 if (i < 3) 
-	    _mesa_printf(",");
+	    debug_printf(",");
 	 else
-	    _mesa_printf("]");
+	    debug_printf("]");
       }
    }
-   _mesa_printf("\n");
+   debug_printf("\n");
 }
 
 void brw_wm_print_program( struct brw_wm_compile *c,
@@ -166,9 +166,9 @@ void brw_wm_print_program( struct brw_wm_compile *c,
 {
    GLuint insn;
 
-   _mesa_printf("%s:\n", stage);
+   debug_printf("%s:\n", stage);
    for (insn = 0; insn < c->nr_insns; insn++)
       brw_wm_print_insn(c, &c->instruction[insn]);
-   _mesa_printf("\n");
+   debug_printf("\n");
 }
 
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 7df9b79d7a..5f7ae6592c 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -1481,7 +1481,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 break;
 
       default:
-	 _mesa_printf("Unsupported opcode %i (%s) in fragment shader\n",
+	 debug_printf("Unsupported opcode %i (%s) in fragment shader\n",
 		      inst->opcode, inst->opcode < MAX_OPCODE ?
 				    _mesa_opcode_string(inst->opcode) :
 				    "unknown");
@@ -1494,12 +1494,12 @@ void brw_wm_emit( struct brw_wm_compile *c )
 		      inst->dst[i]->spill_slot);
    }
 
-   if (INTEL_DEBUG & DEBUG_WM) {
+   if (BRW_DEBUG & DEBUG_WM) {
       int i;
 
-      _mesa_printf("wm-native:\n");
+      debug_printf("wm-native:\n");
       for (i = 0; i < p->nr_insn; i++)
 	 brw_disasm(stderr, &p->store[i]);
-      _mesa_printf("\n");
+      debug_printf("\n");
    }
 }
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index be240031c7..d594730730 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -142,7 +142,7 @@ static struct prog_dst_register get_temp( struct brw_wm_compile *c )
    int bit = _mesa_ffs( ~c->fp_temp );
 
    if (!bit) {
-      _mesa_printf("%s: out of temporaries\n", __FILE__);
+      debug_printf("%s: out of temporaries\n", __FILE__);
       exit(1);
    }
 
@@ -977,7 +977,7 @@ static void print_insns( const struct prog_instruction *insn,
 {
    GLuint i;
    for (i = 0; i < nr; i++, insn++) {
-      _mesa_printf("%3d: ", i);
+      debug_printf("%3d: ", i);
       if (insn->Opcode < MAX_OPCODE)
 	 _mesa_print_instruction(insn);
       else if (insn->Opcode < MAX_WM_OPCODE) {
@@ -988,7 +988,7 @@ static void print_insns( const struct prog_instruction *insn,
 				     3);
       }
       else 
-	 _mesa_printf("965 Opcode %d\n", insn->Opcode);
+	 debug_printf("965 Opcode %d\n", insn->Opcode);
    }
 }
 
@@ -1002,10 +1002,10 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
    struct brw_fragment_program *fp = c->fp;
    GLuint insn;
 
-   if (INTEL_DEBUG & DEBUG_WM) {
-      _mesa_printf("pre-fp:\n");
+   if (BRW_DEBUG & DEBUG_WM) {
+      debug_printf("pre-fp:\n");
       _mesa_print_program(&fp->program.Base); 
-      _mesa_printf("\n");
+      debug_printf("\n");
    }
 
    c->pixel_xy = src_undef();
@@ -1103,10 +1103,10 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
       }
    }
 
-   if (INTEL_DEBUG & DEBUG_WM) {
-      _mesa_printf("pass_fp:\n");
+   if (BRW_DEBUG & DEBUG_WM) {
+      debug_printf("pass_fp:\n");
       print_insns( c->prog_instructions, c->nr_fp_insns );
-      _mesa_printf("\n");
+      debug_printf("\n");
    }
 }
 
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index a8de5fdd0b..3118e615f9 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -1694,7 +1694,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
         c->cur_inst = i;
 
 #if 0
-        _mesa_printf("Inst %d: ", i);
+        debug_printf("Inst %d: ", i);
         _mesa_print_instruction(inst);
 #endif
 
@@ -1920,7 +1920,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                }
                break;
 	    default:
-		_mesa_printf("unsupported IR in fragment shader %d\n",
+		debug_printf("unsupported IR in fragment shader %d\n",
 			inst->Opcode);
 	}
 
@@ -1931,11 +1931,11 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
     }
     post_wm_emit(c);
 
-    if (INTEL_DEBUG & DEBUG_WM) {
-      _mesa_printf("wm-native:\n");
+    if (BRW_DEBUG & DEBUG_WM) {
+      debug_printf("wm-native:\n");
       for (i = 0; i < p->nr_insn; i++)
 	 brw_disasm(stderr, &p->store[i]);
-      _mesa_printf("\n");
+      debug_printf("\n");
     }
 }
 
@@ -1945,8 +1945,8 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
  */
 void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
 {
-    if (INTEL_DEBUG & DEBUG_WM) {
-        _mesa_printf("brw_wm_glsl_emit:\n");
+    if (BRW_DEBUG & DEBUG_WM) {
+        debug_printf("brw_wm_glsl_emit:\n");
     }
 
     /* initial instruction translation/simplification */
@@ -1955,7 +1955,7 @@ void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
     /* actual code generation */
     brw_wm_emit_glsl(brw, c);
 
-    if (INTEL_DEBUG & DEBUG_WM) {
+    if (BRW_DEBUG & DEBUG_WM) {
         brw_wm_print_program(c, "brw_wm_glsl_emit done");
     }
 
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
index 31b0270e84..71e4c56835 100644
--- a/src/gallium/drivers/i965/brw_wm_pass0.c
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -101,7 +101,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
    GLuint i = c->prog_data.nr_params++;
    
    if (i >= BRW_WM_MAX_PARAM) {
-      _mesa_printf("%s: out of params\n", __FUNCTION__);
+      debug_printf("%s: out of params\n", __FUNCTION__);
       c->prog_data.error = 1;
       return NULL;
    }
@@ -150,7 +150,7 @@ static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c,
       return c->imm_ref[i].ref;
    }
    else {
-      _mesa_printf("%s: out of imm_refs\n", __FUNCTION__);
+      debug_printf("%s: out of imm_refs\n", __FUNCTION__);
       c->prog_data.error = 1;
       return NULL;
    }
@@ -434,7 +434,7 @@ void brw_wm_pass0( struct brw_wm_compile *c )
       }
    }
  
-   if (INTEL_DEBUG & DEBUG_WM) {
+   if (BRW_DEBUG & DEBUG_WM) {
       brw_wm_print_program(c, "pass0");
    }
 }
diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c
index f2ae3a958f..85a3a55ca4 100644
--- a/src/gallium/drivers/i965/brw_wm_pass1.c
+++ b/src/gallium/drivers/i965/brw_wm_pass1.c
@@ -284,7 +284,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
       track_arg(c, inst, 2, read2);
    }
 
-   if (INTEL_DEBUG & DEBUG_WM) {
+   if (BRW_DEBUG & DEBUG_WM) {
       brw_wm_print_program(c, "pass1");
    }
 }
diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c
index 6faea018fb..a19ca62328 100644
--- a/src/gallium/drivers/i965/brw_wm_pass2.c
+++ b/src/gallium/drivers/i965/brw_wm_pass2.c
@@ -331,13 +331,13 @@ void brw_wm_pass2( struct brw_wm_compile *c )
       }
    }
 
-   if (INTEL_DEBUG & DEBUG_WM) {
+   if (BRW_DEBUG & DEBUG_WM) {
       brw_wm_print_program(c, "pass2");
    }
 
    c->state = PASS2_DONE;
 
-   if (INTEL_DEBUG & DEBUG_WM) {
+   if (BRW_DEBUG & DEBUG_WM) {
        brw_wm_print_program(c, "pass2/done");
    }
 }
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index a8993f9312..32692d533c 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -76,8 +76,9 @@ static GLint S_FIXED(GLfloat value, GLuint frac_bits)
 }
 
 
-static struct brw_winsys_buffer *upload_default_color( struct brw_context *brw,
-				     const GLfloat *color )
+static struct brw_winsys_buffer *
+upload_default_color( struct brw_context *brw,
+		      const GLfloat *color )
 {
    struct brw_sampler_default_color sdc;
 
@@ -117,63 +118,6 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
 {
    _mesa_memset(sampler, 0, sizeof(*sampler));
 
-   switch (key->minfilter) {
-   case GL_NEAREST:
-      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
-      break;
-   case GL_LINEAR:
-      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
-      break;
-   case GL_NEAREST_MIPMAP_NEAREST:
-      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
-      break;
-   case GL_LINEAR_MIPMAP_NEAREST:
-      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
-      break;
-   case GL_NEAREST_MIPMAP_LINEAR:
-      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
-      break;
-   case GL_LINEAR_MIPMAP_LINEAR:
-      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
-      break;
-   default:
-      break;
-   }
-
-   /* Set Anisotropy: 
-    */
-   if (key->max_aniso > 1.0) {
-      sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; 
-      sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
-
-      if (key->max_aniso > 2.0) {
-	 sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2,
-				       BRW_ANISORATIO_16);
-      }
-   }
-   else {
-      switch (key->magfilter) {
-      case GL_NEAREST:
-	 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
-	 break;
-      case GL_LINEAR:
-	 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
-	 break;
-      default:
-	 break;
-      }  
-   }
-
-   sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
-   sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
-   sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
-
    /* Cube-maps on 965 and later must use the same wrap mode for all 3
     * coordinate dimensions.  Futher, only CUBE and CLAMP are valid.
     */
@@ -198,36 +142,7 @@ static void brw_update_sampler_state(struct wm_sampler_entry *key,
    }
 
 
-   /* Set shadow function: 
-    */
-   if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) {
-      /* Shadowing is "enabled" by emitting a particular sampler
-       * message (sample_c).  So need to recompile WM program when
-       * shadow comparison is enabled on each/any texture unit.
-       */
-      sampler->ss0.shadow_function =
-	 intel_translate_shadow_compare_func(key->comparefunc);
-   }
-
-   /* Set LOD bias: 
-    */
-   sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6);
-
-   sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
-   sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
-
-   /* Set BaseMipLevel, MaxLOD, MinLOD: 
-    *
-    * XXX: I don't think that using firstLevel, lastLevel works,
-    * because we always setup the surface state as if firstLevel ==
-    * level zero.  Probably have to subtract firstLevel from each of
-    * these:
-    */
-   sampler->ss0.base_level = U_FIXED(0, 1);
 
-   sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6);
-   sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6);
-   
    sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
 }
 
@@ -237,57 +152,42 @@ static void
 brw_wm_sampler_populate_key(struct brw_context *brw,
 			    struct wm_sampler_key *key)
 {
-   int unit;
+   int nr = MIN2(brw->curr.number_textures,
+		 brw->curr.number_samplers);
+   int i;
 
    memset(key, 0, sizeof(*key));
 
-   for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) {
-      if (ctx->Texture.Unit[unit]._ReallyEnabled) {
-	 struct wm_sampler_entry *entry = &key->sampler[unit];
-	 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
-	 struct gl_texture_object *texObj = texUnit->_Current;
-	 struct intel_texture_object *intelObj = intel_texture_object(texObj);
-	 struct gl_texture_image *firstImage =
-	    texObj->Image[0][intelObj->firstLevel];
-
-         entry->tex_target = texObj->Target;
-
-	 entry->seamless_cube_map = (texObj->Target == GL_TEXTURE_CUBE_MAP)
-	    ? ctx->Texture.CubeMapSeamless : GL_FALSE;
-
-	 entry->wrap_r = texObj->WrapR;
-	 entry->wrap_s = texObj->WrapS;
-	 entry->wrap_t = texObj->WrapT;
-
-	 entry->maxlod = texObj->MaxLod;
-	 entry->minlod = texObj->MinLod;
-	 entry->lod_bias = texUnit->LodBias + texObj->LodBias;
-	 entry->max_aniso = texObj->MaxAnisotropy;
-	 entry->minfilter = texObj->MinFilter;
-	 entry->magfilter = texObj->MagFilter;
-	 entry->comparemode = texObj->CompareMode;
-         entry->comparefunc = texObj->CompareFunc;
-
-	 brw->sws->bo_unreference(brw->wm.sdc_bo[unit]);
-	 if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
-	    float bordercolor[4] = {
-	       texObj->BorderColor[0],
-	       texObj->BorderColor[0],
-	       texObj->BorderColor[0],
-	       texObj->BorderColor[0]
-	    };
-	    /* GL specs that border color for depth textures is taken from the
-	     * R channel, while the hardware uses A.  Spam R into all the
-	     * channels for safety.
-	     */
-	    brw->wm.sdc_bo[unit] = upload_default_color(brw, bordercolor);
-	 } else {
-	    brw->wm.sdc_bo[unit] = upload_default_color(brw,
-							texObj->BorderColor);
-	 }
-	 key->sampler_count = unit + 1;
+   for (i = 0; i < nr; i++) {
+      const struct brw_texture *tex = brw->curr.texture[i];
+      const struct brw_sampler *sampler = brw->curr.sampler[i];
+      struct wm_sampler_entry *entry = &key->sampler[i];
+
+      entry->tex_target = texObj->Target;
+      entry->seamless_cube_map = FALSE; /* XXX: add this to gallium */
+      entry->ss0 = sampler->ss0;
+      entry->ss1 = sampler->ss1;
+      entry->ss3 = sampler->ss3;
+
+      brw->sws->bo_unreference(brw->wm.sdc_bo[i]);
+      if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+	 float bordercolor[4] = {
+	    texObj->BorderColor[0],
+	    texObj->BorderColor[0],
+	    texObj->BorderColor[0],
+	    texObj->BorderColor[0]
+	 };
+	 /* GL specs that border color for depth textures is taken from the
+	  * R channel, while the hardware uses A.  Spam R into all the
+	  * channels for safety.
+	  */
+	 brw->wm.sdc_bo[i] = upload_default_color(brw, bordercolor);
+      } else {
+	 brw->wm.sdc_bo[i] = upload_default_color(brw, texObj->BorderColor);
       }
    }
+
+   key->sampler_count = nr;
 }
 
 /* All samplers must be uploaded in a single contiguous array, which
@@ -354,7 +254,7 @@ static void upload_wm_samplers( struct brw_context *brw )
 
 const struct brw_tracked_state brw_wm_samplers = {
    .dirty = {
-      .mesa = _NEW_TEXTURE,
+      .mesa = PIPE_NEW_BOUND_TEXTURES | PIPE_NEW_SAMPLER,
       .brw = 0,
       .cache = 0
    },
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index 4989aae830..edabf6ceb6 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -65,7 +65,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 
    memset(key, 0, sizeof(*key));
 
-   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+   if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
       key->max_threads = 1;
    else {
       /* WM maximum threads is number of EUs times number of threads per EU. */
@@ -120,7 +120,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
    ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
 
    /* _NEW_QUERY */
-   key->stats_wm = intel->stats_wm;
+   key->stats_wm = (brw->query.stats_wm != 0);
 
    /* _NEW_LINE */
    key->line_stipple = ctx->Line.StippleFlag;
@@ -215,7 +215,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
    wm.wm5.line_stipple = key->line_stipple;
 
-   if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
+   if (BRW_DEBUG & DEBUG_STATS || key->stats_wm)
       wm.wm4.stats_enable = 1;
 
    bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
-- 
cgit v1.2.3


From 827002f5ff990f8676385583275d6b8090abfb7a Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Mon, 26 Oct 2009 01:46:21 +0100
Subject: r300g: add cubemap support

Also, set a pitch for rectangles only.
---
 src/gallium/drivers/r300/r300_screen.c  |  6 +---
 src/gallium/drivers/r300/r300_texture.c | 55 +++++++++++++++++++++------------
 2 files changed, 37 insertions(+), 24 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index f581f0ca09..6eaf35bd4b 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -277,14 +277,10 @@ static boolean r300_is_format_supported(struct pipe_screen* pscreen,
         case PIPE_TEXTURE_1D:   /* handle 1D textures as 2D ones */
         case PIPE_TEXTURE_2D:
         case PIPE_TEXTURE_3D:
+        case PIPE_TEXTURE_CUBE:
             return check_tex_format(format, tex_usage,
                 r300_screen(pscreen)->caps->is_r500);
 
-        case PIPE_TEXTURE_CUBE:
-            debug_printf("r300: Implementation error: Unsupported format "
-                    "target: %d\n", target);
-            break;
-
         default:
             debug_printf("r300: Fatal: This is not a format target: %d\n",
                 target);
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 762806822c..2a33393a8c 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -27,18 +27,31 @@
 
 #include "r300_context.h"
 #include "r300_texture.h"
+#include "r300_screen.h"
 
-static void r300_setup_texture_state(struct r300_texture* tex)
+static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
 {
     struct r300_texture_state* state = &tex->state;
     struct pipe_texture *pt = &tex->tex;
+    unsigned stride;
 
     state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) |
-        R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff) |
-        R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) |
-        R300_TX_NUM_LEVELS(pt->last_level & 0xf);/* |
-        R300_TX_PITCH_EN;*/
-    /* XXX TX_PITCH_EN breaks rendering mipmap levels > 0, weard */
+                     R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff);
+
+    if (!util_is_power_of_two(pt->width[0]) ||
+        !util_is_power_of_two(pt->height[0])) {
+
+        /* rectangles love this */
+        state->format0 |= R300_TX_PITCH_EN;
+
+        stride = r300_texture_get_stride(tex, 0) / pt->block.size;
+        state->format2 = (stride - 1) & 0x1fff;
+    }
+    else {
+        /* power of two textures (3D, mipmaps, and no pitch) */
+        state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) |
+                          R300_TX_NUM_LEVELS(pt->last_level & 0xf);
+    }
 
     /* XXX */
     state->format1 = r300_translate_texformat(pt->format);
@@ -49,17 +62,17 @@ static void r300_setup_texture_state(struct r300_texture* tex)
         state->format1 |= R300_TX_FORMAT_3D;
     }
 
-    state->format2 = ((r300_texture_get_stride(tex, 0) / pt->block.size) - 1)
-                     & 0x1fff;
-
-    /* Don't worry about accidentally setting this bit on non-r500;
-     * the kernel should catch it. */
-    if (pt->width[0] > 2048) {
-        state->format2 |= R500_TXWIDTH_BIT11;
-    }
-    if (pt->height[0] > 2048) {
-        state->format2 |= R500_TXHEIGHT_BIT11;
+    /* large textures on r500 */
+    if (is_r500)
+    {
+        if (pt->width[0] > 2048) {
+            state->format2 |= R500_TXWIDTH_BIT11;
+        }
+        if (pt->height[0] > 2048) {
+            state->format2 |= R500_TXHEIGHT_BIT11;
+        }
     }
+    assert(is_r500 || (pt->width[0] <= 2048 && pt->height[0] <= 2048));
 
     debug_printf("r300: Set texture state (%dx%d, %d levels)\n",
 		 pt->width[0], pt->height[0], pt->last_level);
@@ -121,7 +134,11 @@ static void r300_setup_miptree(struct r300_texture* tex)
 
         stride = r300_texture_get_stride(tex, i);
         layer_size = stride * base->nblocksy[i];
-        size = layer_size * base->depth[i];
+
+        if (base->target == PIPE_TEXTURE_CUBE)
+            size = layer_size * 6;
+        else
+            size = layer_size * base->depth[i];
 
         tex->offset[i] = align(tex->size, 32);
         tex->size = tex->offset[i] + size;
@@ -151,7 +168,7 @@ static struct pipe_texture*
 
     r300_setup_miptree(tex);
 
-    r300_setup_texture_state(tex);
+    r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500);
 
     tex->buffer = screen->buffer_create(screen, 1024,
                                         PIPE_BUFFER_USAGE_PIXEL,
@@ -229,7 +246,7 @@ static struct pipe_texture*
 
     tex->stride_override = *stride;
 
-    r300_setup_texture_state(tex);
+    r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500);
 
     pipe_buffer_reference(&tex->buffer, buffer);
 
-- 
cgit v1.2.3


From 5241b9568c1f97eb9aca8be5eb7a3ef659d9917f Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Mon, 26 Oct 2009 01:47:55 +0100
Subject: r300g: read occlusion query results from both Z pipes on RV530

---
 src/gallium/drivers/r300/r300_query.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
index 007f11efae..ca00b043c5 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -113,7 +113,7 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
     unsigned flags = PIPE_BUFFER_USAGE_CPU_READ;
     uint32_t* map;
     uint32_t temp = 0;
-    unsigned i;
+    unsigned i, num_results;
 
     if (q->flushed == FALSE)
         pipe->flush(pipe, 0, NULL);
@@ -125,7 +125,13 @@ static boolean r300_get_query_result(struct pipe_context* pipe,
     if (!map)
         return FALSE;
     map += q->offset / 4;
-    for (i = 0; i < r300screen->caps->num_frag_pipes; i++) {
+
+    if (r300screen->caps->family == CHIP_FAMILY_RV530)
+        num_results = r300screen->caps->num_z_pipes;
+    else
+        num_results = r300screen->caps->num_frag_pipes;
+
+    for (i = 0; i < num_results; i++) {
         if (*map == ~0U) {
             /* Looks like our results aren't ready yet. */
             if (wait) {
-- 
cgit v1.2.3


From 5a304995e09d8dbfd40a2dfab32eacb7e85798e3 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 26 Oct 2009 01:11:36 +0000
Subject: i965g: still working on compilation

---
 src/gallium/drivers/i965/brw_context.h          |  15 +-
 src/gallium/drivers/i965/brw_gs.c               |   2 +-
 src/gallium/drivers/i965/brw_swtnl.c            | 144 ++---
 src/gallium/drivers/i965/brw_urb.c              |   5 +-
 src/gallium/drivers/i965/brw_vs.c               |  31 +-
 src/gallium/drivers/i965/brw_vs.h               |  14 +-
 src/gallium/drivers/i965/brw_vs_emit.c          | 733 ++++++++++++------------
 src/gallium/drivers/i965/brw_wm_glsl.c          |   4 +-
 src/gallium/drivers/i965/brw_wm_surface_state.c |   7 +-
 9 files changed, 485 insertions(+), 470 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 10c1cf6f33..8aaf895d20 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -154,6 +154,7 @@ struct brw_vertex_shader {
    const struct tgsi_token *tokens;
    struct tgsi_shader_info info;
 
+   unsigned id;
    struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
    GLboolean use_const_buffer;
 };
@@ -165,6 +166,7 @@ struct brw_fragment_shader {
 
    GLboolean isGLSL;
 
+   unsigned id;
    struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
    GLboolean use_const_buffer;
 };
@@ -280,10 +282,13 @@ struct brw_vs_prog_data {
    GLuint curb_read_length;
    GLuint urb_read_length;
    GLuint total_grf;
-   GLuint nr_outputs_written;
-   GLuint nr_params;       /**< number of float params/constants */
 
-   GLuint inputs_read;
+   GLuint nr_outputs;
+   GLuint nr_inputs;
+
+   GLuint nr_params;       /**< number of TGSI_FILE_CONSTANT's */
+
+   GLboolean copy_edgeflag;
 
    /* Used for calculating urb partitions:
     */
@@ -475,8 +480,8 @@ struct brw_context
    /* Active state from the state tracker: 
     */
    struct {
-      const struct brw_vertex_shader *vertex_shader;
-      const struct brw_fragment_shader *fragment_shader;
+      struct brw_vertex_shader *vertex_shader;
+      struct brw_fragment_shader *fragment_shader;
       const struct brw_blend_state *blend;
       const struct brw_rasterizer_state *rast;
       const struct brw_depth_stencil_state *zstencil;
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
index 3ecaa74e4f..693d8bfdf8 100644
--- a/src/gallium/drivers/i965/brw_gs.c
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -148,7 +148,7 @@ static void populate_key( struct brw_context *brw,
    memset(key, 0, sizeof(*key));
 
    /* CACHE_NEW_VS_PROG */
-   key->nr_attrs = brw->vs.prog_data->nr_outputs_written;
+   key->nr_attrs = brw->vs.prog_data->nr_outputs;
 
    /* BRW_NEW_PRIMITIVE */
    key->primitive = gs_prim[brw->primitive];
diff --git a/src/gallium/drivers/i965/brw_swtnl.c b/src/gallium/drivers/i965/brw_swtnl.c
index d2df8af9f4..464013e7c4 100644
--- a/src/gallium/drivers/i965/brw_swtnl.c
+++ b/src/gallium/drivers/i965/brw_swtnl.c
@@ -1,111 +1,93 @@
 
-/* XXX: could split the primitive list to fallback only on the
- * non-conformant primitives.
- */
-static GLboolean check_fallbacks( struct brw_context *brw,
-				  const struct _mesa_prim *prim,
-				  GLuint nr_prims )
+#include "brw_context.h"
+#include "brw_pipe_rast.h"
+
+
+static GLboolean need_swtnl( struct brw_context *brw )
 {
-   GLuint i;
+   const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ;
 
    /* If we don't require strict OpenGL conformance, never 
     * use fallbacks.  If we're forcing fallbacks, always
     * use fallfacks.
     */
    if (brw->flags.no_swtnl)
-      return GL_FALSE;
+      return FALSE;
 
    if (brw->flags.force_swtnl)
-      return GL_TRUE;
+      return TRUE;
 
-   if (brw->curr.rast->tmpl.smooth_polys) {
-      for (i = 0; i < nr_prims; i++)
-	 if (reduced_prim[prim[i].mode] == GL_TRIANGLES) 
-	    return GL_TRUE;
+   /* Exceeding hw limits on number of VS inputs?
+    */
+   if (brw->curr.num_vertex_elements == 0 ||
+       brw->curr.num_vertex_elements >= BRW_VEP_MAX) {
+      return TRUE;
    }
 
-   /* BRW hardware will do AA lines, but they are non-conformant it
-    * seems.  TBD whether we keep this fallback:
+   /* Position array with zero stride?
+    *
+    * XXX: position isn't always at zero...
+    * XXX: eliminate zero-stride arrays
     */
-   if (ctx->Line.SmoothFlag) {
-      for (i = 0; i < nr_prims; i++)
-	 if (reduced_prim[prim[i].mode] == GL_LINES) 
-	    return GL_TRUE;
+   {
+      int ve0_vb = brw->curr.vertex_element[0].vertex_buffer_index;
+      
+      if (brw->curr.vertex_buffer[ve0_vb].stride == 0)
+	 return TRUE;
    }
 
-   /* Stipple -- these fallbacks could be resolved with a little
-    * bit of work?
+   /* XXX: short-circuit
     */
-   if (ctx->Line.StippleFlag) {
-      for (i = 0; i < nr_prims; i++) {
-	 /* GS doesn't get enough information to know when to reset
-	  * the stipple counter?!?
-	  */
-	 if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP) 
-	    return GL_TRUE;
-	    
-	 if (prim[i].mode == GL_POLYGON &&
-	     (ctx->Polygon.FrontMode == GL_LINE ||
-	      ctx->Polygon.BackMode == GL_LINE))
-	    return GL_TRUE;
-      }
-   }
+   return FALSE;
 
-   if (ctx->Point.SmoothFlag) {
-      for (i = 0; i < nr_prims; i++)
-	 if (prim[i].mode == GL_POINTS) 
-	    return GL_TRUE;
-   }
+   if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) {
+      if (rast->poly_smooth)
+	 return TRUE;
 
-   /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
-    * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
-    * as GL_CLAMP_TO_EDGE instead.  If we're using GL_CLAMP, and
-    * we want strict conformance, force the fallback.
-    * Right now, we only do this for 2D textures.
-    */
+   }
+   
+   if (brw->reduced_primitive == PIPE_PRIM_LINES ||
+       (brw->reduced_primitive == PIPE_PRIM_TRIANGLES &&
+	(rast->fill_cw == PIPE_POLYGON_MODE_LINE ||
+	 rast->fill_ccw == PIPE_POLYGON_MODE_LINE)))
    {
-      int u;
-      for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
-         struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
-         if (texUnit->Enabled) {
-            if (texUnit->Enabled & TEXTURE_1D_BIT) {
-               if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_2D_BIT) {
-               if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
-                   texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-            if (texUnit->Enabled & TEXTURE_3D_BIT) {
-               if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
-                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
-                   texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
-                   return GL_TRUE;
-               }
-            }
-         }
-      }
+      /* BRW hardware will do AA lines, but they are non-conformant it
+       * seems.  TBD whether we keep this fallback:
+       */
+      if (rast->line_smooth)
+	 return TRUE;
+
+      /* XXX: was a fallback in mesa (gs doesn't get enough
+       * information to know when to reset stipple counter), but there
+       * must be a way around it.
+       */
+      if (rast->line_stipple_enable &&
+	  (brw->reduced_primitive == PIPE_PRIM_TRIANGLES ||
+	   brw->primitive == PIPE_PRIM_LINE_LOOP || 
+	   brw->primitive == PIPE_PRIM_LINE_STRIP))
+	 return TRUE;
    }
 
-   /* Exceeding hw limits on number of VS inputs?
-    */
-   if (brw->nr_ve == 0 ||
-       brw->nr_ve >= BRW_VEP_MAX) {
-      return TRUE;
+   
+   if (brw->reduced_primitive == PIPE_PRIM_POINTS ||
+       (brw->reduced_primitive == PIPE_PRIM_TRIANGLES &&
+	(rast->fill_cw == PIPE_POLYGON_MODE_POINT ||
+	 rast->fill_ccw == PIPE_POLYGON_MODE_POINT)))
+   {
+      if (rast->point_smooth)
+	 return TRUE;
    }
 
-   /* Position array with zero stride?
+   /* BRW hardware doesn't handle CLAMP texturing correctly;
+    * brw_wm_sampler_state:translate_wrap_mode() treats CLAMP
+    * as CLAMP_TO_EDGE instead.  If we're using CLAMP, and
+    * we want strict conformance, force the fallback.
+    *
+    * XXX: need a workaround for this.
     */
-   if (brw->vs[brw->ve[0]]->stride == 0)
-      return TRUE;
-
-
       
    /* Nothing stopping us from the fast path now */
-   return GL_FALSE;
+   return FALSE;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
index ff2466528d..57fd8f20b2 100644
--- a/src/gallium/drivers/i965/brw_urb.c
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -35,6 +35,7 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_debug.h"
 
 #define VS 0
 #define GS 1
@@ -111,7 +112,7 @@ static GLboolean check_urb_layout( struct brw_context *brw )
 /* Most minimal update, forces re-emit of URB fence packet after GS
  * unit turned on/off.
  */
-static void recalculate_urb_fence( struct brw_context *brw )
+static int recalculate_urb_fence( struct brw_context *brw )
 {
    GLuint csize = brw->curbe.total_size;
    GLuint vsize = brw->vs.prog_data->urb_entry_size;
@@ -204,6 +205,8 @@ done:
       
       brw->state.dirty.brw |= BRW_NEW_URB_FENCE;
    }
+
+   return 0;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 010ac115d3..3965ca6c54 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -28,17 +28,19 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-           
+
+#include "tgsi/tgsi_dump.h"           
 
 #include "brw_context.h"
 #include "brw_vs.h"
 #include "brw_util.h"
 #include "brw_state.h"
+#include "brw_pipe_rast.h"
 
 
 static void do_vs_prog( struct brw_context *brw, 
-			struct brw_vertex_program *vp,
+			struct brw_vertex_shader *vp,
 			struct brw_vs_prog_key *key )
 {
    GLuint program_size;
@@ -51,16 +53,12 @@ static void do_vs_prog( struct brw_context *brw,
    brw_init_compile(brw, &c.func);
    c.vp = vp;
 
-   c.prog_data.nr_outputs_written = vp->program.Base.OutputsWritten;
-   c.prog_data.inputs_read = vp->program.Base.InputsRead;
-
-   if (c.key.copy_edgeflag) {
-      c.prog_data.nr_outputs_written |= 1<<VERT_RESULT_EDGE;
-      c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG;
-   }
+   c.prog_data.nr_outputs = vp->info.num_outputs;
+   c.prog_data.nr_inputs = vp->info.num_inputs;
+   c.prog_data.copy_edgeflag = c.key.copy_edgeflag;
 
    if (0)
-      tgsi_dump(&c.vp->tokens, 0);
+      tgsi_dump(c.vp->tokens, 0);
 
    /* Emit GEN4 code.
     */
@@ -80,11 +78,10 @@ static void do_vs_prog( struct brw_context *brw,
 }
 
 
-static void brw_upload_vs_prog(struct brw_context *brw)
+static int brw_upload_vs_prog(struct brw_context *brw)
 {
    struct brw_vs_prog_key key;
-   struct brw_vertex_program *vp = 
-      (struct brw_vertex_program *)brw->vertex_program;
+   struct brw_vertex_shader *vp = brw->curr.vertex_shader;
 
    memset(&key, 0, sizeof(key));
 
@@ -92,9 +89,9 @@ static void brw_upload_vs_prog(struct brw_context *brw)
     * the inputs it asks for, whether they are varying or not.
     */
    key.program_string_id = vp->id;
-   key.nr_userclip = brw->nr_userclip;
-   key.copy_edgeflag = (brw->rast->fill_ccw != PIPE_POLYGON_MODE_FILL ||
-			brw->rast->fill_cw != PIPE_POLYGON_MODE_FILL);
+   key.nr_userclip = brw->curr.ucp.nr;
+   key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL ||
+			brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL);
 
    /* Make an early check for the key.
     */
@@ -105,6 +102,8 @@ static void brw_upload_vs_prog(struct brw_context *brw)
 				      &brw->vs.prog_data);
    if (brw->vs.prog_bo == NULL)
       do_vs_prog(brw, vp, &key);
+
+   return 0;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index e33fa2f0aa..58119567dc 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -46,17 +46,22 @@ struct brw_vs_prog_key {
 };
 
 
+
+#define MAX_IF_DEPTH 32
+#define MAX_LOOP_DEPTH 32
+
 struct brw_vs_compile {
    struct brw_compile func;
    struct brw_vs_prog_key key;
    struct brw_vs_prog_data prog_data;
 
-   struct brw_vertex_program *vp;
+   struct brw_vertex_shader *vp;
 
    GLuint nr_inputs;
+   GLuint nr_outputs;
+   GLboolean copy_edgeflag;
 
    GLuint first_output;
-   GLuint nr_outputs;
    GLuint first_overflow_output; /**< VERT_ATTRIB_x */
 
    GLuint first_tmp;
@@ -80,8 +85,13 @@ struct brw_vs_compile {
       GLint index;
       struct brw_reg reg;
    } current_const[3];
+
+   struct brw_instruction *if_inst[MAX_IF_DEPTH];
+   struct brw_instruction *loop_inst[MAX_LOOP_DEPTH];
+
 };
 
+
 void brw_vs_emit( struct brw_vs_compile *c );
 
 #endif
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 04132a167b..4daa98b29e 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -28,11 +28,25 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-            
 
 #include "pipe/p_shader_tokens.h"
+            
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "tgsi/tgsi_ureg.h"
+
 #include "brw_context.h"
 #include "brw_vs.h"
+#include "brw_debug.h"
+
+
+struct ureg_instruction {
+   unsigned opcode:8;
+   unsigned tex_target:3;
+   struct ureg_dst dst;
+   struct ureg_src src[3];
+};
 
 
 static struct brw_reg get_tmp( struct brw_vs_compile *c )
@@ -72,8 +86,8 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     * works if everything fits in the GRF.
     * XXX this heuristic/check may need some fine tuning...
     */
-   if (c->vp->program.Base.Parameters->NumParameters +
-       c->vp->program.Base.NumTemporaries + 20 > BRW_MAX_GRF)
+   if (c->vp->info.file_max[TGSI_FILE_CONSTANT] +
+       c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF)
       c->vp->use_const_buffer = GL_TRUE;
    else
       c->vp->use_const_buffer = GL_FALSE;
@@ -106,25 +120,21 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    }
    else {
       /* use a section of the GRF for constants */
-      GLuint nr_params = c->vp->program.Base.Parameters->NumParameters;
+      GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1;
       for (i = 0; i < nr_params; i++) {
-         c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+         c->regs[TGSI_FILE_CONSTANT][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
       }
       reg += (nr_params + 1) / 2;
       c->prog_data.curb_read_length = reg - 1;
-
       c->prog_data.nr_params = nr_params * 4;
    }
 
    /* Allocate input regs:  
     */
-   c->nr_inputs = 0;
-   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
-      if (c->prog_data.inputs_read & (1 << i)) {
-	 c->nr_inputs++;
-	 c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0);
-	 reg++;
-      }
+   c->nr_inputs = c->vp->info.num_inputs;
+   for (i = 0; i < c->nr_inputs; i++) {
+      c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
+      reg++;
    }
 
    /* If there are no inputs, we'll still be reading one attribute's worth
@@ -144,45 +154,51 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    else
       mrf = 4;
 
-   for (i = 0; i < c->prog_data.nr_outputs_written; i++) {
-      c->nr_outputs++;
-      assert(i < Elements(c->regs[PROGRAM_OUTPUT]));
-      if (i == VERT_RESULT_HPOS) {
-	 c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+   /* XXX: need to access vertex output semantics here:
+    */
+   c->nr_outputs = c->prog_data.nr_outputs;
+   for (i = 0; i < c->prog_data.nr_outputs; i++) {
+      assert(i < Elements(c->regs[TGSI_FILE_OUTPUT]));
+
+      /* XXX: Hardwire position to zero:
+       */
+      if (i == 0) {
+	 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
 	 reg++;
       }
-      else if (i == VERT_RESULT_PSIZ) {
-	 c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
+      /* XXX: disable psiz:
+       */
+      else if (0) {
+	 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
 	 reg++;
 	 mrf++;		/* just a placeholder?  XXX fix later stages & remove this */
       }
+      else if (mrf < 16) {
+	 c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
+	 mrf++;
+      }
       else {
-	 if (mrf < 16) {
-	    c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
-	    mrf++;
-	 }
-	 else {
-	    /* too many vertex results to fit in MRF, use GRF for overflow */
-	    if (!c->first_overflow_output)
-	       c->first_overflow_output = i;
-	    c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
-	    reg++;
-	 }
+	 /* too many vertex results to fit in MRF, use GRF for overflow */
+	 if (!c->first_overflow_output)
+	    c->first_overflow_output = i;
+	 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+	 reg++;
       }
    }     
 
    /* Allocate program temporaries:
     */
-   for (i = 0; i < c->vp->program.Base.NumTemporaries; i++) {
-      c->regs[PROGRAM_TEMPORARY][i] = brw_vec8_grf(reg, 0);
+   
+   for (i = 0; i < c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1; i++) {
+      c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
       reg++;
    }
 
    /* Address reg(s).  Don't try to use the internal address reg until
     * deref time.
     */
-   for (i = 0; i < c->vp->program.Base.NumAddressRegs; i++) {
-      c->regs[PROGRAM_ADDRESS][i] =  brw_reg(BRW_GENERAL_REGISTER_FILE,
+   for (i = 0; i < c->vp->info.file_max[TGSI_FILE_ADDRESS]+1; i++) {
+      c->regs[TGSI_FILE_ADDRESS][i] =  brw_reg(BRW_GENERAL_REGISTER_FILE,
 					     reg,
 					     0,
 					     BRW_REGISTER_TYPE_D,
@@ -243,8 +259,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    c->prog_data.total_grf = reg;
 
    if (BRW_DEBUG & DEBUG_VS) {
-      debug_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs);
-      debug_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries);
+      debug_printf("%s NumAddrRegs %d\n", __FUNCTION__, 
+		   c->vp->info.file_max[TGSI_FILE_ADDRESS]+1);
+      debug_printf("%s NumTemps %d\n", __FUNCTION__,
+		   c->vp->info.file_max[TGSI_FILE_TEMPORARY]+1);
       debug_printf("%s reg = %d\n", __FUNCTION__, reg);
    }
 }
@@ -740,25 +758,25 @@ static void emit_nrm( struct brw_vs_compile *c,
 
 static struct brw_reg
 get_constant(struct brw_vs_compile *c,
-             const struct prog_instruction *inst,
+             const struct ureg_instruction *inst,
              GLuint argIndex)
 {
-   const struct prog_src_register *src = &inst->SrcReg[argIndex];
+   const struct ureg_src src = inst->src[argIndex];
    struct brw_compile *p = &c->func;
    struct brw_reg const_reg;
    struct brw_reg const2_reg;
-   const GLboolean relAddr = src->RelAddr;
+   const GLboolean relAddr = src.Indirect;
 
    assert(argIndex < 3);
 
-   if (c->current_const[argIndex].index != src->Index || relAddr) {
-      struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0];
+   if (c->current_const[argIndex].index != src.Index || relAddr) {
+      struct brw_reg addrReg = c->regs[TGSI_FILE_ADDRESS][0];
 
-      c->current_const[argIndex].index = src->Index;
+      c->current_const[argIndex].index = src.Index;
 
 #if 0
       printf("  fetch const[%d] for arg %d into reg %d\n",
-             src->Index, argIndex, c->current_const[argIndex].reg.nr);
+             src.Index, argIndex, c->current_const[argIndex].reg.nr);
 #endif
       /* need to fetch the constant now */
       brw_dp_READ_4_vs(p,
@@ -766,7 +784,7 @@ get_constant(struct brw_vs_compile *c,
                        0,                             /* oword */
                        relAddr,                       /* relative indexing? */
                        addrReg,                       /* address register */
-                       16 * src->Index,               /* byte offset */
+                       16 * src.Index,               /* byte offset */
                        SURF_INDEX_VERT_CONST_BUFFER   /* binding table index */
                        );
 
@@ -783,7 +801,7 @@ get_constant(struct brw_vs_compile *c,
                           1,                       /* oword */
                           relAddr,                 /* relative indexing? */
                           addrReg,                 /* address register */
-                          16 * src->Index,         /* byte offset */
+                          16 * src.Index,         /* byte offset */
                           SURF_INDEX_VERT_CONST_BUFFER
                           );
       }
@@ -813,30 +831,24 @@ get_constant(struct brw_vs_compile *c,
 /* TODO: relative addressing!
  */
 static struct brw_reg get_reg( struct brw_vs_compile *c,
-			       gl_register_file file,
+			       enum tgsi_file_type file,
 			       GLuint index )
 {
    switch (file) {
-   case PROGRAM_TEMPORARY:
-   case PROGRAM_INPUT:
-   case PROGRAM_OUTPUT:
+   case TGSI_FILE_TEMPORARY:
+   case TGSI_FILE_INPUT:
+   case TGSI_FILE_OUTPUT:
+   case TGSI_FILE_CONSTANT:
       assert(c->regs[file][index].nr != 0);
       return c->regs[file][index];
-   case PROGRAM_STATE_VAR:
-   case PROGRAM_CONSTANT:
-   case PROGRAM_UNIFORM:
-      assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
-      return c->regs[PROGRAM_STATE_VAR][index];
-   case PROGRAM_ADDRESS:
+
+   case TGSI_FILE_ADDRESS:
       assert(index == 0);
       return c->regs[file][index];
 
-   case PROGRAM_UNDEFINED:			/* undef values */
+   case TGSI_FILE_NULL:			/* undef values */
       return brw_null_reg();
 
-   case PROGRAM_LOCAL_PARAM: 
-   case PROGRAM_ENV_PARAM: 
-   case PROGRAM_WRITE_ONLY:
    default:
       assert(0);
       return brw_null_reg();
@@ -853,7 +865,7 @@ static struct brw_reg deref( struct brw_vs_compile *c,
 {
    struct brw_compile *p = &c->func;
    struct brw_reg tmp = vec4(get_tmp(c));
-   struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
+   struct brw_reg addr_reg = c->regs[TGSI_FILE_ADDRESS][0];
    struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW);
    GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
    struct brw_reg indirect = brw_vec4_indirect(0,0);
@@ -886,17 +898,17 @@ static struct brw_reg deref( struct brw_vs_compile *c,
  */
 static struct brw_reg
 get_src_reg( struct brw_vs_compile *c,
-             const struct prog_instruction *inst,
+             const struct ureg_instruction *inst,
              GLuint argIndex )
 {
-   const GLuint file = inst->SrcReg[argIndex].File;
-   const GLint index = inst->SrcReg[argIndex].Index;
-   const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr;
+   const GLuint file = inst->src[argIndex].File;
+   const GLint index = inst->src[argIndex].Index;
+   const GLboolean relAddr = inst->src[argIndex].Indirect;
 
    switch (file) {
-   case PROGRAM_TEMPORARY:
-   case PROGRAM_INPUT:
-   case PROGRAM_OUTPUT:
+   case TGSI_FILE_TEMPORARY:
+   case TGSI_FILE_INPUT:
+   case TGSI_FILE_OUTPUT:
       if (relAddr) {
          return deref(c, c->regs[file][0], index);
       }
@@ -905,30 +917,25 @@ get_src_reg( struct brw_vs_compile *c,
          return c->regs[file][index];
       }
 
-   case PROGRAM_STATE_VAR:
-   case PROGRAM_CONSTANT:
-   case PROGRAM_UNIFORM:
-   case PROGRAM_ENV_PARAM:
+   case TGSI_FILE_CONSTANT:
       if (c->vp->use_const_buffer) {
          return get_constant(c, inst, argIndex);
       }
       else if (relAddr) {
-         return deref(c, c->regs[PROGRAM_STATE_VAR][0], index);
+         return deref(c, c->regs[TGSI_FILE_CONSTANT][0], index);
       }
       else {
-         assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0);
-         return c->regs[PROGRAM_STATE_VAR][index];
+         assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
+         return c->regs[TGSI_FILE_CONSTANT][index];
       }
-   case PROGRAM_ADDRESS:
+   case TGSI_FILE_ADDRESS:
       assert(index == 0);
       return c->regs[file][index];
 
-   case PROGRAM_UNDEFINED:
+   case TGSI_FILE_NULL:
       /* this is a normal case since we loop over all three src args */
       return brw_null_reg();
 
-   case PROGRAM_LOCAL_PARAM: 
-   case PROGRAM_WRITE_ONLY:
    default:
       assert(0);
       return brw_null_reg();
@@ -959,27 +966,27 @@ static void emit_arl( struct brw_vs_compile *c,
  * Return the brw reg for the given instruction's src argument.
  */
 static struct brw_reg get_arg( struct brw_vs_compile *c,
-                               const struct prog_instruction *inst,
+                               const struct ureg_instruction *inst,
                                GLuint argIndex )
 {
-   const struct prog_src_register *src = &inst->SrcReg[argIndex];
+   const struct ureg_src src = inst->src[argIndex];
    struct brw_reg reg;
 
-   if (src->File == PROGRAM_UNDEFINED)
+   if (src.File == TGSI_FILE_NULL)
       return brw_null_reg();
 
    reg = get_src_reg(c, inst, argIndex);
 
    /* Convert 3-bit swizzle to 2-bit.  
     */
-   reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
-				       GET_SWZ(src->Swizzle, 1),
-				       GET_SWZ(src->Swizzle, 2),
-				       GET_SWZ(src->Swizzle, 3));
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(src.SwizzleX,
+				       src.SwizzleY,
+				       src.SwizzleZ,
+				       src.SwizzleW);
 
    /* Note this is ok for non-swizzle instructions: 
     */
-   reg.negate = src->Negate ? 1 : 0;   
+   reg.negate = src.Negate ? 1 : 0;   
 
    return reg;
 }
@@ -989,21 +996,21 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
  * Get brw register for the given program dest register.
  */
 static struct brw_reg get_dst( struct brw_vs_compile *c,
-			       struct prog_dst_register dst )
+			       struct ureg_dst dst )
 {
    struct brw_reg reg;
 
    switch (dst.File) {
-   case PROGRAM_TEMPORARY:
-   case PROGRAM_OUTPUT:
+   case TGSI_FILE_TEMPORARY:
+   case TGSI_FILE_OUTPUT:
       assert(c->regs[dst.File][dst.Index].nr != 0);
       reg = c->regs[dst.File][dst.Index];
       break;
-   case PROGRAM_ADDRESS:
+   case TGSI_FILE_ADDRESS:
       assert(dst.Index == 0);
       reg = c->regs[dst.File][dst.Index];
       break;
-   case PROGRAM_UNDEFINED:
+   case TGSI_FILE_NULL:
       /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
       reg = brw_null_reg();
       break;
@@ -1027,15 +1034,16 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 {
    struct brw_compile *p = &c->func;
    struct brw_reg m0 = brw_message_reg(0);
-   struct brw_reg pos = c->regs[PROGRAM_OUTPUT][VERT_RESULT_HPOS];
+   struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
    struct brw_reg ndc;
    int eot;
    GLuint len_vertext_header = 2;
 
    if (c->key.copy_edgeflag) {
+      assert(0);
       brw_MOV(p, 
-	      get_reg(c, PROGRAM_OUTPUT, VERT_RESULT_EDGE),
-	      get_reg(c, PROGRAM_INPUT, VERT_ATTRIB_EDGEFLAG));
+	      get_reg(c, TGSI_FILE_OUTPUT, 0),
+	      get_reg(c, TGSI_FILE_INPUT, 0));
    }
 
    /* Build ndc coords */
@@ -1060,7 +1068,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
       brw_set_access_mode(p, BRW_ALIGN_16);	
 
       if (c->prog_data.writes_psiz) {
-	 struct brw_reg psiz = c->regs[PROGRAM_OUTPUT][VERT_RESULT_PSIZ];
+	 struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_PSIZ];
 	 brw_MUL(p, brw_writemask(header1, BRW_WRITEMASK_W), brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
 	 brw_AND(p, brw_writemask(header1, BRW_WRITEMASK_W), header1, brw_imm_ud(0x7ff<<8));
       }
@@ -1138,7 +1146,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 		 eot, 		/* writes complete */
 		 0, 		/* urb destination offset */
 		 BRW_URB_SWIZZLE_INTERLEAVE);
-!
+
    if (c->first_overflow_output > 0) {
       /* Not all of the vertex outputs/results fit into the MRF.
        * Move the overflowed attributes from the GRF to the MRF and
@@ -1148,9 +1156,9 @@ static void emit_vertex_write( struct brw_vs_compile *c)
        * at mrf[4] atm...
        */
       GLuint i, mrf = 0;
-      for (i = c->first_overflow_output; i < c->prog_data.nr_outputs_written; i++) {
+      for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) {
 	 /* move from GRF to MRF */
-	 brw_MOV(p, brw_message_reg(4+mrf), c->regs[PROGRAM_OUTPUT][i]);
+	 brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]);
 	 mrf++;
       }
 
@@ -1195,9 +1203,9 @@ post_vs_emit( struct brw_vs_compile *c,
 }
 
 static uint32_t
-get_predicate(const struct prog_instruction *inst)
+get_predicate(const struct ureg_instruction *inst)
 {
-   if (inst->DstReg.CondMask == COND_TR)
+   if (inst->dst.CondMask == COND_TR)
       return BRW_PREDICATE_NONE;
 
    /* All of GLSL only produces predicates for COND_NE and one channel per
@@ -1213,9 +1221,9 @@ get_predicate(const struct prog_instruction *inst)
     * predicate on that.  We can probably support this, but it won't
     * necessarily be easy.
     */
-   assert(inst->DstReg.CondMask == COND_NE);
+/*   assert(inst->dst.CondMask == COND_NE); */
 
-   switch (inst->DstReg.CondSwizzle) {
+   switch (inst->dst.CondSwizzle) {
    case SWIZZLE_XXXX:
       return BRW_PREDICATE_ALIGN16_REPLICATE_X;
    case SWIZZLE_YYYY:
@@ -1225,26 +1233,281 @@ get_predicate(const struct prog_instruction *inst)
    case SWIZZLE_WWWW:
       return BRW_PREDICATE_ALIGN16_REPLICATE_W;
    default:
-      _mesa_problem(NULL, "Unexpected predicate: 0x%08x\n",
-		    inst->DstReg.CondMask);
+      debug_printf("Unexpected predicate: 0x%08x\n",
+		    inst->dst.CondMask);
       return BRW_PREDICATE_NORMAL;
    }
 }
 
+static void emit_insn(struct brw_vs_compile *c,
+		      const struct tgsi_full_instruction *insn)
+{
+   struct brw_reg args[3], dst;
+   GLuint i;
+
+#if 0
+   printf("%d: ", insn);
+   _mesa_print_instruction(inst);
+#endif
+
+   /* Get argument regs.
+    */
+   for (i = 0; i < 3; i++) {
+      const struct ureg_src src = inst->src[i];
+      index = src.Index;
+      file = src.File;	
+      args[i] = get_arg(c, inst, i);
+   }
+
+   /* Get dest regs.  Note that it is possible for a reg to be both
+    * dst and arg, given the static allocation of registers.  So
+    * care needs to be taken emitting multi-operation instructions.
+    */ 
+   index = inst->dst.Index;
+   file = inst->dst.File;
+   dst = get_dst(c, inst->dst);
+
+   if (inst->SaturateMode != SATURATE_OFF) {
+      debug_printf("Unsupported saturate %d in vertex shader",
+		   inst->SaturateMode);
+   }
+
+   switch (inst->Opcode) {
+   case TGSI_OPCODE_ABS:
+      brw_MOV(p, dst, brw_abs(args[0]));
+      break;
+   case TGSI_OPCODE_ADD:
+      brw_ADD(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_COS:
+      emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
+      break;
+   case TGSI_OPCODE_DP3:
+      brw_DP3(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_DP4:
+      brw_DP4(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_DPH:
+      brw_DPH(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_NRM3:
+      emit_nrm(c, dst, args[0], 3);
+      break;
+   case TGSI_OPCODE_NRM4:
+      emit_nrm(c, dst, args[0], 4);
+      break;
+   case TGSI_OPCODE_DST:
+      unalias2(c, dst, args[0], args[1], emit_dst_noalias); 
+      break;
+   case TGSI_OPCODE_EXP:
+      unalias1(c, dst, args[0], emit_exp_noalias);
+      break;
+   case TGSI_OPCODE_EX2:
+      emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
+      break;
+   case TGSI_OPCODE_ARL:
+      emit_arl(c, dst, args[0]);
+      break;
+   case TGSI_OPCODE_FLR:
+      brw_RNDD(p, dst, args[0]);
+      break;
+   case TGSI_OPCODE_FRC:
+      brw_FRC(p, dst, args[0]);
+      break;
+   case TGSI_OPCODE_LOG:
+      unalias1(c, dst, args[0], emit_log_noalias);
+      break;
+   case TGSI_OPCODE_LG2:
+      emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
+      break;
+   case TGSI_OPCODE_LIT:
+      unalias1(c, dst, args[0], emit_lit_noalias);
+      break;
+   case TGSI_OPCODE_LRP:
+      unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
+      break;
+   case TGSI_OPCODE_MAD:
+      brw_MOV(p, brw_acc_reg(), args[2]);
+      brw_MAC(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_MAX:
+      emit_max(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_MIN:
+      emit_min(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_MOV:
+      brw_MOV(p, dst, args[0]);
+      break;
+   case TGSI_OPCODE_MUL:
+      brw_MUL(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_POW:
+      emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); 
+      break;
+   case TGSI_OPCODE_RCP:
+      emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
+      break;
+   case TGSI_OPCODE_RSQ:
+      emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+      break;
+   case TGSI_OPCODE_SEQ:
+      emit_seq(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_SIN:
+      emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
+      break;
+   case TGSI_OPCODE_SNE:
+      emit_sne(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_SGE:
+      emit_sge(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_SGT:
+      emit_sgt(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_SLT:
+      emit_slt(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_SLE:
+      emit_sle(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_SUB:
+      brw_ADD(p, dst, args[0], negate(args[1]));
+      break;
+   case TGSI_OPCODE_TRUNC:
+      /* round toward zero */
+      brw_RNDZ(p, dst, args[0]);
+      break;
+   case TGSI_OPCODE_XPD:
+      emit_xpd(p, dst, args[0], args[1]);
+      break;
+   case TGSI_OPCODE_IF:
+      assert(if_depth < MAX_IF_DEPTH);
+      if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
+      /* Note that brw_IF smashes the predicate_control field. */
+      if_inst[if_depth]->header.predicate_control = get_predicate(inst);
+      if_depth++;
+      break;
+   case TGSI_OPCODE_ELSE:
+      if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
+      break;
+   case TGSI_OPCODE_ENDIF:
+      assert(if_depth > 0);
+      brw_ENDIF(p, if_inst[--if_depth]);
+      break;			
+   case TGSI_OPCODE_BGNLOOP:
+      loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+      break;
+   case TGSI_OPCODE_BRK:
+      brw_set_predicate_control(p, get_predicate(inst));
+      brw_BREAK(p);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      break;
+   case TGSI_OPCODE_CONT:
+      brw_set_predicate_control(p, get_predicate(inst));
+      brw_CONT(p);
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      break;
+   case TGSI_OPCODE_ENDLOOP: 
+   {
+      struct brw_instruction *inst0, *inst1;
+      GLuint br = 1;
+
+      loop_depth--;
+
+      if (BRW_IS_IGDNG(brw))
+	 br = 2;
+
+      inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+      /* patch all the BREAK/CONT instructions from last BEGINLOOP */
+      while (inst0 > loop_inst[loop_depth]) {
+	 inst0--;
+	 if (inst0->header.opcode == TGSI_OPCODE_BRK) {
+	    inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+	    inst0->bits3.if_else.pop_count = 0;
+	 }
+	 else if (inst0->header.opcode == TGSI_OPCODE_CONT) {
+	    inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+	    inst0->bits3.if_else.pop_count = 0;
+	 }
+      }
+   }
+   break;
+   case TGSI_OPCODE_BRA:
+      brw_set_predicate_control(p, get_predicate(inst));
+      brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      break;
+   case TGSI_OPCODE_CAL:
+      brw_set_access_mode(p, BRW_ALIGN_1);
+      brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+      brw_set_access_mode(p, BRW_ALIGN_16);
+      brw_ADD(p, get_addr_reg(stack_index),
+	      get_addr_reg(stack_index), brw_imm_d(4));
+      brw_save_call(p, inst->Comment, p->nr_insn);
+      brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+      break;
+   case TGSI_OPCODE_RET:
+      brw_ADD(p, get_addr_reg(stack_index),
+	      get_addr_reg(stack_index), brw_imm_d(-4));
+      brw_set_access_mode(p, BRW_ALIGN_1);
+      brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
+      brw_set_access_mode(p, BRW_ALIGN_16);
+      break;
+   case TGSI_OPCODE_END:	
+      end_offset = p->nr_insn;
+      /* this instruction will get patched later to jump past subroutine
+       * code, etc.
+       */
+      brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
+      break;
+   case TGSI_OPCODE_PRINT:
+      /* no-op */
+      break;
+   case TGSI_OPCODE_BGNSUB:
+      brw_save_label(p, inst->Comment, p->nr_insn);
+      break;
+   case TGSI_OPCODE_ENDSUB:
+      /* no-op */
+      break;
+   default:
+      debug_printf("Unsupported opcode %i (%s) in vertex shader",
+		   inst->Opcode, inst->Opcode < MAX_OPCODE ?
+		   _mesa_opcode_string(inst->Opcode) :
+		   "unknown");
+   }
+
+   /* Set the predication update on the last instruction of the native
+    * instruction sequence.
+    *
+    * This would be problematic if it was set on a math instruction,
+    * but that shouldn't be the case with the current GLSL compiler.
+    */
+   if (inst->CondUpdate) {
+      struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
+
+      assert(hw_insn->header.destreg__conditionalmod == 0);
+      hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
+   }
+
+   release_tmps(c);
+}
+
+
 /* Emit the vertex program instructions here.
  */
 void brw_vs_emit(struct brw_vs_compile *c )
 {
-#define MAX_IF_DEPTH 32
-#define MAX_LOOP_DEPTH 32
    struct brw_compile *p = &c->func;
    struct brw_context *brw = p->brw;
-   const GLuint nr_insns = c->vp->program.Base.NumInstructions;
    GLuint insn, if_depth = 0, loop_depth = 0;
    GLuint end_offset = 0;
    struct brw_instruction *end_inst, *last_inst;
-   struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
    const struct brw_indirect stack_index = brw_indirect(0, 0);   
+   struct tgsi_parse_context parse;
+   struct tgsi_full_declaration *decl;
    GLuint index;
    GLuint file;
 
@@ -1264,258 +1527,8 @@ void brw_vs_emit(struct brw_vs_compile *c )
 
    for (insn = 0; insn < nr_insns; insn++) {
 
-      const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
-      struct brw_reg args[3], dst;
-      GLuint i;
+      const struct ureg_instruction *inst = &c->vp->program.Base.Instructions[insn];
       
-#if 0
-      printf("%d: ", insn);
-      _mesa_print_instruction(inst);
-#endif
-
-      /* Get argument regs.
-       */
-      for (i = 0; i < 3; i++) {
-	 const struct prog_src_register *src = &inst->SrcReg[i];
-	 index = src->Index;
-	 file = src->File;	
-	 args[i] = get_arg(c, inst, i);
-      }
-
-      /* Get dest regs.  Note that it is possible for a reg to be both
-       * dst and arg, given the static allocation of registers.  So
-       * care needs to be taken emitting multi-operation instructions.
-       */ 
-      index = inst->DstReg.Index;
-      file = inst->DstReg.File;
-      dst = get_dst(c, inst->DstReg);
-
-      if (inst->SaturateMode != SATURATE_OFF) {
-	 _mesa_problem(NULL, "Unsupported saturate %d in vertex shader",
-                       inst->SaturateMode);
-      }
-
-      switch (inst->Opcode) {
-      case TGSI_OPCODE_ABS:
-	 brw_MOV(p, dst, brw_abs(args[0]));
-	 break;
-      case TGSI_OPCODE_ADD:
-	 brw_ADD(p, dst, args[0], args[1]);
-	 break;
-      case TGSI_OPCODE_COS:
-	 emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
-	 break;
-      case TGSI_OPCODE_DP3:
-	 brw_DP3(p, dst, args[0], args[1]);
-	 break;
-      case TGSI_OPCODE_DP4:
-	 brw_DP4(p, dst, args[0], args[1]);
-	 break;
-      case TGSI_OPCODE_DPH:
-	 brw_DPH(p, dst, args[0], args[1]);
-	 break;
-      case TGSI_OPCODE_NRM3:
-	 emit_nrm(c, dst, args[0], 3);
-	 break;
-      case TGSI_OPCODE_NRM4:
-	 emit_nrm(c, dst, args[0], 4);
-	 break;
-      case TGSI_OPCODE_DST:
-	 unalias2(c, dst, args[0], args[1], emit_dst_noalias); 
-	 break;
-      case TGSI_OPCODE_EXP:
-	 unalias1(c, dst, args[0], emit_exp_noalias);
-	 break;
-      case TGSI_OPCODE_EX2:
-	 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
-	 break;
-      case TGSI_OPCODE_ARL:
-	 emit_arl(c, dst, args[0]);
-	 break;
-      case TGSI_OPCODE_FLR:
-	 brw_RNDD(p, dst, args[0]);
-	 break;
-      case TGSI_OPCODE_FRC:
-	 brw_FRC(p, dst, args[0]);
-	 break;
-      case TGSI_OPCODE_LOG:
-	 unalias1(c, dst, args[0], emit_log_noalias);
-	 break;
-      case TGSI_OPCODE_LG2:
-	 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
-	 break;
-      case TGSI_OPCODE_LIT:
-	 unalias1(c, dst, args[0], emit_lit_noalias);
-	 break;
-      case TGSI_OPCODE_LRP:
-	 unalias3(c, dst, args[0], args[1], args[2], emit_lrp_noalias);
-	 break;
-      case TGSI_OPCODE_MAD:
-	 brw_MOV(p, brw_acc_reg(), args[2]);
-	 brw_MAC(p, dst, args[0], args[1]);
-	 break;
-      case TGSI_OPCODE_MAX:
-	 emit_max(p, dst, args[0], args[1]);
-	 break;
-      case TGSI_OPCODE_MIN:
-	 emit_min(p, dst, args[0], args[1]);
-	 break;
-      case TGSI_OPCODE_MOV:
-	 brw_MOV(p, dst, args[0]);
-	 break;
-      case TGSI_OPCODE_MUL:
-	 brw_MUL(p, dst, args[0], args[1]);
-	 break;
-      case TGSI_OPCODE_POW:
-	 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); 
-	 break;
-      case TGSI_OPCODE_RCP:
-	 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
-	 break;
-      case TGSI_OPCODE_RSQ:
-	 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
-	 break;
-      case TGSI_OPCODE_SEQ:
-         emit_seq(p, dst, args[0], args[1]);
-         break;
-      case TGSI_OPCODE_SIN:
-	 emit_math1(c, BRW_MATH_FUNCTION_SIN, dst, args[0], BRW_MATH_PRECISION_FULL);
-	 break;
-      case TGSI_OPCODE_SNE:
-         emit_sne(p, dst, args[0], args[1]);
-         break;
-      case TGSI_OPCODE_SGE:
-	 emit_sge(p, dst, args[0], args[1]);
-	 break;
-      case TGSI_OPCODE_SGT:
-         emit_sgt(p, dst, args[0], args[1]);
-         break;
-      case TGSI_OPCODE_SLT:
-	 emit_slt(p, dst, args[0], args[1]);
-	 break;
-      case TGSI_OPCODE_SLE:
-         emit_sle(p, dst, args[0], args[1]);
-         break;
-      case TGSI_OPCODE_SUB:
-	 brw_ADD(p, dst, args[0], negate(args[1]));
-	 break;
-      case TGSI_OPCODE_TRUNC:
-         /* round toward zero */
-	 brw_RNDZ(p, dst, args[0]);
-	 break;
-      case TGSI_OPCODE_XPD:
-	 emit_xpd(p, dst, args[0], args[1]);
-	 break;
-      case TGSI_OPCODE_IF:
-	 assert(if_depth < MAX_IF_DEPTH);
-	 if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
-	 /* Note that brw_IF smashes the predicate_control field. */
-	 if_inst[if_depth]->header.predicate_control = get_predicate(inst);
-	 if_depth++;
-	 break;
-      case TGSI_OPCODE_ELSE:
-	 if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
-	 break;
-      case TGSI_OPCODE_ENDIF:
-         assert(if_depth > 0);
-	 brw_ENDIF(p, if_inst[--if_depth]);
-	 break;			
-      case TGSI_OPCODE_BGNLOOP:
-         loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
-         break;
-      case TGSI_OPCODE_BRK:
-	 brw_set_predicate_control(p, get_predicate(inst));
-         brw_BREAK(p);
-	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-         break;
-      case TGSI_OPCODE_CONT:
-	 brw_set_predicate_control(p, get_predicate(inst));
-         brw_CONT(p);
-         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-         break;
-      case TGSI_OPCODE_ENDLOOP: 
-         {
-            struct brw_instruction *inst0, *inst1;
-	    GLuint br = 1;
-
-            loop_depth--;
-
-	    if (BRW_IS_IGDNG(brw))
-	       br = 2;
-
-            inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
-            /* patch all the BREAK/CONT instructions from last BEGINLOOP */
-            while (inst0 > loop_inst[loop_depth]) {
-               inst0--;
-               if (inst0->header.opcode == BRW_TGSI_OPCODE_BREAK) {
-                  inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
-                  inst0->bits3.if_else.pop_count = 0;
-               }
-               else if (inst0->header.opcode == BRW_TGSI_OPCODE_CONTINUE) {
-                  inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
-                  inst0->bits3.if_else.pop_count = 0;
-               }
-            }
-         }
-         break;
-      case TGSI_OPCODE_BRA:
-	 brw_set_predicate_control(p, get_predicate(inst));
-         brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
-	 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-         break;
-      case TGSI_OPCODE_CAL:
-	 brw_set_access_mode(p, BRW_ALIGN_1);
-	 brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
-	 brw_set_access_mode(p, BRW_ALIGN_16);
-	 brw_ADD(p, get_addr_reg(stack_index),
-			 get_addr_reg(stack_index), brw_imm_d(4));
-         brw_save_call(p, inst->Comment, p->nr_insn);
-	 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
-         break;
-      case TGSI_OPCODE_RET:
-	 brw_ADD(p, get_addr_reg(stack_index),
-			 get_addr_reg(stack_index), brw_imm_d(-4));
-	 brw_set_access_mode(p, BRW_ALIGN_1);
-         brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
-	 brw_set_access_mode(p, BRW_ALIGN_16);
-	 break;
-      case TGSI_OPCODE_END:	
-         end_offset = p->nr_insn;
-         /* this instruction will get patched later to jump past subroutine
-          * code, etc.
-          */
-         brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
-         break;
-      case TGSI_OPCODE_PRINT:
-         /* no-op */
-         break;
-      case TGSI_OPCODE_BGNSUB:
-         brw_save_label(p, inst->Comment, p->nr_insn);
-         break;
-      case TGSI_OPCODE_ENDSUB:
-         /* no-op */
-         break;
-      default:
-	 _mesa_problem(NULL, "Unsupported opcode %i (%s) in vertex shader",
-                       inst->Opcode, inst->Opcode < MAX_OPCODE ?
-				    _mesa_opcode_string(inst->Opcode) :
-				    "unknown");
-      }
-
-      /* Set the predication update on the last instruction of the native
-       * instruction sequence.
-       *
-       * This would be problematic if it was set on a math instruction,
-       * but that shouldn't be the case with the current GLSL compiler.
-       */
-      if (inst->CondUpdate) {
-	 struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
-
-	 assert(hw_insn->header.destreg__conditionalmod == 0);
-	 hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
-      }
-
-      release_tmps(c);
    }
 
    end_inst = &p->store[end_offset];
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index 3118e615f9..23f7ba16fd 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -167,8 +167,8 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
 	case PROGRAM_PAYLOAD:
 	    break;
 	default:
-	    _mesa_problem(NULL, "Unexpected file in get_reg()");
-	    return brw_null_reg();
+	   debug_printf("Unexpected file in get_reg()");
+	   return brw_null_reg();
     }
 
     assert(index < 256);
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index e1ed6438dc..7157feb6f3 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -516,8 +516,11 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
 	 key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
 	 break;
       default:
-	 _mesa_problem(ctx, "Bad renderbuffer format: %d\n",
-		       irb->texformat->MesaFormat);
+	 debug_printf("Bad renderbuffer format: %d\n",
+		      irb->texformat->MesaFormat);
+	 assert(0);
+	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+	 return;
       }
       key.tiling = region->tiling;
       if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {
-- 
cgit v1.2.3


From 7ba2fe40fa092551f1c493d754c80ca93564d32b Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 27 Oct 2009 00:29:21 +0000
Subject: i965g: still working on compilation

---
 src/gallium/drivers/i965/brw_context.h |   1 +
 src/gallium/drivers/i965/brw_eu.c      |  18 ++---
 src/gallium/drivers/i965/brw_eu.h      |   4 +-
 src/gallium/drivers/i965/brw_vs.h      |   6 ++
 src/gallium/drivers/i965/brw_vs_emit.c | 131 ++++++++++++++++-----------------
 src/gallium/drivers/i965/brw_wm.h      |   9 +--
 src/gallium/drivers/i965/brw_wm_glsl.c |   2 +-
 7 files changed, 83 insertions(+), 88 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 8aaf895d20..7b85363e9f 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -289,6 +289,7 @@ struct brw_vs_prog_data {
    GLuint nr_params;       /**< number of TGSI_FILE_CONSTANT's */
 
    GLboolean copy_edgeflag;
+   GLboolean writes_psiz;
 
    /* Used for calculating urb partitions:
     */
diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c
index df49d4b72f..1189a35b6f 100644
--- a/src/gallium/drivers/i965/brw_eu.c
+++ b/src/gallium/drivers/i965/brw_eu.c
@@ -152,7 +152,7 @@ const GLuint *brw_get_program( struct brw_compile *p,
  */
 struct brw_glsl_label
 {
-   const char *name; /**< the label string */
+   GLuint label;     /**< the label number */
    GLuint position;  /**< the position of the brw instruction for this label */
    struct brw_glsl_label *next;  /**< next in linked list */
 };
@@ -164,7 +164,7 @@ struct brw_glsl_label
 struct brw_glsl_call
 {
    GLuint call_inst_pos;  /**< location of the CAL instruction */
-   const char *sub_name;  /**< name of subroutine to call */
+   GLuint label;
    struct brw_glsl_call *next;  /**< next in linked list */
 };
 
@@ -173,10 +173,10 @@ struct brw_glsl_call
  * Called for each OPCODE_BGNSUB.
  */
 void
-brw_save_label(struct brw_compile *c, const char *name, GLuint position)
+brw_save_label(struct brw_compile *c, unsigned l, GLuint position)
 {
    struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label);
-   label->name = name;
+   label->label = l;
    label->position = position;
    label->next = c->first_label;
    c->first_label = label;
@@ -187,11 +187,11 @@ brw_save_label(struct brw_compile *c, const char *name, GLuint position)
  * Called for each OPCODE_CAL.
  */
 void
-brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos)
+brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos)
 {
    struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call);
    call->call_inst_pos = call_pos;
-   call->sub_name = name;
+   call->label = label;
    call->next = c->first_call;
    c->first_call = call;
 }
@@ -201,11 +201,11 @@ brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos)
  * Lookup a label, return label's position/offset.
  */
 static GLuint
-brw_lookup_label(struct brw_compile *c, const char *name)
+brw_lookup_label(struct brw_compile *c, unsigned l)
 {
    const struct brw_glsl_label *label;
    for (label = c->first_label; label; label = label->next) {
-      if (strcmp(name, label->name) == 0) {
+      if (l == label->label) {
          return label->position;
       }
    }
@@ -224,7 +224,7 @@ brw_resolve_cals(struct brw_compile *c)
     const struct brw_glsl_call *call;
 
     for (call = c->first_call; call; call = call->next) {
-        const GLuint sub_loc = brw_lookup_label(c, call->sub_name);
+        const GLuint sub_loc = brw_lookup_label(c, call->label);
 	struct brw_instruction *brw_call_inst = &c->store[call->call_inst_pos];
 	struct brw_instruction *brw_sub_inst = &c->store[sub_loc];
 	GLint offset = brw_sub_inst - brw_call_inst;
diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h
index ac5a623cac..3379522104 100644
--- a/src/gallium/drivers/i965/brw_eu.h
+++ b/src/gallium/drivers/i965/brw_eu.h
@@ -136,10 +136,10 @@ struct brw_compile {
 
 
 void
-brw_save_label(struct brw_compile *c, const char *name, GLuint position);
+brw_save_label(struct brw_compile *c, unsigned label, GLuint position);
 
 void
-brw_save_call(struct brw_compile *c, const char *name, GLuint call_pos);
+brw_save_call(struct brw_compile *c, unsigned label, GLuint call_pos);
 
 void
 brw_resolve_cals(struct brw_compile *c);
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index 58119567dc..2a2dbb3457 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -54,6 +54,7 @@ struct brw_vs_compile {
    struct brw_compile func;
    struct brw_vs_prog_key key;
    struct brw_vs_prog_data prog_data;
+   struct brw_chipset chipset;
 
    struct brw_vertex_shader *vp;
 
@@ -88,7 +89,12 @@ struct brw_vs_compile {
 
    struct brw_instruction *if_inst[MAX_IF_DEPTH];
    struct brw_instruction *loop_inst[MAX_LOOP_DEPTH];
+   GLuint insn;
+   GLuint if_depth;
+   GLuint loop_depth;
+   GLuint end_offset;
 
+   struct brw_indirect stack_index;
 };
 
 
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 4daa98b29e..5366ab8514 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -35,19 +35,15 @@
 #include "util/u_math.h"
 
 #include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_ureg_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
 
 #include "brw_context.h"
 #include "brw_vs.h"
 #include "brw_debug.h"
 
 
-struct ureg_instruction {
-   unsigned opcode:8;
-   unsigned tex_target:3;
-   struct ureg_dst dst;
-   struct ureg_src src[3];
-};
-
 
 static struct brw_reg get_tmp( struct brw_vs_compile *c )
 {
@@ -149,7 +145,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    c->first_output = reg;
    c->first_overflow_output = 0;
 
-   if (BRW_IS_IGDNG(c->func.brw))
+   if (c->chipset.is_igdng)
       mrf = 8;
    else
       mrf = 4;
@@ -251,7 +247,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     */
    attributes_in_vue = MAX2(c->nr_outputs, c->nr_inputs);
 
-   if (BRW_IS_IGDNG(c->func.brw))
+   if (c->chipset.is_igdng)
       c->prog_data.urb_entry_size = (attributes_in_vue + 6 + 3) / 4;
    else
       c->prog_data.urb_entry_size = (attributes_in_vue + 2 + 3) / 4;
@@ -1058,7 +1054,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
     */
    if (c->prog_data.writes_psiz ||
        c->key.nr_userclip || 
-       BRW_IS_965(p->brw))
+       c->chipset.is_965)
    {
       struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
       GLuint i;
@@ -1089,7 +1085,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
        * Later, clipping will detect ucp[6] and ensure the primitive is
        * clipped against all fixed planes.
        */
-      if (BRW_IS_965(p->brw)) {
+      if (c->chipset.is_965) {
 	 brw_CMP(p,
 		 vec8(brw_null_reg()),
 		 BRW_CONDITIONAL_L,
@@ -1117,7 +1113,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
    brw_set_access_mode(p, BRW_ALIGN_1);
    brw_MOV(p, offset(m0, 2), ndc);
 
-   if (BRW_IS_IGDNG(p->brw)) {
+   if (c->chipset.is_igdng) {
        /* There are 20 DWs (D0-D19) in VUE vertex header on IGDNG */
        brw_MOV(p, offset(m0, 3), pos); /* a portion of vertex header */
        /* m4, m5 contain the distances from vertex to the user clip planeXXX. 
@@ -1205,6 +1201,9 @@ post_vs_emit( struct brw_vs_compile *c,
 static uint32_t
 get_predicate(const struct ureg_instruction *inst)
 {
+   /* XXX: disabling for now
+    */
+#if 0
    if (inst->dst.CondMask == COND_TR)
       return BRW_PREDICATE_NONE;
 
@@ -1237,11 +1236,15 @@ get_predicate(const struct ureg_instruction *inst)
 		    inst->dst.CondMask);
       return BRW_PREDICATE_NORMAL;
    }
+#else
+   return BRW_PREDICATE_NORMAL;
+#endif
 }
 
 static void emit_insn(struct brw_vs_compile *c,
-		      const struct tgsi_full_instruction *insn)
+		      const struct ureg_instruction *inst)
 {
+   struct brw_compile *p = &c->func;
    struct brw_reg args[3], dst;
    GLuint i;
 
@@ -1253,9 +1256,6 @@ static void emit_insn(struct brw_vs_compile *c,
    /* Get argument regs.
     */
    for (i = 0; i < 3; i++) {
-      const struct ureg_src src = inst->src[i];
-      index = src.Index;
-      file = src.File;	
       args[i] = get_arg(c, inst, i);
    }
 
@@ -1263,16 +1263,13 @@ static void emit_insn(struct brw_vs_compile *c,
     * dst and arg, given the static allocation of registers.  So
     * care needs to be taken emitting multi-operation instructions.
     */ 
-   index = inst->dst.Index;
-   file = inst->dst.File;
    dst = get_dst(c, inst->dst);
 
-   if (inst->SaturateMode != SATURATE_OFF) {
-      debug_printf("Unsupported saturate %d in vertex shader",
-		   inst->SaturateMode);
+   if (inst->dst.Saturate) {
+      debug_printf("Unsupported saturate in vertex shader");
    }
 
-   switch (inst->Opcode) {
+   switch (inst->opcode) {
    case TGSI_OPCODE_ABS:
       brw_MOV(p, dst, brw_abs(args[0]));
       break;
@@ -1291,7 +1288,7 @@ static void emit_insn(struct brw_vs_compile *c,
    case TGSI_OPCODE_DPH:
       brw_DPH(p, dst, args[0], args[1]);
       break;
-   case TGSI_OPCODE_NRM3:
+   case TGSI_OPCODE_NRM:
       emit_nrm(c, dst, args[0], 3);
       break;
    case TGSI_OPCODE_NRM4:
@@ -1384,21 +1381,21 @@ static void emit_insn(struct brw_vs_compile *c,
       emit_xpd(p, dst, args[0], args[1]);
       break;
    case TGSI_OPCODE_IF:
-      assert(if_depth < MAX_IF_DEPTH);
-      if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
+      assert(c->if_depth < MAX_IF_DEPTH);
+      c->if_inst[c->if_depth] = brw_IF(p, BRW_EXECUTE_8);
       /* Note that brw_IF smashes the predicate_control field. */
-      if_inst[if_depth]->header.predicate_control = get_predicate(inst);
-      if_depth++;
+      c->if_inst[c->if_depth]->header.predicate_control = get_predicate(inst);
+      c->if_depth++;
       break;
    case TGSI_OPCODE_ELSE:
-      if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
+      c->if_inst[c->if_depth-1] = brw_ELSE(p, c->if_inst[c->if_depth-1]);
       break;
    case TGSI_OPCODE_ENDIF:
-      assert(if_depth > 0);
-      brw_ENDIF(p, if_inst[--if_depth]);
+      assert(c->if_depth > 0);
+      brw_ENDIF(p, c->if_inst[--c->if_depth]);
       break;			
    case TGSI_OPCODE_BGNLOOP:
-      loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+      c->loop_inst[c->loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
       break;
    case TGSI_OPCODE_BRK:
       brw_set_predicate_control(p, get_predicate(inst));
@@ -1415,14 +1412,14 @@ static void emit_insn(struct brw_vs_compile *c,
       struct brw_instruction *inst0, *inst1;
       GLuint br = 1;
 
-      loop_depth--;
+      c->loop_depth--;
 
-      if (BRW_IS_IGDNG(brw))
+      if (c->chipset.is_igdng)
 	 br = 2;
 
-      inst0 = inst1 = brw_WHILE(p, loop_inst[loop_depth]);
+      inst0 = inst1 = brw_WHILE(p, c->loop_inst[c->loop_depth]);
       /* patch all the BREAK/CONT instructions from last BEGINLOOP */
-      while (inst0 > loop_inst[loop_depth]) {
+      while (inst0 > c->loop_inst[c->loop_depth]) {
 	 inst0--;
 	 if (inst0->header.opcode == TGSI_OPCODE_BRK) {
 	    inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
@@ -1442,41 +1439,37 @@ static void emit_insn(struct brw_vs_compile *c,
       break;
    case TGSI_OPCODE_CAL:
       brw_set_access_mode(p, BRW_ALIGN_1);
-      brw_ADD(p, deref_1d(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
+      brw_ADD(p, deref_1d(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
       brw_set_access_mode(p, BRW_ALIGN_16);
-      brw_ADD(p, get_addr_reg(stack_index),
-	      get_addr_reg(stack_index), brw_imm_d(4));
-      brw_save_call(p, inst->Comment, p->nr_insn);
+      brw_ADD(p, get_addr_reg(c->stack_index),
+	      get_addr_reg(c->stack_index), brw_imm_d(4));
+      brw_save_call(p, inst->label, p->nr_insn);
       brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
       break;
    case TGSI_OPCODE_RET:
-      brw_ADD(p, get_addr_reg(stack_index),
-	      get_addr_reg(stack_index), brw_imm_d(-4));
+      brw_ADD(p, get_addr_reg(c->stack_index),
+	      get_addr_reg(c->stack_index), brw_imm_d(-4));
       brw_set_access_mode(p, BRW_ALIGN_1);
-      brw_MOV(p, brw_ip_reg(), deref_1d(stack_index, 0));
+      brw_MOV(p, brw_ip_reg(), deref_1d(c->stack_index, 0));
       brw_set_access_mode(p, BRW_ALIGN_16);
       break;
    case TGSI_OPCODE_END:	
-      end_offset = p->nr_insn;
+      c->end_offset = p->nr_insn;
       /* this instruction will get patched later to jump past subroutine
        * code, etc.
        */
       brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
       break;
-   case TGSI_OPCODE_PRINT:
-      /* no-op */
-      break;
    case TGSI_OPCODE_BGNSUB:
-      brw_save_label(p, inst->Comment, p->nr_insn);
+      brw_save_label(p, p->nr_insn, p->nr_insn);
       break;
    case TGSI_OPCODE_ENDSUB:
       /* no-op */
       break;
    default:
       debug_printf("Unsupported opcode %i (%s) in vertex shader",
-		   inst->Opcode, inst->Opcode < MAX_OPCODE ?
-		   _mesa_opcode_string(inst->Opcode) :
-		   "unknown");
+		   inst->opcode, 
+		   tgsi_get_opcode_name(inst->opcode));
    }
 
    /* Set the predication update on the last instruction of the native
@@ -1485,12 +1478,16 @@ static void emit_insn(struct brw_vs_compile *c,
     * This would be problematic if it was set on a math instruction,
     * but that shouldn't be the case with the current GLSL compiler.
     */
+#if 0
+   /* XXX: disabled
+    */
    if (inst->CondUpdate) {
       struct brw_instruction *hw_insn = &p->store[p->nr_insn - 1];
 
       assert(hw_insn->header.destreg__conditionalmod == 0);
       hw_insn->header.destreg__conditionalmod = BRW_CONDITIONAL_NZ;
    }
+#endif
 
    release_tmps(c);
 }
@@ -1498,24 +1495,19 @@ static void emit_insn(struct brw_vs_compile *c,
 
 /* Emit the vertex program instructions here.
  */
-void brw_vs_emit(struct brw_vs_compile *c )
+void brw_vs_emit(struct brw_vs_compile *c)
 {
    struct brw_compile *p = &c->func;
-   struct brw_context *brw = p->brw;
-   GLuint insn, if_depth = 0, loop_depth = 0;
-   GLuint end_offset = 0;
    struct brw_instruction *end_inst, *last_inst;
-   const struct brw_indirect stack_index = brw_indirect(0, 0);   
-   struct tgsi_parse_context parse;
-   struct tgsi_full_declaration *decl;
-   GLuint index;
-   GLuint file;
+   struct ureg_parse_context parse;
+   struct ureg_declaration *decl;
+   struct ureg_declaration *imm;
+   struct ureg_declaration *insn;
 
-   if (BRW_DEBUG & DEBUG_VS) {
-      debug_printf("vs-mesa:\n");
-      _mesa_print_program(&c->vp->program.Base); 
-      debug_printf("\n");
-   }
+   if (BRW_DEBUG & DEBUG_VS)
+      tgsi_dump(c->vp->tokens, 0); 
+
+   c->stack_index = brw_indirect(0, 0);
 
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_access_mode(p, BRW_ALIGN_16);
@@ -1523,12 +1515,15 @@ void brw_vs_emit(struct brw_vs_compile *c )
    /* Static register allocation
     */
    brw_vs_alloc_regs(c);
-   brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
+   brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
 
-   for (insn = 0; insn < nr_insns; insn++) {
+   while (ureg_next_decl(&parse, &decl)) {
+   }
 
-      const struct ureg_instruction *inst = &c->vp->program.Base.Instructions[insn];
-      
+   while (ureg_next_immediate(&parse, &imm)) {
+   }
+
+   while (ureg_next_instruction(&parse, &insn)) {
    }
 
    end_inst = &p->store[end_offset];
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 5bc2a49c1f..084430cf28 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -34,6 +34,7 @@
 #define BRW_WM_H
 
 #include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_ureg_parse.h"
 
 #include "brw_context.h"
 #include "brw_eu.h"
@@ -163,14 +164,6 @@ struct brw_wm_instruction {
 #define BRW_WM_MAX_SUBROUTINE 16
 
 
-struct ureg_instruction {
-   unsigned opcode:8;
-   unsigned tex_target:3;
-   struct ureg_dst dst;
-   struct ureg_src src[3];
-};
-
-
 /* New opcodes to track internal operations required for WM unit.
  * These are added early so that the registers used can be tracked,
  * freed and reused like those of other instructions.
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index 23f7ba16fd..59bc4ef701 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -1867,7 +1867,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
                 brw_set_access_mode(p, BRW_ALIGN_16);
                 brw_ADD(p, get_addr_reg(stack_index),
                          get_addr_reg(stack_index), brw_imm_d(4));
-		brw_save_call(&c->func, inst->Comment, p->nr_insn);
+		brw_save_call(&c->func, inst->label, p->nr_insn);
                 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
                 brw_pop_insn_state(p);
 		break;
-- 
cgit v1.2.3


From 7d967b9b7c08aea2a471c5bf6aced8bfafdae874 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 28 Oct 2009 00:30:45 +0100
Subject: nv50: activate more lanes in a warp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some cards have crippling defaults set and use only 4
of 32 lanes. This should activate 16 on these.
Those that allow 32 by default should still do so.

Found out by Marcin Kościelnicki.
---
 src/gallium/drivers/nv50/nv50_screen.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index c672ea471a..c8d0f1e4d8 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -309,6 +309,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_method(so, screen->tesla, 0x121c, 1);
 	so_data  (so, 1);
 
+	/* try to activate all/more lanes (threads) in a warp */
+	so_method(so, screen->tesla, 0x1400, 1);
+	so_data  (so, 0xf);
+
 	so_method(so, screen->tesla, 0x13bc, 1);
 	so_data  (so, 0x54);
 	/* origin is top left (set to 1 for bottom left) */
-- 
cgit v1.2.3


From 095e66f695ce1d869a824d9e22f63b54c95ca0ac Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 27 Oct 2009 20:09:53 +0000
Subject: llvmpipe: Implement round() for MSVC.

---
 src/gallium/drivers/llvmpipe/lp_test_main.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index d4767ff52b..82fada5a35 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -40,6 +40,18 @@
 #include "lp_test.h"
 
 
+#ifdef PIPE_CC_MSVC
+static INLINE double
+round(double x)
+{
+   if (x >= 0.0)
+      return floor(x + 0.5);
+   else
+      return ceil(x - 0.5);
+}
+#endif
+
+
 void
 dump_type(FILE *fp,
           struct lp_type type)
-- 
cgit v1.2.3


From 182ff3e47a2d18917cdf3344c2ce95bd0a460784 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 28 Oct 2009 11:05:32 +0000
Subject: llvmpipe: Make sure the JIT engine and X86 target are linked on MSVC
 build.

Basically mimic the llvm 2.6 way of linking execution engines and
targets.
---
 src/gallium/drivers/llvmpipe/Makefile        |  3 ++
 src/gallium/drivers/llvmpipe/SConscript      |  1 +
 src/gallium/drivers/llvmpipe/lp_bld_misc.cpp | 62 ++++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_bld_misc.h   | 50 ++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_jit.c        |  3 +-
 src/gallium/drivers/llvmpipe/lp_test_main.c  |  3 +-
 6 files changed, 118 insertions(+), 4 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
 create mode 100644 src/gallium/drivers/llvmpipe/lp_bld_misc.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index 96c014e592..cdf318844c 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -57,6 +57,9 @@ C_SOURCES = \
 	lp_tile_cache.c \
 	lp_tile_soa.c
 
+CPP_SOURCES = \
+	lp_bld_misc.cpp
+
 include ../../Makefile.template
 
 lp_tile_soa.c: lp_tile_soa.py ../../auxiliary/util/u_format_parse.py ../../auxiliary/util/u_format_access.py ../../auxiliary/util/u_format.csv
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 52983039fd..f4410f8201 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -34,6 +34,7 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_bld_format_soa.c',
 		'lp_bld_interp.c',
 		'lp_bld_intr.c',
+		'lp_bld_misc.cpp',
         'lp_bld_pack.c',
         'lp_bld_sample.c',
 		'lp_bld_sample_soa.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
new file mode 100644
index 0000000000..c9acaf1f16
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
@@ -0,0 +1,62 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "llvm/Config/config.h"
+
+#include "pipe/p_config.h"
+
+#include "lp_bld_misc.h"
+
+
+#ifndef LLVM_NATIVE_ARCH
+
+namespace llvm {
+   extern void LinkInJIT();
+}
+
+
+void
+LLVMLinkInJIT(void)
+{
+   llvm::LinkInJIT();
+}
+
+
+extern "C" int X86TargetMachineModule;
+
+
+void
+LLVMInitializeNativeTarget(void)
+{
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   X86TargetMachineModule = 1;			
+#endif
+}
+
+
+#endif
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
new file mode 100644
index 0000000000..51a84c5e25
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
@@ -0,0 +1,50 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef LP_BLD_MISC_H
+#define LP_BLD_MISC_H
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void
+LLVMLinkInJIT(void);
+
+void
+LLVMInitializeNativeTarget(void);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* !LP_BLD_MISC_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 1126bf90b9..13535dd638 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -39,6 +39,7 @@
 #include "util/u_cpu_detect.h"
 #include "lp_screen.h"
 #include "lp_bld_intr.h"
+#include "lp_bld_misc.h"
 #include "lp_jit.h"
 
 
@@ -156,10 +157,8 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
    util_cpu_caps.has_sse4_1 = 0;
 #endif
 
-#ifdef LLVM_NATIVE_ARCH
    LLVMLinkInJIT();
    LLVMInitializeNativeTarget();
-#endif
 
    screen->module = LLVMModuleCreateWithName("llvmpipe");
 
diff --git a/src/gallium/drivers/llvmpipe/lp_test_main.c b/src/gallium/drivers/llvmpipe/lp_test_main.c
index 82fada5a35..314544aa9a 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_main.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_main.c
@@ -37,6 +37,7 @@
 #include "util/u_cpu_detect.h"
 
 #include "lp_bld_const.h"
+#include "lp_bld_misc.h"
 #include "lp_test.h"
 
 
@@ -379,10 +380,8 @@ int main(int argc, char **argv)
          n = atoi(argv[i]);
    }
 
-#ifdef LLVM_NATIVE_ARCH
    LLVMLinkInJIT();
    LLVMInitializeNativeTarget();
-#endif
 
    util_cpu_detect();
 
-- 
cgit v1.2.3


From 0e44884aada4e4bd6384245d9ae065da5aca7f3d Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Wed, 28 Oct 2009 02:19:52 +0100
Subject: r300g: fix blending and do some optimizations

Signed-off-by: Corbin Simpson <MostAwesomeDude@gmail.com>
---
 src/gallium/drivers/r300/r300_emit.c  |   1 +
 src/gallium/drivers/r300/r300_state.c | 101 ++++++++++++++++++----------------
 2 files changed, 54 insertions(+), 48 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 258c38fefd..096165a292 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -31,6 +31,7 @@
 #include "r300_screen.h"
 #include "r300_state_derived.h"
 #include "r300_state_inlines.h"
+#include "r300_texture.h"
 #include "r300_vs.h"
 
 void r300_emit_blend_state(struct r300_context* r300,
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index a3e1bc621a..5d28837ef7 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -49,46 +49,47 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
 {
     struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
 
+    if (state->blend_enable)
     {
-	unsigned eqRGB = state->rgb_func;
-	unsigned srcRGB = state->rgb_src_factor;
-	unsigned dstRGB = state->rgb_dst_factor;
-
-	unsigned eqA = state->alpha_func;
-	unsigned srcA = state->alpha_src_factor;
-	unsigned dstA = state->alpha_dst_factor;
-
-	if (srcA != srcRGB ||
-	    dstA != dstRGB ||
-	    eqA != eqRGB) {
-	    blend->alpha_blend_control =
-		r300_translate_blend_function(eqA) |
-		(r300_translate_blend_factor(srcA) <<
-                    R300_SRC_BLEND_SHIFT) |
-                (r300_translate_blend_factor(dstA) <<
-		 R300_DST_BLEND_SHIFT);
-	    blend->blend_control |= R300_ALPHA_BLEND_ENABLE |
-		R300_SEPARATE_ALPHA_ENABLE;
-	} else {
-	    blend->alpha_blend_control = R300_COMB_FCN_ADD_CLAMP |
-		(R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
-		(R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
-	}
-    }
-    if (state->blend_enable) {
-        /* XXX for now, always do separate alpha...
-         * is it faster to do it with one reg? */
-        blend->blend_control |= R300_READ_ENABLE |
-                r300_translate_blend_function(state->rgb_func) |
-                (r300_translate_blend_factor(state->rgb_src_factor) <<
-                    R300_SRC_BLEND_SHIFT) |
-                (r300_translate_blend_factor(state->rgb_dst_factor) <<
-                    R300_DST_BLEND_SHIFT);
-    } else {
-	blend->blend_control = 
-	    R300_COMB_FCN_ADD_CLAMP |
-	    (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
-	    (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
+        unsigned eqRGB = state->rgb_func;
+        unsigned srcRGB = state->rgb_src_factor;
+        unsigned dstRGB = state->rgb_dst_factor;
+
+        unsigned eqA = state->alpha_func;
+        unsigned srcA = state->alpha_src_factor;
+        unsigned dstA = state->alpha_dst_factor;
+
+        /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha,
+         * this is just the crappy D3D naming */
+        blend->blend_control = R300_ALPHA_BLEND_ENABLE |
+            r300_translate_blend_function(eqRGB) |
+            ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) |
+            ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT);
+
+        /* optimization: some operations do not require the destination color */
+        if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN ||
+            eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX ||
+            dstRGB != PIPE_BLENDFACTOR_ZERO ||
+            dstA != PIPE_BLENDFACTOR_ZERO ||
+            srcRGB == PIPE_BLENDFACTOR_DST_COLOR ||
+            srcRGB == PIPE_BLENDFACTOR_DST_ALPHA ||
+            srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR ||
+            srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
+            srcA == PIPE_BLENDFACTOR_DST_ALPHA ||
+            srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA)
+            blend->blend_control |= R300_READ_ENABLE;
+
+        /* XXX implement the optimization with DISCARD_SRC_PIXELS*/
+        /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */
+
+        /* separate alpha */
+        if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
+            blend->blend_control |= R300_SEPARATE_ALPHA_ENABLE;
+            blend->alpha_blend_control =
+                r300_translate_blend_function(eqA) |
+                (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) |
+                (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT);
+        }
     }
 
     /* PIPE_LOGICOP_* don't need to be translated, fortunately. */
@@ -122,25 +123,29 @@ static void r300_delete_blend_state(struct pipe_context* pipe,
     FREE(state);
 }
 
+/* Convert float to 10bit integer */
+static unsigned float_to_fixed10(float f)
+{
+    return CLAMP((unsigned)(f * 1023.9f), 0, 1023);
+}
+
 /* Set blend color.
  * Setup both R300 and R500 registers, figure out later which one to write. */
 static void r300_set_blend_color(struct pipe_context* pipe,
                                  const struct pipe_blend_color* color)
 {
     struct r300_context* r300 = r300_context(pipe);
-    ubyte ur, ug, ub, ua;
-
-    ur = float_to_ubyte(color->color[0]);
-    ug = float_to_ubyte(color->color[1]);
-    ub = float_to_ubyte(color->color[2]);
-    ua = float_to_ubyte(color->color[3]);
 
     util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM,
             &r300->blend_color_state->blend_color);
 
-    /* XXX this is wrong */
-    r300->blend_color_state->blend_color_red_alpha = ur | (ua << 16);
-    r300->blend_color_state->blend_color_green_blue = ub | (ug << 16);
+    /* XXX if FP16 blending is enabled, we should use the FP16 format */
+    r300->blend_color_state->blend_color_red_alpha =
+        float_to_fixed10(color->color[0]) |
+        (float_to_fixed10(color->color[3]) << 16);
+    r300->blend_color_state->blend_color_green_blue =
+        float_to_fixed10(color->color[2]) |
+        (float_to_fixed10(color->color[1]) << 16);
 
     r300->dirty_state |= R300_NEW_BLEND_COLOR;
 }
-- 
cgit v1.2.3


From a1d726aae8fcacfa1eb1d76ce9c46adaafeaf4a4 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Wed, 28 Oct 2009 02:21:49 +0100
Subject: r300g: fix the WRAP_T mode when using 1D textures

Signed-off-by: Corbin Simpson <MostAwesomeDude@gmail.com>
---
 src/gallium/drivers/r300/r300_emit.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 096165a292..8bfa2932c9 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -532,10 +532,17 @@ void r300_emit_texture(struct r300_context* r300,
                        struct r300_texture* tex,
                        unsigned offset)
 {
+    uint32_t filter0 = sampler->filter0;
     CS_LOCALS(r300);
 
+    /* to emulate 1D textures through 2D ones correctly */
+    if (tex->tex.height[0] == 1) {
+        filter0 &= ~R300_TX_WRAP_T_MASK;
+        filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
+    }
+
     BEGIN_CS(16);
-    OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), sampler->filter0 |
+    OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 |
         (offset << 28));
     OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1);
     OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color);
-- 
cgit v1.2.3


From bcfde429139476c2d04baddaf671651cfc860145 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Wed, 28 Oct 2009 02:43:51 +0100
Subject: r300g: fix emitting the stencil-ref and alpha-ref values

Signed-off-by: Corbin Simpson <MostAwesomeDude@gmail.com>

DSA really needs its head examined someday. ~ C.
---
 src/gallium/drivers/r300/r300_emit.c  | 16 ++++++++++------
 src/gallium/drivers/r300/r300_reg.h   |  2 ++
 src/gallium/drivers/r300/r300_state.c | 24 +++++++++++++++++++-----
 3 files changed, 31 insertions(+), 11 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 8bfa2932c9..2a8e4a9f41 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -102,19 +102,23 @@ void r300_emit_dsa_state(struct r300_context* r300,
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
     CS_LOCALS(r300);
 
-    BEGIN_CS(r300screen->caps->is_r500 ? 8 : 8);
+    BEGIN_CS(r300screen->caps->is_r500 ? 10 : 8);
     OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
-    /* XXX figure out the r300 counterpart for this */
-    if (r300screen->caps->is_r500) {
-        /* OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); */
-    }
+
+    /* not needed since we use the 8bit alpha ref */
+    /*if (r300screen->caps->is_r500) {
+        OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference);
+    }*/
+
     OUT_CS_REG_SEQ(R300_ZB_CNTL, 3);
     OUT_CS(dsa->z_buffer_control);
     OUT_CS(dsa->z_stencil_control);
     OUT_CS(dsa->stencil_ref_mask);
     OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top);
+
+    /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */
     if (r300screen->caps->is_r500) {
-        /* OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); */
+        OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf);
     }
     END_CS;
 }
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index e920b2a5e7..babc3c709e 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2416,6 +2416,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #	define R300_Z_WRITE_ENABLE		 (1 << 2)
 #	define R300_Z_SIGNED_COMPARE		 (1 << 3)
 #	define R300_STENCIL_FRONT_BACK		 (1 << 4)
+#   define R500_STENCIL_ZSIGNED_MAGNITUDE (1 << 5)
+#   define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6)
 
 #define R300_ZB_ZSTENCILCNTL                   0x4f04
 	/* functions */
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 5d28837ef7..5db8c69dec 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -198,6 +198,8 @@ static void*
         r300_create_dsa_state(struct pipe_context* pipe,
                               const struct pipe_depth_stencil_alpha_state* state)
 {
+    struct r300_capabilities *caps =
+        r300_screen(r300_context(pipe)->context.screen)->caps;
     struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state);
 
     /* Depth test setup. */
@@ -242,9 +244,16 @@ static void*
             (r300_translate_stencil_op(state->stencil[1].zfail_op) <<
                 R300_S_BACK_ZFAIL_OP_SHIFT);
 
-            dsa->stencil_ref_bf = (state->stencil[1].ref_value) |
-                (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) |
-                (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT);
+            /* XXX it seems r3xx doesn't support STENCILREFMASK_BF */
+            if (caps->is_r500)
+            {
+                dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK;
+                dsa->stencil_ref_bf = (state->stencil[1].ref_value) |
+                    (state->stencil[1].valuemask <<
+                    R300_STENCILMASK_SHIFT) |
+                    (state->stencil[1].writemask <<
+                    R300_STENCILWRITEMASK_SHIFT);
+            }
         }
     }
 
@@ -253,8 +262,13 @@ static void*
         dsa->alpha_function =
             r300_translate_alpha_function(state->alpha.func) |
             R300_FG_ALPHA_FUNC_ENABLE;
-        dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f,
-                                     0, 1023);
+
+        /* XXX figure out why emitting 10bit alpha ref causes CS to dump */
+        /* always use 8bit alpha ref */
+        dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value);
+
+        if (caps->is_r500)
+            dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT;
     }
 
     return (void*)dsa;
-- 
cgit v1.2.3


From 81c51bb67f97c60e21a5e7cf87e154bb46ee481b Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 28 Oct 2009 10:02:23 -0700
Subject: r300g: Fix XXX.

Nothing strange here.
---
 src/gallium/drivers/r300/r300_texture.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 2a33393a8c..3e90fea6c8 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -53,7 +53,6 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
                           R300_TX_NUM_LEVELS(pt->last_level & 0xf);
     }
 
-    /* XXX */
     state->format1 = r300_translate_texformat(pt->format);
     if (pt->target == PIPE_TEXTURE_CUBE) {
         state->format1 |= R300_TX_FORMAT_CUBIC_MAP;
-- 
cgit v1.2.3


From 6007e2e0085d9131b22dc8a98d7500a66a0e4c97 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 28 Oct 2009 11:47:24 -0700
Subject: r300g: Clear up a bit of the buffer reference stuff.

Still need to actually get reference info from winsys somehow. Doing added
buffers is easy, but knowing whether a flush has happened is a bit tricky.
---
 src/gallium/drivers/r300/r300_context.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index c34fbb1123..e45564b54e 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -76,26 +76,23 @@ static void r300_destroy_context(struct pipe_context* context)
 }
 
 static unsigned int
-r300_is_texture_referenced( struct pipe_context *pipe,
-			    struct pipe_texture *texture,
-			    unsigned face, unsigned level)
+r300_is_texture_referenced(struct pipe_context *pipe,
+                           struct pipe_texture *texture,
+                           unsigned face, unsigned level)
 {
-   /**
-    * FIXME: Optimize.
-    */
+    struct pipe_buffer* buf;
 
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+    r300_get_texture_buffer(texture, &buf, NULL);
+
+    return pipe->is_buffer_referenced(pipe, buf);
 }
 
 static unsigned int
-r300_is_buffer_referenced( struct pipe_context *pipe,
-			   struct pipe_buffer *buf)
+r300_is_buffer_referenced(struct pipe_context *pipe,
+                          struct pipe_buffer *buf)
 {
-   /**
-    * FIXME: Optimize.
-    */
-
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+    /* XXX */
+    return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
 }
 
 static void r300_flush_cb(void *data)
-- 
cgit v1.2.3


From e7c8a2763855c35af1d141b67551b364e6579051 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Sun, 18 Oct 2009 18:06:51 +0200
Subject: r300g: add some texture formats

---
 src/gallium/drivers/r300/r300_screen.c  | 2 ++
 src/gallium/drivers/r300/r300_texture.h | 7 +++++++
 2 files changed, 9 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 6eaf35bd4b..6efa17cbaf 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -197,6 +197,8 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage,
         case PIPE_FORMAT_DXT3_RGBA:
         case PIPE_FORMAT_DXT5_RGBA:
         case PIPE_FORMAT_YCBCR:
+        case PIPE_FORMAT_L8_UNORM:
+        case PIPE_FORMAT_A8L8_UNORM:
             retval = usage & PIPE_TEXTURE_USAGE_SAMPLER;
             break;
 
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index 35e06a9acb..a18e0cbe1a 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -85,6 +85,13 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
         case PIPE_FORMAT_Z24S8_UNORM:
         case PIPE_FORMAT_Z24X8_UNORM:
             return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP);
+
+        case PIPE_FORMAT_A8L8_UNORM:
+            return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8);
+
+        case PIPE_FORMAT_L8_UNORM:
+            return R300_EASY_TX_FORMAT(X, X, X, ONE, X8);
+
         default:
             debug_printf("r300: Implementation error: "
                 "Got unsupported texture format %s in %s\n",
-- 
cgit v1.2.3


From 23d8d15bedb7178bedde9b994be3925a160c193d Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 28 Oct 2009 11:58:13 -0700
Subject: r300g: Keep texture formats organized.

---
 src/gallium/drivers/r300/r300_texture.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
index a18e0cbe1a..55ceb1a513 100644
--- a/src/gallium/drivers/r300/r300_texture.h
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -43,6 +43,8 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
         /* X8 */
         case PIPE_FORMAT_I8_UNORM:
             return R300_EASY_TX_FORMAT(X, X, X, X, X8);
+        case PIPE_FORMAT_L8_UNORM:
+            return R300_EASY_TX_FORMAT(X, X, X, ONE, X8);
         /* X16 */
         case PIPE_FORMAT_R16_UNORM:
             return R300_EASY_TX_FORMAT(X, X, X, X, X16);
@@ -51,6 +53,9 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
                 R300_TX_FORMAT_SIGNED;
         case PIPE_FORMAT_Z16_UNORM:
             return R300_EASY_TX_FORMAT(X, X, X, X, X16);
+        /* Y8X8 */
+        case PIPE_FORMAT_A8L8_UNORM:
+            return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8);
         /* W8Z8Y8X8 */
         case PIPE_FORMAT_A8R8G8B8_UNORM:
             return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
@@ -86,12 +91,6 @@ static INLINE uint32_t r300_translate_texformat(enum pipe_format format)
         case PIPE_FORMAT_Z24X8_UNORM:
             return R300_EASY_TX_FORMAT(X, X, X, X, W24_FP);
 
-        case PIPE_FORMAT_A8L8_UNORM:
-            return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8);
-
-        case PIPE_FORMAT_L8_UNORM:
-            return R300_EASY_TX_FORMAT(X, X, X, ONE, X8);
-
         default:
             debug_printf("r300: Implementation error: "
                 "Got unsupported texture format %s in %s\n",
-- 
cgit v1.2.3


From f3d8d534e6f1d102d71338d58fbaa98c382f1858 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 28 Oct 2009 12:11:52 -0700
Subject: r300g: Use u_trim_pipe_prim to prevent lockups from incorrect vert
 counts.

Adapted from osiris' version on his tree.
---
 src/gallium/drivers/r300/r300_render.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 6f392402bd..c36350d29e 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -26,6 +26,7 @@
 #include "pipe/p_inlines.h"
 
 #include "util/u_memory.h"
+#include "util/u_prim.h"
 
 #include "r300_cs.h"
 #include "r300_context.h"
@@ -86,6 +87,10 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
     
     CS_LOCALS(r300);
 
+    if (!u_trim_pipe_prim(mode, &count)) {
+        return FALSE;
+    }
+
 validate:
     for (i = 0; i < aos_count; i++) {
         if (!r300->winsys->add_buffer(r300->winsys, aos[i].buffer,
@@ -191,6 +196,10 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
     struct r300_context* r300 = r300_context(pipe);
     int i;
 
+    if (!u_trim_pipe_prim(mode, &count)) {
+        return FALSE;
+    }
+
     for (i = 0; i < r300->vertex_buffer_count; i++) {
         void* buf = pipe_buffer_map(pipe->screen,
                                     r300->vertex_buffers[i].buffer,
-- 
cgit v1.2.3


From 81b8589f064204d9ddcd7d1f9d43d2dcf5676235 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 28 Oct 2009 21:24:03 +0000
Subject: i965g: still working on compilation

---
 src/gallium/drivers/i965/brw_vs.c               |   2 +-
 src/gallium/drivers/i965/brw_vs.h               |   3 +
 src/gallium/drivers/i965/brw_vs_emit.c          | 199 +++++++++++++++++-------
 src/gallium/drivers/i965/brw_vs_state.c         |  25 +--
 src/gallium/drivers/i965/brw_vs_surface_state.c |  33 +++-
 src/gallium/drivers/i965/brw_wm.c               |  36 +++--
 6 files changed, 208 insertions(+), 90 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 3965ca6c54..26a28114d9 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -57,7 +57,7 @@ static void do_vs_prog( struct brw_context *brw,
    c.prog_data.nr_inputs = vp->info.num_inputs;
    c.prog_data.copy_edgeflag = c.key.copy_edgeflag;
 
-   if (0)
+   if (1)
       tgsi_dump(c.vp->tokens, 0);
 
    /* Emit GEN4 code.
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index 2a2dbb3457..b4e450d89b 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -60,6 +60,9 @@ struct brw_vs_compile {
 
    GLuint nr_inputs;
    GLuint nr_outputs;
+   GLuint nr_immediates;
+   GLfloat immediate[128][4];
+
    GLboolean copy_edgeflag;
 
    GLuint first_output;
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 5366ab8514..6809bccdec 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -34,8 +34,7 @@
 #include "util/u_memory.h"
 #include "util/u_math.h"
 
-#include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_ureg_parse.h"
+#include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_info.h"
 
@@ -67,6 +66,7 @@ static void release_tmps( struct brw_vs_compile *c )
 }
 
 
+
 /**
  * Preallocate GRF register before code emit.
  * Do things as simply as possible.  Allocate and populate all regs
@@ -83,10 +83,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     * XXX this heuristic/check may need some fine tuning...
     */
    if (c->vp->info.file_max[TGSI_FILE_CONSTANT] +
+       c->vp->info.file_max[TGSI_FILE_IMMEDIATE] +
        c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF)
       c->vp->use_const_buffer = GL_TRUE;
-   else
+   else {
+      /* XXX: immediates can go elsewhere if necessary:
+       */
+      assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] +
+	     c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF);
+
       c->vp->use_const_buffer = GL_FALSE;
+   }
 
    /*printf("use_const_buffer = %d\n", c->vp->use_const_buffer);*/
 
@@ -139,6 +146,29 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    if (c->nr_inputs == 0)
       reg++;
 
+   /* Allocate a GRF and load immediate values by hand with 4 MOVs!!!
+    *
+    * XXX: Try to encode float immediates as brw immediates
+    * XXX: Put immediates into the CURBE.
+    * XXX: Make sure ureg sets minimal immediate size and respect it
+    * here.
+    */
+   for (i = 0; i < c->nr_immediates; i++) {
+      struct brw_reg r;
+      int j;
+
+      r = brw_vec8_grf(reg, 0);
+
+      for (j = 0; j < 4; j++) {
+	 brw_MOV(&c->func, 
+		 brw_writemask(r, (1<<j)), 
+		 brw_imm_f(c->immediate[i][j]));
+      }
+
+      reg++;
+   }
+
+
    /* Allocate outputs.  The non-position outputs go straight into message regs.
     */
    c->nr_outputs = 0;
@@ -754,21 +784,20 @@ static void emit_nrm( struct brw_vs_compile *c,
 
 static struct brw_reg
 get_constant(struct brw_vs_compile *c,
-             const struct ureg_instruction *inst,
-             GLuint argIndex)
+	     GLuint argIndex,
+	     GLuint index,
+	     GLboolean relAddr)
 {
-   const struct ureg_src src = inst->src[argIndex];
    struct brw_compile *p = &c->func;
    struct brw_reg const_reg;
    struct brw_reg const2_reg;
-   const GLboolean relAddr = src.Indirect;
 
    assert(argIndex < 3);
 
-   if (c->current_const[argIndex].index != src.Index || relAddr) {
+   if (c->current_const[argIndex].index != index || relAddr) {
       struct brw_reg addrReg = c->regs[TGSI_FILE_ADDRESS][0];
 
-      c->current_const[argIndex].index = src.Index;
+      c->current_const[argIndex].index = index;
 
 #if 0
       printf("  fetch const[%d] for arg %d into reg %d\n",
@@ -780,7 +809,7 @@ get_constant(struct brw_vs_compile *c,
                        0,                             /* oword */
                        relAddr,                       /* relative indexing? */
                        addrReg,                       /* address register */
-                       16 * src.Index,               /* byte offset */
+                       16 * index,               /* byte offset */
                        SURF_INDEX_VERT_CONST_BUFFER   /* binding table index */
                        );
 
@@ -797,7 +826,7 @@ get_constant(struct brw_vs_compile *c,
                           1,                       /* oword */
                           relAddr,                 /* relative indexing? */
                           addrReg,                 /* address register */
-                          16 * src.Index,         /* byte offset */
+                          16 * index,         /* byte offset */
                           SURF_INDEX_VERT_CONST_BUFFER
                           );
       }
@@ -894,12 +923,11 @@ static struct brw_reg deref( struct brw_vs_compile *c,
  */
 static struct brw_reg
 get_src_reg( struct brw_vs_compile *c,
-             const struct ureg_instruction *inst,
-             GLuint argIndex )
+	     GLuint argIndex,
+	     GLuint file,
+	     GLint index,
+	     GLboolean relAddr )
 {
-   const GLuint file = inst->src[argIndex].File;
-   const GLint index = inst->src[argIndex].Index;
-   const GLboolean relAddr = inst->src[argIndex].Indirect;
 
    switch (file) {
    case TGSI_FILE_TEMPORARY:
@@ -913,9 +941,12 @@ get_src_reg( struct brw_vs_compile *c,
          return c->regs[file][index];
       }
 
+   case TGSI_FILE_IMMEDIATE:
+      return c->regs[file][index];
+
    case TGSI_FILE_CONSTANT:
       if (c->vp->use_const_buffer) {
-         return get_constant(c, inst, argIndex);
+         return get_constant(c, argIndex, index, relAddr);
       }
       else if (relAddr) {
          return deref(c, c->regs[TGSI_FILE_CONSTANT][0], index);
@@ -962,27 +993,32 @@ static void emit_arl( struct brw_vs_compile *c,
  * Return the brw reg for the given instruction's src argument.
  */
 static struct brw_reg get_arg( struct brw_vs_compile *c,
-                               const struct ureg_instruction *inst,
+                               const struct tgsi_full_src_register *src,
                                GLuint argIndex )
 {
-   const struct ureg_src src = inst->src[argIndex];
    struct brw_reg reg;
 
-   if (src.File == TGSI_FILE_NULL)
+   if (src->SrcRegister.File == TGSI_FILE_NULL)
       return brw_null_reg();
 
-   reg = get_src_reg(c, inst, argIndex);
+   reg = get_src_reg(c, argIndex,
+		     src->SrcRegister.File,
+		     src->SrcRegister.Index,
+		     src->SrcRegister.Indirect);
 
    /* Convert 3-bit swizzle to 2-bit.  
     */
-   reg.dw1.bits.swizzle = BRW_SWIZZLE4(src.SwizzleX,
-				       src.SwizzleY,
-				       src.SwizzleZ,
-				       src.SwizzleW);
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SrcRegister.SwizzleX,
+				       src->SrcRegister.SwizzleY,
+				       src->SrcRegister.SwizzleZ,
+				       src->SrcRegister.SwizzleW);
 
    /* Note this is ok for non-swizzle instructions: 
     */
-   reg.negate = src.Negate ? 1 : 0;   
+   reg.negate = src->SrcRegister.Negate ? 1 : 0;   
+
+   /* XXX: abs, absneg
+    */
 
    return reg;
 }
@@ -992,19 +1028,21 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
  * Get brw register for the given program dest register.
  */
 static struct brw_reg get_dst( struct brw_vs_compile *c,
-			       struct ureg_dst dst )
+			       unsigned file,
+			       unsigned index,
+			       unsigned writemask )
 {
    struct brw_reg reg;
 
-   switch (dst.File) {
+   switch (file) {
    case TGSI_FILE_TEMPORARY:
    case TGSI_FILE_OUTPUT:
-      assert(c->regs[dst.File][dst.Index].nr != 0);
-      reg = c->regs[dst.File][dst.Index];
+      assert(c->regs[file][index].nr != 0);
+      reg = c->regs[file][index];
       break;
    case TGSI_FILE_ADDRESS:
-      assert(dst.Index == 0);
-      reg = c->regs[dst.File][dst.Index];
+      assert(index == 0);
+      reg = c->regs[file][index];
       break;
    case TGSI_FILE_NULL:
       /* we may hit this for OPCODE_END, OPCODE_KIL, etc */
@@ -1015,7 +1053,7 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
       reg = brw_null_reg();
    }
 
-   reg.dw1.bits.writemask = dst.WriteMask;
+   reg.dw1.bits.writemask = writemask;
 
    return reg;
 }
@@ -1199,7 +1237,7 @@ post_vs_emit( struct brw_vs_compile *c,
 }
 
 static uint32_t
-get_predicate(const struct ureg_instruction *inst)
+get_predicate(const struct tgsi_full_instruction *inst)
 {
    /* XXX: disabling for now
     */
@@ -1242,8 +1280,10 @@ get_predicate(const struct ureg_instruction *inst)
 }
 
 static void emit_insn(struct brw_vs_compile *c,
-		      const struct ureg_instruction *inst)
+		      const struct tgsi_full_instruction *inst)
 {
+   unsigned opcode = inst->Instruction.Opcode;
+   unsigned label = inst->InstructionExtLabel.Label;
    struct brw_compile *p = &c->func;
    struct brw_reg args[3], dst;
    GLuint i;
@@ -1256,20 +1296,25 @@ static void emit_insn(struct brw_vs_compile *c,
    /* Get argument regs.
     */
    for (i = 0; i < 3; i++) {
-      args[i] = get_arg(c, inst, i);
+      args[i] = get_arg(c, &inst->FullSrcRegisters[i], i);
    }
 
    /* Get dest regs.  Note that it is possible for a reg to be both
     * dst and arg, given the static allocation of registers.  So
     * care needs to be taken emitting multi-operation instructions.
     */ 
-   dst = get_dst(c, inst->dst);
+   dst = get_dst(c, 
+		 inst->FullDstRegisters[0].DstRegister.File,
+		 inst->FullDstRegisters[0].DstRegister.Index,
+		 inst->FullDstRegisters[0].DstRegister.WriteMask);
 
-   if (inst->dst.Saturate) {
+   /* XXX: saturate
+    */
+   if (inst->Instruction.Saturate != TGSI_SAT_NONE) {
       debug_printf("Unsupported saturate in vertex shader");
    }
 
-   switch (inst->opcode) {
+   switch (opcode) {
    case TGSI_OPCODE_ABS:
       brw_MOV(p, dst, brw_abs(args[0]));
       break;
@@ -1443,7 +1488,7 @@ static void emit_insn(struct brw_vs_compile *c,
       brw_set_access_mode(p, BRW_ALIGN_16);
       brw_ADD(p, get_addr_reg(c->stack_index),
 	      get_addr_reg(c->stack_index), brw_imm_d(4));
-      brw_save_call(p, inst->label, p->nr_insn);
+      brw_save_call(p, label, p->nr_insn);
       brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
       break;
    case TGSI_OPCODE_RET:
@@ -1468,8 +1513,8 @@ static void emit_insn(struct brw_vs_compile *c,
       break;
    default:
       debug_printf("Unsupported opcode %i (%s) in vertex shader",
-		   inst->opcode, 
-		   tgsi_get_opcode_name(inst->opcode));
+		   opcode, 
+		   tgsi_get_opcode_name(opcode));
    }
 
    /* Set the predication update on the last instruction of the native
@@ -1498,11 +1543,12 @@ static void emit_insn(struct brw_vs_compile *c,
 void brw_vs_emit(struct brw_vs_compile *c)
 {
    struct brw_compile *p = &c->func;
+   const struct tgsi_token *tokens = c->vp->tokens;
    struct brw_instruction *end_inst, *last_inst;
-   struct ureg_parse_context parse;
-   struct ureg_declaration *decl;
-   struct ureg_declaration *imm;
-   struct ureg_declaration *insn;
+   struct tgsi_parse_context parse;
+   struct tgsi_full_instruction *inst;
+   boolean done = FALSE;
+   int i;
 
    if (BRW_DEBUG & DEBUG_VS)
       tgsi_dump(c->vp->tokens, 0); 
@@ -1512,21 +1558,66 @@ void brw_vs_emit(struct brw_vs_compile *c)
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_access_mode(p, BRW_ALIGN_16);
    
+   /* Inputs */
+   tgsi_parse_init( &parse, tokens );
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+	 /* Nothing to do -- using info from tgsi_scan().
+	  */
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE: {
+	 static const float id[4] = {0,0,0,1};
+	 const float *imm = &parse.FullToken.FullImmediate.u[i].Float;
+	 unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+
+	 for (i = 0; i < size; i++)
+	    c->immediate[c->nr_immediates][i] = imm[i];
+
+	 for ( ; i < 4; i++)
+	    c->immediate[c->nr_immediates][i] = id[i];
+
+	 c->nr_immediates++;
+	 break;
+      }
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+	 done = 1;
+	 break;
+      }
+   }
+
    /* Static register allocation
     */
    brw_vs_alloc_regs(c);
    brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
 
-   while (ureg_next_decl(&parse, &decl)) {
-   }
-
-   while (ureg_next_immediate(&parse, &imm)) {
-   }
-
-   while (ureg_next_instruction(&parse, &insn)) {
+   /* Instructions
+    */
+   tgsi_parse_init( &parse, tokens );
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+	 break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         inst = &parse.FullToken.FullInstruction;
+	 emit_insn( c, inst );
+         break;
+
+      default:
+         assert( 0 );
+      }
    }
+   tgsi_parse_free( &parse );
 
-   end_inst = &p->store[end_offset];
+   end_inst = &p->store[c->end_offset];
    last_inst = &p->store[p->nr_insn];
 
    /* The END instruction will be patched to jump to this code */
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index 05a91f2de4..549696f7ae 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -29,8 +29,10 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
             
+#include "util/u_math.h"
 
 
+#include "brw_debug.h"
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
@@ -64,8 +66,8 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
    /* BRW_NEW_NR_VS_SURFACES */
    key->nr_surfaces = brw->vs.nr_surfaces;
 
-   /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */
-   if (ctx->Transform.ClipPlanesEnabled) {
+   /* PIPE_NEW_CLIP */
+   if (brw->curr.ucp.nr) {
       /* Note that we read in the userclip planes as well, hence
        * clip_start:
        */
@@ -86,7 +88,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
    memset(&vs, 0, sizeof(vs));
 
    vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
-   vs.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   vs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    /* Choosing multiple program flow means that we may get 2-vertex threads,
     * which will have the channel mask for dwords 4-7 enabled in the thread,
@@ -119,6 +121,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
       chipset_max_threads = 32;
    else
       chipset_max_threads = 16;
+
    vs.thread4.max_threads = CLAMP(key->nr_urb_entries / 2,
 				  1, chipset_max_threads) - 1;
 
@@ -145,16 +148,16 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 			 NULL, NULL);
 
    /* Emit VS program relocation */
-   dri_bo_emit_reloc(bo,
-		     I915_GEM_DOMAIN_INSTRUCTION, 0,
-		     vs.thread0.grf_reg_count << 1,
-		     offsetof(struct brw_vs_unit_state, thread0),
-		     brw->vs.prog_bo);
+   brw->sws->bo_emit_reloc(bo,
+			   I915_GEM_DOMAIN_INSTRUCTION, 0,
+			   vs.thread0.grf_reg_count << 1,
+			   offsetof(struct brw_vs_unit_state, thread0),
+			   brw->vs.prog_bo);
 
    return bo;
 }
 
-static void prepare_vs_unit(struct brw_context *brw)
+static int prepare_vs_unit(struct brw_context *brw)
 {
    struct brw_vs_unit_key key;
 
@@ -168,11 +171,13 @@ static void prepare_vs_unit(struct brw_context *brw)
    if (brw->vs.state_bo == NULL) {
       brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
    }
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_vs_unit = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM,
+      .mesa  = (PIPE_NEW_CLIP),
       .brw   = (BRW_NEW_CURBE_OFFSETS |
                 BRW_NEW_NR_VS_SURFACES |
 		BRW_NEW_URB_FENCE),
diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c
index 319e29bfcb..9a9d47a8a3 100644
--- a/src/gallium/drivers/i965/brw_vs_surface_state.c
+++ b/src/gallium/drivers/i965/brw_vs_surface_state.c
@@ -32,6 +32,11 @@
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_winsys.h"
+
+/* XXX: disabled true constant buffer functionality
+ */
+
 
 /* Creates a new VS constant buffer reflecting the current VS program's
  * constants, if needed by the VS program.
@@ -39,9 +44,12 @@
  * Otherwise, constants go through the CURBEs using the brw_constant_buffer
  * state atom.
  */
-static drm_intel_bo *
+#if 0
+static struct brw_winsys_buffer *
 brw_vs_update_constant_buffer(struct brw_context *brw)
 {
+   /* XXX: true constant buffers
+    */
    struct brw_vertex_program *vp =
       (struct brw_vertex_program *) brw->vertex_program;
    const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
@@ -61,21 +69,20 @@ brw_vs_update_constant_buffer(struct brw_context *brw)
 
    return const_buffer;
 }
+#endif
 
 /**
  * Update the surface state for a VS constant buffer.
  *
  * Sets brw->vs.surf_bo[surf] and brw->vp->const_buffer.
  */
+#if 0
 static void
 brw_update_vs_constant_surface( struct brw_context *brw,
                                 GLuint surf)
 {
-   struct brw_context *brw = brw_context(ctx);
    struct brw_surface_key key;
-   struct brw_vertex_program *vp =
-      (struct brw_vertex_program *) brw->vertex_program;
-   const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
+   struct pipe_buffer *cb = brw->curr.vs_constants;
 
    assert(surf == 0);
 
@@ -121,6 +128,7 @@ brw_update_vs_constant_surface( struct brw_context *brw,
       brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
    }
 }
+#endif
 
 
 /**
@@ -129,6 +137,7 @@ brw_update_vs_constant_surface( struct brw_context *brw,
 static struct brw_winsys_buffer *
 brw_vs_get_binding_table(struct brw_context *brw)
 {
+#if 0
    struct brw_winsys_buffer *bind_bo;
 
    bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
@@ -169,6 +178,9 @@ brw_vs_get_binding_table(struct brw_context *brw)
    }
 
    return bind_bo;
+#else
+   return NULL;
+#endif
 }
 
 /**
@@ -178,8 +190,9 @@ brw_vs_get_binding_table(struct brw_context *brw)
  * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
  * CACHE_NEW_SURF_BIND for the binding table upload.
  */
-static void prepare_vs_surfaces(struct brw_context *brw )
+static int prepare_vs_surfaces(struct brw_context *brw )
 {
+#if 0
    int i;
    int nr_surfaces = 0;
 
@@ -195,6 +208,7 @@ static void prepare_vs_surfaces(struct brw_context *brw )
       brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
       brw->vs.nr_surfaces = nr_surfaces;
    }
+#endif
 
    /* Note that we don't end up updating the bind_bo if we don't have a
     * surface to be pointing at.  This should be relatively harmless, as it
@@ -204,12 +218,15 @@ static void prepare_vs_surfaces(struct brw_context *brw )
       brw->sws->bo_unreference(brw->vs.bind_bo);
       brw->vs.bind_bo = brw_vs_get_binding_table(brw);
    }
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_vs_surfaces = {
    .dirty = {
-      .mesa = (_NEW_PROGRAM_CONSTANTS),
-      .brw = (BRW_NEW_VERTEX_PROGRAM),
+      .mesa = (PIPE_NEW_VERTEX_CONSTANTS |
+	       PIPE_NEW_VERTEX_SHADER),
+      .brw = 0,
       .cache = 0
    },
    .prepare = prepare_vs_surfaces,
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 3d889699f8..f0dabfcfd0 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -28,11 +28,14 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-             
+
+#include "tgsi/tgsi_info.h"
+
 #include "brw_context.h"
 #include "brw_util.h"
 #include "brw_wm.h"
 #include "brw_state.h"
+#include "brw_debug.h"
 
 
 /** Return number of src args for given instruction */
@@ -54,7 +57,7 @@ GLuint brw_wm_nr_args( GLuint opcode )
       return 3;
    default:
       assert(opcode < MAX_OPCODE);
-      return _mesa_num_inst_src_regs(opcode);
+      return tgsi_get_opcode_info(opcode)->num_src;
    }
 }
 
@@ -62,17 +65,17 @@ GLuint brw_wm_nr_args( GLuint opcode )
 GLuint brw_wm_is_scalar_result( GLuint opcode )
 {
    switch (opcode) {
-   case OPCODE_COS:
-   case OPCODE_EX2:
-   case OPCODE_LG2:
-   case OPCODE_POW:
-   case OPCODE_RCP:
-   case OPCODE_RSQ:
-   case OPCODE_SIN:
-   case OPCODE_DP3:
-   case OPCODE_DP4:
-   case OPCODE_DPH:
-   case OPCODE_DST:
+   case TGSI_OPCODE_COS:
+   case TGSI_OPCODE_EX2:
+   case TGSI_OPCODE_LG2:
+   case TGSI_OPCODE_POW:
+   case TGSI_OPCODE_RCP:
+   case TGSI_OPCODE_RSQ:
+   case TGSI_OPCODE_SIN:
+   case TGSI_OPCODE_DP3:
+   case TGSI_OPCODE_DP4:
+   case TGSI_OPCODE_DPH:
+   case TGSI_OPCODE_DST:
       return 1;
       
    default:
@@ -134,7 +137,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
  * we'll use one of two code generators.
  */
 static void do_wm_prog( struct brw_context *brw,
-			struct brw_fragment_program *fp, 
+			struct brw_fragment_shader *fp, 
 			struct brw_wm_prog_key *key)
 {
    struct brw_wm_compile *c;
@@ -163,7 +166,7 @@ static void do_wm_prog( struct brw_context *brw,
    brw_init_compile(brw, &c->func);
 
    /* temporary sanity check assertion */
-   ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
+   assert(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
 
    /*
     * Shader which use GLSL features such as flow control are handled
@@ -200,8 +203,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
 				 struct brw_wm_prog_key *key )
 {
    /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct brw_fragment_program *fp = 
-      (struct brw_fragment_program *)brw->fragment_program;
+   const struct brw_fragment_program *fp = brw->curr.fragment_shader;
    GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
    GLuint lookup = 0;
    GLuint line_aa;
-- 
cgit v1.2.3


From 99cc0fd67597cbcd6106afcf437a0d5e2431c9df Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 29 Oct 2009 20:18:01 +0000
Subject: i965g: work in progress on fragment shaders

---
 src/gallium/drivers/i965/brw_context.h     |   10 +-
 src/gallium/drivers/i965/brw_eu.c          |   20 +-
 src/gallium/drivers/i965/brw_eu.h          |    8 +-
 src/gallium/drivers/i965/brw_pipe_depth.c  |   42 +-
 src/gallium/drivers/i965/brw_pipe_rast.c   |   18 +
 src/gallium/drivers/i965/brw_pipe_rast.h   |    1 +
 src/gallium/drivers/i965/brw_pipe_shader.c |    4 +-
 src/gallium/drivers/i965/brw_screen.h      |    7 +
 src/gallium/drivers/i965/brw_vs_emit.c     |    2 -
 src/gallium/drivers/i965/brw_wm.c          |  167 ++---
 src/gallium/drivers/i965/brw_wm.h          |   41 +-
 src/gallium/drivers/i965/brw_wm_debug.c    |   17 +-
 src/gallium/drivers/i965/brw_wm_emit.c     |  195 +++---
 src/gallium/drivers/i965/brw_wm_fp.c       | 1031 ++++++++++------------------
 src/gallium/drivers/i965/brw_wm_glsl.c     |   12 +-
 src/gallium/drivers/i965/brw_wm_pass0.c    |   73 +-
 src/gallium/drivers/i965/brw_wm_pass1.c    |   26 +-
 src/gallium/drivers/i965/brw_wm_state.c    |    8 +-
 18 files changed, 682 insertions(+), 1000 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 7b85363e9f..e6c3161066 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -132,6 +132,8 @@ struct brw_depth_stencil_state {
    struct brw_cc2 cc2;
    struct brw_cc3 cc3;
    struct brw_cc7 cc7;
+
+   unsigned iz_lookup;
 };
 
 
@@ -164,7 +166,10 @@ struct brw_fragment_shader {
    const struct tgsi_token *tokens;
    struct tgsi_shader_info info;
 
-   GLboolean isGLSL;
+   unsigned iz_lookup;
+   
+   boolean  uses_depth:1;
+   boolean  has_flow_control:1;
 
    unsigned id;
    struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
@@ -194,6 +199,7 @@ struct brw_fragment_shader {
 #define PIPE_NEW_COLOR_BUFFERS          0x40000
 #define PIPE_NEW_QUERY                  0x80000
 #define PIPE_NEW_SCISSOR                0x100000
+#define PIPE_NEW_BOUND_TEXTURES         0x200000
 
 
@@ -487,7 +493,7 @@ struct brw_context
       const struct brw_rasterizer_state *rast;
       const struct brw_depth_stencil_state *zstencil;
 
-      const struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+      const struct brw_texture *texture[PIPE_MAX_SAMPLERS];
       const struct pipe_sampler *sampler[PIPE_MAX_SAMPLERS];
       unsigned num_textures;
       unsigned num_samplers;
diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c
index 1189a35b6f..de43b14512 100644
--- a/src/gallium/drivers/i965/brw_eu.c
+++ b/src/gallium/drivers/i965/brw_eu.c
@@ -150,22 +150,22 @@ const GLuint *brw_get_program( struct brw_compile *p,
 /**
  * For each OPCODE_BGNSUB we create one of these.
  */
-struct brw_glsl_label
+struct brw_eu_label
 {
    GLuint label;     /**< the label number */
    GLuint position;  /**< the position of the brw instruction for this label */
-   struct brw_glsl_label *next;  /**< next in linked list */
+   struct brw_eu_label *next;  /**< next in linked list */
 };
 
 
 /**
  * For each OPCODE_CAL we create one of these.
  */
-struct brw_glsl_call
+struct brw_eu_call
 {
    GLuint call_inst_pos;  /**< location of the CAL instruction */
    GLuint label;
-   struct brw_glsl_call *next;  /**< next in linked list */
+   struct brw_eu_call *next;  /**< next in linked list */
 };
 
 
@@ -175,7 +175,7 @@ struct brw_glsl_call
 void
 brw_save_label(struct brw_compile *c, unsigned l, GLuint position)
 {
-   struct brw_glsl_label *label = CALLOC_STRUCT(brw_glsl_label);
+   struct brw_eu_label *label = CALLOC_STRUCT(brw_eu_label);
    label->label = l;
    label->position = position;
    label->next = c->first_label;
@@ -189,7 +189,7 @@ brw_save_label(struct brw_compile *c, unsigned l, GLuint position)
 void
 brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos)
 {
-   struct brw_glsl_call *call = CALLOC_STRUCT(brw_glsl_call);
+   struct brw_eu_call *call = CALLOC_STRUCT(brw_eu_call);
    call->call_inst_pos = call_pos;
    call->label = label;
    call->next = c->first_call;
@@ -203,7 +203,7 @@ brw_save_call(struct brw_compile *c, GLuint label, GLuint call_pos)
 static GLuint
 brw_lookup_label(struct brw_compile *c, unsigned l)
 {
-   const struct brw_glsl_label *label;
+   const struct brw_eu_label *label;
    for (label = c->first_label; label; label = label->next) {
       if (l == label->label) {
          return label->position;
@@ -221,7 +221,7 @@ brw_lookup_label(struct brw_compile *c, unsigned l)
 void
 brw_resolve_cals(struct brw_compile *c)
 {
-    const struct brw_glsl_call *call;
+    const struct brw_eu_call *call;
 
     for (call = c->first_call; call; call = call->next) {
         const GLuint sub_loc = brw_lookup_label(c, call->label);
@@ -235,7 +235,7 @@ brw_resolve_cals(struct brw_compile *c)
 
     /* free linked list of calls */
     {
-        struct brw_glsl_call *call, *next;
+        struct brw_eu_call *call, *next;
         for (call = c->first_call; call; call = next) {
 	    next = call->next;
 	    FREE(call);
@@ -245,7 +245,7 @@ brw_resolve_cals(struct brw_compile *c)
 
     /* free linked list of labels */
     {
-        struct brw_glsl_label *label, *next;
+        struct brw_eu_label *label, *next;
 	for (label = c->first_label; label; label = next) {
 	    next = label->next;
 	    FREE(label);
diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h
index 3379522104..7bddc3859c 100644
--- a/src/gallium/drivers/i965/brw_eu.h
+++ b/src/gallium/drivers/i965/brw_eu.h
@@ -109,8 +109,8 @@ struct brw_indirect {
 };
 
 
-struct brw_glsl_label;
-struct brw_glsl_call;
+struct brw_eu_label;
+struct brw_eu_call;
 
 
@@ -130,8 +130,8 @@ struct brw_compile {
    GLboolean single_program_flow;
    struct brw_context *brw;
 
-   struct brw_glsl_label *first_label;  /**< linked list of labels */
-   struct brw_glsl_call *first_call;    /**< linked list of CALs */
+   struct brw_eu_label *first_label;  /**< linked list of labels */
+   struct brw_eu_call *first_call;    /**< linked list of CALs */
 };
 
 
diff --git a/src/gallium/drivers/i965/brw_pipe_depth.c b/src/gallium/drivers/i965/brw_pipe_depth.c
index 33fe517e0b..e010d76e0d 100644
--- a/src/gallium/drivers/i965/brw_pipe_depth.c
+++ b/src/gallium/drivers/i965/brw_pipe_depth.c
@@ -5,6 +5,10 @@
 #include "brw_context.h"
 #include "brw_defines.h"
 
+/* XXX: Fixme - include this to get IZ_ defines
+ */
+#include "brw_wm.h"
+
 static unsigned brw_translate_compare_func(unsigned func)
 {
    switch (func) {
@@ -55,13 +59,9 @@ static unsigned translate_stencil_op(unsigned op)
    }
 }
 
-
-static void *
-brw_create_depth_stencil_state( struct pipe_context *pipe,
-				const struct pipe_depth_stencil_alpha_state *templ )
+static void create_bcc_state( struct brw_depth_stencil_state *zstencil,
+			      const struct pipe_depth_stencil_alpha_state *templ )
 {
-   struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state);
-
    if (templ->stencil[0].enabled) {
       zstencil->cc0.stencil_enable = 1;
       zstencil->cc0.stencil_func =
@@ -108,6 +108,36 @@ brw_create_depth_stencil_state( struct pipe_context *pipe,
       zstencil->cc2.depth_test_function = brw_translate_compare_func(templ->depth.func);
       zstencil->cc2.depth_write_enable = templ->depth.writemask;
    }
+}
+
+static void create_wm_iz_state( struct brw_depth_stencil_state *zstencil )
+{
+   if (zstencil->cc3.alpha_test)
+      zstencil->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+   if (zstencil->cc2.depth_test)
+      zstencil->iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+   if (zstencil->cc2.depth_write_enable)
+      zstencil->iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+   if (zstencil->cc0.stencil_enable)
+      zstencil->iz_lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+   if (zstencil->cc0.stencil_write_enable)
+      zstencil->iz_lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+
+}
+
+
+static void *
+brw_create_depth_stencil_state( struct pipe_context *pipe,
+				const struct pipe_depth_stencil_alpha_state *templ )
+{
+   struct brw_depth_stencil_state *zstencil = CALLOC_STRUCT(brw_depth_stencil_state);
+
+   create_bcc_state( zstencil, templ );
+   create_wm_iz_state( zstencil );
 
    return (void *)zstencil;
 }
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c
index 86822d478a..51159bf147 100644
--- a/src/gallium/drivers/i965/brw_pipe_rast.c
+++ b/src/gallium/drivers/i965/brw_pipe_rast.c
@@ -64,3 +64,21 @@ calculate_line_stipple_rast()
    bls.bits1.inverse_repeat_count = tmpi;
 
 }
+
+
+
+static void
+calculate_wm_lookup()
+{
+   if (rast->fill_cw == PIPE_POLYGON_MODE_LINE &&
+       rast->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+      line_aa = AA_ALWAYS;
+   }
+   else if (rast->fill_cw == PIPE_POLYGON_MODE_LINE ||
+	    rast->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+      line_aa = AA_SOMETIMES;
+   }
+   else {
+      line_aa = AA_NEVER;
+   }
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.h b/src/gallium/drivers/i965/brw_pipe_rast.h
index 800a9208a7..9354f01e18 100644
--- a/src/gallium/drivers/i965/brw_pipe_rast.h
+++ b/src/gallium/drivers/i965/brw_pipe_rast.h
@@ -10,6 +10,7 @@ struct brw_rasterizer_state {
     */
    struct brw_clip_prog_key clip_key;
    struct brw_line_stipple bls;
+   unsigned unfilled_aa_line;
 };
 
 #endif
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 8b61da763c..6e37eac634 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -39,7 +39,7 @@
  * as flow conditionals, loops, subroutines.
  * Some GLSL shaders may use these features, others might not.
  */
-GLboolean brw_wm_is_glsl(const struct brw_fragment_shader *fp)
+GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp)
 {
     return (fp->info.insn_count[TGSI_OPCODE_ARL] > 0 ||
 	    fp->info.insn_count[TGSI_OPCODE_IF] > 0 ||
@@ -144,7 +144,7 @@ static void brwProgramStringNotify( struct brw_context *brw,
       if (newFP == curFP)
 	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
       newFP->id = brw->program_id++;      
-      newFP->isGLSL = brw_wm_is_glsl(fprog);
+      newFP->has_flow_control = brw_wm_has_flow_control(fprog);
    }
    else if (target == GL_VERTEX_PROGRAM_ARB) {
       struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index eafd8ddf77..efa27db1e0 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -64,6 +64,13 @@ struct brw_buffer
    boolean is_user_buffer;
 };
 
+struct brw_texture
+{
+   struct pipe_texture base;
+
+   ubyte shader_swizzle;
+};
+
 
 /*
  * Cast wrappers
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 6809bccdec..bcc5c5f713 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -1013,8 +1013,6 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
 				       src->SrcRegister.SwizzleZ,
 				       src->SrcRegister.SwizzleW);
 
-   /* Note this is ok for non-swizzle instructions: 
-    */
    reg.negate = src->SrcRegister.Negate ? 1 : 0;   
 
    /* XXX: abs, absneg
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index f0dabfcfd0..33602b59c1 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -28,14 +28,17 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
+#include "pipe/p_error.h"
 
 #include "tgsi/tgsi_info.h"
 
 #include "brw_context.h"
+#include "brw_screen.h"
 #include "brw_util.h"
 #include "brw_wm.h"
 #include "brw_state.h"
 #include "brw_debug.h"
+#include "brw_pipe_rast.h"
 
 
 /** Return number of src args for given instruction */
@@ -85,12 +88,12 @@ GLuint brw_wm_is_scalar_result( GLuint opcode )
 
 
 /**
- * Do GPU code generation for non-GLSL shader.  non-GLSL shaders have
- * no flow control instructions so we can more readily do SSA-style
- * optimizations.
+ * Do GPU code generation for shaders without flow control.  Shaders
+ * without flow control instructions can more readily be analysed for
+ * SSA-style optimizations.
  */
 static void
-brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+brw_wm_linear_shader_emit(struct brw_context *brw, struct brw_wm_compile *c)
 {
    /* Augment fragment program.  Add instructions for pre- and
     * post-fragment-program tasks such as interpolation and fogging.
@@ -136,7 +139,7 @@ brw_wm_non_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
  * Depending on the instructions used (i.e. flow control instructions)
  * we'll use one of two code generators.
  */
-static void do_wm_prog( struct brw_context *brw,
+static int do_wm_prog( struct brw_context *brw,
 			struct brw_fragment_shader *fp, 
 			struct brw_wm_prog_key *key)
 {
@@ -153,7 +156,7 @@ static void do_wm_prog( struct brw_context *brw,
           * without triggering a segfault, no way to signal,
           * so just return.
           */
-         return;
+         return PIPE_ERROR_OUT_OF_MEMORY;
       }
    } else {
       memset(c, 0, sizeof(*brw->wm.compile_data));
@@ -166,19 +169,19 @@ static void do_wm_prog( struct brw_context *brw,
    brw_init_compile(brw, &c->func);
 
    /* temporary sanity check assertion */
-   assert(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
+   assert(fp->has_flow_control == brw_wm_has_flow_control(c->fp));
 
    /*
     * Shader which use GLSL features such as flow control are handled
     * differently from "simple" shaders.
     */
-   if (fp->isGLSL) {
+   if (fp->has_flow_control) {
       c->dispatch_width = 8;
-      brw_wm_glsl_emit(brw, c);
+      brw_wm_branching_shader_emit(brw, c);
    }
    else {
       c->dispatch_width = 16;
-      brw_wm_non_glsl_emit(brw, c);
+      brw_wm_linear_shader_emit(brw, c);
    }
 
    if (BRW_DEBUG & DEBUG_WM)
@@ -195,6 +198,8 @@ static void do_wm_prog( struct brw_context *brw,
 				       program, program_size,
 				       &c->prog_data,
 				       &brw->wm.prog_data );
+
+   return 0;
 }
 
 
@@ -202,71 +207,36 @@ static void do_wm_prog( struct brw_context *brw,
 static void brw_wm_populate_key( struct brw_context *brw,
 				 struct brw_wm_prog_key *key )
 {
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   const struct brw_fragment_program *fp = brw->curr.fragment_shader;
-   GLboolean uses_depth = (fp->program.Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
-   GLuint lookup = 0;
-   GLuint line_aa;
-   GLuint i;
+   unsigned lookup, line_aa;
+   unsigned i;
 
    memset(key, 0, sizeof(*key));
 
-   /* Build the index for table lookup
+   /* PIPE_NEW_FRAGMENT_SHADER
+    * PIPE_NEW_DEPTH_STENCIL_ALPHA
     */
-   /* _NEW_COLOR */
-   if (fp->program.UsesKill ||
-       ctx->Color.AlphaEnabled)
-      lookup |= IZ_PS_KILL_ALPHATEST_BIT;
-
-   if (fp->program.Base.OutputsWritten & (1<<FRAG_RESULT_DEPTH))
-      lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
-
-   /* _NEW_DEPTH */
-   if (ctx->Depth.Test)
-      lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
-
-   if (ctx->Depth.Test &&  
-       ctx->Depth.Mask) /* ?? */
-      lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+   lookup = (brw->curr.zstencil->iz_lookup |
+	     brw->curr.fragment_shader->iz_lookup);
 
-   /* _NEW_STENCIL */
-   if (ctx->Stencil._Enabled) {
-      lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
 
-      if (ctx->Stencil.WriteMask[0] ||
-	  ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
-	 lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
-   }
-
-   line_aa = AA_NEVER;
-
-   /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
-   if (ctx->Line.SmoothFlag) {
-      if (brw->intel.reduced_primitive == GL_LINES) {
-	 line_aa = AA_ALWAYS;
-      }
-      else if (brw->intel.reduced_primitive == GL_TRIANGLES) {
-	 if (ctx->Polygon.FrontMode == GL_LINE) {
-	    line_aa = AA_SOMETIMES;
-
-	    if (ctx->Polygon.BackMode == GL_LINE ||
-		(ctx->Polygon.CullFlag &&
-		 ctx->Polygon.CullFaceMode == GL_BACK))
-	       line_aa = AA_ALWAYS;
-	 }
-	 else if (ctx->Polygon.BackMode == GL_LINE) {
-	    line_aa = AA_SOMETIMES;
-
-	    if ((ctx->Polygon.CullFlag &&
-		 ctx->Polygon.CullFaceMode == GL_FRONT))
-	       line_aa = AA_ALWAYS;
-	 }
-      }
+   /* PIPE_NEW_RAST
+    * BRW_NEW_REDUCED_PRIMITIVE 
+    */
+   switch (brw->reduced_primitive) {
+   case PIPE_PRIM_POINTS:
+      line_aa = AA_NEVER;
+      break;
+   case PIPE_PRIM_LINES:
+      line_aa = AA_ALWAYS;
+      break;
+   default:
+      line_aa = brw->curr.rast->unfilled_aa_line;
+      break;
    }
 	 
    brw_wm_lookup_iz(line_aa,
 		    lookup,
-		    uses_depth,
+		    brw->curr.fragment_shader->uses_depth,
 		    key);
 
    /* Revisit this, figure out if it's really useful, and either push
@@ -276,54 +246,39 @@ static void brw_wm_populate_key( struct brw_context *brw,
    key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/
 
    /* PIPE_NEW_RAST */
-   key->flat_shade = brw->rast.flat_shade;
+   key->flat_shade = brw->curr.rast->templ.flatshade;
 
    /* This can be determined by looking at the INTERP mode each input decl.
     */
-   key->linear_color = 0;
-
-   /* _NEW_TEXTURE */
-   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
-      if (i < brw->nr_textures) {
-	 const struct gl_texture_unit *unit = &ctx->Texture.Unit[i];
-	 const struct gl_texture_object *t = unit->_Current;
-	 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
-	 
-	 if (img->InternalFormat == GL_YCBCR_MESA) {
-	    key->yuvtex_mask |= 1 << i;
-	    if (img->TexFormat->MesaFormat == MESA_FORMAT_YCBCR)
-	       key->yuvtex_swap_mask |= 1 << i;
-	 }
+   key->linear_attrib_mask = 0;
 
-	 key->tex_swizzles[i] = t->_Swizzle;
+   /* PIPE_NEW_BOUND_TEXTURES */
+   for (i = 0; i < brw->curr.num_textures; i++) {
+      const struct brw_texture *tex = brw->curr.texture[i];
 	 
-	 if (0)
-	    key->shadowtex_mask |= 1<<i;
-      }
-      else {
-         key->tex_swizzles[i] = SWIZZLE_NOOP;
-      }
-   }
+      if (tex->base.format == PIPE_FORMAT_YCBCR)
+	 key->yuvtex_mask |= 1 << i;
 
+      if (tex->base.format == PIPE_FORMAT_YCBCR_REV)
+	 key->yuvtex_swap_mask |= 1 << i;
 
-   /* _NEW_FRAMEBUFFER */
-   if (brw->intel.driDrawable != NULL) {
-      key->drawable_height = brw->fb.cbufs[0].height;
+      /* XXX: shadow texture
+       */
+      /* key->shadowtex_mask |= 1<<i; */
    }
 
    /* CACHE_NEW_VS_PROG */
-   key->vp_nr_outputs_written = brw->vs.prog_data->nr_outputs_written;
+   key->vp_nr_outputs = brw->vs.prog_data->nr_outputs;
 
    /* The unique fragment program ID */
-   key->program_string_id = fp->id;
+   key->program_string_id = brw->curr.fragment_shader->id;
 }
 
 
-static void brw_prepare_wm_prog(struct brw_context *brw)
+static int brw_prepare_wm_prog(struct brw_context *brw)
 {
    struct brw_wm_prog_key key;
-   struct brw_fragment_program *fp = (struct brw_fragment_program *)
-      brw->fragment_program;
+   struct brw_fragment_shader *fs = brw->curr.fragment_shader;
      
    brw_wm_populate_key(brw, &key);
 
@@ -335,23 +290,19 @@ static void brw_prepare_wm_prog(struct brw_context *brw)
 				      NULL, 0,
 				      &brw->wm.prog_data);
    if (brw->wm.prog_bo == NULL)
-      do_wm_prog(brw, fp, &key);
+      return do_wm_prog(brw, fs, &key);
+
+   return 0;
 }
 
 
 const struct brw_tracked_state brw_wm_prog = {
    .dirty = {
-      .mesa  = (_NEW_COLOR |
-		_NEW_DEPTH |
-                _NEW_HINT |
-		_NEW_STENCIL |
-		_NEW_POLYGON |
-		_NEW_LINE |
-		_NEW_LIGHT |
-		_NEW_BUFFERS |
-		_NEW_TEXTURE),
-      .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
-		BRW_NEW_WM_INPUT_DIMENSIONS |
+      .mesa  = (PIPE_NEW_FRAGMENT_SHADER |
+		PIPE_NEW_DEPTH_STENCIL_ALPHA |
+		PIPE_NEW_RAST |
+		PIPE_NEW_BOUND_TEXTURES),
+      .brw   = (BRW_NEW_WM_INPUT_DIMENSIONS |
 		BRW_NEW_REDUCED_PRIMITIVE),
       .cache = CACHE_NEW_VS_PROG,
    },
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 084430cf28..2cd5bb7081 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -33,9 +33,6 @@
 #ifndef BRW_WM_H
 #define BRW_WM_H
 
-#include "tgsi/tgsi_ureg.h"
-#include "tgsi/tgsi_ureg_parse.h"
-
 #include "brw_context.h"
 #include "brw_eu.h"
 
@@ -59,8 +56,8 @@
 #define AA_ALWAYS    2
 
 struct brw_wm_prog_key {
-   unsigned proj_attrib_mask; /**< one bit per fragment program attribute */
-   unsigned linear_attrib_mask:1;  /**< linear interpolation vs perspective interp */
+   unsigned proj_attrib_mask;    /**< one bit per fragment program attribute */
+   unsigned linear_attrib_mask;  /**< linear interpolation vs perspective interp */
 
    GLuint source_depth_reg:3;
    GLuint aa_dest_stencil_reg:3;
@@ -75,11 +72,10 @@ struct brw_wm_prog_key {
    GLuint yuvtex_mask:16;
    GLuint yuvtex_swap_mask:16;	/* UV swaped */
 
-   GLuint tex_swizzles[BRW_MAX_TEX_UNIT];
-
-   GLuint program_string_id:32;
+   GLuint vp_nr_outputs:6;
+   GLuint nr_cbufs:3;
 
-   GLuint vp_nr_outputs_written;
+   GLuint program_string_id;
 };
 
 
@@ -146,9 +142,8 @@ struct brw_wm_instruction {
    GLuint opcode:8;
    GLuint saturate:1;
    GLuint writemask:4;
-   GLuint tex_unit:4;   /* texture unit for TEX, TXD, TXP instructions */
-   GLuint tex_idx:3;    /* TEXTURE_1D,2D,3D,CUBE,RECT_INDEX source target */
-   GLuint tex_shadow:1; /* do shadow comparison? */
+   GLuint tex_unit:4;   /* texture/sampler unit for texture instructions */
+   GLuint tex_target:4; /* TGSI_TEXTURE_x for texture instructions*/
    GLuint eot:1;    	/* End of thread indicator for FB_WRITE*/
    GLuint target:10;    /* target binding table index for FB_WRITE*/
 };
@@ -180,15 +175,17 @@ struct brw_wm_instruction {
 #define WM_FRONTFACING    (MAX_OPCODE + 8)
 #define MAX_WM_OPCODE     (MAX_OPCODE + 9)
 
-#define PROGRAM_PAYLOAD   (TGSI_FILE_COUNT)
-#define PAYLOAD_DEPTH     (FRAG_ATTRIB_MAX)
+#define BRW_FILE_PAYLOAD   (TGSI_FILE_COUNT)
+#define PAYLOAD_DEPTH      (FRAG_ATTRIB_MAX) /* ?? */
+
+struct brw_passfp_program;
 
 struct brw_wm_compile {
    struct brw_compile func;
    struct brw_wm_prog_key key;
    struct brw_wm_prog_data prog_data;
 
-   struct brw_fragment_program *fp;
+   struct brw_fragment_shader *fp;
 
    GLfloat (*env_param)[4];
 
@@ -201,15 +198,7 @@ struct brw_wm_compile {
     * simplifying and adding instructions for interpolation and
     * framebuffer writes.
     */
-   struct ureg_instruction prog_instructions[BRW_WM_MAX_INSN];
-   GLuint nr_fp_insns;
-   GLuint fp_temp;
-   GLuint fp_interp_emitted;
-   GLuint fp_fragcolor_emitted;
-
-   struct ureg_src pixel_xy;
-   struct ureg_src delta_xy;
-   struct ureg_src pixel_w;
+   struct brw_passfp_program *pass_fp;
 
 
    struct brw_wm_value vreg[BRW_WM_MAX_VREG];
@@ -298,8 +287,8 @@ void brw_wm_lookup_iz( GLuint line_aa,
 		       GLboolean ps_uses_depth,
 		       struct brw_wm_prog_key *key );
 
-//GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
-void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp);
+void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c);
 
 void emit_ddxy(struct brw_compile *p,
 	       const struct brw_reg *dst,
diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c
index 04dec5ba39..65d7626eea 100644
--- a/src/gallium/drivers/i965/brw_wm_debug.c
+++ b/src/gallium/drivers/i965/brw_wm_debug.c
@@ -28,7 +28,8 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-               
+
+#include "tgsi/tgsi_info.h"
 
 #include "brw_context.h"
 #include "brw_wm.h"
@@ -49,10 +50,10 @@ void brw_wm_print_value( struct brw_wm_compile *c,
 	    value - c->creg < BRW_WM_MAX_PARAM)
       debug_printf("c%d", value - c->creg);
    else if (value - c->payload.input_interp >= 0 &&
-	    value - c->payload.input_interp < FRAG_ATTRIB_MAX)
+	    value - c->payload.input_interp < PIPE_MAX_SHADER_INPUTS)
       debug_printf("i%d", value - c->payload.input_interp);
    else if (value - c->payload.depth >= 0 &&
-	    value - c->payload.depth < FRAG_ATTRIB_MAX)
+	    value - c->payload.depth < PIPE_MAX_SHADER_INPUTS)
       debug_printf("d%d", value - c->payload.depth);
    else 
       debug_printf("?");
@@ -100,10 +101,10 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
 
    if (inst->writemask != BRW_WRITEMASK_XYZW)
       debug_printf(".%s%s%s%s", 
-		   GET_BIT(inst->writemask, 0) ? "x" : "",
-		   GET_BIT(inst->writemask, 1) ? "y" : "",
-		   GET_BIT(inst->writemask, 2) ? "z" : "",
-		   GET_BIT(inst->writemask, 3) ? "w" : "");
+		   (inst->writemask & BRW_WRITEMASK_X) ? "x" : "",
+		   (inst->writemask & BRW_WRITEMASK_Y) ? "y" : "",
+		   (inst->writemask & BRW_WRITEMASK_Z) ? "z" : "",
+		   (inst->writemask & BRW_WRITEMASK_W) ? "w" : "");
 
    switch (inst->opcode) {
    case WM_PIXELXY:
@@ -134,7 +135,7 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
       debug_printf(" = FRONTFACING");
       break;
    default:
-      debug_printf(" = %s", _mesa_opcode_string(inst->opcode));
+      debug_printf(" = %s", tgsi_get_opcode_info(inst->opcode)->mnemonic);
       break;
    }
 
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 5f7ae6592c..a705d8b344 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -28,10 +28,13 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-               
+
+#include "util/u_math.h"
+#include "tgsi/tgsi_info.h"
 
 #include "brw_context.h"
 #include "brw_wm.h"
+#include "brw_debug.h"
 
 /* Not quite sure how correct this is - need to understand horiz
  * vs. vertical strides a little better.
@@ -45,15 +48,15 @@ static INLINE struct brw_reg sechalf( struct brw_reg reg )
 
 /* Payload R0:
  *
- * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 tiles,
+ * R0.0 -- pixel mask, one bit for each of 4 pixels in 4 quads,
  *         corresponding to each of the 16 execution channels.
  * R0.1..8 -- ?
  * R1.0 -- triangle vertex 0.X
  * R1.1 -- triangle vertex 0.Y
- * R1.2 -- tile 0 x,y coords (2 packed uwords)
- * R1.3 -- tile 1 x,y coords (2 packed uwords)
- * R1.4 -- tile 2 x,y coords (2 packed uwords)
- * R1.5 -- tile 3 x,y coords (2 packed uwords)
+ * R1.2 -- quad 0 x,y coords (2 packed uwords)
+ * R1.3 -- quad 1 x,y coords (2 packed uwords)
+ * R1.4 -- quad 2 x,y coords (2 packed uwords)
+ * R1.5 -- quad 3 x,y coords (2 packed uwords)
  * R1.6 -- ?
  * R1.7 -- ?
  * R1.8 -- ?
@@ -134,11 +137,17 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
    /* XXX: is this needed any more, or is this a NOOP?
     */
    if (mask & BRW_WRITEMASK_Y) {
+#if 0
       /* Y' = height - 1 - Y */
       brw_ADD(p,
 	      dst[1],
 	      negate(retype(arg0[1], BRW_REGISTER_TYPE_W)),
 	      brw_imm_d(c->key.drawable_height - 1));
+#else
+      brw_MOV(p,
+	      dst[0],
+	      retype(arg0[0], BRW_REGISTER_TYPE_W));
+#endif
    }
 }
 
@@ -279,28 +288,28 @@ static void emit_frontfacing( struct brw_compile *p,
 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
  * looking like:
  *
- * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
+ * arg0: q0.tl q0.tr q0.bl q0.br q1.tl q1.tr q1.bl q1.br
  *
  * and we're trying to produce:
  *
  *           DDX                     DDY
- * dst: (ss0.tr - ss0.tl)     (ss0.tl - ss0.bl)
- *      (ss0.tr - ss0.tl)     (ss0.tr - ss0.br)
- *      (ss0.br - ss0.bl)     (ss0.tl - ss0.bl)
- *      (ss0.br - ss0.bl)     (ss0.tr - ss0.br)
- *      (ss1.tr - ss1.tl)     (ss1.tl - ss1.bl)
- *      (ss1.tr - ss1.tl)     (ss1.tr - ss1.br)
- *      (ss1.br - ss1.bl)     (ss1.tl - ss1.bl)
- *      (ss1.br - ss1.bl)     (ss1.tr - ss1.br)
+ * dst: (q0.tr - q0.tl)     (q0.tl - q0.bl)
+ *      (q0.tr - q0.tl)     (q0.tr - q0.br)
+ *      (q0.br - q0.bl)     (q0.tl - q0.bl)
+ *      (q0.br - q0.bl)     (q0.tr - q0.br)
+ *      (q1.tr - q1.tl)     (q1.tl - q1.bl)
+ *      (q1.tr - q1.tl)     (q1.tr - q1.br)
+ *      (q1.br - q1.bl)     (q1.tl - q1.bl)
+ *      (q1.br - q1.bl)     (q1.tr - q1.br)
  *
- * and add another set of two more subspans if in 16-pixel dispatch mode.
+ * and add two more quads if in 16-pixel dispatch mode.
  *
  * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
  * for each pair, and vertstride = 2 jumps us 2 elements after processing a
  * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
  * between each other.  We could probably do it like ddx and swizzle the right
  * order later, but bail for now and just produce
- * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ * ((q0.tl - q0.bl)x4 (q1.tl - q1.bl)x4)
  */
 void emit_ddxy(struct brw_compile *p,
 	       const struct brw_reg *dst,
@@ -611,12 +620,12 @@ static void emit_dp3( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -633,12 +642,12 @@ static void emit_dp4( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -656,12 +665,12 @@ static void emit_dph( struct brw_compile *p,
 		      const struct brw_reg *arg0,
 		      const struct brw_reg *arg1 )
 {
-   const int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   const int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
    brw_MAC(p, brw_null_reg(), arg0[1], arg1[1]);
@@ -704,12 +713,12 @@ static void emit_math1( struct brw_compile *p,
 			GLuint mask,
 			const struct brw_reg *arg0 )
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_MOV(p, brw_message_reg(2), arg0[0]);
 
@@ -732,12 +741,12 @@ static void emit_math2( struct brw_compile *p,
 			const struct brw_reg *arg0,
 			const struct brw_reg *arg1)
 {
-   int dst_chan = _mesa_ffs(mask & BRW_WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & BRW_WRITEMASK_XYZW) - 1;
 
    if (!(mask & BRW_WRITEMASK_XYZW))
       return; /* Do not emit dead code */
 
-   assert(is_power_of_two(mask & BRW_WRITEMASK_XYZW));
+   assert(util_is_power_of_two(mask & BRW_WRITEMASK_XYZW));
 
    brw_push_insn_state(p);
 
@@ -790,21 +799,32 @@ static void emit_tex( struct brw_wm_compile *c,
    GLuint i, nr;
    GLuint emit;
    GLuint msg_type;
+   GLboolean shadow = FALSE;
 
    /* How many input regs are there?
     */
-   switch (inst->tex_idx) {
-   case TEXTURE_1D_INDEX:
+   switch (inst->tex_target) {
+   case TGSI_TEXTURE_1D:
       emit = BRW_WRITEMASK_X;
       nr = 1;
       break;
-   case TEXTURE_2D_INDEX:
-   case TEXTURE_RECT_INDEX:
+   case TGSI_TEXTURE_SHADOW1D:
+      emit = BRW_WRITEMASK_XW;
+      nr = 4;
+      shadow = TRUE;
+      break;
+   case TGSI_TEXTURE_2D:
       emit = BRW_WRITEMASK_XY;
       nr = 2;
       break;
-   case TEXTURE_3D_INDEX:
-   case TEXTURE_CUBE_INDEX:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+      emit = BRW_WRITEMASK_XYW;
+      nr = 4;
+      shadow = TRUE;
+      break;
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
       emit = BRW_WRITEMASK_XYZ;
       nr = 3;
       break;
@@ -813,11 +833,6 @@ static void emit_tex( struct brw_wm_compile *c,
       abort();
    }
 
-   if (inst->tex_shadow) {
-      nr = 4;
-      emit |= BRW_WRITEMASK_W;
-   }
-
    msgLength = 1;
 
    for (i = 0; i < nr; i++) {
@@ -832,12 +847,12 @@ static void emit_tex( struct brw_wm_compile *c,
    responseLength = 8;		/* always */
 
    if (BRW_IS_IGDNG(p->brw)) {
-       if (inst->tex_shadow)
+       if (shadow)
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE_IGDNG;
        else
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_IGDNG;
    } else {
-       if (inst->tex_shadow)
+       if (shadow)
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
        else
            msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
@@ -870,20 +885,23 @@ static void emit_txb( struct brw_wm_compile *c,
    GLuint msg_type;
    /* Shadow ignored for txb.
     */
-   switch (inst->tex_idx) {
-   case TEXTURE_1D_INDEX:
+   switch (inst->tex_target) {
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
       brw_MOV(p, brw_message_reg(2), arg[0]);
       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
       break;
-   case TEXTURE_2D_INDEX:
-   case TEXTURE_RECT_INDEX:
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
       brw_MOV(p, brw_message_reg(2), arg[0]);
       brw_MOV(p, brw_message_reg(4), arg[1]);
       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
       break;
-   case TEXTURE_3D_INDEX:
-   case TEXTURE_CUBE_INDEX:
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
       brw_MOV(p, brw_message_reg(2), arg[0]);
       brw_MOV(p, brw_message_reg(4), arg[1]);
       brw_MOV(p, brw_message_reg(6), arg[2]);
@@ -976,10 +994,10 @@ static void emit_kil( struct brw_wm_compile *c,
    }
 }
 
-/* KIL_NV kills the pixels that are currently executing, not based on a test
+/* KILLP kills the pixels that are currently executing, not based on a test
  * of the arguments.
  */
-static void emit_kil_nv( struct brw_wm_compile *c )
+static void emit_killp( struct brw_wm_compile *c )
 {
    struct brw_compile *p = &c->func;
    struct brw_reg r0uw = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
@@ -1259,7 +1277,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
     */
    spill_values(c, c->payload.depth, 4);
    spill_values(c, c->creg, c->nr_creg);
-   spill_values(c, c->payload.input_interp, FRAG_ATTRIB_MAX);
+   spill_values(c, c->payload.input_interp, PIPE_MAX_SHADER_INPUTS);
    
 
    for (insn = 0; insn < c->nr_insns; insn++) {
@@ -1328,89 +1346,89 @@ void brw_wm_emit( struct brw_wm_compile *c )
 
 	 /* Straightforward arithmetic:
 	  */
-      case OPCODE_ADD:
+      case TGSI_OPCODE_ADD:
 	 emit_alu2(p, brw_ADD, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_FRC:
+      case TGSI_OPCODE_FRC:
 	 emit_alu1(p, brw_FRC, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_FLR:
+      case TGSI_OPCODE_FLR:
 	 emit_alu1(p, brw_RNDD, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_DDX:
+      case TGSI_OPCODE_DDX:
 	 emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
 	 break;
 
-      case OPCODE_DDY:
+      case TGSI_OPCODE_DDY:
 	 emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
 	 break;
 
-      case OPCODE_DP3:
+      case TGSI_OPCODE_DP3:
 	 emit_dp3(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_DP4:
+      case TGSI_OPCODE_DP4:
 	 emit_dp4(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_DPH:
+      case TGSI_OPCODE_DPH:
 	 emit_dph(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_TRUNC:
+      case TGSI_OPCODE_TRUNC:
 	 emit_trunc(p, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_LRP:
+      case TGSI_OPCODE_LRP:
 	 emit_lrp(p, dst, dst_flags, args[0], args[1], args[2]);
 	 break;
 
-      case OPCODE_MAD:	
+      case TGSI_OPCODE_MAD:	
 	 emit_mad(p, dst, dst_flags, args[0], args[1], args[2]);
 	 break;
 
-      case OPCODE_MOV:
+      case TGSI_OPCODE_MOV:
 	 emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_MUL:
+      case TGSI_OPCODE_MUL:
 	 emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_XPD:
+      case TGSI_OPCODE_XPD:
 	 emit_xpd(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
 	 /* Higher math functions:
 	  */
-      case OPCODE_RCP:
+      case TGSI_OPCODE_RCP:
 	 emit_math1(p, BRW_MATH_FUNCTION_INV, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_RSQ:
+      case TGSI_OPCODE_RSQ:
 	 emit_math1(p, BRW_MATH_FUNCTION_RSQ, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_SIN:
+      case TGSI_OPCODE_SIN:
 	 emit_math1(p, BRW_MATH_FUNCTION_SIN, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_COS:
+      case TGSI_OPCODE_COS:
 	 emit_math1(p, BRW_MATH_FUNCTION_COS, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_EX2:
+      case TGSI_OPCODE_EX2:
 	 emit_math1(p, BRW_MATH_FUNCTION_EXP, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_LG2:
+      case TGSI_OPCODE_LG2:
 	 emit_math1(p, BRW_MATH_FUNCTION_LOG, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_SCS:
+      case TGSI_OPCODE_SCS:
 	 /* There is an scs math function, but it would need some
 	  * fixup for 16-element execution.
 	  */
@@ -1420,71 +1438,70 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	    emit_math1(p, BRW_MATH_FUNCTION_SIN, dst+1, (dst_flags&SATURATE)|BRW_WRITEMASK_X, args[0]);
 	 break;
 
-      case OPCODE_POW:
+      case TGSI_OPCODE_POW:
 	 emit_math2(p, BRW_MATH_FUNCTION_POW, dst, dst_flags, args[0], args[1]);
 	 break;
 
 	 /* Comparisons:
 	  */
-      case OPCODE_CMP:
+      case TGSI_OPCODE_CMP:
 	 emit_cmp(p, dst, dst_flags, args[0], args[1], args[2]);
 	 break;
 
-      case OPCODE_MAX:
+      case TGSI_OPCODE_MAX:
 	 emit_max(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_MIN:
+      case TGSI_OPCODE_MIN:
 	 emit_min(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_SLT:
+      case TGSI_OPCODE_SLT:
 	 emit_slt(p, dst, dst_flags, args[0], args[1]);
 	 break;
 
-      case OPCODE_SLE:
+      case TGSI_OPCODE_SLE:
 	 emit_sle(p, dst, dst_flags, args[0], args[1]);
 	break;
-      case OPCODE_SGT:
+      case TGSI_OPCODE_SGT:
 	 emit_sgt(p, dst, dst_flags, args[0], args[1]);
 	break;
-      case OPCODE_SGE:
+      case TGSI_OPCODE_SGE:
 	 emit_sge(p, dst, dst_flags, args[0], args[1]);
 	 break;
-      case OPCODE_SEQ:
+      case TGSI_OPCODE_SEQ:
 	 emit_seq(p, dst, dst_flags, args[0], args[1]);
 	break;
-      case OPCODE_SNE:
+      case TGSI_OPCODE_SNE:
 	 emit_sne(p, dst, dst_flags, args[0], args[1]);
 	break;
 
-      case OPCODE_LIT:
+      case TGSI_OPCODE_LIT:
 	 emit_lit(p, dst, dst_flags, args[0]);
 	 break;
 
 	 /* Texturing operations:
 	  */
-      case OPCODE_TEX:
+      case TGSI_OPCODE_TEX:
 	 emit_tex(c, inst, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_TXB:
+      case TGSI_OPCODE_TXB:
 	 emit_txb(c, inst, dst, dst_flags, args[0]);
 	 break;
 
-      case OPCODE_KIL:
+      case TGSI_OPCODE_KIL:
 	 emit_kil(c, args[0]);
 	 break;
 
-      case OPCODE_KIL_NV:
-	 emit_kil_nv(c);
+      case TGSI_OPCODE_KILP:
+	 emit_killp(c);
 	 break;
 
       default:
 	 debug_printf("Unsupported opcode %i (%s) in fragment shader\n",
-		      inst->opcode, inst->opcode < MAX_OPCODE ?
-				    _mesa_opcode_string(inst->opcode) :
-				    "unknown");
+		      inst->opcode, 
+		      tgsi_get_opcode_info(inst->opcode)->mnemonic);
       }
       
       for (i = 0; i < 4; i++)
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index d594730730..8ba037cdae 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -30,9 +30,8 @@
   */
                
 
-#include "pipe/p_shader_constants.h"
+#include "pipe/p_shader_tokens.h"
 
-#include "brw_context.h"
 #include "brw_wm.h"
 #include "brw_util.h"
 
@@ -43,7 +42,7 @@
 #define W    3
 
 
-static const char *wm_opcode_strings[] = {   
+static const char *wm_opcode_strings[] = {
    "PIXELXY",
    "DELTAXY",
    "PIXELW",
@@ -57,143 +56,6 @@ static const char *wm_opcode_strings[] = {
 
 
-/***********************************************************************
- * Source regs
- */
-
-static struct prog_src_register src_reg(GLuint file, GLuint idx)
-{
-   struct prog_src_register reg;
-   reg.File = file;
-   reg.Index = idx;
-   reg.Swizzle = SWIZZLE_NOOP;
-   reg.RelAddr = 0;
-   reg.Negate = NEGATE_NONE;
-   reg.Abs = 0;
-   return reg;
-}
-
-static struct prog_src_register src_reg_from_dst(struct prog_dst_register dst)
-{
-   return src_reg(dst.File, dst.Index);
-}
-
-static struct prog_src_register src_undef( void )
-{
-   return src_reg(PROGRAM_UNDEFINED, 0);
-}
-
-static GLboolean src_is_undef(struct prog_src_register src)
-{
-   return src.File == PROGRAM_UNDEFINED;
-}
-
-static struct prog_src_register src_swizzle( struct prog_src_register reg, int x, int y, int z, int w )
-{
-   reg.Swizzle = MAKE_SWIZZLE4(x,y,z,w);
-   return reg;
-}
-
-static struct prog_src_register src_swizzle1( struct prog_src_register reg, int x )
-{
-   return src_swizzle(reg, x, x, x, x);
-}
-
-static struct prog_src_register src_swizzle4( struct prog_src_register reg, uint swizzle )
-{
-   reg.Swizzle = swizzle;
-   return reg;
-}
-
-
-/***********************************************************************
- * Dest regs
- */
-
-static struct prog_dst_register dst_reg(GLuint file, GLuint idx)
-{
-   struct prog_dst_register reg;
-   reg.File = file;
-   reg.Index = idx;
-   reg.WriteMask = BRW_WRITEMASK_XYZW;
-   reg.RelAddr = 0;
-   reg.CondMask = COND_TR;
-   reg.CondSwizzle = 0;
-   reg.CondSrc = 0;
-   reg.pad = 0;
-   return reg;
-}
-
-static struct prog_dst_register dst_mask( struct prog_dst_register reg, int mask )
-{
-   reg.WriteMask &= mask;
-   return reg;
-}
-
-static struct prog_dst_register dst_undef( void )
-{
-   return dst_reg(PROGRAM_UNDEFINED, 0);
-}
-
-
-
-static struct prog_dst_register get_temp( struct brw_wm_compile *c )
-{
-   int bit = _mesa_ffs( ~c->fp_temp );
-
-   if (!bit) {
-      debug_printf("%s: out of temporaries\n", __FILE__);
-      exit(1);
-   }
-
-   c->fp_temp |= 1<<(bit-1);
-   return dst_reg(PROGRAM_TEMPORARY, c->first_internal_temp+(bit-1));
-}
-
-
-static void release_temp( struct brw_wm_compile *c, struct prog_dst_register temp )
-{
-   c->fp_temp &= ~(1 << (temp.Index - c->first_internal_temp));
-}
-
-
-/***********************************************************************
- * Instructions 
- */
-
-static struct prog_instruction *get_fp_inst(struct brw_wm_compile *c)
-{
-   return &c->prog_instructions[c->nr_fp_insns++];
-}
-
-static struct prog_instruction *emit_insn(struct brw_wm_compile *c,
-					const struct prog_instruction *inst0)
-{
-   struct prog_instruction *inst = get_fp_inst(c);
-   *inst = *inst0;
-   return inst;
-}
-
-static struct prog_instruction * emit_op(struct brw_wm_compile *c,
-					 GLuint op,
-					 struct prog_dst_register dest,
-					 GLuint saturate,
-					 struct prog_src_register src0,
-					 struct prog_src_register src1,
-					 struct prog_src_register src2 )
-{
-   struct prog_instruction *inst = get_fp_inst(c);
-      
-   memset(inst, 0, sizeof(*inst));
-
-   inst->Opcode = op;
-   inst->DstReg = dest;
-   inst->SaturateMode = saturate;   
-   inst->SrcReg[0] = src0;
-   inst->SrcReg[1] = src1;
-   inst->SrcReg[2] = src2;
-   return inst;
-}
 
 
 /* Many opcodes produce the same value across all the result channels.
@@ -202,32 +64,28 @@ static struct prog_instruction * emit_op(struct brw_wm_compile *c,
  * anyway.  We can easily get both by emitting the opcode to one channel, and
  * then MOVing it to the others, which brw_wm_pass*.c already understands.
  */
-static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
-						 const struct prog_instruction *inst0)
-{
-   struct prog_instruction *inst;
-   unsigned int dst_chan;
-   unsigned int other_channel_mask;
-
-   if (inst0->DstReg.WriteMask == 0)
-      return NULL;
-
-   dst_chan = _mesa_ffs(inst0->DstReg.WriteMask) - 1;
-   inst = get_fp_inst(c);
-   *inst = *inst0;
-   inst->DstReg.WriteMask = 1 << dst_chan;
-
-   other_channel_mask = inst0->DstReg.WriteMask & ~(1 << dst_chan);
-   if (other_channel_mask != 0) {
-      inst = emit_op(c,
-		     TGSI_OPCODE_MOV,
-		     dst_mask(inst0->DstReg, other_channel_mask),
-		     0,
-		     src_swizzle1(src_reg_from_dst(inst0->DstReg), dst_chan),
-		     src_undef(),
-		     src_undef());
+static void emit_scalar_insn(struct brw_wm_compile *c,
+			     unsigned opcode,
+			     struct brw_dst dst,
+			     struct brw_src src0,
+			     struct brw_src src1,
+			     struct brw_src src2 )
+{
+   unsigned first_chan = ffs(dst.writemask) - 1;
+   unsigned first_mask = 1 << first_chan;
+
+   if (dst.writemask == 0)
+      return;
+
+   emit_op( c, opcode,
+	    brw_writemask(dst, first_mask),
+	    src0, src1, src2 );
+
+   if (dst.writemask != first_mask) {
+      emit_op1(c, TGSI_OPCODE_MOV,
+	       brw_writemask(dst, ~first_mask),
+	       src_swizzle1(brw_src(dst), first_chan));
    }
-   return inst;
 }
 
 
@@ -235,11 +93,11 @@ static struct prog_instruction *emit_scalar_insn(struct brw_wm_compile *c,
  * Special instructions for interpolation and other tasks
  */
 
-static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
+static struct ureg_src get_pixel_xy( struct brw_wm_compile *c )
 {
    if (src_is_undef(c->pixel_xy)) {
-      struct prog_dst_register pixel_xy = get_temp(c);
-      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+      struct ureg_dst pixel_xy = get_temp(c);
+      struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
       
       
       /* Emit the out calculations, and hold onto the results.  Use
@@ -250,7 +108,6 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
       emit_op(c,
 	      WM_PIXELXY,
 	      dst_mask(pixel_xy, BRW_WRITEMASK_XY),
-	      0,
 	      payload_r0_depth,
 	      src_undef(),
 	      src_undef());
@@ -261,19 +118,18 @@ static struct prog_src_register get_pixel_xy( struct brw_wm_compile *c )
    return c->pixel_xy;
 }
 
-static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
+static struct ureg_src get_delta_xy( struct brw_wm_compile *c )
 {
    if (src_is_undef(c->delta_xy)) {
-      struct prog_dst_register delta_xy = get_temp(c);
-      struct prog_src_register pixel_xy = get_pixel_xy(c);
-      struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
+      struct ureg_dst delta_xy = get_temp(c);
+      struct ureg_src pixel_xy = get_pixel_xy(c);
+      struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
       
       /* deltas.xy = DELTAXY pixel_xy, payload[0]
        */
       emit_op(c,
 	      WM_DELTAXY,
 	      dst_mask(delta_xy, BRW_WRITEMASK_XY),
-	      0,
 	      pixel_xy, 
 	      payload_r0_depth,
 	      src_undef());
@@ -284,19 +140,18 @@ static struct prog_src_register get_delta_xy( struct brw_wm_compile *c )
    return c->delta_xy;
 }
 
-static struct prog_src_register get_pixel_w( struct brw_wm_compile *c )
+static struct ureg_src get_pixel_w( struct brw_wm_compile *c )
 {
    if (src_is_undef(c->pixel_w)) {
-      struct prog_dst_register pixel_w = get_temp(c);
-      struct prog_src_register deltas = get_delta_xy(c);
-      struct prog_src_register interp_wpos = src_reg(PROGRAM_PAYLOAD, FRAG_ATTRIB_WPOS);
+      struct ureg_dst pixel_w = get_temp(c);
+      struct ureg_src deltas = get_delta_xy(c);
+      struct ureg_src interp_wpos = src_reg(TGSI_FILE_PAYLOAD, FRAG_ATTRIB_WPOS);
 
       /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
        */
       emit_op(c,
 	      WM_PIXELW,
 	      dst_mask(pixel_w, BRW_WRITEMASK_W),
-	      0,
 	      interp_wpos,
 	      deltas, 
 	      src_undef());
@@ -313,9 +168,9 @@ static void emit_interp( struct brw_wm_compile *c,
 			 GLuint semantic_index,
 			 GLuint interp_mode )
 {
-   struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx);
-   struct prog_src_register interp = src_reg(PROGRAM_PAYLOAD, idx);
-   struct prog_src_register deltas = get_delta_xy(c);
+   struct ureg_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
+   struct ureg_src interp = src_reg(TGSI_FILE_PAYLOAD, idx);
+   struct ureg_src deltas = get_delta_xy(c);
 
    /* Need to use PINTERP on attributes which have been
     * multiplied by 1/W in the SF program, and LINTERP on those
@@ -325,271 +180,197 @@ static void emit_interp( struct brw_wm_compile *c,
    case FRAG_ATTRIB_WPOS:
       /* Have to treat wpos.xy specially:
        */
-      emit_op(c,
+      emit_op1(c,
 	      WM_WPOSXY,
 	      dst_mask(dst, BRW_WRITEMASK_XY),
-	      0,
-	      get_pixel_xy(c),
-	      src_undef(),
-	      src_undef());
+	      get_pixel_xy(c));
       
-      dst = dst_mask(dst, BRW_WRITEMASK_ZW);
-
-      /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
+      /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
        */
-      emit_op(c,
-	      WM_LINTERP,
-	      dst,
-	      0,
-	      interp,
-	      deltas,
-	      src_undef());
+      emit_op2(c,
+	       WM_LINTERP,
+	       dst_mask(dst, BRW_WRITEMASK_ZW),
+	       interp,
+	       deltas);
       break;
 
    case TGSI_SEMANTIC_COLOR:
       if (c->key.flat_shade) {
-	 emit_op(c,
+	 emit_op1(c,
 		 WM_CINTERP,
 		 dst,
-		 0,
-		 interp,
-		 src_undef(),
-		 src_undef());
+		 interp);
+      }
+      else if (interp_mode == TGSI_INTERPOLATE_LINEAR) {
+	 emit_op2(c,
+		  WM_LINTERP,
+		  dst,
+		  interp,
+		  deltas);
       }
       else {
-	 emit_op(c,
-		 translate_interp_mode(interp_mode),
-		 dst,
-		 0,
-		 interp,
-		 deltas,
-		 src_undef());
+	 emit_op3(c,
+		  WM_PINTERP,
+		  dst,
+		  interp,
+		  deltas,
+		  get_pixel_w(c));
       }
+
       break;
    case FRAG_ATTRIB_FOGC:
       /* Interpolate the fog coordinate */
-      emit_op(c,
+      emit_op3(c,
 	      WM_PINTERP,
 	      dst_mask(dst, BRW_WRITEMASK_X),
-	      0,
 	      interp,
 	      deltas,
 	      get_pixel_w(c));
 
-      emit_op(c,
+      emit_op1(c,
 	      TGSI_OPCODE_MOV,
-	      dst_mask(dst, BRW_WRITEMASK_YZW),
-	      0,
-	      src_swizzle(interp,
-			  SWIZZLE_ZERO,
-			  SWIZZLE_ZERO,
-			  SWIZZLE_ZERO,
-			  SWIZZLE_ONE),
-	      src_undef(),
-	      src_undef());
+	      dst_mask(dst, BRW_WRITEMASK_YZ),
+	      brw_imm1f(0.0));
+
+      emit_op1(c,
+	      TGSI_OPCODE_MOV,
+	      dst_mask(dst, BRW_WRITEMASK_W),
+	      brw_imm1f(1.0));
       break;
 
    case FRAG_ATTRIB_FACE:
       /* XXX review/test this case */
-      emit_op(c,
-              WM_FRONTFACING,
-              dst_mask(dst, BRW_WRITEMASK_X),
-              0,
-              src_undef(),
-              src_undef(),
-              src_undef());
+      emit_op0(c,
+	       WM_FRONTFACING,
+	       dst_mask(dst, BRW_WRITEMASK_X));
+      
+      emit_op1(c,
+	      TGSI_OPCODE_MOV,
+	      dst_mask(dst, BRW_WRITEMASK_YZ),
+	      brw_imm1f(0.0));
+
+      emit_op1(c,
+	      TGSI_OPCODE_MOV,
+	      dst_mask(dst, BRW_WRITEMASK_W),
+	      brw_imm1f(1.0));
       break;
 
    case FRAG_ATTRIB_PNTC:
       /* XXX review/test this case */
-      emit_op(c,
-	      WM_PINTERP,
-	      dst_mask(dst, BRW_WRITEMASK_XY),
-	      0,
-	      interp,
-	      deltas,
-	      get_pixel_w(c));
-
-      emit_op(c,
+      emit_op3(c,
+	       WM_PINTERP,
+	       dst_mask(dst, BRW_WRITEMASK_XY),
+	       interp,
+	       deltas,
+	       get_pixel_w(c));
+
+      emit_op1(c,
 	      TGSI_OPCODE_MOV,
-	      dst_mask(dst, BRW_WRITEMASK_ZW),
-	      0,
-	      src_swizzle(interp,
-			  SWIZZLE_ZERO,
-			  SWIZZLE_ZERO,
-			  SWIZZLE_ZERO,
-			  SWIZZLE_ONE),
-	      src_undef(),
-	      src_undef());
-      break;
+	      dst_mask(dst, BRW_WRITEMASK_Z),
+	      brw_imm1f(c->pass_fp, 0.0f));
 
-   default:
-      emit_op(c,
-	      translate_interp_mode(interp_mode),
-	      dst,
-	      0,
-	      interp,
-	      deltas,
-	      get_pixel_w(c));
+      emit_op1(c,
+	      TGSI_OPCODE_MOV,
+	      dst_mask(dst, BRW_WRITEMASK_W),
+	      brw_imm1f(c->pass_fp, 1.0f));
       break;
-   }
-}
-
-/***********************************************************************
- * Hacks to extend the program parameter and constant lists.
- */
-
-/* Add the fog parameters to the parameter list of the original
- * program, rather than creating a new list.  Doesn't really do any
- * harm and it's not as if the parameter handling isn't a big hack
- * anyway.
- */
-static struct prog_src_register search_or_add_param5(struct brw_wm_compile *c, 
-                                                     GLint s0,
-                                                     GLint s1,
-                                                     GLint s2,
-                                                     GLint s3,
-                                                     GLint s4)
-{
-   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
-   gl_state_index tokens[STATE_LENGTH];
-   GLuint idx;
-   tokens[0] = s0;
-   tokens[1] = s1;
-   tokens[2] = s2;
-   tokens[3] = s3;
-   tokens[4] = s4;
-   
-   for (idx = 0; idx < paramList->NumParameters; idx++) {
-      if (paramList->Parameters[idx].Type == PROGRAM_STATE_VAR &&
-	  memcmp(paramList->Parameters[idx].StateIndexes, tokens, sizeof(tokens)) == 0)
-	 return src_reg(PROGRAM_STATE_VAR, idx);
-   }
-
-   idx = _mesa_add_state_reference( paramList, tokens );
-
-   return src_reg(PROGRAM_STATE_VAR, idx);
-}
 
+   default: 
+      switch (interp_mode) {
+      case TGSI_INTERPOLATE_CONSTANT:
+	 emit_op1(c,
+		  WM_CINTERP,
+		  dst,
+		  interp);
+	 break;
 
-static struct prog_src_register search_or_add_const4f( struct brw_wm_compile *c, 
-						     GLfloat s0,
-						     GLfloat s1,
-						     GLfloat s2,
-						     GLfloat s3)
-{
-   struct gl_program_parameter_list *paramList = c->fp->program.Base.Parameters;
-   GLfloat values[4];
-   GLuint idx;
-   GLuint swizzle;
-
-   values[0] = s0;
-   values[1] = s1;
-   values[2] = s2;
-   values[3] = s3;
-
-   /* Have to search, otherwise multiple compilations will each grow
-    * the parameter list.
-    */
-   for (idx = 0; idx < paramList->NumParameters; idx++) {
-      if (paramList->Parameters[idx].Type == PROGRAM_CONSTANT &&
-	  memcmp(paramList->ParameterValues[idx], values, sizeof(values)) == 0)
+      case TGSI_INTERPOLATE_LINEAR:
+	 emit_op2(c,
+		  WM_LINTERP,
+		  dst,
+		  interp,
+		  deltas);
+	 break;
 
-	 /* XXX: this mimics the mesa bug which puts all constants and
-	  * parameters into the "PROGRAM_STATE_VAR" category:
-	  */
-	 return src_reg(PROGRAM_STATE_VAR, idx);
+      case TGSI_INTERPOLATE_PERSPECTIVE:
+	 emit_op3(c,
+		  WM_PINTERP,
+		  dst,
+		  interp,
+		  deltas,
+		  get_pixel_w(c));
+	 break;
+      }
+      break;
    }
-   
-   idx = _mesa_add_unnamed_constant( paramList, values, 4, &swizzle );
-   assert(swizzle == SWIZZLE_NOOP); /* Need to handle swizzle in reg setup */
-   return src_reg(PROGRAM_STATE_VAR, idx);
 }
 
 
-
 /***********************************************************************
  * Expand various instructions here to simpler forms.  
  */
 static void precalc_dst( struct brw_wm_compile *c,
-			       const struct prog_instruction *inst )
+			 struct brw_dst dst,
+			 struct brw_src src0,
+			 struct brw_src src1 )
 {
-   struct prog_src_register src0 = inst->SrcReg[0];
-   struct prog_src_register src1 = inst->SrcReg[1];
-   struct prog_dst_register dst = inst->DstReg;
-   
    if (dst.WriteMask & BRW_WRITEMASK_Y) {      
       /* dst.y = mul src0.y, src1.y
        */
-      emit_op(c,
-	      TGSI_OPCODE_MUL,
-	      dst_mask(dst, BRW_WRITEMASK_Y),
-	      inst->SaturateMode,
-	      src0,
-	      src1,
-	      src_undef());
+      emit_op2(c,
+	       TGSI_OPCODE_MUL,
+	       dst_mask(dst, BRW_WRITEMASK_Y),
+	       src0,
+	       src1);
    }
 
    if (dst.WriteMask & BRW_WRITEMASK_XZ) {
       struct prog_instruction *swz;
       GLuint z = GET_SWZ(src0.Swizzle, Z);
 
-      /* dst.xz = swz src0.1zzz
+      /* dst.z = mov src0.zzzz
+       */
+      emit_op1(c,
+	      TGSI_OPCODE_MOV,
+	      dst_mask(dst, BRW_WRITEMASK_Z),
+	      src_swizzle1(src0, Z));
+
+      /* dst.x = immf(1.0)
        */
-      swz = emit_op(c,
-		    TGSI_OPCODE_MOV,
-		    dst_mask(dst, BRW_WRITEMASK_XZ),
-		    inst->SaturateMode,
-		    src_swizzle(src0, SWIZZLE_ONE, z, z, z),
-		    src_undef(),
-		    src_undef());
-      /* Avoid letting negation flag of src0 affect our 1 constant. */
-      swz->SrcReg[0].Negate &= ~NEGATE_X;
+      emit_op1(c,
+	      TGSI_OPCODE_MOV,
+	      brw_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
+	      src_immf(c, 1.0));
    }
    if (dst.WriteMask & BRW_WRITEMASK_W) {
       /* dst.w = mov src1.w
        */
-      emit_op(c,
-	      TGSI_OPCODE_MOV,
-	      dst_mask(dst, BRW_WRITEMASK_W),
-	      inst->SaturateMode,
-	      src1,
-	      src_undef(),
-	      src_undef());
+      emit_op1(c,
+	       TGSI_OPCODE_MOV,
+	       dst_mask(dst, BRW_WRITEMASK_W),
+	       src1);
    }
 }
 
 
 static void precalc_lit( struct brw_wm_compile *c,
-			 const struct prog_instruction *inst )
+			 struct ureg_dst dst,
+			 struct ureg_src src0 )
 {
-   struct prog_src_register src0 = inst->SrcReg[0];
-   struct prog_dst_register dst = inst->DstReg;
-   
    if (dst.WriteMask & BRW_WRITEMASK_XW) {
-      struct prog_instruction *swz;
-
-      /* dst.xw = swz src0.1111
+      /* dst.xw = imm(1.0f)
        */
-      swz = emit_op(c,
-		    TGSI_OPCODE_MOV,
-		    dst_mask(dst, BRW_WRITEMASK_XW),
-		    0,
-		    src_swizzle1(src0, SWIZZLE_ONE),
-		    src_undef(),
-		    src_undef());
-      /* Avoid letting the negation flag of src0 affect our 1 constant. */
-      swz->SrcReg[0].Negate = NEGATE_NONE;
+      emit_op1(c,
+	       TGSI_OPCODE_MOV,
+	       brw_saturate(brw_writemask(dst, BRW_WRITEMASK_XW), 0),
+	       brw_imm1f(1.0f));
    }
 
    if (dst.WriteMask & BRW_WRITEMASK_YZ) {
-      emit_op(c,
-	      TGSI_OPCODE_LIT,
-	      dst_mask(dst, BRW_WRITEMASK_YZ),
-	      inst->SaturateMode,
-	      src0,
-	      src_undef(),
-	      src_undef());
+      emit_op1(c,
+	       TGSI_OPCODE_LIT,
+	       brw_writemask(dst, BRW_WRITEMASK_YZ),
+	       src0);
    }
 }
 
@@ -601,99 +382,62 @@ static void precalc_lit( struct brw_wm_compile *c,
  * instruction itself.
  */
 static void precalc_tex( struct brw_wm_compile *c,
-			 const struct prog_instruction *inst )
+			 struct brw_dst dst,
+			 unsigned unit,
+			 struct brw_src src0 )
 {
-   struct prog_src_register coord;
-   struct prog_dst_register tmpcoord;
-   const GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+   struct ureg_src coord = src_undef();
+   struct ureg_dst tmp = dst_undef();
 
    assert(unit < BRW_MAX_TEX_UNIT);
 
+   /* Cubemap: find longest component of coord vector and normalize
+    * it.
+    */
    if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
-       struct prog_instruction *out;
-       struct prog_dst_register tmp0 = get_temp(c);
-       struct prog_src_register tmp0src = src_reg_from_dst(tmp0);
-       struct prog_dst_register tmp1 = get_temp(c);
-       struct prog_src_register tmp1src = src_reg_from_dst(tmp1);
-       struct prog_src_register src0 = inst->SrcReg[0];
-
-       /* find longest component of coord vector and normalize it */
-       tmpcoord = get_temp(c);
-       coord = src_reg_from_dst(tmpcoord);
-
-       /* tmpcoord = src0 (i.e.: coord = src0) */
-       out = emit_op(c, TGSI_OPCODE_MOV,
-                     tmpcoord,
-                     0,
-                     src0,
-                     src_undef(),
-                     src_undef());
-       out->SrcReg[0].Negate = NEGATE_NONE;
-       out->SrcReg[0].Abs = 1;
-
-       /* tmp0 = MAX(coord.X, coord.Y) */
-       emit_op(c, TGSI_OPCODE_MAX,
-               tmp0,
-               0,
-               src_swizzle1(coord, X),
-               src_swizzle1(coord, Y),
-               src_undef());
-
-       /* tmp1 = MAX(tmp0, coord.Z) */
-       emit_op(c, TGSI_OPCODE_MAX,
-               tmp1,
-               0,
-               tmp0src,
-               src_swizzle1(coord, Z),
-               src_undef());
-
-       /* tmp0 = 1 / tmp1 */
-       emit_op(c, TGSI_OPCODE_RCP,
-               dst_mask(tmp0, BRW_WRITEMASK_X),
-               0,
-               tmp1src,
-               src_undef(),
-               src_undef());
-
-       /* tmpCoord = src0 * tmp0 */
-       emit_op(c, TGSI_OPCODE_MUL,
-               tmpcoord,
-               0,
-               src0,
-               src_swizzle1(tmp0src, SWIZZLE_X),
-               src_undef());
-
-       release_temp(c, tmp0);
-       release_temp(c, tmp1);
+      struct ureg_src tmpsrc;
+
+      tmp = get_temp(c);
+      tmpsrc = brw_src(tmpcoord)
+
+      /* tmp = abs(src0) */
+      emit_op1(c, 
+	       TGSI_OPCODE_MOV,
+	       tmp,
+	       brw_abs(src0));
+
+      /* tmp.X = MAX(tmp.X, tmp.Y) */
+      emit_op2(c, TGSI_OPCODE_MAX,
+	       brw_writemask(tmp, BRW_WRITEMASK_X),
+	       src_swizzle1(tmpsrc, X),
+	       src_swizzle1(tmpsrc, Y));
+
+      /* tmp.X = MAX(tmp.X, tmp.Z) */
+      emit_op2(c, TGSI_OPCODE_MAX,
+	       brw_writemask(tmp, BRW_WRITEMASK_X),
+	       tmpsrc,
+	       src_swizzle1(tmpsrc, Z));
+
+      /* tmp.X = 1 / tmp.X */
+      emit_op1(c, TGSI_OPCODE_RCP,
+	      dst_mask(tmp, BRW_WRITEMASK_X),
+	      tmpsrc);
+
+      /* tmp = src0 * tmp.xxxx */
+      emit_op2(c, TGSI_OPCODE_MUL,
+	       tmp,
+	       src0,
+	       src_swizzle1(tmpsrc, SWIZZLE_X));
+
+      coord = tmpsrc;
    }
    else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
-      struct prog_src_register scale = 
-	 search_or_add_param5( c, 
-			       STATE_INTERNAL, 
-			       STATE_TEXRECT_SCALE,
-			       unit,
-			       0,0 );
-
-      tmpcoord = get_temp(c);
-
-      /* coord.xy   = MUL inst->SrcReg[0], { 1/width, 1/height }
+      /* XXX: need a mechanism for internally generated constants.
        */
-      emit_op(c,
-	      TGSI_OPCODE_MUL,
-	      tmpcoord,
-	      0,
-	      inst->SrcReg[0],
-	      src_swizzle(scale,
-			  SWIZZLE_X,
-			  SWIZZLE_Y,
-			  SWIZZLE_ONE,
-			  SWIZZLE_ONE),
-	      src_undef());
-
-      coord = src_reg_from_dst(tmpcoord);
+      coord = src0;
    }
    else {
-      coord = inst->SrcReg[0];
+      coord = src0;
    }
 
    /* Need to emit YUV texture conversions by hand.  Probably need to
@@ -704,58 +448,36 @@ static void precalc_tex( struct brw_wm_compile *c,
    if (c->key.yuvtex_mask & (1 << unit)) {
       /* convert ycbcr to RGBA */
       GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
-
-      /* 
-	 CONST C0 = { -.5, -.0625,  -.5, 1.164 }
-	 CONST C1 = { 1.596, -0.813, 2.018, -.391 }
-	 UYV     = TEX ...
-	 UYV.xyz = ADD UYV,     C0
-	 UYV.y   = MUL UYV.y,   C0.w
- 	 if (UV swaped)
-	    RGB.xyz = MAD UYV.zzx, C1,   UYV.y
-	 else
-	    RGB.xyz = MAD UYV.xxz, C1,   UYV.y 
-	 RGB.y   = MAD UYV.z,   C1.w, RGB.y
-      */
-      struct prog_dst_register dst = inst->DstReg;
-      struct prog_dst_register tmp = get_temp(c);
-      struct prog_src_register tmpsrc = src_reg_from_dst(tmp);
-      struct prog_src_register C0 = search_or_add_const4f( c,  -.5, -.0625, -.5, 1.164 );
-      struct prog_src_register C1 = search_or_add_const4f( c, 1.596, -0.813, 2.018, -.391 );
+      struct ureg_dst dst = inst->DstReg;
+      struct ureg_dst tmp = get_temp(c);
+      struct ureg_src tmpsrc = src_reg_from_dst(tmp);
+      struct ureg_src C0 = ureg_imm4f( c->ureg,  -.5, -.0625, -.5, 1.164 );
+      struct ureg_src C1 = ureg_imm4f( c->ureg, 1.596, -0.813, 2.018, -.391 );
      
       /* tmp     = TEX ...
        */
       emit_tex_op(c, 
                   TGSI_OPCODE_TEX,
-                  tmp,
-                  inst->SaturateMode,
+                  brw_saturate(tmp, dst.Saturate),
                   unit,
                   inst->TexSrcTarget,
-                  inst->TexShadow,
                   coord,
                   src_undef(),
                   src_undef());
 
       /* tmp.xyz =  ADD TMP, C0
        */
-      emit_op(c,
-	      TGSI_OPCODE_ADD,
-	      dst_mask(tmp, BRW_WRITEMASK_XYZ),
-	      0,
-	      tmpsrc,
-	      C0,
-	      src_undef());
+      emit_op2(c, TGSI_OPCODE_ADD,
+	       dst_mask(tmp, BRW_WRITEMASK_XYZ),
+	       tmpsrc,
+	       C0);
 
       /* YUV.y   = MUL YUV.y, C0.w
        */
-
-      emit_op(c,
-	      TGSI_OPCODE_MUL,
-	      dst_mask(tmp, BRW_WRITEMASK_Y),
-	      0,
-	      tmpsrc,
-	      src_swizzle1(C0, W),
-	      src_undef());
+      emit_op2(c, TGSI_OPCODE_MUL,
+	       dst_mask(tmp, BRW_WRITEMASK_Y),
+	       tmpsrc,
+	       src_swizzle1(C0, W));
 
       /* 
        * if (UV swaped)
@@ -764,23 +486,22 @@ static void precalc_tex( struct brw_wm_compile *c,
        *     RGB.xyz = MAD YUV.xxz, C1, YUV.y
        */
 
-      emit_op(c,
-	      TGSI_OPCODE_MAD,
-	      dst_mask(dst, BRW_WRITEMASK_XYZ),
-	      0,
-	      swap_uv?src_swizzle(tmpsrc, Z,Z,X,X):src_swizzle(tmpsrc, X,X,Z,Z),
-	      C1,
-	      src_swizzle1(tmpsrc, Y));
+      emit_op3(c, TGSI_OPCODE_MAD,
+	       dst_mask(dst, BRW_WRITEMASK_XYZ),
+	       ( swap_uv ? 
+		 src_swizzle(tmpsrc, Z,Z,X,X) : 
+		 src_swizzle(tmpsrc, X,X,Z,Z)),
+	       C1,
+	       src_swizzle1(tmpsrc, Y));
 
       /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
        */
-      emit_op(c,
-	      TGSI_OPCODE_MAD,
-	      dst_mask(dst, BRW_WRITEMASK_Y),
-	      0,
-	      src_swizzle1(tmpsrc, Z),
-	      src_swizzle1(C1, W),
-	      src_swizzle1(src_reg_from_dst(dst), Y));
+      emit_op3(c,
+	       TGSI_OPCODE_MAD,
+	       dst_mask(dst, BRW_WRITEMASK_Y),
+	       src_swizzle1(tmpsrc, Z),
+	       src_swizzle1(C1, W),
+	       src_swizzle1(src_reg_from_dst(dst), Y));
 
       release_temp(c, tmp);
    }
@@ -789,29 +510,20 @@ static void precalc_tex( struct brw_wm_compile *c,
       emit_tex_op(c, 
                   TGSI_OPCODE_TEX,
                   inst->DstReg,
-                  inst->SaturateMode,
                   unit,
                   inst->TexSrcTarget,
-                  inst->TexShadow,
                   coord,
                   src_undef(),
                   src_undef());
    }
 
-   /* For GL_EXT_texture_swizzle: */
-   if (c->key.tex_swizzles[unit] != SWIZZLE_NOOP) {
-      /* swizzle the result of the TEX instruction */
-      struct prog_src_register tmpsrc = src_reg_from_dst(inst->DstReg);
-      emit_op(c, TGSI_OPCODE_MOV,
-              inst->DstReg,
-              SATURATE_OFF, /* saturate already done above */
-              src_swizzle4(tmpsrc, c->key.tex_swizzles[unit]),
-              src_undef(),
-              src_undef());
-   }
+   /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
+    * generating shader varients in mesa state tracker.
+    */
 
-   if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) ||
-       (inst->TexSrcTarget == TEXTURE_CUBE_INDEX))
+   /* Release this temp if we ended up allocating it:
+    */
+   if (!brw_dst_is_undef(tmpcoord))
       release_temp(c, tmpcoord);
 }
 
@@ -822,7 +534,7 @@ static void precalc_tex( struct brw_wm_compile *c,
 static GLboolean projtex( struct brw_wm_compile *c,
 			  const struct prog_instruction *inst )
 {
-   const struct prog_src_register src = inst->SrcReg[0];
+   const struct ureg_src src = inst->SrcReg[0];
    GLboolean retVal;
 
    assert(inst->Opcode == TGSI_OPCODE_TXP);
@@ -836,7 +548,7 @@ static GLboolean projtex( struct brw_wm_compile *c,
     */
    if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
       retVal = GL_FALSE;  /* ut2004 gun rendering !?! */
-   else if (src.File == PROGRAM_INPUT && 
+   else if (src.File == TGSI_FILE_INPUT && 
 	    GET_SWZ(src.Swizzle, W) == W &&
             (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
       retVal = GL_FALSE;
@@ -853,10 +565,10 @@ static GLboolean projtex( struct brw_wm_compile *c,
 static void precalc_txp( struct brw_wm_compile *c,
 			       const struct prog_instruction *inst )
 {
-   struct prog_src_register src0 = inst->SrcReg[0];
+   struct ureg_src src0 = inst->SrcReg[0];
 
    if (projtex(c, inst)) {
-      struct prog_dst_register tmp = get_temp(c);
+      struct ureg_dst tmp = get_temp(c);
       struct prog_instruction tmp_inst;
 
       /* tmp0.w = RCP inst.arg[0][3]
@@ -864,7 +576,6 @@ static void precalc_txp( struct brw_wm_compile *c,
       emit_op(c,
 	      TGSI_OPCODE_RCP,
 	      dst_mask(tmp, BRW_WRITEMASK_W),
-	      0,
 	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
 	      src_undef(),
 	      src_undef());
@@ -874,7 +585,6 @@ static void precalc_txp( struct brw_wm_compile *c,
       emit_op(c,
 	      TGSI_OPCODE_MUL,
 	      dst_mask(tmp, BRW_WRITEMASK_XYZ),
-	      0,
 	      src0,
 	      src_swizzle1(src_reg_from_dst(tmp), W),
 	      src_undef());
@@ -899,43 +609,30 @@ static void precalc_txp( struct brw_wm_compile *c,
 
 static void emit_fb_write( struct brw_wm_compile *c )
 {
-   struct prog_src_register payload_r0_depth = src_reg(PROGRAM_PAYLOAD, PAYLOAD_DEPTH);
-   struct prog_src_register outdepth = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DEPTH);
-   struct prog_src_register outcolor;
+   struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
+   struct ureg_src outdepth = src_reg(TGSI_FILE_OUTPUT, FRAG_RESULT_DEPTH);
+   struct ureg_src outcolor;
+   struct prog_instruction *inst;
    GLuint i;
 
-   struct prog_instruction *inst, *last_inst;
-   struct brw_context *brw = c->func.brw;
 
    /* The inst->Aux field is used for FB write target and the EOT marker */
 
-   if (brw->state.nr_color_regions > 1) {
-      for (i = 0 ; i < brw->state.nr_color_regions; i++) {
-         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i);
-         last_inst = inst = emit_op(c,
-                                    WM_FB_WRITE, dst_mask(dst_undef(),0), 0,
-                                    outcolor, payload_r0_depth, outdepth);
-         inst->Aux = (i<<1);
-         if (c->fp_fragcolor_emitted) {
-            outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
-            last_inst = inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
-                                       0, outcolor, payload_r0_depth, outdepth);
-            inst->Aux = (i<<1);
-         }
-      }
-      last_inst->Aux |= 1; //eot
-   }
-   else {
-      /* if gl_FragData[0] is written, use it, else use gl_FragColor */
-      if (c->fp->program.Base.OutputsWritten & (1 << FRAG_RESULT_DATA0))
-         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_DATA0);
-      else 
-         outcolor = src_reg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
-
-      inst = emit_op(c, WM_FB_WRITE, dst_mask(dst_undef(),0),
-                     0, outcolor, payload_r0_depth, outdepth);
-      inst->Aux = 1|(0<<1);
+   for (i = 0 ; i < c->key.nr_cbufs; i++) {
+      outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
+
+      inst = emit_op(c, WM_FB_WRITE,
+		     dst_mask(dst_undef(), 0),
+		     outcolor,
+		     payload_r0_depth,
+		     outdepth);
+
+      inst->Aux = (i<<1);
    }
+ 
+   /* Set EOT flag on last inst:
+    */
+   inst->Aux |= 1; //eot
 }
 
 
@@ -952,7 +649,7 @@ static void validate_src_regs( struct brw_wm_compile *c,
    GLuint i;
 
    for (i = 0; i < nr_args; i++) {
-      if (inst->SrcReg[i].File == PROGRAM_INPUT) {
+      if (inst->SrcReg[i].File == TGSI_FILE_INPUT) {
 	 GLuint idx = inst->SrcReg[i].Index;
 	 if (!(c->fp_interp_emitted & (1<<idx))) {
 	    emit_interp(c, idx);
@@ -965,34 +662,86 @@ static void validate_src_regs( struct brw_wm_compile *c,
 static void validate_dst_regs( struct brw_wm_compile *c,
 			       const struct prog_instruction *inst )
 {
-   if (inst->DstReg.File == PROGRAM_OUTPUT) {
+   if (inst->DstReg.File == TGSI_FILE_OUTPUT) {
       GLuint idx = inst->DstReg.Index;
       if (idx == FRAG_RESULT_COLOR)
-         c->fp_fragcolor_emitted = 1;
+         c->fp_fragcolor_emitted |= inst->DstReg.WriteMask;
    }
 }
 
-static void print_insns( const struct prog_instruction *insn,
-			 GLuint nr )
+
+
+static void emit_insn( struct brw_wm_compile *c,
+		       const struct tgsi_full_instruction *inst )
 {
-   GLuint i;
-   for (i = 0; i < nr; i++, insn++) {
-      debug_printf("%3d: ", i);
-      if (insn->Opcode < MAX_OPCODE)
-	 _mesa_print_instruction(insn);
-      else if (insn->Opcode < MAX_WM_OPCODE) {
-	 GLuint idx = insn->Opcode - MAX_OPCODE;
-
-	 _mesa_print_alu_instruction(insn,
-				     wm_opcode_strings[idx],
-				     3);
-      }
-      else 
-	 debug_printf("965 Opcode %d\n", insn->Opcode);
+
+   switch (inst->Opcode) {
+   case TGSI_OPCODE_ABS:
+      emit_op1(c, TGSI_OPCODE_MOV,
+	       dst, 
+	       brw_abs(src[0]));
+      break;
+
+   case TGSI_OPCODE_SUB: 
+      emit_op2(c, TGSI_OPCODE_ADD,
+	       dst,
+	       src[0],
+	       brw_negate(src[1]));
+      break;
+
+   case TGSI_OPCODE_SCS: 
+      emit_op1(c, TGSI_OPCODE_SCS,
+	       brw_writemask(dst, BRW_WRITEMASK_XY),
+	       src[0]);
+      break;
+	 
+   case TGSI_OPCODE_DST:
+      precalc_dst(c, inst);
+      break;
+
+   case TGSI_OPCODE_LIT:
+      precalc_lit(c, inst);
+      break;
+
+   case TGSI_OPCODE_TEX:
+      precalc_tex(c, inst);
+      break;
+
+   case TGSI_OPCODE_TXP:
+      precalc_txp(c, inst);
+      break;
+
+   case TGSI_OPCODE_TXB:
+      out = emit_insn(c, inst);
+      out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
+      assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
+      break;
+
+   case TGSI_OPCODE_XPD: 
+      emit_op2(c, TGSI_OPCODE_XPD,
+	       brw_writemask(dst, BRW_WRITEMASK_XYZ),
+	       src[0], 
+	       src[1]);
+      break;
+
+   case TGSI_OPCODE_KIL: 
+      emit_op1(c, TGSI_OPCODE_KIL,
+	       brw_writemask(dst_undef(), 0),
+	       src[0]);
+      break;
+
+   case TGSI_OPCODE_END:
+      emit_fb_write(c);
+      break;
+   default:
+      if (brw_wm_is_scalar_result(inst->Opcode))
+	 emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
+      else
+	 emit_op(c, opcode, dst, src[0], src[1], src[2]);
+      break;
    }
 }
 
-
 /**
  * Initial pass for fragment program code generation.
  * This function is used by both the GLSL and non-GLSL paths.
@@ -1004,108 +753,62 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
 
    if (BRW_DEBUG & DEBUG_WM) {
       debug_printf("pre-fp:\n");
-      _mesa_print_program(&fp->program.Base); 
-      debug_printf("\n");
+      tgsi_dump(fp->tokens, 0); 
    }
 
-   c->pixel_xy = src_undef();
-   c->delta_xy = src_undef();
-   c->pixel_w = src_undef();
+   c->pixel_xy = brw_src_undef();
+   c->delta_xy = brw_src_undef();
+   c->pixel_w = brw_src_undef();
    c->nr_fp_insns = 0;
    c->fp->tex_units_used = 0x0;
 
-   /* Emit preamble instructions.  This is where special instructions such as
-    * WM_CINTERP, WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
-    * compute shader inputs from varying vars.
-    */
-   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
-      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
-      validate_src_regs(c, inst);
-      validate_dst_regs(c, inst);
-   }
 
    /* Loop over all instructions doing assorted simplifications and
     * transformations.
     */
-   for (insn = 0; insn < fp->program.Base.NumInstructions; insn++) {
-      const struct prog_instruction *inst = &fp->program.Base.Instructions[insn];
-      struct prog_instruction *out;
-
-      /* Check for INPUT values, emit INTERP instructions where
-       * necessary:
-       */
-
-      switch (inst->Opcode) {
-      case TGSI_OPCODE_ABS:
-	 out = emit_insn(c, inst);
-	 out->Opcode = TGSI_OPCODE_MOV;
-	 out->SrcReg[0].Negate = NEGATE_NONE;
-	 out->SrcReg[0].Abs = 1;
-	 break;
-
-      case TGSI_OPCODE_SUB: 
-	 out = emit_insn(c, inst);
-	 out->Opcode = TGSI_OPCODE_ADD;
-	 out->SrcReg[1].Negate ^= NEGATE_XYZW;
-	 break;
-
-      case TGSI_OPCODE_SCS: 
-	 out = emit_insn(c, inst);
-	 /* This should probably be done in the parser. 
+   tgsi_parse_init( &parse, tokens );
+   while( !tgsi_parse_end_of_tokens( &parse ) ) {
+      tgsi_parse_token( &parse );
+
+      switch( parse.FullToken.Token.Type ) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+	 /* If branching shader, emit preamble instructions at decl time, as
+	  * instruction order in the shader does not correspond to the order
+	  * instructions are executed in the wild.
+	  *
+	  * This is where special instructions such as WM_CINTERP,
+	  * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to compute
+	  * shader inputs from varying vars.
+	  *
+	  * XXX: For non-branching shaders, consider deferring variable
+	  * initialization as late as possible to minimize register
+	  * usage.  This is how the original BRW driver worked.
 	  */
-	 out->DstReg.WriteMask &= BRW_WRITEMASK_XY;
-	 break;
-	 
-      case TGSI_OPCODE_DST:
-	 precalc_dst(c, inst);
-	 break;
-
-      case TGSI_OPCODE_LIT:
-	 precalc_lit(c, inst);
-	 break;
-
-      case TGSI_OPCODE_TEX:
-	 precalc_tex(c, inst);
-	 break;
-
-      case TGSI_OPCODE_TXP:
-	 precalc_txp(c, inst);
-	 break;
-
-      case TGSI_OPCODE_TXB:
-	 out = emit_insn(c, inst);
-	 out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
-         assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
-	 break;
-
-      case TGSI_OPCODE_XPD: 
-	 out = emit_insn(c, inst);
-	 /* This should probably be done in the parser. 
+	 validate_src_regs(c, inst);
+	 validate_dst_regs(c, inst);
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+	 /* Unlike VS programs we can probably manage fine encoding
+	  * immediate values directly into the emitted EU
+	  * instructions, as we probably only need to reference one
+	  * float value per instruction.  Just save the data for now
+	  * and use directly later.
 	  */
-	 out->DstReg.WriteMask &= BRW_WRITEMASK_XYZ;
 	 break;
 
-      case TGSI_OPCODE_KIL: 
-	 out = emit_insn(c, inst);
-	 /* This should probably be done in the parser. 
-	  */
-	 out->DstReg.WriteMask = 0;
-	 break;
-      case TGSI_OPCODE_END:
-	 emit_fb_write(c);
-	 break;
-      default:
-	 if (brw_wm_is_scalar_result(inst->Opcode))
-	    emit_scalar_insn(c, inst);
-	 else
-	    emit_insn(c, inst);
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         inst = &parse.FullToken.FullInstruction;
+	 emit_insn( c, inst );
 	 break;
       }
    }
 
+   c->brw_program = brw_finalize( c->builder );
+
    if (BRW_DEBUG & DEBUG_WM) {
       debug_printf("pass_fp:\n");
-      print_insns( c->prog_instructions, c->nr_fp_insns );
+      brw_print_program( c->brw_program );
       debug_printf("\n");
    }
 }
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index 59bc4ef701..cdc10484a6 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -332,7 +332,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
 	  for (j = 0; j < 4; j++)
 	     set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
        }
-       if (c->key.vp_outputs_written & (1 << i)) {
+       if (c->key.nr_vp_outputs > i) {
 	  reg_index += 2;
        }
     }
@@ -1670,7 +1670,7 @@ get_argument_regs(struct brw_wm_compile *c,
     }
 }
 
-static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
+static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_compile *c)
 {
 #define MAX_IF_DEPTH 32
 #define MAX_LOOP_DEPTH 32
@@ -1943,20 +1943,20 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
  * Do GPU code generation for shaders that use GLSL features such as
  * flow control.  Other shaders will be compiled with the 
  */
-void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c)
+void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c)
 {
     if (BRW_DEBUG & DEBUG_WM) {
-        debug_printf("brw_wm_glsl_emit:\n");
+       debug_printf("%s:\n", __FUNCTION__);
     }
 
     /* initial instruction translation/simplification */
     brw_wm_pass_fp(c);
 
     /* actual code generation */
-    brw_wm_emit_glsl(brw, c);
+    brw_wm_emit_branching_shader(brw, c);
 
     if (BRW_DEBUG & DEBUG_WM) {
-        brw_wm_print_program(c, "brw_wm_glsl_emit done");
+        brw_wm_print_program(c, "brw_wm_branching_shader_emit done");
     }
 
     c->prog_data.total_grf = num_grf_used(c);
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
index 71e4c56835..d8b9028927 100644
--- a/src/gallium/drivers/i965/brw_wm_pass0.c
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -168,54 +168,20 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 
    if (!ref) {
       switch (file) {
-      case PROGRAM_INPUT:
-      case PROGRAM_PAYLOAD:
-      case PROGRAM_TEMPORARY:
-      case PROGRAM_OUTPUT:
-      case PROGRAM_VARYING:
+      case TGSI_FILE_INPUT:
+      case TGSI_FILE_TEMPORARY:
+      case TGSI_FILE_OUTPUT:
+      case BRW_FILE_PAYLOAD:
+	 /* should already be done?? */
 	 break;
 
-      case PROGRAM_LOCAL_PARAM:
-	 ref = get_param_ref(c, &c->fp->program.Base.LocalParams[idx][component]);
-	 break;
-
-      case PROGRAM_ENV_PARAM:
+      case TGSI_FILE_CONSTANT:
 	 ref = get_param_ref(c, &c->env_param[idx][component]);
 	 break;
 
-      case PROGRAM_STATE_VAR:
-      case PROGRAM_UNIFORM:
-      case PROGRAM_CONSTANT:
-      case PROGRAM_NAMED_PARAM: {
-	 struct gl_program_parameter_list *plist = c->fp->program.Base.Parameters;
-	 
-	 /* There's something really hokey about parameters parsed in
-	  * arb programs - they all end up in here, whether they be
-	  * state values, parameters or constants.  This duplicates the
-	  * structure above & also seems to subvert the limits set for
-	  * each type of constant/param.
-	  */ 
-	 switch (plist->Parameters[idx].Type) {
-	 case PROGRAM_NAMED_PARAM:
-	 case PROGRAM_CONSTANT:
-	    /* These are invarient:
-	     */
-	    ref = get_imm_ref(c, &plist->ParameterValues[idx][component]);
-	    break;
-
-	 case PROGRAM_STATE_VAR:
-	 case PROGRAM_UNIFORM:
-	    /* These may change from run to run:
-	     */
-	    ref = get_param_ref(c, &plist->ParameterValues[idx][component] );
-	    break;
-
-	 default:
-	    assert(0);
-	    break;
-	 }
+      case TGSI_FILE_IMMEDIATE:
+	 ref = get_imm_ref(c, &plist->ParameterValues[idx][component]);
 	 break;
-      }
 
       default:
 	 assert(0);
@@ -310,17 +276,16 @@ translate_insn(struct brw_wm_compile *c,
                const struct prog_instruction *inst)
 {
    struct brw_wm_instruction *out = get_instruction(c);
-   GLuint writemask = inst->DstReg.WriteMask;
+   GLuint writemask = inst->dst.WriteMask;
    GLuint nr_args = brw_wm_nr_args(inst->Opcode);
    GLuint i, j;
 
    /* Copy some data out of the instruction
     */
    out->opcode = inst->Opcode;
-   out->saturate = (inst->SaturateMode != SATURATE_OFF);
+   out->saturate = inst->dst.Saturate;
    out->tex_unit = inst->TexSrcUnit;
-   out->tex_idx = inst->TexSrcTarget;
-   out->tex_shadow = inst->TexShadow;
+   out->tex_target = inst->TexSrcTarget;
    out->eot = inst->Aux & 1;
    out->target = inst->Aux >> 1;
 
@@ -328,7 +293,7 @@ translate_insn(struct brw_wm_compile *c,
     */
    for (i = 0; i < nr_args; i++) {
       for (j = 0; j < 4; j++) {
-	 out->src[i][j] = get_new_ref(c, inst->SrcReg[i], j, out);
+	 out->src[i][j] = get_new_ref(c, inst->src[i], j, out);
       }
    }
 
@@ -380,15 +345,6 @@ static void pass0_init_payload( struct brw_wm_compile *c )
 			     &c->payload.depth[j] );
    }
 
-#if 0
-   /* This seems to be an alternative to the INTERP_WPOS stuff I do
-    * elsewhere:
-    */
-   if (c->key.source_depth_reg)
-      pass0_set_fpreg_value(c, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, 2,
-			    &c->payload.depth[c->key.source_depth_reg/2]);
-#endif
-   
    for (i = 0; i < FRAG_ATTRIB_MAX; i++)
       pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, 
 			     &c->payload.input_interp[i] );      
@@ -403,6 +359,9 @@ static void pass0_init_payload( struct brw_wm_compile *c )
  * the same number.
  *
  * Translate away swizzling and eliminate non-saturating moves.
+ *
+ * Translate instructions from Mesa's prog_instruction structs to our
+ * internal brw_wm_instruction representation.
  */
 void brw_wm_pass0( struct brw_wm_compile *c )
 {
@@ -421,7 +380,7 @@ void brw_wm_pass0( struct brw_wm_compile *c )
        */      
       switch (inst->Opcode) {
       case OPCODE_MOV: 
-	 if (!inst->SaturateMode) {
+	 if (!inst->dst.Saturate) {
 	    pass0_precalc_mov(c, inst);
 	 }
 	 else {
diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c
index 85a3a55ca4..b0356b1bd5 100644
--- a/src/gallium/drivers/i965/brw_wm_pass1.c
+++ b/src/gallium/drivers/i965/brw_wm_pass1.c
@@ -90,17 +90,24 @@ static void track_arg(struct brw_wm_compile *c,
 static GLuint get_texcoord_mask( GLuint tex_idx )
 {
    switch (tex_idx) {
-   case TEXTURE_1D_INDEX:
+   case TGSI_TEXTURE_1D:
       return BRW_WRITEMASK_X;
-   case TEXTURE_2D_INDEX:
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
       return BRW_WRITEMASK_XY;
-   case TEXTURE_3D_INDEX:
+   case TGSI_TEXTURE_3D:
       return BRW_WRITEMASK_XYZ;
-   case TEXTURE_CUBE_INDEX:
+   case TGSI_TEXTURE_CUBE:
       return BRW_WRITEMASK_XYZ;
-   case TEXTURE_RECT_INDEX:
-      return BRW_WRITEMASK_XY;
-   default: return 0;
+
+   case TGSI_TEXTURE_SHADOW1D:
+      return BRW_WRITEMASK_XZ;
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+      return BRW_WRITEMASK_XYZ;
+   default: 
+      assert(0);
+      return 0;
    }
 }
 
@@ -217,14 +224,9 @@ void brw_wm_pass1( struct brw_wm_compile *c )
       case TGSI_OPCODE_TEX:
       case TGSI_OPCODE_TXP:
 	 read0 = get_texcoord_mask(inst->tex_idx);
-
-         if (inst->tex_shadow)
-	    read0 |= BRW_WRITEMASK_Z;
 	 break;
 
       case TGSI_OPCODE_TXB:
-	 /* Shadow ignored for txb.
-	  */
 	 read0 = get_texcoord_mask(inst->tex_idx) | BRW_WRITEMASK_W;
 	 break;
 
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index edabf6ceb6..1898f38cef 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -52,7 +52,7 @@ struct brw_wm_unit_key {
    unsigned int max_threads;
 
    unsigned int nr_surfaces, sampler_count;
-   GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
+   GLboolean uses_depth, computes_depth, uses_kill, has_flow_control;
    GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
    GLfloat offset_units, offset_factor;
 };
@@ -114,10 +114,10 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 
    /* _NEW_COLOR */
    key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
-   key->is_glsl = bfp->isGLSL;
+   key->has_flow_control = bfp->has_flow_control;
 
    /* temporary sanity check assertion */
-   ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+   ASSERT(bfp->has_flow_control == brw_wm_has_flow_control(fp));
 
    /* _NEW_QUERY */
    key->stats_wm = (brw->query.stats_wm != 0);
@@ -184,7 +184,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    wm.wm5.program_computes_depth = key->computes_depth;
    wm.wm5.program_uses_killpixel = key->uses_kill;
 
-   if (key->is_glsl)
+   if (key->has_flow_control)
       wm.wm5.enable_8_pix = 1;
    else
       wm.wm5.enable_16_pix = 1;
-- 
cgit v1.2.3


From ca9c413647bf9efb5ed770e3a655bc758075aec7 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Fri, 30 Oct 2009 08:03:10 +0000
Subject: softpipe: Respect gl_rasterization_rules in primitive setup.

---
 src/gallium/drivers/softpipe/sp_setup.c | 40 +++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 14 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 00fb52a64f..615581b95f 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -87,6 +87,8 @@ struct setup_context {
    float oneoverarea;
    int facing;
 
+   float pixel_offset;
+
    struct quad_header quad[MAX_QUADS];
    struct quad_header *quad_ptrs[MAX_QUADS];
    unsigned count;
@@ -379,6 +381,16 @@ static boolean setup_sort_vertices( struct setup_context *setup,
       ((det > 0.0) ^ 
        (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW));
 
+   /* Prepare pixel offset for rasterisation:
+    *  - pixel center (0.5, 0.5) for GL, or
+    *  - assume (0.0, 0.0) for other APIs.
+    */
+   if (setup->softpipe->rasterizer->gl_rasterization_rules) {
+      setup->pixel_offset = 0.5f;
+   } else {
+      setup->pixel_offset = 0.0f;
+   }
+
    return TRUE;
 }
 
@@ -427,7 +439,7 @@ static void tri_linear_coeff( struct setup_context *setup,
 
    /* calculate a0 as the value which would be sampled for the
     * fragment at (0,0), taking into account that we want to sample at
-    * pixel centers, in other words (0.5, 0.5).
+    * pixel centers, in other words (pixel_offset, pixel_offset).
     *
     * this is neat but unfortunately not a good way to do things for
     * triangles with very large values of dadx or dady as it will
@@ -438,8 +450,8 @@ static void tri_linear_coeff( struct setup_context *setup,
     * instead - i'll switch to this later.
     */
    coef->a0[i] = (setup->vmin[vertSlot][i] -
-                  (dadx * (setup->vmin[0][0] - 0.5f) +
-                   dady * (setup->vmin[0][1] - 0.5f)));
+                  (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                   dady * (setup->vmin[0][1] - setup->pixel_offset)));
 
    /*
    debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
@@ -487,8 +499,8 @@ static void tri_persp_coeff( struct setup_context *setup,
    coef->dadx[i] = dadx;
    coef->dady[i] = dady;
    coef->a0[i] = (mina -
-                  (dadx * (setup->vmin[0][0] - 0.5f) +
-                   dady * (setup->vmin[0][1] - 0.5f)));
+                  (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                   dady * (setup->vmin[0][1] - setup->pixel_offset)));
 }
 
 
@@ -575,12 +587,12 @@ static void setup_tri_coefficients( struct setup_context *setup )
 
 static void setup_tri_edges( struct setup_context *setup )
 {
-   float vmin_x = setup->vmin[0][0] + 0.5f;
-   float vmid_x = setup->vmid[0][0] + 0.5f;
+   float vmin_x = setup->vmin[0][0] + setup->pixel_offset;
+   float vmid_x = setup->vmid[0][0] + setup->pixel_offset;
 
-   float vmin_y = setup->vmin[0][1] - 0.5f;
-   float vmid_y = setup->vmid[0][1] - 0.5f;
-   float vmax_y = setup->vmax[0][1] - 0.5f;
+   float vmin_y = setup->vmin[0][1] - setup->pixel_offset;
+   float vmid_y = setup->vmid[0][1] - setup->pixel_offset;
+   float vmax_y = setup->vmax[0][1] - setup->pixel_offset;
 
    setup->emaj.sy = ceilf(vmin_y);
    setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
@@ -780,8 +792,8 @@ line_linear_coeff(const struct setup_context *setup,
    coef->dadx[i] = dadx;
    coef->dady[i] = dady;
    coef->a0[i] = (setup->vmin[vertSlot][i] -
-                  (dadx * (setup->vmin[0][0] - 0.5f) +
-                   dady * (setup->vmin[0][1] - 0.5f)));
+                  (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                   dady * (setup->vmin[0][1] - setup->pixel_offset)));
 }
 
 
@@ -803,8 +815,8 @@ line_persp_coeff(const struct setup_context *setup,
    coef->dadx[i] = dadx;
    coef->dady[i] = dady;
    coef->a0[i] = (setup->vmin[vertSlot][i] -
-                  (dadx * (setup->vmin[0][0] - 0.5f) +
-                   dady * (setup->vmin[0][1] - 0.5f)));
+                  (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                   dady * (setup->vmin[0][1] - setup->pixel_offset)));
 }
 
 
-- 
cgit v1.2.3


From ace78d90ded52d8fe4b3b077abf9a4db381dce16 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 29 Oct 2009 23:48:59 +0100
Subject: r300g: fix crash in r300_is_texture_referenced

Also, a subtle fix in emitting a texture state
---
 src/gallium/drivers/r300/r300_context.c | 2 +-
 src/gallium/drivers/r300/r300_emit.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index e45564b54e..02f201b49a 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -80,7 +80,7 @@ r300_is_texture_referenced(struct pipe_context *pipe,
                            struct pipe_texture *texture,
                            unsigned face, unsigned level)
 {
-    struct pipe_buffer* buf;
+    struct pipe_buffer* buf = 0;
 
     r300_get_texture_buffer(texture, &buf, NULL);
 
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 2a8e4a9f41..3b0b41e486 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -540,7 +540,7 @@ void r300_emit_texture(struct r300_context* r300,
     CS_LOCALS(r300);
 
     /* to emulate 1D textures through 2D ones correctly */
-    if (tex->tex.height[0] == 1) {
+    if (tex->tex.target == PIPE_TEXTURE_1D) {
         filter0 &= ~R300_TX_WRAP_T_MASK;
         filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
     }
-- 
cgit v1.2.3


From 11180b44717943d767b64f0b658f31b6c2594aa4 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Fri, 30 Oct 2009 13:08:37 +0100
Subject: r300g: remove unnecessary assertions

Also, correct typos in comments.
---
 src/gallium/drivers/r300/r300_reg.h   | 4 ++--
 src/gallium/drivers/r300/r300_state.c | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index babc3c709e..1e4d3f5d70 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -862,10 +862,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_POINTSIZE_X_MASK          0xffff0000
 #       define R300_POINTSIZE_MAX             (R300_POINTSIZE_Y_MASK / 6)
 
-/* Blue fill color */
+/* Red fill color */
 #define R500_GA_FILL_R                                0x4220
 
-/* Blue fill color */
+/* Green fill color */
 #define R500_GA_FILL_G                                0x4224
 
 /* Blue fill color */
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 5db8c69dec..1e7fabf683 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -608,17 +608,14 @@ static void r300_set_viewport_state(struct pipe_context* pipe,
     r300->viewport_state->vte_control = R300_VTX_W0_FMT;
 
     if (state->scale[0] != 1.0f) {
-        assert(state->scale[0] != 0.0f);
         r300->viewport_state->xscale = state->scale[0];
         r300->viewport_state->vte_control |= R300_VPORT_X_SCALE_ENA;
     }
     if (state->scale[1] != 1.0f) {
-        assert(state->scale[1] != 0.0f);
         r300->viewport_state->yscale = state->scale[1];
         r300->viewport_state->vte_control |= R300_VPORT_Y_SCALE_ENA;
     }
     if (state->scale[2] != 1.0f) {
-        assert(state->scale[2] != 0.0f);
         r300->viewport_state->zscale = state->scale[2];
         r300->viewport_state->vte_control |= R300_VPORT_Z_SCALE_ENA;
     }
-- 
cgit v1.2.3


From 63c9450ae776ff4207422442dd8c3d9d13a05e7a Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Fri, 30 Oct 2009 18:19:25 +0100
Subject: r300g: add precalculating of pixel pitch, add a new NPOT flag

---
 src/gallium/drivers/r300/r300_context.h |  8 ++++++++
 src/gallium/drivers/r300/r300_emit.c    | 16 ++++++++-------
 src/gallium/drivers/r300/r300_texture.c | 35 +++++++++++++++++++++++----------
 3 files changed, 42 insertions(+), 17 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 4d73567bbe..cee0734d21 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -181,6 +181,9 @@ struct r300_texture {
     /* Offsets into the buffer. */
     unsigned offset[PIPE_MAX_TEXTURE_LEVELS];
 
+    /* A pitch for each mip-level */
+    unsigned pitch[PIPE_MAX_TEXTURE_LEVELS];
+
     /* Size of one zslice or face based on the texture target */
     unsigned layer_size[PIPE_MAX_TEXTURE_LEVELS];
 
@@ -197,6 +200,11 @@ struct r300_texture {
     /* Total size of this texture, in bytes. */
     unsigned size;
 
+    /* Whether this texture has non-power-of-two dimensions.
+     * It can be either a regular texture or a rectangle one.
+     */
+    boolean is_npot;
+
     /* Pipe buffer backing this texture. */
     struct pipe_buffer* buffer;
 
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 3b0b41e486..be38fbc619 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -285,21 +285,22 @@ void r300_emit_fb_state(struct r300_context* r300,
                         struct pipe_framebuffer_state* fb)
 {
     struct r300_texture* tex;
-    unsigned pixpitch;
+    struct pipe_surface* surf;
     int i;
     CS_LOCALS(r300);
 
     BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4);
     for (i = 0; i < fb->nr_cbufs; i++) {
-        tex = (struct r300_texture*)fb->cbufs[i]->texture;
+        surf = fb->cbufs[i];
+        tex = (struct r300_texture*)surf->texture;
         assert(tex && tex->buffer && "cbuf is marked, but NULL!");
-        pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
 
+        /* XXX I still need to figure out how to set the mipmap level here */
         OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
         OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
 
         OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
-        OUT_CS_RELOC(tex->buffer, pixpitch |
+        OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] |
                      r300_translate_colorformat(tex->tex.format), 0,
                      RADEON_GEM_DOMAIN_VRAM, 0);
 
@@ -308,9 +309,9 @@ void r300_emit_fb_state(struct r300_context* r300,
     }
 
     if (fb->zsbuf) {
-        tex = (struct r300_texture*)fb->zsbuf->texture;
+        surf = fb->zsbuf;
+        tex = (struct r300_texture*)surf->texture;
         assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
-        pixpitch = r300_texture_get_stride(tex, 0) / tex->tex.block.size;
 
         OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
         OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
@@ -318,7 +319,8 @@ void r300_emit_fb_state(struct r300_context* r300,
         OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format));
 
         OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
-        OUT_CS_RELOC(tex->buffer, pixpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+        OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level], 0,
+                     RADEON_GEM_DOMAIN_VRAM, 0);
     }
 
     OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 3e90fea6c8..7199918a84 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -33,21 +33,15 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
 {
     struct r300_texture_state* state = &tex->state;
     struct pipe_texture *pt = &tex->tex;
-    unsigned stride;
 
     state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) |
                      R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff);
 
-    if (!util_is_power_of_two(pt->width[0]) ||
-        !util_is_power_of_two(pt->height[0])) {
-
+    if (tex->is_npot) {
         /* rectangles love this */
         state->format0 |= R300_TX_PITCH_EN;
-
-        stride = r300_texture_get_stride(tex, 0) / pt->block.size;
-        state->format2 = (stride - 1) & 0x1fff;
-    }
-    else {
+        state->format2 = (tex->pitch[0] - 1) & 0x1fff;
+    } else {
         /* power of two textures (3D, mipmaps, and no pitch) */
         state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) |
                           R300_TX_NUM_LEVELS(pt->last_level & 0xf);
@@ -143,6 +137,12 @@ static void r300_setup_miptree(struct r300_texture* tex)
         tex->size = tex->offset[i] + size;
         tex->layer_size[i] = layer_size;
 
+        if (tex->is_npot) {
+            tex->pitch[i] = stride / base->block.size;
+        } else {
+            tex->pitch[i] = base->width[i];
+        }
+
         debug_printf("r300: Texture miptree: Level %d "
                 "(%dx%dx%d px, pitch %d bytes)\n",
                 i, base->width[i], base->height[i], base->depth[i],
@@ -150,6 +150,12 @@ static void r300_setup_miptree(struct r300_texture* tex)
     }
 }
 
+static void r300_setup_flags(struct r300_texture* tex)
+{
+    tex->is_npot = !util_is_power_of_two(tex->tex.width[0]) ||
+                   !util_is_power_of_two(tex->tex.height[0]);
+}
+
 /* Create a new texture. */
 static struct pipe_texture*
     r300_texture_create(struct pipe_screen* screen,
@@ -165,8 +171,8 @@ static struct pipe_texture*
     pipe_reference_init(&tex->tex.reference, 1);
     tex->tex.screen = screen;
 
+    r300_setup_flags(tex);
     r300_setup_miptree(tex);
-
     r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500);
 
     tex->buffer = screen->buffer_create(screen, 1024,
@@ -234,6 +240,13 @@ static struct pipe_texture*
 {
     struct r300_texture* tex;
 
+    /* Support only 2D textures without mipmaps */
+    if (base->target != PIPE_TEXTURE_2D ||
+        base->depth[0] != 1 ||
+        base->last_level != 0) {
+        return NULL;
+    }
+
     tex = CALLOC_STRUCT(r300_texture);
     if (!tex) {
         return NULL;
@@ -244,7 +257,9 @@ static struct pipe_texture*
     tex->tex.screen = screen;
 
     tex->stride_override = *stride;
+    tex->pitch[0] = *stride / base->block.size;
 
+    r300_setup_flags(tex);
     r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500);
 
     pipe_buffer_reference(&tex->buffer, buffer);
-- 
cgit v1.2.3


From a8f85dceb5e721437ba30ec540cd0bf8ee454325 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sat, 31 Oct 2009 05:34:46 +0100
Subject: r300g: fix reading from the destination buffer in blending

---
 src/gallium/drivers/r300/r300_state.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 1e7fabf683..3ac627e959 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -75,7 +75,9 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
             srcRGB == PIPE_BLENDFACTOR_DST_ALPHA ||
             srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR ||
             srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
+            srcA == PIPE_BLENDFACTOR_DST_COLOR ||
             srcA == PIPE_BLENDFACTOR_DST_ALPHA ||
+            srcA == PIPE_BLENDFACTOR_INV_DST_COLOR ||
             srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA)
             blend->blend_control |= R300_READ_ENABLE;
 
-- 
cgit v1.2.3


From 3f60130b87a4a75f1b7cb6e0b854001bbe8f7ec8 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sat, 31 Oct 2009 05:38:25 +0100
Subject: r300g: pretend NPOT support

It's requires to get GL2.1, therefore, much more piglit tests can be used
for testing. Figure out later how to emulate this.
---
 src/gallium/drivers/r300/r300_screen.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 6efa17cbaf..390b63007e 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -84,7 +84,9 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
             /* XXX I'm told this goes up to 16 */
             return 8;
         case PIPE_CAP_NPOT_TEXTURES:
-            return 0;
+            /* XXX enable now to get GL2.1 API,
+             * figure out later how to emulate this */
+            return 1;
         case PIPE_CAP_TWO_SIDED_STENCIL:
             if (r300screen->caps->is_r500) {
                 return 1;
-- 
cgit v1.2.3


From c9928ac3ee5dc0d10127388f9312779a6c59da7c Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sat, 31 Oct 2009 07:23:00 +0100
Subject: r300g: correct the pitch calculation for smaller mipmaps

---
 src/gallium/drivers/r300/r300_emit.c    | 2 +-
 src/gallium/drivers/r300/r300_texture.c | 7 +------
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index be38fbc619..22cf9cac2a 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -305,7 +305,7 @@ void r300_emit_fb_state(struct r300_context* r300,
                      RADEON_GEM_DOMAIN_VRAM, 0);
 
         OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i),
-            r300_translate_out_fmt(fb->cbufs[i]->format));
+            r300_translate_out_fmt(surf->format));
     }
 
     if (fb->zsbuf) {
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 7199918a84..aea25cf71d 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -136,12 +136,7 @@ static void r300_setup_miptree(struct r300_texture* tex)
         tex->offset[i] = align(tex->size, 32);
         tex->size = tex->offset[i] + size;
         tex->layer_size[i] = layer_size;
-
-        if (tex->is_npot) {
-            tex->pitch[i] = stride / base->block.size;
-        } else {
-            tex->pitch[i] = base->width[i];
-        }
+        tex->pitch[i] = stride / base->block.size;
 
         debug_printf("r300: Texture miptree: Level %d "
                 "(%dx%dx%d px, pitch %d bytes)\n",
-- 
cgit v1.2.3


From 525f529d138168386224136dc45abb858677bac7 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sat, 31 Oct 2009 11:25:48 +0100
Subject: nv50: make MRTs work

We have to indicate to the hw whether the FP exports
multiple colour results.
Method 0x121c is used to specify the number of RTs.
Also deactivate zeta explicitly if there's no zsbuf.
---
 src/gallium/drivers/nv50/nv50_program.c        |  4 ++++
 src/gallium/drivers/nv50/nv50_state_validate.c | 11 +++++++++++
 2 files changed, 15 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 9ccc4f5a16..c3edc02cb5 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2644,6 +2644,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			pc->result[2].rhw = rid;
 
 		p->cfg.high_result = rid;
+
+		/* separate/different colour results for MRTs ? */
+		if (pc->result_nr - (p->info.writes_z ? 1 : 0) > 1)
+			p->cfg.regs[2] |= 1;
 	}
 
 	if (pc->immd_nr) {
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 956a700615..a13d64b7fa 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -37,6 +37,14 @@ nv50_state_validate_fb(struct nv50_context *nv50)
 	struct pipe_framebuffer_state *fb = &nv50->framebuffer;
 	unsigned i, w, h, gw = 0;
 
+	/* Set nr of active RTs. Don't know what 0xfac6880 does, but
+	 * at least 0x880 was required to draw to more than 1 RT.
+	 * In some special cases, 0xfac6880 is not used, we probably
+	 * don't hit any of these though.
+	 */
+	so_method(so, tesla, 0x121c, 1);
+	so_data  (so, 0x0fac6880 | fb->nr_cbufs);
+
 	for (i = 0; i < fb->nr_cbufs; i++) {
 		struct pipe_texture *pt = fb->cbufs[i]->texture;
 		struct nouveau_bo *bo = nv50_miptree(pt)->base.bo;
@@ -121,6 +129,9 @@ nv50_state_validate_fb(struct nv50_context *nv50)
 		so_data  (so, fb->zsbuf->width);
 		so_data  (so, fb->zsbuf->height);
 		so_data  (so, 0x00010001);
+	} else {
+		so_method(so, tesla, 0x1538, 1);
+		so_data  (so, 0);
 	}
 
 	so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2);
-- 
cgit v1.2.3


From 9831e1f76cd020e1cde2b13e03149415319a8135 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sat, 31 Oct 2009 13:38:22 +0100
Subject: nv50: use SIFC also for shader upload

Adds a more generic SIFC transfer function.
---
 src/gallium/drivers/nv50/nv50_context.h  | 11 +++-
 src/gallium/drivers/nv50/nv50_program.c  | 79 +++++++++--------------------
 src/gallium/drivers/nv50/nv50_transfer.c | 86 ++++++++++++++++++++++++++++++++
 3 files changed, 120 insertions(+), 56 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 33667e8765..890defb90c 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -196,7 +196,8 @@ extern void nv50_clear(struct pipe_context *pipe, unsigned buffers,
 extern void nv50_vertprog_validate(struct nv50_context *nv50);
 extern void nv50_fragprog_validate(struct nv50_context *nv50);
 extern void nv50_linkage_validate(struct nv50_context *nv50);
-extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p);
+extern void nv50_program_destroy(struct nv50_context *nv50,
+				 struct nv50_program *p);
 
 /* nv50_state_validate.c */
 extern boolean nv50_state_validate(struct nv50_context *nv50);
@@ -210,4 +211,12 @@ extern void nv50_so_init_sifc(struct nv50_context *nv50,
 /* nv50_tex.c */
 extern void nv50_tex_validate(struct nv50_context *);
 
+/* nv50_transfer.c */
+extern void
+nv50_upload_sifc(struct nv50_context *nv50,
+		 struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc,
+		 unsigned dst_format, int dst_w, int dst_h, int dst_pitch,
+		 void *src, unsigned src_format, int src_pitch,
+		 int x, int y, int w, int h, int cpp);
+
 #endif
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index c3edc02cb5..faf638949f 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2980,11 +2980,8 @@ static void
 nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 {
 	struct nouveau_channel *chan = nv50->screen->base.channel;
-	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nv50_program_exec *e;
-	struct nouveau_stateobj *so;
-	const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
-	unsigned start, count, *up, *ptr;
+	uint32_t *up, i;
 	boolean upload = FALSE;
 
 	if (!p->bo) {
@@ -2999,32 +2996,37 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 	if (!upload)
 		return;
 
-	for (e = p->exec_head; e; e = e->next) {
+	up = MALLOC(p->exec_size * 4);
+
+	for (i = 0, e = p->exec_head; e; e = e->next) {
 		unsigned ei, ci, bs;
 
-		if (e->param.index < 0)
-			continue;
+		if (e->param.index >= 0 && e->param.mask) {
+			bs = (e->inst[1] >> 22) & 0x07;
+			assert(bs < 2);
+			ei = e->param.shift >> 5;
+			ci = e->param.index;
+			if (bs == 0)
+				ci += p->data[bs]->start;
 
-		if (e->param.mask == 0) {
+			e->inst[ei] &= ~e->param.mask;
+			e->inst[ei] |= (ci << e->param.shift);
+		} else
+		if (e->param.index >= 0) {
+			/* zero mask means param is a jump/branch offset */
 			assert(!(e->param.index & 1));
 			/* seem to be 8 byte steps */
 			ei = (e->param.index >> 1) + 0 /* START_ID */;
 
 			e->inst[0] &= 0xf0000fff;
 			e->inst[0] |= ei << 12;
-			continue;
 		}
 
-		bs = (e->inst[1] >> 22) & 0x07;
-		assert(bs < 2);
-		ei = e->param.shift >> 5;
-		ci = e->param.index;
-		if (bs == 0)
-			ci += p->data[bs]->start;
-
-		e->inst[ei] &= ~e->param.mask;
-		e->inst[ei] |= (ci << e->param.shift);
+		up[i++] = e->inst[0];
+		if (is_long(e))
+			up[i++] = e->inst[1];
 	}
+	assert(i == p->exec_size);
 
 	if (p->data[0])
 		p->data_start[0] = p->data[0]->start;
@@ -3037,45 +3039,12 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p)
 			NOUVEAU_ERR("0x%08x\n", e->inst[1]);
 	}
 #endif
-
-	up = ptr = MALLOC(p->exec_size * 4);
-	for (e = p->exec_head; e; e = e->next) {
-		*(ptr++) = e->inst[0];
-		if (is_long(e))
-			*(ptr++) = e->inst[1];
-	}
-
-	so = so_new(4,2);
-	so_method(so, nv50->screen->tesla, NV50TCL_CB_DEF_ADDRESS_HIGH, 3);
-	so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_HIGH, 0, 0);
-	so_reloc (so, p->bo, 0, flags | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4));
-
-	start = 0; count = p->exec_size;
-	while (count) {
-		struct nouveau_channel *chan = nv50->screen->base.channel;
-		unsigned nr;
-
-		so_emit(chan, so);
-
-		nr = MIN2(count, 2047);
-		nr = MIN2(chan->pushbuf->remaining, nr);
-		if (chan->pushbuf->remaining < (nr + 3)) {
-			FIRE_RING(chan);
-			continue;
-		}
-
-		BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
-		OUT_RING  (chan, (start << 8) | NV50_CB_PUPLOAD);
-		BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, nr);
-		OUT_RINGp (chan, up + start, nr);
-
-		start += nr;
-		count -= nr;
-	}
+	nv50_upload_sifc(nv50, p->bo, 0, NOUVEAU_BO_VRAM,
+			 NV50_2D_DST_FORMAT_R8_UNORM, 65536, 1, 262144,
+			 up, NV50_2D_SIFC_FORMAT_R8_UNORM, 0,
+			 0, 0, p->exec_size * 4, 1, 1);
 
 	FREE(up);
-	so_ref(NULL, &so);
 }
 
 void
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 9c289026bb..f1eb672336 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -237,3 +237,89 @@ nv50_transfer_init_screen_functions(struct pipe_screen *pscreen)
 	pscreen->transfer_map = nv50_transfer_map;
 	pscreen->transfer_unmap = nv50_transfer_unmap;
 }
+
+void
+nv50_upload_sifc(struct nv50_context *nv50,
+		 struct nouveau_bo *bo, unsigned dst_offset, unsigned reloc,
+		 unsigned dst_format, int dst_w, int dst_h, int dst_pitch,
+		 void *src, unsigned src_format, int src_pitch,
+		 int x, int y, int w, int h, int cpp)
+{
+	struct nouveau_channel *chan = nv50->screen->base.channel;
+	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	unsigned line_dwords = (w * cpp + 3) / 4;
+
+	reloc |= NOUVEAU_BO_WR;
+
+	WAIT_RING (chan, 32);
+
+	if (bo->tile_flags) {
+		BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);
+		OUT_RING  (chan, dst_format);
+		OUT_RING  (chan, 0);
+		OUT_RING  (chan, bo->tile_mode << 4);
+		OUT_RING  (chan, 1);
+		OUT_RING  (chan, 0);
+	} else {
+		BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 2);
+		OUT_RING  (chan, dst_format);
+		OUT_RING  (chan, 1);
+		BEGIN_RING(chan, eng2d, NV50_2D_DST_PITCH, 1);
+		OUT_RING  (chan, dst_pitch);
+	}
+
+	BEGIN_RING(chan, eng2d, NV50_2D_DST_WIDTH, 4);
+	OUT_RING  (chan, dst_w);
+	OUT_RING  (chan, dst_h);
+	OUT_RELOCh(chan, bo, dst_offset, reloc);
+	OUT_RELOCl(chan, bo, dst_offset, reloc);
+
+	/* NV50_2D_OPERATION_SRCCOPY assumed already set */
+
+	BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, src_format);
+	BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
+	OUT_RING  (chan, w);
+	OUT_RING  (chan, h);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 1);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 1);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, x);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, y);
+
+	while (h--) {
+		const uint32_t *p = src;
+		unsigned count = line_dwords;
+
+		while (count) {
+			unsigned nr = MIN2(count, 1792);
+
+			if (chan->pushbuf->remaining <= nr) {
+				FIRE_RING (chan);
+
+				BEGIN_RING(chan, eng2d,
+					   NV50_2D_DST_ADDRESS_HIGH, 2);
+				OUT_RELOCh(chan, bo, dst_offset, reloc);
+				OUT_RELOCl(chan, bo, dst_offset, reloc);
+			}
+			assert(chan->pushbuf->remaining > nr);
+
+			BEGIN_RING(chan, eng2d,
+				   NV50_2D_SIFC_DATA | (2 << 29), nr);
+			OUT_RINGp (chan, p, nr);
+
+			p += nr;
+			count -= nr;
+		}
+
+		src += src_pitch;
+	}
+
+	BEGIN_RING(chan, tesla, 0x1440, 1);
+	OUT_RING  (chan, 0);
+}
-- 
cgit v1.2.3


From 91232b7004d7a9fbf4f99bb9ec4e5eea8e1c6eef Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sat, 24 Oct 2009 17:36:48 -0400
Subject: nouveau: Support X8R8G8B8 textures on nv30, nv40 and RTs on
 nv10-nv40.

---
 src/gallium/drivers/nv04/nv04_surface_2d.c | 1 +
 src/gallium/drivers/nv10/nv10_state_emit.c | 3 +++
 src/gallium/drivers/nv20/nv20_state_emit.c | 3 +++
 src/gallium/drivers/nv30/nv30_fragtex.c    | 1 +
 src/gallium/drivers/nv30/nv30_state_fb.c   | 3 +++
 src/gallium/drivers/nv40/nv40_fragtex.c    | 1 +
 src/gallium/drivers/nv40/nv40_state_fb.c   | 3 +++
 7 files changed, 15 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 8c7eb367e2..8be134b83d 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -42,6 +42,7 @@ nv04_rect_format(enum pipe_format format)
 	case PIPE_FORMAT_A8L8_UNORM:
 	case PIPE_FORMAT_Z16_UNORM:
 		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
+	case PIPE_FORMAT_X8R8G8B8_UNORM:
 	case PIPE_FORMAT_A8R8G8B8_UNORM:
 	case PIPE_FORMAT_Z24S8_UNORM:
 	case PIPE_FORMAT_Z24X8_UNORM:
diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c
index d8691ef9c6..2577ab73b5 100644
--- a/src/gallium/drivers/nv10/nv10_state_emit.c
+++ b/src/gallium/drivers/nv10/nv10_state_emit.c
@@ -129,6 +129,9 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
 	rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR;
 
 	switch (colour_format) {
+	case PIPE_FORMAT_X8R8G8B8_UNORM:
+		rt_format |= NV10TCL_RT_FORMAT_COLOR_X8R8G8B8;
+		break;
 	case PIPE_FORMAT_A8R8G8B8_UNORM:
 	case 0:
 		rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8;
diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c
index 4042f46d05..0122b1c2cd 100644
--- a/src/gallium/drivers/nv20/nv20_state_emit.c
+++ b/src/gallium/drivers/nv20/nv20_state_emit.c
@@ -135,6 +135,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
 	rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR | 0x20;
 
 	switch (colour_format) {
+	case PIPE_FORMAT_X8R8G8B8_UNORM:
+		rt_format |= NV20TCL_RT_FORMAT_COLOR_X8R8G8B8;
+		break;
 	case PIPE_FORMAT_A8R8G8B8_UNORM:
 	case 0:
 		rt_format |= NV20TCL_RT_FORMAT_COLOR_A8R8G8B8;
diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
index 3dd636f4ee..dca760cae6 100644
--- a/src/gallium/drivers/nv30/nv30_fragtex.c
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -21,6 +21,7 @@ struct nv30_texture_format {
 
 static struct nv30_texture_format
 nv30_texture_formats[] = {
+	_(X8R8G8B8_UNORM, A8R8G8B8,   S1,   S1,   S1,  ONE, X, Y, Z, W),
 	_(A8R8G8B8_UNORM, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W),
 	_(A1R5G5B5_UNORM, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W),
 	_(A4R4G4B4_UNORM, A4R4G4B4,   S1,   S1,   S1,   S1, X, Y, Z, W),
diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
index 4d6a67e56d..6f6d1740d6 100644
--- a/src/gallium/drivers/nv30/nv30_state_fb.c
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -66,6 +66,9 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 	}
 
 	switch (colour_format) {
+	case PIPE_FORMAT_X8R8G8B8_UNORM:
+		rt_format |= NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
+		break;
 	case PIPE_FORMAT_A8R8G8B8_UNORM:
 	case 0:
 		rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c
index f6cdf31dfe..e2ec57564d 100644
--- a/src/gallium/drivers/nv40/nv40_fragtex.c
+++ b/src/gallium/drivers/nv40/nv40_fragtex.c
@@ -23,6 +23,7 @@ struct nv40_texture_format {
 
 static struct nv40_texture_format
 nv40_texture_formats[] = {
+	_(X8R8G8B8_UNORM, A8R8G8B8,   S1,   S1,   S1,  ONE, X, Y, Z, W, 0, 0, 0, 0),
 	_(A8R8G8B8_UNORM, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0),
 	_(A1R5G5B5_UNORM, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0),
 	_(A4R4G4B4_UNORM, A4R4G4B4,   S1,   S1,   S1,   S1, X, Y, Z, W, 0, 0, 0, 0),
diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c
index c2f739157a..1c7a7cd64f 100644
--- a/src/gallium/drivers/nv40/nv40_state_fb.c
+++ b/src/gallium/drivers/nv40/nv40_state_fb.c
@@ -57,6 +57,9 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40)
 		rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR;
 
 	switch (colour_format) {
+	case PIPE_FORMAT_X8R8G8B8_UNORM:
+		rt_format |= NV40TCL_RT_FORMAT_COLOR_X8R8G8B8;
+		break;
 	case PIPE_FORMAT_A8R8G8B8_UNORM:
 	case 0:
 		rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8;
-- 
cgit v1.2.3


From 5d61b6f1f64ca26dd038af0679873ef0353660dd Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sat, 31 Oct 2009 15:05:01 +0000
Subject: i965g: wip on fragment shaders

---
 src/gallium/drivers/i965/brw_wm.h    |  63 ++-
 src/gallium/drivers/i965/brw_wm_fp.c | 871 ++++++++++++++++++++++++++---------
 2 files changed, 698 insertions(+), 236 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 2cd5bb7081..8ee99420aa 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -74,6 +74,7 @@ struct brw_wm_prog_key {
 
    GLuint vp_nr_outputs:6;
    GLuint nr_cbufs:3;
+   GLuint has_flow_control:1;
 
    GLuint program_string_id;
 };
@@ -176,9 +177,36 @@ struct brw_wm_instruction {
 #define MAX_WM_OPCODE     (MAX_OPCODE + 9)
 
 #define BRW_FILE_PAYLOAD   (TGSI_FILE_COUNT)
-#define PAYLOAD_DEPTH      (FRAG_ATTRIB_MAX) /* ?? */
+#define PAYLOAD_DEPTH      (PIPE_MAX_SHADER_INPUTS) /* ?? */
+
+
+struct brw_fp_src {
+   unsigned file:4;
+   unsigned index:16;
+   unsigned swizzle:8;
+   unsigned indirect:1;
+   unsigned negate:1;
+   unsigned abs:1;
+};
+
+struct brw_fp_dst {
+   unsigned file:4;
+   unsigned index:16;
+   unsigned writemask:4;
+   unsigned indirect:1;
+   unsigned saturate:1;
+};
+
+struct brw_fp_instruction {
+   struct brw_fp_dst dst;
+   struct brw_fp_src src[3];
+   unsigned opcode:8;
+   unsigned tex_unit:4;
+   unsigned tex_target:4;
+   unsigned target:10;		/* destination surface for FB_WRITE */
+   unsigned eot:1;		/* mark last instruction (usually FB_WRITE) */
+};
 
-struct brw_passfp_program;
 
 struct brw_wm_compile {
    struct brw_compile func;
@@ -198,9 +226,26 @@ struct brw_wm_compile {
     * simplifying and adding instructions for interpolation and
     * framebuffer writes.
     */
-   struct brw_passfp_program *pass_fp;
-
-
+   struct {
+      GLfloat v[4];
+      unsigned nr;
+   } immediate[BRW_WM_MAX_CONST+3];
+   GLuint nr_immediates;
+   
+   struct brw_fp_instruction fp_instructions[BRW_WM_MAX_INSN];
+   GLuint nr_fp_insns;
+   GLuint fp_temp;
+   GLuint fp_interp_emitted;
+   GLuint fp_fragcolor_emitted;
+   GLuint fp_first_internal_temp;
+
+   struct brw_fp_src fp_pixel_xy;
+   struct brw_fp_src fp_delta_xy;
+   struct brw_fp_src fp_pixel_w;
+
+
+   /* Subsequent passes using SSA representation:
+    */
    struct brw_wm_value vreg[BRW_WM_MAX_VREG];
    GLuint nr_vreg;
 
@@ -213,7 +258,7 @@ struct brw_wm_compile {
    } payload;
 
 
-   const struct brw_wm_ref *pass0_fp_reg[PROGRAM_PAYLOAD+1][256][4];
+   const struct brw_wm_ref *pass0_fp_reg[BRW_FILE_PAYLOAD+1][256][4];
 
    struct brw_wm_ref undef_ref;
    struct brw_wm_value undef_value;
@@ -241,7 +286,7 @@ struct brw_wm_compile {
    struct {
       GLboolean inited;
       struct brw_reg reg;
-   } wm_regs[PROGRAM_PAYLOAD+1][256][4];
+   } wm_regs[BRW_FILE_PAYLOAD+1][256][4];
 
    GLboolean used_grf[BRW_WM_MAX_GRF];
    GLuint first_free_grf;
@@ -258,13 +303,15 @@ struct brw_wm_compile {
       GLint index;
       struct brw_reg reg;
    } current_const[3];
+
+   GLuint error;
 };
 
 
 GLuint brw_wm_nr_args( GLuint opcode );
 GLuint brw_wm_is_scalar_result( GLuint opcode );
 
-void brw_wm_pass_fp( struct brw_wm_compile *c );
+int brw_wm_pass_fp( struct brw_wm_compile *c );
 void brw_wm_pass0( struct brw_wm_compile *c );
 void brw_wm_pass1( struct brw_wm_compile *c );
 void brw_wm_pass2( struct brw_wm_compile *c );
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index 8ba037cdae..57933afbbe 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -31,15 +31,26 @@
                
 
 #include "pipe/p_shader_tokens.h"
+#include "pipe/p_error.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_util.h"
 
 #include "brw_wm.h"
 #include "brw_util.h"
+#include "brw_debug.h"
 
 
 #define X    0
 #define Y    1
 #define Z    2
 #define W    3
+#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3)
 
 
 static const char *wm_opcode_strings[] = {
@@ -54,7 +65,294 @@ static const char *wm_opcode_strings[] = {
    "FRONTFACING",
 };
 
+/***********************************************************************
+ * Source regs
+ */
+
+static struct brw_fp_src src_reg(GLuint file, GLuint idx)
+{
+   struct brw_fp_src reg;
+   reg.file = file;
+   reg.index = idx;
+   reg.swizzle = BRW_SWIZZLE_XYZW;
+   reg.indirect = 0;
+   reg.negate = 0;
+   reg.abs = 0;
+   return reg;
+}
+
+static struct brw_fp_src src_reg_from_dst(struct brw_fp_dst dst)
+{
+   return src_reg(dst.file, dst.index);
+}
+
+static struct brw_fp_src src_undef( void )
+{
+   return src_reg(TGSI_FILE_NULL, 0);
+}
+
+static GLboolean src_is_undef(struct brw_fp_src src)
+{
+   return src.file == TGSI_FILE_NULL;
+}
+
+static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z, int w )
+{
+   unsigned swz = reg.swizzle;
+
+   reg.swizzle = ( GET_SWZ(swz, x) << 0 |
+		   GET_SWZ(swz, y) << 2 |
+		   GET_SWZ(swz, z) << 4 |
+		   GET_SWZ(swz, w) << 6 );
+
+   return reg;
+}
+
+static struct brw_fp_src src_scalar( struct brw_fp_src reg, int x )
+{
+   return src_swizzle(reg, x, x, x, x);
+}
+
+static struct brw_fp_src src_abs( struct brw_fp_src src )
+{
+   src.negate = 0;
+   src.abs = 1;
+   return src;
+}
+
+static struct brw_fp_src src_negate( struct brw_fp_src src )
+{
+   src.negate = 1;
+   src.abs = 0;
+   return src;
+}
+
+
+static int match_or_expand_immediate( const float *v,
+                                      unsigned nr,
+                                      float *v2,
+                                      unsigned *nr2,
+                                      unsigned *swizzle )
+{
+   unsigned i, j;
+   
+   *swizzle = 0;
+
+   for (i = 0; i < nr; i++) {
+      boolean found = FALSE;
+
+      for (j = 0; j < *nr2 && !found; j++) {
+         if (v[i] == v2[j]) {
+            *swizzle |= j << (i * 2);
+            found = TRUE;
+         }
+      }
+
+      if (!found) {
+         if (*nr2 >= 4) 
+            return FALSE;
+
+         v2[*nr2] = v[i];
+         *swizzle |= *nr2 << (i * 2);
+         (*nr2)++;
+      }
+   }
+
+   return TRUE;
+}
+
+
+
+/* Internally generated immediates: overkill...
+ */
+static struct brw_fp_src src_imm( struct brw_wm_compile *c, 
+				  const GLfloat *v, 
+				  unsigned nr)
+{
+   unsigned i, j;
+   unsigned swizzle;
+
+   /* Could do a first pass where we examine all existing immediates
+    * without expanding.
+    */
+
+   for (i = 0; i < c->nr_immediates; i++) {
+      if (match_or_expand_immediate( v, 
+                                     nr,
+                                     c->immediate[i].v,
+                                     &c->immediate[i].nr, 
+                                     &swizzle ))
+         goto out;
+   }
+
+   if (c->nr_immediates < Elements(c->immediate)) {
+      i = c->nr_immediates++;
+      if (match_or_expand_immediate( v,
+                                     nr,
+                                     c->immediate[i].v,
+                                     &c->immediate[i].nr, 
+                                     &swizzle ))
+         goto out;
+   }
+
+   c->error = 1;
+   return src_undef();
+
+out:
+   /* Make sure that all referenced elements are from this immediate.
+    * Has the effect of making size-one immediates into scalars.
+    */
+   for (j = nr; j < 4; j++)
+      swizzle |= (swizzle & 0x3) << (j * 2);
+
+   return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
+		       GET_SWZ(swizzle, X),
+		       GET_SWZ(swizzle, Y),
+		       GET_SWZ(swizzle, Z),
+		       GET_SWZ(swizzle, W) );
+}
+
+
+
+static struct brw_fp_src src_imm1f( struct brw_wm_compile *c,
+				    GLfloat f )
+{
+   return src_imm(c, &f, 1);
+}
+
+static struct brw_fp_src src_imm4f( struct brw_wm_compile *c,
+				    GLfloat x,
+				    GLfloat y,
+				    GLfloat z,
+				    GLfloat w)
+{
+   GLfloat f[4] = {x,y,z,w};
+   return src_imm(c, f, 4);
+}
+
+
+
+/***********************************************************************
+ * Dest regs
+ */
+
+static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
+{
+   struct brw_fp_dst reg;
+   reg.file = file;
+   reg.index = idx;
+   reg.writemask = BRW_WRITEMASK_XYZW;
+   reg.indirect = 0;
+   return reg;
+}
+
+static struct brw_fp_dst dst_mask( struct brw_fp_dst reg, int mask )
+{
+   reg.writemask &= mask;
+   return reg;
+}
+
+static struct brw_fp_dst dst_undef( void )
+{
+   return dst_reg(TGSI_FILE_NULL, 0);
+}
+
+static boolean dst_is_undef( struct brw_fp_dst dst )
+{
+   return dst.file == TGSI_FILE_NULL;
+}
+
+static struct brw_fp_dst dst_saturate( struct brw_fp_dst reg, boolean flag )
+{
+   reg.saturate = flag;
+   return reg;
+}
+
+static struct brw_fp_dst get_temp( struct brw_wm_compile *c )
+{
+   int bit = ffs( ~c->fp_temp );
+
+   if (!bit) {
+      debug_printf("%s: out of temporaries\n", __FILE__);
+   }
+
+   c->fp_temp |= 1<<(bit-1);
+   return dst_reg(TGSI_FILE_TEMPORARY, c->fp_first_internal_temp+(bit-1));
+}
+
+
+static void release_temp( struct brw_wm_compile *c, struct brw_fp_dst temp )
+{
+   c->fp_temp &= ~(1 << (temp.index - c->fp_first_internal_temp));
+}
+
+
+/***********************************************************************
+ * Instructions 
+ */
+
+static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
+{
+   return &c->fp_instructions[c->nr_fp_insns++];
+}
+
+static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
+					     GLuint op,
+					     struct brw_fp_dst dest,
+					     GLuint tex_src_unit,
+					     GLuint tex_src_target,
+					     struct brw_fp_src src0,
+					     struct brw_fp_src src1,
+					     struct brw_fp_src src2 )
+{
+   struct brw_fp_instruction *inst = get_fp_inst(c);
+
+   inst->opcode = op;
+   inst->dst = dest;
+   inst->tex_unit = tex_src_unit;
+   inst->tex_target = tex_src_target;
+   inst->src[0] = src0;
+   inst->src[1] = src1;
+   inst->src[2] = src2;
 
+   return inst;
+}
+   
+
+static INLINE void emit_op3(struct brw_wm_compile *c,
+			    GLuint op,
+			    struct brw_fp_dst dest,
+			    struct brw_fp_src src0,
+			    struct brw_fp_src src1,
+			    struct brw_fp_src src2 )
+{
+   emit_tex_op(c, op, dest, 0, 0, src0, src1, src2);
+}
+
+
+static INLINE void emit_op2(struct brw_wm_compile *c,
+			    GLuint op,
+			    struct brw_fp_dst dest,
+			    struct brw_fp_src src0,
+			    struct brw_fp_src src1)
+{
+   emit_tex_op(c, op, dest, 0, 0, src0, src1, src_undef());
+}
+
+static INLINE void emit_op1(struct brw_wm_compile *c,
+			    GLuint op,
+			    struct brw_fp_dst dest,
+			    struct brw_fp_src src0)
+{
+   emit_tex_op(c, op, dest, 0, 0, src0, src_undef(), src_undef());
+}
+
+static INLINE void emit_op0(struct brw_wm_compile *c,
+			   GLuint op,
+			   struct brw_fp_dst dest)
+{
+   emit_tex_op(c, op, dest, 0, 0, src_undef(), src_undef(), src_undef());
+}
 
 
@@ -66,10 +364,10 @@ static const char *wm_opcode_strings[] = {
  */
 static void emit_scalar_insn(struct brw_wm_compile *c,
 			     unsigned opcode,
-			     struct brw_dst dst,
-			     struct brw_src src0,
-			     struct brw_src src1,
-			     struct brw_src src2 )
+			     struct brw_fp_dst dst,
+			     struct brw_fp_src src0,
+			     struct brw_fp_src src1,
+			     struct brw_fp_src src2 )
 {
    unsigned first_chan = ffs(dst.writemask) - 1;
    unsigned first_mask = 1 << first_chan;
@@ -77,14 +375,14 @@ static void emit_scalar_insn(struct brw_wm_compile *c,
    if (dst.writemask == 0)
       return;
 
-   emit_op( c, opcode,
-	    brw_writemask(dst, first_mask),
-	    src0, src1, src2 );
+   emit_op3( c, opcode,
+	     dst_mask(dst, first_mask),
+	     src0, src1, src2 );
 
    if (dst.writemask != first_mask) {
       emit_op1(c, TGSI_OPCODE_MOV,
-	       brw_writemask(dst, ~first_mask),
-	       src_swizzle1(brw_src(dst), first_chan));
+	       dst_mask(dst, ~first_mask),
+	       src_scalar(src_reg_from_dst(dst), first_chan));
    }
 }
 
@@ -93,11 +391,11 @@ static void emit_scalar_insn(struct brw_wm_compile *c,
  * Special instructions for interpolation and other tasks
  */
 
-static struct ureg_src get_pixel_xy( struct brw_wm_compile *c )
+static struct brw_fp_src get_pixel_xy( struct brw_wm_compile *c )
 {
-   if (src_is_undef(c->pixel_xy)) {
-      struct ureg_dst pixel_xy = get_temp(c);
-      struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
+   if (src_is_undef(c->fp_pixel_xy)) {
+      struct brw_fp_dst pixel_xy = get_temp(c);
+      struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
       
       
       /* Emit the out calculations, and hold onto the results.  Use
@@ -105,79 +403,85 @@ static struct ureg_src get_pixel_xy( struct brw_wm_compile *c )
        */   
       /* pixel_xy.xy = PIXELXY payload[0];
        */
-      emit_op(c,
-	      WM_PIXELXY,
-	      dst_mask(pixel_xy, BRW_WRITEMASK_XY),
-	      payload_r0_depth,
-	      src_undef(),
-	      src_undef());
+      emit_op1(c,
+	       WM_PIXELXY,
+	       dst_mask(pixel_xy, BRW_WRITEMASK_XY),
+	       payload_r0_depth);
 
-      c->pixel_xy = src_reg_from_dst(pixel_xy);
+      c->fp_pixel_xy = src_reg_from_dst(pixel_xy);
    }
 
-   return c->pixel_xy;
+   return c->fp_pixel_xy;
 }
 
-static struct ureg_src get_delta_xy( struct brw_wm_compile *c )
+static struct brw_fp_src get_delta_xy( struct brw_wm_compile *c )
 {
-   if (src_is_undef(c->delta_xy)) {
-      struct ureg_dst delta_xy = get_temp(c);
-      struct ureg_src pixel_xy = get_pixel_xy(c);
-      struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
+   if (src_is_undef(c->fp_delta_xy)) {
+      struct brw_fp_dst delta_xy = get_temp(c);
+      struct brw_fp_src pixel_xy = get_pixel_xy(c);
+      struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
       
       /* deltas.xy = DELTAXY pixel_xy, payload[0]
        */
-      emit_op(c,
+      emit_op3(c,
 	      WM_DELTAXY,
 	      dst_mask(delta_xy, BRW_WRITEMASK_XY),
 	      pixel_xy, 
 	      payload_r0_depth,
 	      src_undef());
       
-      c->delta_xy = src_reg_from_dst(delta_xy);
+      c->fp_delta_xy = src_reg_from_dst(delta_xy);
    }
 
-   return c->delta_xy;
+   return c->fp_delta_xy;
 }
 
-static struct ureg_src get_pixel_w( struct brw_wm_compile *c )
+static struct brw_fp_src get_pixel_w( struct brw_wm_compile *c )
 {
-   if (src_is_undef(c->pixel_w)) {
-      struct ureg_dst pixel_w = get_temp(c);
-      struct ureg_src deltas = get_delta_xy(c);
-      struct ureg_src interp_wpos = src_reg(TGSI_FILE_PAYLOAD, FRAG_ATTRIB_WPOS);
+   if (src_is_undef(c->fp_pixel_w)) {
+      struct brw_fp_dst pixel_w = get_temp(c);
+      struct brw_fp_src deltas = get_delta_xy(c);
+
+      /* XXX: assuming position is always first -- valid? 
+       */
+      struct brw_fp_src interp_wpos = src_reg(BRW_FILE_PAYLOAD, 0);
 
       /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
        */
-      emit_op(c,
-	      WM_PIXELW,
-	      dst_mask(pixel_w, BRW_WRITEMASK_W),
-	      interp_wpos,
-	      deltas, 
-	      src_undef());
+      emit_op3(c,
+	       WM_PIXELW,
+	       dst_mask(pixel_w, BRW_WRITEMASK_W),
+	       interp_wpos,
+	       deltas, 
+	       src_undef());
       
 
-      c->pixel_w = src_reg_from_dst(pixel_w);
+      c->fp_pixel_w = src_reg_from_dst(pixel_w);
    }
 
-   return c->pixel_w;
+   return c->fp_pixel_w;
 }
 
+
+/***********************************************************************
+ * Emit INTERP instructions ahead of first use of each attrib.
+ */
+
 static void emit_interp( struct brw_wm_compile *c,
+			 GLuint idx,
 			 GLuint semantic,
-			 GLuint semantic_index,
 			 GLuint interp_mode )
 {
-   struct ureg_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
-   struct ureg_src interp = src_reg(TGSI_FILE_PAYLOAD, idx);
-   struct ureg_src deltas = get_delta_xy(c);
+   struct brw_fp_dst dst = dst_reg(TGSI_FILE_INPUT, idx);
+   struct brw_fp_src interp = src_reg(BRW_FILE_PAYLOAD, idx);
+   struct brw_fp_src deltas = get_delta_xy(c);
 
    /* Need to use PINTERP on attributes which have been
     * multiplied by 1/W in the SF program, and LINTERP on those
     * which have not:
     */
    switch (semantic) {
-   case FRAG_ATTRIB_WPOS:
+   case TGSI_SEMANTIC_POSITION:
       /* Have to treat wpos.xy specially:
        */
       emit_op1(c,
@@ -218,7 +522,8 @@ static void emit_interp( struct brw_wm_compile *c,
       }
 
       break;
-   case FRAG_ATTRIB_FOGC:
+
+   case TGSI_SEMANTIC_FOG:
       /* Interpolate the fog coordinate */
       emit_op3(c,
 	      WM_PINTERP,
@@ -228,17 +533,17 @@ static void emit_interp( struct brw_wm_compile *c,
 	      get_pixel_w(c));
 
       emit_op1(c,
-	      TGSI_OPCODE_MOV,
-	      dst_mask(dst, BRW_WRITEMASK_YZ),
-	      brw_imm1f(0.0));
+	       TGSI_OPCODE_MOV,
+	       dst_mask(dst, BRW_WRITEMASK_YZ),
+	       src_imm1f(c, 0.0));
 
       emit_op1(c,
-	      TGSI_OPCODE_MOV,
-	      dst_mask(dst, BRW_WRITEMASK_W),
-	      brw_imm1f(1.0));
+	       TGSI_OPCODE_MOV,
+	       dst_mask(dst, BRW_WRITEMASK_W),
+	       src_imm1f(c, 1.0));
       break;
 
-   case FRAG_ATTRIB_FACE:
+   case TGSI_SEMANTIC_FACE:
       /* XXX review/test this case */
       emit_op0(c,
 	       WM_FRONTFACING,
@@ -247,15 +552,15 @@ static void emit_interp( struct brw_wm_compile *c,
       emit_op1(c,
 	      TGSI_OPCODE_MOV,
 	      dst_mask(dst, BRW_WRITEMASK_YZ),
-	      brw_imm1f(0.0));
+	       src_imm1f(c, 0.0));
 
       emit_op1(c,
 	      TGSI_OPCODE_MOV,
 	      dst_mask(dst, BRW_WRITEMASK_W),
-	      brw_imm1f(1.0));
+	       src_imm1f(c, 1.0));
       break;
 
-   case FRAG_ATTRIB_PNTC:
+   case TGSI_SEMANTIC_PSIZE:
       /* XXX review/test this case */
       emit_op3(c,
 	       WM_PINTERP,
@@ -267,12 +572,12 @@ static void emit_interp( struct brw_wm_compile *c,
       emit_op1(c,
 	      TGSI_OPCODE_MOV,
 	      dst_mask(dst, BRW_WRITEMASK_Z),
-	      brw_imm1f(c->pass_fp, 0.0f));
+	      src_imm1f(c, 0.0f));
 
       emit_op1(c,
 	      TGSI_OPCODE_MOV,
 	      dst_mask(dst, BRW_WRITEMASK_W),
-	      brw_imm1f(c->pass_fp, 1.0f));
+	      src_imm1f(c, 1.0f));
       break;
 
    default: 
@@ -310,11 +615,11 @@ static void emit_interp( struct brw_wm_compile *c,
  * Expand various instructions here to simpler forms.  
  */
 static void precalc_dst( struct brw_wm_compile *c,
-			 struct brw_dst dst,
-			 struct brw_src src0,
-			 struct brw_src src1 )
+			 struct brw_fp_dst dst,
+			 struct brw_fp_src src0,
+			 struct brw_fp_src src1 )
 {
-   if (dst.WriteMask & BRW_WRITEMASK_Y) {      
+   if (dst.writemask & BRW_WRITEMASK_Y) {      
       /* dst.y = mul src0.y, src1.y
        */
       emit_op2(c,
@@ -324,25 +629,22 @@ static void precalc_dst( struct brw_wm_compile *c,
 	       src1);
    }
 
-   if (dst.WriteMask & BRW_WRITEMASK_XZ) {
-      struct prog_instruction *swz;
-      GLuint z = GET_SWZ(src0.Swizzle, Z);
-
+   if (dst.writemask & BRW_WRITEMASK_XZ) {
       /* dst.z = mov src0.zzzz
        */
       emit_op1(c,
 	      TGSI_OPCODE_MOV,
 	      dst_mask(dst, BRW_WRITEMASK_Z),
-	      src_swizzle1(src0, Z));
+	      src_scalar(src0, Z));
 
-      /* dst.x = immf(1.0)
+      /* dst.x = imm1f(1.0)
        */
       emit_op1(c,
 	      TGSI_OPCODE_MOV,
-	      brw_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
-	      src_immf(c, 1.0));
+	      dst_saturate(dst_mask(dst, BRW_WRITEMASK_X), 0),
+	      src_imm1f(c, 1.0));
    }
-   if (dst.WriteMask & BRW_WRITEMASK_W) {
+   if (dst.writemask & BRW_WRITEMASK_W) {
       /* dst.w = mov src1.w
        */
       emit_op1(c,
@@ -354,22 +656,22 @@ static void precalc_dst( struct brw_wm_compile *c,
 
 
 static void precalc_lit( struct brw_wm_compile *c,
-			 struct ureg_dst dst,
-			 struct ureg_src src0 )
+			 struct brw_fp_dst dst,
+			 struct brw_fp_src src0 )
 {
-   if (dst.WriteMask & BRW_WRITEMASK_XW) {
+   if (dst.writemask & BRW_WRITEMASK_XW) {
       /* dst.xw = imm(1.0f)
        */
       emit_op1(c,
 	       TGSI_OPCODE_MOV,
-	       brw_saturate(brw_writemask(dst, BRW_WRITEMASK_XW), 0),
-	       brw_imm1f(1.0f));
+	       dst_saturate(dst_mask(dst, BRW_WRITEMASK_XW), 0),
+	       src_imm1f(c, 1.0f));
    }
 
-   if (dst.WriteMask & BRW_WRITEMASK_YZ) {
+   if (dst.writemask & BRW_WRITEMASK_YZ) {
       emit_op1(c,
 	       TGSI_OPCODE_LIT,
-	       brw_writemask(dst, BRW_WRITEMASK_YZ),
+	       dst_mask(dst, BRW_WRITEMASK_YZ),
 	       src0);
    }
 }
@@ -382,41 +684,42 @@ static void precalc_lit( struct brw_wm_compile *c,
  * instruction itself.
  */
 static void precalc_tex( struct brw_wm_compile *c,
-			 struct brw_dst dst,
+			 struct brw_fp_dst dst,
+			 unsigned target,
 			 unsigned unit,
-			 struct brw_src src0 )
+			 struct brw_fp_src src0 )
 {
-   struct ureg_src coord = src_undef();
-   struct ureg_dst tmp = dst_undef();
+   struct brw_fp_src coord = src_undef();
+   struct brw_fp_dst tmp = dst_undef();
 
    assert(unit < BRW_MAX_TEX_UNIT);
 
    /* Cubemap: find longest component of coord vector and normalize
     * it.
     */
-   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX) {
-      struct ureg_src tmpsrc;
+   if (target == TGSI_TEXTURE_CUBE) {
+      struct brw_fp_src tmpsrc;
 
       tmp = get_temp(c);
-      tmpsrc = brw_src(tmpcoord)
+      tmpsrc = src_reg_from_dst(tmp);
 
       /* tmp = abs(src0) */
       emit_op1(c, 
 	       TGSI_OPCODE_MOV,
 	       tmp,
-	       brw_abs(src0));
+	       src_abs(src0));
 
       /* tmp.X = MAX(tmp.X, tmp.Y) */
       emit_op2(c, TGSI_OPCODE_MAX,
-	       brw_writemask(tmp, BRW_WRITEMASK_X),
-	       src_swizzle1(tmpsrc, X),
-	       src_swizzle1(tmpsrc, Y));
+	       dst_mask(tmp, BRW_WRITEMASK_X),
+	       src_scalar(tmpsrc, X),
+	       src_scalar(tmpsrc, Y));
 
       /* tmp.X = MAX(tmp.X, tmp.Z) */
       emit_op2(c, TGSI_OPCODE_MAX,
-	       brw_writemask(tmp, BRW_WRITEMASK_X),
+	       dst_mask(tmp, BRW_WRITEMASK_X),
 	       tmpsrc,
-	       src_swizzle1(tmpsrc, Z));
+	       src_scalar(tmpsrc, Z));
 
       /* tmp.X = 1 / tmp.X */
       emit_op1(c, TGSI_OPCODE_RCP,
@@ -427,11 +730,12 @@ static void precalc_tex( struct brw_wm_compile *c,
       emit_op2(c, TGSI_OPCODE_MUL,
 	       tmp,
 	       src0,
-	       src_swizzle1(tmpsrc, SWIZZLE_X));
+	       src_scalar(tmpsrc, X));
 
       coord = tmpsrc;
    }
-   else if (inst->TexSrcTarget == TEXTURE_RECT_INDEX) {
+   else if (target == TGSI_TEXTURE_RECT ||
+	    target == TGSI_TEXTURE_SHADOWRECT) {
       /* XXX: need a mechanism for internally generated constants.
        */
       coord = src0;
@@ -448,19 +752,18 @@ static void precalc_tex( struct brw_wm_compile *c,
    if (c->key.yuvtex_mask & (1 << unit)) {
       /* convert ycbcr to RGBA */
       GLboolean  swap_uv = c->key.yuvtex_swap_mask & (1<<unit);
-      struct ureg_dst dst = inst->DstReg;
-      struct ureg_dst tmp = get_temp(c);
-      struct ureg_src tmpsrc = src_reg_from_dst(tmp);
-      struct ureg_src C0 = ureg_imm4f( c->ureg,  -.5, -.0625, -.5, 1.164 );
-      struct ureg_src C1 = ureg_imm4f( c->ureg, 1.596, -0.813, 2.018, -.391 );
+      struct brw_fp_dst tmp = get_temp(c);
+      struct brw_fp_src tmpsrc = src_reg_from_dst(tmp);
+      struct brw_fp_src C0 = src_imm4f( c,  -.5, -.0625, -.5, 1.164 );
+      struct brw_fp_src C1 = src_imm4f( c, 1.596, -0.813, 2.018, -.391 );
      
       /* tmp     = TEX ...
        */
       emit_tex_op(c, 
                   TGSI_OPCODE_TEX,
-                  brw_saturate(tmp, dst.Saturate),
+                  dst_saturate(tmp, dst.saturate),
                   unit,
-                  inst->TexSrcTarget,
+                  target,
                   coord,
                   src_undef(),
                   src_undef());
@@ -477,7 +780,7 @@ static void precalc_tex( struct brw_wm_compile *c,
       emit_op2(c, TGSI_OPCODE_MUL,
 	       dst_mask(tmp, BRW_WRITEMASK_Y),
 	       tmpsrc,
-	       src_swizzle1(C0, W));
+	       src_scalar(C0, W));
 
       /* 
        * if (UV swaped)
@@ -492,16 +795,16 @@ static void precalc_tex( struct brw_wm_compile *c,
 		 src_swizzle(tmpsrc, Z,Z,X,X) : 
 		 src_swizzle(tmpsrc, X,X,Z,Z)),
 	       C1,
-	       src_swizzle1(tmpsrc, Y));
+	       src_scalar(tmpsrc, Y));
 
       /*  RGB.y   = MAD YUV.z, C1.w, RGB.y
        */
       emit_op3(c,
 	       TGSI_OPCODE_MAD,
 	       dst_mask(dst, BRW_WRITEMASK_Y),
-	       src_swizzle1(tmpsrc, Z),
-	       src_swizzle1(C1, W),
-	       src_swizzle1(src_reg_from_dst(dst), Y));
+	       src_scalar(tmpsrc, Z),
+	       src_scalar(C1, W),
+	       src_scalar(src_reg_from_dst(dst), Y));
 
       release_temp(c, tmp);
    }
@@ -509,9 +812,9 @@ static void precalc_tex( struct brw_wm_compile *c,
       /* ordinary RGBA tex instruction */
       emit_tex_op(c, 
                   TGSI_OPCODE_TEX,
-                  inst->DstReg,
+                  dst,
                   unit,
-                  inst->TexSrcTarget,
+                  target,
                   coord,
                   src_undef(),
                   src_undef());
@@ -523,8 +826,8 @@ static void precalc_tex( struct brw_wm_compile *c,
 
    /* Release this temp if we ended up allocating it:
     */
-   if (!brw_dst_is_undef(tmpcoord))
-      release_temp(c, tmpcoord);
+   if (!dst_is_undef(tmp))
+      release_temp(c, tmp);
 }
 
 
@@ -532,13 +835,9 @@ static void precalc_tex( struct brw_wm_compile *c,
  * Check if the given TXP instruction really needs the divide-by-W step.
  */
 static GLboolean projtex( struct brw_wm_compile *c,
-			  const struct prog_instruction *inst )
+			  unsigned target, 
+			  struct brw_fp_src src )
 {
-   const struct ureg_src src = inst->SrcReg[0];
-   GLboolean retVal;
-
-   assert(inst->Opcode == TGSI_OPCODE_TXP);
-
    /* Only try to detect the simplest cases.  Could detect (later)
     * cases where we are trying to emit code like RCP {1.0}, MUL x,
     * {1.0}, and so on.
@@ -546,16 +845,15 @@ static GLboolean projtex( struct brw_wm_compile *c,
     * More complex cases than this typically only arise from
     * user-provided fragment programs anyway:
     */
-   if (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)
-      retVal = GL_FALSE;  /* ut2004 gun rendering !?! */
-   else if (src.File == TGSI_FILE_INPUT && 
-	    GET_SWZ(src.Swizzle, W) == W &&
-            (c->key.proj_attrib_mask & (1 << src.Index)) == 0)
-      retVal = GL_FALSE;
-   else
-      retVal = GL_TRUE;
-
-   return retVal;
+   if (target == TGSI_TEXTURE_CUBE)
+      return GL_FALSE;  /* ut2004 gun rendering !?! */
+   
+   if (src.file == TGSI_FILE_INPUT && 
+       GET_SWZ(src.swizzle, W) == W &&
+       (c->key.proj_attrib_mask & (1 << src.index)) == 0)
+      return GL_FALSE;
+
+   return GL_TRUE;
 }
 
 
@@ -563,110 +861,168 @@ static GLboolean projtex( struct brw_wm_compile *c,
  * Emit code for TXP.
  */
 static void precalc_txp( struct brw_wm_compile *c,
-			       const struct prog_instruction *inst )
+			 struct brw_fp_dst dst,
+			 unsigned target,
+			 unsigned unit,
+			 struct brw_fp_src src0 )
 {
-   struct ureg_src src0 = inst->SrcReg[0];
-
-   if (projtex(c, inst)) {
-      struct ureg_dst tmp = get_temp(c);
-      struct prog_instruction tmp_inst;
+   if (projtex(c, target, src0)) {
+      struct brw_fp_dst tmp = get_temp(c);
 
       /* tmp0.w = RCP inst.arg[0][3]
        */
-      emit_op(c,
+      emit_op1(c,
 	      TGSI_OPCODE_RCP,
 	      dst_mask(tmp, BRW_WRITEMASK_W),
-	      src_swizzle1(src0, GET_SWZ(src0.Swizzle, W)),
-	      src_undef(),
-	      src_undef());
+	      src_scalar(src0, W));
 
       /* tmp0.xyz =  MUL inst.arg[0], tmp0.wwww
        */
-      emit_op(c,
-	      TGSI_OPCODE_MUL,
-	      dst_mask(tmp, BRW_WRITEMASK_XYZ),
-	      src0,
-	      src_swizzle1(src_reg_from_dst(tmp), W),
-	      src_undef());
+      emit_op2(c,
+	       TGSI_OPCODE_MUL,
+	       dst_mask(tmp, BRW_WRITEMASK_XYZ),
+	       src0,
+	       src_scalar(src_reg_from_dst(tmp), W));
 
-      /* dst = precalc(TEX tmp0)
+      /* dst = TEX tmp0
        */
-      tmp_inst = *inst;
-      tmp_inst.SrcReg[0] = src_reg_from_dst(tmp);
-      precalc_tex(c, &tmp_inst);
+      precalc_tex(c, 
+		  dst,
+		  target,
+		  unit,
+		  src_reg_from_dst(tmp));
 
       release_temp(c, tmp);
    }
    else
    {
-      /* dst = precalc(TEX src0)
+      /* dst = TEX src0
        */
-      precalc_tex(c, inst);
+      precalc_tex(c, dst, target, unit, src0);
    }
 }
 
 
+/* XXX: note this returns a src_reg.
+ */
+static struct brw_fp_src
+find_output_by_semantic( struct brw_wm_compile *c,
+			 unsigned semantic,
+			 unsigned index )
+{
+   const struct tgsi_shader_info *info = &c->fp->info;
+   unsigned i;
+
+   for (i = 0; i < info->num_outputs; i++)
+      if (info->output_semantic_name[i] == semantic &&
+	  info->output_semantic_index[i] == index)
+	 return src_reg( TGSI_FILE_OUTPUT, i );
+
+   /* If not found, return some arbitrary immediate value:
+    */
+   return src_imm1f(c, 1.0);
+}
+
 
 static void emit_fb_write( struct brw_wm_compile *c )
 {
-   struct ureg_src payload_r0_depth = src_reg(TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH);
-   struct ureg_src outdepth = src_reg(TGSI_FILE_OUTPUT, FRAG_RESULT_DEPTH);
-   struct ureg_src outcolor;
-   struct prog_instruction *inst;
+   struct brw_fp_src payload_r0_depth = src_reg(BRW_FILE_PAYLOAD, PAYLOAD_DEPTH);
+   struct brw_fp_src outdepth = find_output_by_semantic(c, TGSI_SEMANTIC_POSITION, 0);
    GLuint i;
 
 
-   /* The inst->Aux field is used for FB write target and the EOT marker */
+   outdepth = src_scalar(outdepth, Z);
 
    for (i = 0 ; i < c->key.nr_cbufs; i++) {
-      outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
+      struct brw_fp_src outcolor;
+      unsigned target = 1<<i;
 
-      inst = emit_op(c, WM_FB_WRITE,
-		     dst_mask(dst_undef(), 0),
-		     outcolor,
-		     payload_r0_depth,
-		     outdepth);
+      /* Set EOT flag on last inst:
+       */
+      if (i == c->key.nr_cbufs - 1)
+	 target |= 1;
+      
+      outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
 
-      inst->Aux = (i<<1);
+      /* Use emit_tex_op so that we can specify the inst->tex_target
+       * field, which is abused to contain the FB write target and the
+       * EOT marker
+       */
+      emit_tex_op(c, WM_FB_WRITE,
+		  dst_undef(),
+		  target,
+		  0,
+		  outcolor,
+		  payload_r0_depth,
+		  outdepth);
    }
- 
-   /* Set EOT flag on last inst:
-    */
-   inst->Aux |= 1; //eot
 }
 
 
+static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
+					const struct tgsi_full_dst_register *dst,
+					unsigned saturate )
+{
+   struct brw_fp_dst out;
+
+   out.file = dst->DstRegister.File;
+   out.index = dst->DstRegister.Index;
+   out.writemask = dst->DstRegister.WriteMask;
+   out.indirect = dst->DstRegister.Indirect;
+   out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
+   
+   if (out.indirect) {
+      assert(dst->DstRegisterInd.File == TGSI_FILE_ADDRESS);
+      assert(dst->DstRegisterInd.Index == 0);
+   }
+   
+   return out;
+}
 
 
-/***********************************************************************
- * Emit INTERP instructions ahead of first use of each attrib.
- */
-
-static void validate_src_regs( struct brw_wm_compile *c,
-			       const struct prog_instruction *inst )
+static struct brw_fp_src translate_src( struct brw_wm_compile *c,
+					const struct tgsi_full_src_register *src )
 {
-   GLuint nr_args = brw_wm_nr_args( inst->Opcode );
-   GLuint i;
+   struct brw_fp_src out;
+
+   out.file = src->SrcRegister.File;
+   out.index = src->SrcRegister.Index;
+   out.indirect = src->SrcRegister.Indirect;
+
+   out.swizzle = ((src->SrcRegister.SwizzleX << 0) |
+		  (src->SrcRegister.SwizzleY << 2) |
+		  (src->SrcRegister.SwizzleZ << 4) |
+		  (src->SrcRegister.SwizzleW << 6));
+   
+   switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
+   case TGSI_UTIL_SIGN_CLEAR:
+      out.abs = 1;
+      out.negate = 0;
+      break;
 
-   for (i = 0; i < nr_args; i++) {
-      if (inst->SrcReg[i].File == TGSI_FILE_INPUT) {
-	 GLuint idx = inst->SrcReg[i].Index;
-	 if (!(c->fp_interp_emitted & (1<<idx))) {
-	    emit_interp(c, idx);
-	    c->fp_interp_emitted |= 1<<idx;
-	 }
-      }
+   case TGSI_UTIL_SIGN_SET:
+      out.abs = 1;
+      out.negate = 1;
+      break;
+
+   case TGSI_UTIL_SIGN_TOGGLE:
+      out.abs = 0;
+      out.negate = 1;
+      break;
+
+   case TGSI_UTIL_SIGN_KEEP:
+   default:
+      out.abs = 0;
+      out.negate = 0;
+      break;
    }
-}
-	 
-static void validate_dst_regs( struct brw_wm_compile *c,
-			       const struct prog_instruction *inst )
-{
-   if (inst->DstReg.File == TGSI_FILE_OUTPUT) {
-      GLuint idx = inst->DstReg.Index;
-      if (idx == FRAG_RESULT_COLOR)
-         c->fp_fragcolor_emitted |= inst->DstReg.WriteMask;
+
+   if (out.indirect) {
+      assert(src->SrcRegisterInd.File == TGSI_FILE_ADDRESS);
+      assert(src->SrcRegisterInd.Index == 0);
    }
+   
+   return out;
 }
 
 
@@ -674,59 +1030,78 @@ static void validate_dst_regs( struct brw_wm_compile *c,
 static void emit_insn( struct brw_wm_compile *c,
 		       const struct tgsi_full_instruction *inst )
 {
-
-   switch (inst->Opcode) {
+   unsigned opcode = inst->Instruction.Opcode;
+   struct brw_fp_dst dst;
+   struct brw_fp_src src[3];
+   int i;
+
+   dst = translate_dst( c, &inst->FullDstRegisters[0],
+			inst->Instruction.Saturate );
+
+   for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
+      src[i] = translate_src( c, &inst->FullSrcRegisters[0] );
+   
+   switch (opcode) {
    case TGSI_OPCODE_ABS:
       emit_op1(c, TGSI_OPCODE_MOV,
 	       dst, 
-	       brw_abs(src[0]));
+	       src_abs(src[0]));
       break;
 
    case TGSI_OPCODE_SUB: 
       emit_op2(c, TGSI_OPCODE_ADD,
 	       dst,
 	       src[0],
-	       brw_negate(src[1]));
+	       src_negate(src[1]));
       break;
 
    case TGSI_OPCODE_SCS: 
       emit_op1(c, TGSI_OPCODE_SCS,
-	       brw_writemask(dst, BRW_WRITEMASK_XY),
+	       dst_mask(dst, BRW_WRITEMASK_XY),
 	       src[0]);
       break;
 	 
    case TGSI_OPCODE_DST:
-      precalc_dst(c, inst);
+      precalc_dst(c, dst, src[0], src[1]);
       break;
 
    case TGSI_OPCODE_LIT:
-      precalc_lit(c, inst);
+      precalc_lit(c, dst, src[0]);
       break;
 
    case TGSI_OPCODE_TEX:
-      precalc_tex(c, inst);
+      precalc_tex(c, dst,
+		  inst->InstructionExtTexture.Texture,
+		  src[0].file,	/* sampler unit */
+		  src[1] );
       break;
 
    case TGSI_OPCODE_TXP:
-      precalc_txp(c, inst);
+      precalc_txp(c, dst,
+		  inst->InstructionExtTexture.Texture,
+		  src[0].file,	/* sampler unit */
+		  src[1] );
       break;
 
    case TGSI_OPCODE_TXB:
-      out = emit_insn(c, inst);
-      out->TexSrcUnit = fp->program.Base.SamplerUnits[inst->TexSrcUnit];
-      assert(out->TexSrcUnit < BRW_MAX_TEX_UNIT);
+      /* XXX: TXB not done
+       */
+      precalc_tex(c, dst,
+		  inst->InstructionExtTexture.Texture,
+		  src[0].file,	/* sampler unit */
+		  src[1] );
       break;
 
    case TGSI_OPCODE_XPD: 
       emit_op2(c, TGSI_OPCODE_XPD,
-	       brw_writemask(dst, BRW_WRITEMASK_XYZ),
+	       dst_mask(dst, BRW_WRITEMASK_XYZ),
 	       src[0], 
 	       src[1]);
       break;
 
    case TGSI_OPCODE_KIL: 
       emit_op1(c, TGSI_OPCODE_KIL,
-	       brw_writemask(dst_undef(), 0),
+	       dst_mask(dst_undef(), 0),
 	       src[0]);
       break;
 
@@ -734,10 +1109,11 @@ static void emit_insn( struct brw_wm_compile *c,
       emit_fb_write(c);
       break;
    default:
-      if (brw_wm_is_scalar_result(inst->Opcode))
+      if (!c->key.has_flow_control &&
+	  brw_wm_is_scalar_result(opcode))
 	 emit_scalar_insn(c, opcode, dst, src[0], src[1], src[2]);
       else
-	 emit_op(c, opcode, dst, src[0], src[1], src[2]);
+	 emit_op3(c, opcode, dst, src[0], src[1], src[2]);
       break;
    }
 }
@@ -746,46 +1122,70 @@ static void emit_insn( struct brw_wm_compile *c,
  * Initial pass for fragment program code generation.
  * This function is used by both the GLSL and non-GLSL paths.
  */
-void brw_wm_pass_fp( struct brw_wm_compile *c )
+int brw_wm_pass_fp( struct brw_wm_compile *c )
 {
-   struct brw_fragment_program *fp = c->fp;
-   GLuint insn;
+   struct brw_fragment_shader *fs = c->fp;
+   struct tgsi_parse_context parse;
+   struct tgsi_full_instruction *inst;
+   struct tgsi_full_declaration *decl;
+   const float *imm;
+   GLuint size;
+   GLuint i;
 
    if (BRW_DEBUG & DEBUG_WM) {
       debug_printf("pre-fp:\n");
-      tgsi_dump(fp->tokens, 0); 
+      tgsi_dump(fs->tokens, 0); 
    }
 
-   c->pixel_xy = brw_src_undef();
-   c->delta_xy = brw_src_undef();
-   c->pixel_w = brw_src_undef();
+   c->fp_pixel_xy = src_undef();
+   c->fp_delta_xy = src_undef();
+   c->fp_pixel_w = src_undef();
    c->nr_fp_insns = 0;
-   c->fp->tex_units_used = 0x0;
+   c->nr_immediates = 0;
 
 
    /* Loop over all instructions doing assorted simplifications and
     * transformations.
     */
-   tgsi_parse_init( &parse, tokens );
+   tgsi_parse_init( &parse, fs->tokens );
    while( !tgsi_parse_end_of_tokens( &parse ) ) {
       tgsi_parse_token( &parse );
 
       switch( parse.FullToken.Token.Type ) {
       case TGSI_TOKEN_TYPE_DECLARATION:
-	 /* If branching shader, emit preamble instructions at decl time, as
-	  * instruction order in the shader does not correspond to the order
-	  * instructions are executed in the wild.
-	  *
-	  * This is where special instructions such as WM_CINTERP,
-	  * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to compute
-	  * shader inputs from varying vars.
+	 /* Turn intput declarations into special WM_* instructions.
 	  *
 	  * XXX: For non-branching shaders, consider deferring variable
 	  * initialization as late as possible to minimize register
 	  * usage.  This is how the original BRW driver worked.
+	  *
+	  * In a branching shader, must preamble instructions at decl
+	  * time, as instruction order in the shader does not
+	  * correspond to the order instructions are executed in the
+	  * wild.
+	  *
+	  * This is where special instructions such as WM_CINTERP,
+	  * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
+	  * compute shader inputs from the payload registers and pixel
+	  * position.
 	  */
-	 validate_src_regs(c, inst);
-	 validate_dst_regs(c, inst);
+         decl = &parse.FullToken.FullDeclaration;
+         if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+            unsigned first, last, mask;
+            unsigned attrib;
+
+            first = decl->DeclarationRange.First;
+            last = decl->DeclarationRange.Last;
+            mask = decl->Declaration.UsageMask;
+
+            for (attrib = first; attrib <= last; attrib++) {
+	       emit_interp(c, 
+			   attrib, 
+			   decl->Semantic.SemanticName,
+			   decl->Declaration.Interpolate );
+            }
+         }
+	 
          break;
 
       case TGSI_TOKEN_TYPE_IMMEDIATE:
@@ -795,21 +1195,36 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
 	  * float value per instruction.  Just save the data for now
 	  * and use directly later.
 	  */
+	 i = c->nr_immediates++;
+	 imm = &parse.FullToken.FullImmediate.u[i].Float;
+	 size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+
+	 if (c->nr_immediates >= BRW_WM_MAX_CONST)
+	    return PIPE_ERROR_OUT_OF_MEMORY;
+
+	 for (i = 0; i < size; i++)
+	    c->immediate[c->nr_immediates].v[i] = imm[i];
+
+	 for (; i < 4; i++)
+	    c->immediate[c->nr_immediates].v[i] = 0.0;
+
+	 c->immediate[c->nr_immediates].nr = size;
+	 c->nr_immediates++;
 	 break;
 
       case TGSI_TOKEN_TYPE_INSTRUCTION:
          inst = &parse.FullToken.FullInstruction;
-	 emit_insn( c, inst );
+	 emit_insn(c, inst);
 	 break;
       }
    }
 
-   c->brw_program = brw_finalize( c->builder );
-
    if (BRW_DEBUG & DEBUG_WM) {
       debug_printf("pass_fp:\n");
-      brw_print_program( c->brw_program );
+      //brw_print_program( c->fp_brw_program );
       debug_printf("\n");
    }
+
+   return c->error;
 }
 
-- 
cgit v1.2.3


From f202a34cb1eca41cf5d12bd72016f284bc81ccf8 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sat, 31 Oct 2009 18:23:14 +0000
Subject: i965g: non-glsl fragment shader path is compiling

Disabled glsl code for now, probably want to clean this up somehow.
---
 src/gallium/drivers/i965/Makefile       |   1 -
 src/gallium/drivers/i965/brw_wm.c       |  14 +-
 src/gallium/drivers/i965/brw_wm.h       |  10 +-
 src/gallium/drivers/i965/brw_wm_fp.c    |   7 +-
 src/gallium/drivers/i965/brw_wm_glsl.c  | 268 ++++++++++++++++++++------------
 src/gallium/drivers/i965/brw_wm_pass0.c |  87 +++++------
 src/gallium/drivers/i965/brw_wm_pass1.c |   8 +-
 src/gallium/drivers/i965/brw_wm_pass2.c |  27 +---
 8 files changed, 230 insertions(+), 192 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index c3dbad72ae..896cb234a6 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -47,7 +47,6 @@ C_SOURCES = \
 	brw_wm_debug.c \
 	brw_wm_emit.c \
 	brw_wm_fp.c \
-	brw_wm_glsl.c \
 	brw_wm_iz.c \
 	brw_wm_pass0.c \
 	brw_wm_pass1.c \
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 33602b59c1..4fbf9de9bb 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -177,7 +177,10 @@ static int do_wm_prog( struct brw_context *brw,
     */
    if (fp->has_flow_control) {
       c->dispatch_width = 8;
-      brw_wm_branching_shader_emit(brw, c);
+      /* XXX: GLSL support
+       */
+      exit(1);
+      //brw_wm_branching_shader_emit(brw, c);
    }
    else {
       c->dispatch_width = 16;
@@ -239,18 +242,9 @@ static void brw_wm_populate_key( struct brw_context *brw,
 		    brw->curr.fragment_shader->uses_depth,
 		    key);
 
-   /* Revisit this, figure out if it's really useful, and either push
-    * it into the state tracker so that everyone benefits (use to
-    * create fs varients with TEX rather than TXP), or discard.
-    */
-   key->proj_attrib_mask = ~0; /*brw->wm.input_size_masks[4-1];*/
-
    /* PIPE_NEW_RAST */
    key->flat_shade = brw->curr.rast->templ.flatshade;
 
-   /* This can be determined by looking at the INTERP mode each input decl.
-    */
-   key->linear_attrib_mask = 0;
 
    /* PIPE_NEW_BOUND_TEXTURES */
    for (i = 0; i < brw->curr.num_textures; i++) {
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 8ee99420aa..48dac39756 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -56,9 +56,6 @@
 #define AA_ALWAYS    2
 
 struct brw_wm_prog_key {
-   unsigned proj_attrib_mask;    /**< one bit per fragment program attribute */
-   unsigned linear_attrib_mask;  /**< linear interpolation vs perspective interp */
-
    GLuint source_depth_reg:3;
    GLuint aa_dest_stencil_reg:3;
    GLuint dest_depth_reg:3;
@@ -73,6 +70,7 @@ struct brw_wm_prog_key {
    GLuint yuvtex_swap_mask:16;	/* UV swaped */
 
    GLuint vp_nr_outputs:6;
+   GLuint nr_inputs:6;
    GLuint nr_cbufs:3;
    GLuint has_flow_control:1;
 
@@ -179,6 +177,12 @@ struct brw_wm_instruction {
 #define BRW_FILE_PAYLOAD   (TGSI_FILE_COUNT)
 #define PAYLOAD_DEPTH      (PIPE_MAX_SHADER_INPUTS) /* ?? */
 
+#define X    0
+#define Y    1
+#define Z    2
+#define W    3
+#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3)
+
 
 struct brw_fp_src {
    unsigned file:4;
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index 57933afbbe..58f1d35b7d 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -46,11 +46,6 @@
 #include "brw_debug.h"
 
 
-#define X    0
-#define Y    1
-#define Z    2
-#define W    3
-#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3)
 
 
 static const char *wm_opcode_strings[] = {
@@ -850,7 +845,7 @@ static GLboolean projtex( struct brw_wm_compile *c,
    
    if (src.file == TGSI_FILE_INPUT && 
        GET_SWZ(src.swizzle, W) == W &&
-       (c->key.proj_attrib_mask & (1 << src.index)) == 0)
+       c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE)
       return GL_FALSE;
 
    return GL_TRUE;
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index cdc10484a6..a06b0a446e 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -1,10 +1,13 @@
+#include "util/u_math.h"
+
+
 #include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_wm.h"
 
 
 static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
-                                  const struct prog_instruction *inst,
+                                  const struct brw_fp_instruction *inst,
                                   GLuint component);
 
 
@@ -63,7 +66,7 @@ alloc_grf(struct brw_wm_compile *c)
    /* really, no free GRF regs found */
    if (!c->out_of_regs) {
       /* print warning once per compilation */
-      _mesa_warning(NULL, "i965: ran out of registers for fragment program");
+      debug_printf("%s: ran out of registers for fragment program", __FUNCTION__);
       c->out_of_regs = GL_TRUE;
    }
 
@@ -154,20 +157,18 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
 {
     struct brw_reg reg;
     switch (file) {
-	case PROGRAM_STATE_VAR:
-	case PROGRAM_CONSTANT:
-	case PROGRAM_UNIFORM:
-	    file = PROGRAM_STATE_VAR;
-	    break;
-	case PROGRAM_UNDEFINED:
+	case TGSI_FILE_NULL:
 	    return brw_null_reg();	
-	case PROGRAM_TEMPORARY:
-	case PROGRAM_INPUT:
-	case PROGRAM_OUTPUT:
-	case PROGRAM_PAYLOAD:
+
+	case TGSI_FILE_CONSTANT:
+	case TGSI_FILE_TEMPORARY:
+	case TGSI_FILE_INPUT:
+	case TGSI_FILE_OUTPUT:
+	case BRW_FILE_PAYLOAD:
 	    break;
+
 	default:
-	   debug_printf("Unexpected file in get_reg()");
+	   debug_printf("%s: Unexpected file type\n", __FUNCTION__);
 	   return brw_null_reg();
     }
 
@@ -204,6 +205,76 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
 
 
+
+/**
+ * Find first/last instruction that references each temporary register.
+ */
+GLboolean
+_mesa_find_temp_intervals(const struct prog_instruction *instructions,
+                          GLuint numInstructions,
+                          GLint intBegin[MAX_PROGRAM_TEMPS],
+                          GLint intEnd[MAX_PROGRAM_TEMPS])
+{
+   struct loop_info
+   {
+      GLuint Start, End;  /**< Start, end instructions of loop */
+   };
+   struct loop_info loopStack[MAX_LOOP_NESTING];
+   GLuint loopStackDepth = 0;
+   GLuint i;
+
+   for (i = 0; i < MAX_PROGRAM_TEMPS; i++){
+      intBegin[i] = intEnd[i] = -1;
+   }
+
+   /* Scan instructions looking for temporary registers */
+   for (i = 0; i < numInstructions; i++) {
+      const struct prog_instruction *inst = instructions + i;
+      if (inst->Opcode == OPCODE_BGNLOOP) {
+         loopStack[loopStackDepth].Start = i;
+         loopStack[loopStackDepth].End = inst->BranchTarget;
+         loopStackDepth++;
+      }
+      else if (inst->Opcode == OPCODE_ENDLOOP) {
+         loopStackDepth--;
+      }
+      else if (inst->Opcode == OPCODE_CAL) {
+         return GL_FALSE;
+      }
+      else {
+         const GLuint numSrc = 3;
+         GLuint j;
+         for (j = 0; j < numSrc; j++) {
+            if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) {
+               const GLuint index = inst->SrcReg[j].Index;
+               if (inst->SrcReg[j].RelAddr)
+                  return GL_FALSE;
+               update_interval(intBegin, intEnd, index, i);
+               if (loopStackDepth > 0) {
+                  /* extend temp register's interval to end of loop */
+                  GLuint loopEnd = loopStack[loopStackDepth - 1].End;
+                  update_interval(intBegin, intEnd, index, loopEnd);
+               }
+            }
+         }
+         if (inst->DstReg.File == PROGRAM_TEMPORARY) {
+            const GLuint index = inst->DstReg.Index;
+            if (inst->DstReg.RelAddr)
+               return GL_FALSE;
+            update_interval(intBegin, intEnd, index, i);
+            if (loopStackDepth > 0) {
+               /* extend temp register's interval to end of loop */
+               GLuint loopEnd = loopStack[loopStackDepth - 1].End;
+               update_interval(intBegin, intEnd, index, loopEnd);
+            }
+         }
+      }
+   }
+
+   return GL_TRUE;
+}
+
+
 /**
  * This is called if we run out of GRF registers.  Examine the live intervals
  * of temp regs in the program and free those which won't be used again.
@@ -211,29 +282,29 @@ get_reg(struct brw_wm_compile *c, int file, int index, int component,
 static void
 reclaim_temps(struct brw_wm_compile *c)
 {
-   GLint intBegin[MAX_PROGRAM_TEMPS];
-   GLint intEnd[MAX_PROGRAM_TEMPS];
+   GLint intBegin[BRW_WM_MAX_TEMPS];
+   GLint intEnd[BRW_WM_MAX_TEMPS];
    int index;
 
    /*printf("Reclaim temps:\n");*/
 
-   _mesa_find_temp_intervals(c->prog_instructions, c->nr_fp_insns,
+   _mesa_find_temp_intervals(c->fp_instructions, c->nr_fp_insns,
                              intBegin, intEnd);
 
-   for (index = 0; index < MAX_PROGRAM_TEMPS; index++) {
+   for (index = 0; index < BRW_WM_MAX_TEMPS; index++) {
       if (intEnd[index] != -1 && intEnd[index] < c->cur_inst) {
          /* program temp[i] can be freed */
          int component;
          /*printf("  temp[%d] is dead\n", index);*/
          for (component = 0; component < 4; component++) {
-            if (c->wm_regs[PROGRAM_TEMPORARY][index][component].inited) {
-               int r = c->wm_regs[PROGRAM_TEMPORARY][index][component].reg.nr;
+            if (c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited) {
+               int r = c->wm_regs[TGSI_FILE_TEMPORARY][index][component].reg.nr;
                release_grf(c, r);
                /*
                printf("  Reclaim temp %d, reg %d at inst %d\n",
                       index, r, c->cur_inst);
                */
-               c->wm_regs[PROGRAM_TEMPORARY][index][component].inited = GL_FALSE;
+               c->wm_regs[TGSI_FILE_TEMPORARY][index][component].inited = GL_FALSE;
             }
          }
       }
@@ -264,7 +335,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
             reg = brw_vec8_grf(i * 2, 0);
         else
             reg = brw_vec8_grf(0, 0);
-	set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
+	set_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, i, reg);
     }
     reg_index += 2 * c->key.nr_depth_regs;
 
@@ -306,7 +377,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
                   * Constants will be copied in prepare_constant_buffer()
                   */
                  c->prog_data.param[index] = &plist->ParameterValues[i][j];
-                 set_reg(c, PROGRAM_STATE_VAR, i, j, reg);
+                 set_reg(c, TGSI_FILE_STATE_VAR, i, j, reg);
               }
            }
            /* number of constant regs used (each reg is float[8]) */
@@ -330,7 +401,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
 	  urb_read_length = reg_index;
 	  reg = brw_vec8_grf(reg_index, 0);
 	  for (j = 0; j < 4; j++)
-	     set_reg(c, PROGRAM_PAYLOAD, fp_input, j, reg);
+	     set_reg(c, TGSI_FILE_PAYLOAD, fp_input, j, reg);
        }
        if (c->key.nr_vp_outputs > i) {
 	  reg_index += 2;
@@ -354,7 +425,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
     prealloc_grf(c, 127);
 
     for (i = 0; i < c->nr_fp_insns; i++) {
-	const struct prog_instruction *inst = &c->prog_instructions[i];
+	const struct brw_fp_instruction *inst = &c->fp_instructions[i];
 	struct brw_reg dst[4];
 
 	switch (inst->Opcode) {
@@ -397,7 +468,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
  * the three GRF slots.
  */
 static void fetch_constants(struct brw_wm_compile *c,
-                            const struct prog_instruction *inst)
+                            const struct brw_fp_instruction *inst)
 {
    struct brw_compile *p = &c->func;
    GLuint i;
@@ -405,9 +476,8 @@ static void fetch_constants(struct brw_wm_compile *c,
    /* loop over instruction src regs */
    for (i = 0; i < 3; i++) {
       const struct prog_src_register *src = &inst->SrcReg[i];
-      if (src->File == PROGRAM_STATE_VAR ||
-          src->File == PROGRAM_CONSTANT ||
-          src->File == PROGRAM_UNIFORM) {
+      if (src->File == TGSI_FILE_IMMEDIATE ||
+          src->File == TGSI_FILE_CONSTANT) {
 	 c->current_const[i].index = src->Index;
 
 #if 0
@@ -431,7 +501,7 @@ static void fetch_constants(struct brw_wm_compile *c,
  * Convert Mesa dst register to brw register.
  */
 static struct brw_reg get_dst_reg(struct brw_wm_compile *c, 
-                                  const struct prog_instruction *inst,
+                                  const struct brw_fp_instruction *inst,
                                   GLuint component)
 {
     const int nr = 1;
@@ -442,7 +512,7 @@ static struct brw_reg get_dst_reg(struct brw_wm_compile *c,
 
 static struct brw_reg
 get_src_reg_const(struct brw_wm_compile *c,
-                  const struct prog_instruction *inst,
+                  const struct brw_fp_instruction *inst,
                   GLuint srcRegIndex, GLuint component)
 {
    /* We should have already fetched the constant from the constant
@@ -462,7 +532,7 @@ get_src_reg_const(struct brw_wm_compile *c,
    const_reg = stride(const_reg, 0, 1, 0);
    const_reg.subnr = component * 4;
 
-   if (src->Negate & (1 << component))
+   if (src->Negate)
       const_reg = negate(const_reg);
    if (src->Abs)
       const_reg = brw_abs(const_reg);
@@ -483,7 +553,7 @@ get_src_reg_const(struct brw_wm_compile *c,
  * Convert Mesa src register to brw register.
  */
 static struct brw_reg get_src_reg(struct brw_wm_compile *c, 
-                                  const struct prog_instruction *inst,
+                                  const struct brw_fp_instruction *inst,
                                   GLuint srcRegIndex, GLuint channel)
 {
     const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
@@ -499,9 +569,9 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
     }
 
     if (c->fp->use_const_buffer &&
-        (src->File == PROGRAM_STATE_VAR ||
-         src->File == PROGRAM_CONSTANT ||
-         src->File == PROGRAM_UNIFORM)) {
+        (src->File == TGSI_FILE_STATE_VAR ||
+         src->File == TGSI_FILE_CONSTANT ||
+         src->File == TGSI_FILE_UNIFORM)) {
        return get_src_reg_const(c, inst, srcRegIndex, component);
     }
     else {
@@ -513,26 +583,26 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
 
 
 /**
- * Same as \sa get_src_reg() but if the register is a literal, emit
- * a brw_reg encoding the literal.
- * Note that a brw instruction only allows one src operand to be a literal.
+ * Same as \sa get_src_reg() but if the register is a immediate, emit
+ * a brw_reg encoding the immediate.
+ * Note that a brw instruction only allows one src operand to be a immediate.
  * For instructions with more than one operand, only the second can be a
- * literal.  This means that we treat some literals as constants/uniforms
- * (which why PROGRAM_CONSTANT is checked in fetch_constants()).
+ * immediate.  This means that we treat some immediates as constants
+ * (which why TGSI_FILE_IMMEDIATE is checked in fetch_constants()).
  * 
  */
 static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c, 
-                                      const struct prog_instruction *inst,
+                                      const struct brw_fp_instruction *inst,
                                       GLuint srcRegIndex, GLuint channel)
 {
     const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
-    if (src->File == PROGRAM_CONSTANT) {
-       /* a literal */
+    if (src->File == TGSI_FILE_IMMEDIATE) {
+       /* an immediate */
        const int component = GET_SWZ(src->Swizzle, channel);
        const GLfloat *param =
           c->fp->program.Base.Parameters->ParameterValues[src->Index];
        GLfloat value = param[component];
-       if (src->Negate & (1 << channel))
+       if (src->Negate)
           value = -value;
        if (src->Abs)
           value = FABSF(value);
@@ -612,7 +682,7 @@ static void invoke_subroutine( struct brw_wm_compile *c,
 }
 
 static void emit_trunc( struct brw_wm_compile *c,
-                        const struct prog_instruction *inst)
+                        const struct brw_fp_instruction *inst)
 {
     int i;
     struct brw_compile *p = &c->func;
@@ -630,7 +700,7 @@ static void emit_trunc( struct brw_wm_compile *c,
 }
 
 static void emit_mov( struct brw_wm_compile *c,
-                      const struct prog_instruction *inst)
+                      const struct brw_fp_instruction *inst)
 {
     int i;
     struct brw_compile *p = &c->func;
@@ -650,7 +720,7 @@ static void emit_mov( struct brw_wm_compile *c,
 }
 
 static void emit_pixel_xy(struct brw_wm_compile *c,
-                          const struct prog_instruction *inst)
+                          const struct brw_fp_instruction *inst)
 {
     struct brw_reg r1 = brw_vec1_grf(1, 0);
     struct brw_reg r1_uw = retype(r1, BRW_REGISTER_TYPE_UW);
@@ -680,7 +750,7 @@ static void emit_pixel_xy(struct brw_wm_compile *c,
 }
 
 static void emit_delta_xy(struct brw_wm_compile *c,
-                          const struct prog_instruction *inst)
+                          const struct brw_fp_instruction *inst)
 {
     struct brw_reg r1 = brw_vec1_grf(1, 0);
     struct brw_reg dst0, dst1, src0, src1;
@@ -740,7 +810,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
 }
 
 static void emit_fb_write(struct brw_wm_compile *c,
-                          const struct prog_instruction *inst)
+                          const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     int nr = 2;
@@ -808,7 +878,7 @@ static void emit_fb_write(struct brw_wm_compile *c,
 }
 
 static void emit_pixel_w( struct brw_wm_compile *c,
-                          const struct prog_instruction *inst)
+                          const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     GLuint mask = inst->DstReg.WriteMask;
@@ -838,7 +908,7 @@ static void emit_pixel_w( struct brw_wm_compile *c,
 }
 
 static void emit_linterp(struct brw_wm_compile *c,
-                         const struct prog_instruction *inst)
+                         const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     GLuint mask = inst->DstReg.WriteMask;
@@ -867,7 +937,7 @@ static void emit_linterp(struct brw_wm_compile *c,
 }
 
 static void emit_cinterp(struct brw_wm_compile *c,
-                         const struct prog_instruction *inst)
+                         const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     GLuint mask = inst->DstReg.WriteMask;
@@ -893,7 +963,7 @@ static void emit_cinterp(struct brw_wm_compile *c,
 }
 
 static void emit_pinterp(struct brw_wm_compile *c,
-                         const struct prog_instruction *inst)
+                         const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     GLuint mask = inst->DstReg.WriteMask;
@@ -927,7 +997,7 @@ static void emit_pinterp(struct brw_wm_compile *c,
 
 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
 static void emit_frontfacing(struct brw_wm_compile *c,
-			     const struct prog_instruction *inst)
+			     const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
@@ -956,7 +1026,7 @@ static void emit_frontfacing(struct brw_wm_compile *c,
 }
 
 static void emit_xpd(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     int i;
     struct brw_compile *p = &c->func;
@@ -981,13 +1051,13 @@ static void emit_xpd(struct brw_wm_compile *c,
 }
 
 static void emit_dp3(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_reg src0[3], src1[3], dst;
     int i;
     struct brw_compile *p = &c->func;
     GLuint mask = inst->DstReg.WriteMask;
-    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+    int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
 
     if (!(mask & WRITEMASK_XYZW))
 	return;
@@ -1008,13 +1078,13 @@ static void emit_dp3(struct brw_wm_compile *c,
 }
 
 static void emit_dp4(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_reg src0[4], src1[4], dst;
     int i;
     struct brw_compile *p = &c->func;
     GLuint mask = inst->DstReg.WriteMask;
-    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+    int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
 
     if (!(mask & WRITEMASK_XYZW))
 	return;
@@ -1035,13 +1105,13 @@ static void emit_dp4(struct brw_wm_compile *c,
 }
 
 static void emit_dph(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_reg src0[4], src1[4], dst;
     int i;
     struct brw_compile *p = &c->func;
     GLuint mask = inst->DstReg.WriteMask;
-    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+    int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
 
     if (!(mask & WRITEMASK_XYZW))
 	return;
@@ -1067,12 +1137,12 @@ static void emit_dph(struct brw_wm_compile *c,
  * register's X, Y, Z and W channels (subject to writemasking of course).
  */
 static void emit_math1(struct brw_wm_compile *c,
-                       const struct prog_instruction *inst, GLuint func)
+                       const struct brw_fp_instruction *inst, GLuint func)
 {
     struct brw_compile *p = &c->func;
     struct brw_reg src0, dst;
     GLuint mask = inst->DstReg.WriteMask;
-    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+    int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
 
     if (!(mask & WRITEMASK_XYZW))
 	return;
@@ -1095,43 +1165,43 @@ static void emit_math1(struct brw_wm_compile *c,
 }
 
 static void emit_rcp(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_math1(c, inst, BRW_MATH_FUNCTION_INV);
 }
 
 static void emit_rsq(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ);
 }
 
 static void emit_sin(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_math1(c, inst, BRW_MATH_FUNCTION_SIN);
 }
 
 static void emit_cos(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_math1(c, inst, BRW_MATH_FUNCTION_COS);
 }
 
 static void emit_ex2(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_math1(c, inst, BRW_MATH_FUNCTION_EXP);
 }
 
 static void emit_lg2(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_math1(c, inst, BRW_MATH_FUNCTION_LOG);
 }
 
 static void emit_add(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     struct brw_reg src0, src1, dst;
@@ -1150,7 +1220,7 @@ static void emit_add(struct brw_wm_compile *c,
 }
 
 static void emit_arl(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     struct brw_reg src0, addr_reg;
@@ -1164,7 +1234,7 @@ static void emit_arl(struct brw_wm_compile *c,
 
 
 static void emit_mul(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     struct brw_reg src0, src1, dst;
@@ -1183,7 +1253,7 @@ static void emit_mul(struct brw_wm_compile *c,
 }
 
 static void emit_frc(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     struct brw_reg src0, dst;
@@ -1202,7 +1272,7 @@ static void emit_frc(struct brw_wm_compile *c,
 }
 
 static void emit_flr(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     struct brw_reg src0, dst;
@@ -1221,7 +1291,7 @@ static void emit_flr(struct brw_wm_compile *c,
 
 
 static void emit_min_max(struct brw_wm_compile *c,
-                         const struct prog_instruction *inst)
+                         const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     const GLuint mask = inst->DstReg.WriteMask;
@@ -1269,12 +1339,12 @@ static void emit_min_max(struct brw_wm_compile *c,
 }
 
 static void emit_pow(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     struct brw_reg dst, src0, src1;
     GLuint mask = inst->DstReg.WriteMask;
-    int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+    int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
 
     if (!(mask & WRITEMASK_XYZW))
 	return;
@@ -1299,7 +1369,7 @@ static void emit_pow(struct brw_wm_compile *c,
 }
 
 static void emit_lrp(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     GLuint mask = inst->DstReg.WriteMask;
@@ -1352,7 +1422,7 @@ static void emit_kil(struct brw_wm_compile *c)
 }
 
 static void emit_mad(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     GLuint mask = inst->DstReg.WriteMask;
@@ -1375,7 +1445,7 @@ static void emit_mad(struct brw_wm_compile *c,
 }
 
 static void emit_sop(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst, GLuint cond)
+                     const struct brw_fp_instruction *inst, GLuint cond)
 {
     struct brw_compile *p = &c->func;
     GLuint mask = inst->DstReg.WriteMask;
@@ -1399,37 +1469,37 @@ static void emit_sop(struct brw_wm_compile *c,
 }
 
 static void emit_slt(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_sop(c, inst, BRW_CONDITIONAL_L);
 }
 
 static void emit_sle(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_sop(c, inst, BRW_CONDITIONAL_LE);
 }
 
 static void emit_sgt(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_sop(c, inst, BRW_CONDITIONAL_G);
 }
 
 static void emit_sge(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_sop(c, inst, BRW_CONDITIONAL_GE);
 }
 
 static void emit_seq(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_sop(c, inst, BRW_CONDITIONAL_EQ);
 }
 
 static void emit_sne(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     emit_sop(c, inst, BRW_CONDITIONAL_NEQ);
 }
@@ -1459,7 +1529,7 @@ static INLINE struct brw_reg odd_bytes( struct brw_reg reg )
 
     
 static void emit_wpos_xy(struct brw_wm_compile *c,
-                         const struct prog_instruction *inst)
+                         const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     GLuint mask = inst->DstReg.WriteMask;
@@ -1494,25 +1564,25 @@ static void emit_wpos_xy(struct brw_wm_compile *c,
    BIAS on SIMD8 not working yet...
  */	
 static void emit_txb(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     struct brw_reg dst[4], src[4], payload_reg;
-    /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
-    const GLuint unit = inst->TexSrcUnit;
+    /* Note: tex_unit was already looked up through SamplerTextures[] */
+    const GLuint unit = inst->tex_unit;
     GLuint i;
     GLuint msg_type;
 
     assert(unit < BRW_MAX_TEX_UNIT);
 
-    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+    payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
 
     for (i = 0; i < 4; i++) 
 	dst[i] = get_dst_reg(c, inst, i);
     for (i = 0; i < 4; i++)
 	src[i] = get_src_reg(c, inst, 0, i);
 
-    switch (inst->TexSrcTarget) {
+    switch (inst->tex_target) {
 	case TEXTURE_1D_INDEX:
 	    brw_MOV(p, brw_message_reg(2), src[0]);         /* s coord */
 	    brw_MOV(p, brw_message_reg(3), brw_imm_f(0));   /* t coord */
@@ -1561,12 +1631,12 @@ static void emit_txb(struct brw_wm_compile *c,
 
 
 static void emit_tex(struct brw_wm_compile *c,
-                     const struct prog_instruction *inst)
+                     const struct brw_fp_instruction *inst)
 {
     struct brw_compile *p = &c->func;
     struct brw_reg dst[4], src[4], payload_reg;
-    /* Note: TexSrcUnit was already looked up through SamplerTextures[] */
-    const GLuint unit = inst->TexSrcUnit;
+    /* Note: tex_unit was already looked up through SamplerTextures[] */
+    const GLuint unit = inst->tex_unit;
     GLuint msg_len;
     GLuint i, nr;
     GLuint emit;
@@ -1575,14 +1645,14 @@ static void emit_tex(struct brw_wm_compile *c,
 
     assert(unit < BRW_MAX_TEX_UNIT);
 
-    payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
+    payload_reg = get_reg(c, TGSI_FILE_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0);
 
     for (i = 0; i < 4; i++) 
 	dst[i] = get_dst_reg(c, inst, i);
     for (i = 0; i < 4; i++)
 	src[i] = get_src_reg(c, inst, 0, i);
 
-    switch (inst->TexSrcTarget) {
+    switch (inst->tex_target) {
 	case TEXTURE_1D_INDEX:
 	    emit = WRITEMASK_X;
 	    nr = 1;
@@ -1657,7 +1727,7 @@ static void post_wm_emit( struct brw_wm_compile *c )
 
 static void
 get_argument_regs(struct brw_wm_compile *c,
-		  const struct prog_instruction *inst,
+		  const struct brw_fp_instruction *inst,
 		  int index,
 		  struct brw_reg *regs,
 		  int mask)
@@ -1686,7 +1756,7 @@ static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_
     brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
 
     for (i = 0; i < c->nr_fp_insns; i++) {
-        const struct prog_instruction *inst = &c->prog_instructions[i];
+        const struct brw_fp_instruction *inst = &c->fp_instructions[i];
 	int dst_flags;
 	struct brw_reg args[3][4], dst[4];
 	int j;
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
index d8b9028927..7b18335dec 100644
--- a/src/gallium/drivers/i965/brw_wm_pass0.c
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -28,9 +28,10 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-                 
 
-#include "brw_context.h"
+#include "util/u_memory.h"
+
+#include "brw_debug.h"
 #include "brw_wm.h"
 
 
@@ -133,19 +134,19 @@ static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c,
    /* Search for an existing const value matching the request:
     */
    for (i = 0; i < c->nr_imm_refs; i++) {
-      if (c->imm_ref[i].imm_val == *imm1f) 
+      if (c->imm_ref[i].imm1f == *imm1f) 
 	 return c->imm_ref[i].ref;
    }
 
    /* Else try to add a new one:
     */
-   if (c->nr_imm_refs < BRW_WM_MAX_IMM) {
+   if (c->nr_imm_refs < Elements(c->imm_ref)) {
       GLuint i = c->nr_imm_refs++;
 
       /* An immediate is a special type of parameter:
        */
-      c->imm_ref[i].imm_val = *imm_val;
-      c->imm_ref[i].ref = get_param_ref(c, imm_val);
+      c->imm_ref[i].imm1f = *imm1f;
+      c->imm_ref[i].ref = get_param_ref(c, imm1f);
 
       return c->imm_ref[i].ref;
    }
@@ -180,7 +181,7 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 	 break;
 
       case TGSI_FILE_IMMEDIATE:
-	 ref = get_imm_ref(c, &plist->ParameterValues[idx][component]);
+	 ref = get_imm_ref(c, &c->immediate[idx].v[component]);
 	 break;
 
       default:
@@ -205,16 +206,16 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 
 static void pass0_set_dst( struct brw_wm_compile *c,
 			   struct brw_wm_instruction *out,
-			   const struct prog_instruction *inst,
+			   const struct brw_fp_instruction *inst,
 			   GLuint writemask )
 {
-   const struct prog_dst_register *dst = &inst->DstReg;
+   const struct brw_fp_dst dst = inst->dst;
    GLuint i;
 
    for (i = 0; i < 4; i++) {
       if (writemask & (1<<i)) {
 	 out->dst[i] = get_value(c);
-	 pass0_set_fpreg_value(c, dst->File, dst->Index, i, out->dst[i]);
+	 pass0_set_fpreg_value(c, dst.file, dst.index, i, out->dst[i]);
       }
    }
 
@@ -223,27 +224,15 @@ static void pass0_set_dst( struct brw_wm_compile *c,
 
 
 static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
-						    struct prog_src_register src,
+						    struct brw_fp_src src,
 						    GLuint i )
 {
-   GLuint component = GET_SWZ(src.Swizzle,i);
-   const struct brw_wm_ref *src_ref;
-   static const GLfloat const_zero = 0.0;
-   static const GLfloat const_one = 1.0;
-
-   if (component == SWIZZLE_ZERO) 
-      src_ref = get_imm_ref(c, &const_zero);
-   else if (component == SWIZZLE_ONE) 
-      src_ref = get_imm_ref(c, &const_one);
-   else 
-      src_ref = pass0_get_reg(c, src.File, src.Index, component);
-
-   return src_ref;
+   return pass0_get_reg(c, src.file, src.index, GET_SWZ(src.swizzle,i));
 }
 
 
 static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
-				       struct prog_src_register src,
+				       struct brw_fp_src src,
 				       GLuint i,
 				       struct brw_wm_instruction *insn)
 {
@@ -259,10 +248,10 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
       newref->value->lastuse = newref;
    }
 
-   if (src.Negate & (1 << i))
+   if (src.negate)
       newref->hw_reg.negate ^= 1;
 
-   if (src.Abs) {
+   if (src.abs) {
       newref->hw_reg.negate = 0;
       newref->hw_reg.abs = 1;
    }
@@ -273,21 +262,21 @@ static struct brw_wm_ref *get_new_ref( struct brw_wm_compile *c,
 
 static void
 translate_insn(struct brw_wm_compile *c,
-               const struct prog_instruction *inst)
+               const struct brw_fp_instruction *inst)
 {
    struct brw_wm_instruction *out = get_instruction(c);
-   GLuint writemask = inst->dst.WriteMask;
-   GLuint nr_args = brw_wm_nr_args(inst->Opcode);
+   GLuint writemask = inst->dst.writemask;
+   GLuint nr_args = brw_wm_nr_args(inst->opcode);
    GLuint i, j;
 
    /* Copy some data out of the instruction
     */
-   out->opcode = inst->Opcode;
-   out->saturate = inst->dst.Saturate;
-   out->tex_unit = inst->TexSrcUnit;
-   out->tex_target = inst->TexSrcTarget;
-   out->eot = inst->Aux & 1;
-   out->target = inst->Aux >> 1;
+   out->opcode = inst->opcode;
+   out->saturate = inst->dst.saturate;
+   out->tex_unit = inst->tex_unit;
+   out->tex_target = inst->tex_target;
+   out->eot = inst->eot; //inst->Aux & 1;
+   out->target = inst->target; //inst->Aux >> 1;
 
    /* Args:
     */
@@ -308,10 +297,10 @@ translate_insn(struct brw_wm_compile *c,
  * Optimize moves and swizzles away:
  */ 
 static void pass0_precalc_mov( struct brw_wm_compile *c,
-			       const struct prog_instruction *inst )
+			       const struct brw_fp_instruction *inst )
 {
-   const struct prog_dst_register *dst = &inst->DstReg;
-   GLuint writemask = inst->DstReg.WriteMask;
+   const struct brw_fp_dst dst = inst->dst;
+   GLuint writemask = dst.writemask;
    struct brw_wm_ref *refs[4];
    GLuint i;
 
@@ -323,11 +312,11 @@ static void pass0_precalc_mov( struct brw_wm_compile *c,
     * one loop and the above case was incorrectly handled.
     */
    for (i = 0; i < 4; i++) {
-      refs[i] = get_new_ref(c, inst->SrcReg[0], i, NULL);
+      refs[i] = get_new_ref(c, inst->src[0], i, NULL);
    }
    for (i = 0; i < 4; i++) {
       if (writemask & (1 << i)) {	    
-         pass0_set_fpreg_ref( c, dst->File, dst->Index, i, refs[i]);
+         pass0_set_fpreg_ref( c, dst.file, dst.index, i, refs[i]);
       }
    }
 }
@@ -341,12 +330,12 @@ static void pass0_init_payload( struct brw_wm_compile *c )
 
    for (i = 0; i < 4; i++) {
       GLuint j = i >= c->key.nr_depth_regs ? 0 : i;
-      pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, 
+      pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, PAYLOAD_DEPTH, i, 
 			     &c->payload.depth[j] );
    }
 
-   for (i = 0; i < FRAG_ATTRIB_MAX; i++)
-      pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, i, 0, 
+   for (i = 0; i < c->key.nr_inputs; i++)
+      pass0_set_fpreg_value( c, BRW_FILE_PAYLOAD, i, 0, 
 			     &c->payload.input_interp[i] );      
 }
 
@@ -360,7 +349,7 @@ static void pass0_init_payload( struct brw_wm_compile *c )
  *
  * Translate away swizzling and eliminate non-saturating moves.
  *
- * Translate instructions from Mesa's prog_instruction structs to our
+ * Translate instructions from our fp_instruction structs to our
  * internal brw_wm_instruction representation.
  */
 void brw_wm_pass0( struct brw_wm_compile *c )
@@ -374,13 +363,13 @@ void brw_wm_pass0( struct brw_wm_compile *c )
    pass0_init_payload(c);
 
    for (insn = 0; insn < c->nr_fp_insns; insn++) {
-      const struct prog_instruction *inst = &c->prog_instructions[insn];
+      const struct brw_fp_instruction *inst = &c->fp_instructions[insn];
 
       /* Optimize away moves, otherwise emit translated instruction:
        */      
-      switch (inst->Opcode) {
-      case OPCODE_MOV: 
-	 if (!inst->dst.Saturate) {
+      switch (inst->opcode) {
+      case TGSI_OPCODE_MOV: 
+	 if (!inst->dst.saturate) {
 	    pass0_precalc_mov(c, inst);
 	 }
 	 else {
diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c
index b0356b1bd5..09ad2b8f5b 100644
--- a/src/gallium/drivers/i965/brw_wm_pass1.c
+++ b/src/gallium/drivers/i965/brw_wm_pass1.c
@@ -30,8 +30,8 @@
   */
                   
 
-#include "brw_context.h"
 #include "brw_wm.h"
+#include "brw_debug.h"
 
 
 static GLuint get_tracked_mask(struct brw_wm_compile *c,
@@ -223,11 +223,11 @@ void brw_wm_pass1( struct brw_wm_compile *c )
 
       case TGSI_OPCODE_TEX:
       case TGSI_OPCODE_TXP:
-	 read0 = get_texcoord_mask(inst->tex_idx);
+	 read0 = get_texcoord_mask(inst->tex_target);
 	 break;
 
       case TGSI_OPCODE_TXB:
-	 read0 = get_texcoord_mask(inst->tex_idx) | BRW_WRITEMASK_W;
+	 read0 = get_texcoord_mask(inst->tex_target) | BRW_WRITEMASK_W;
 	 break;
 
       case WM_WPOSXY:
@@ -276,7 +276,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
 
       case TGSI_OPCODE_DST:
       case WM_FRONTFACING:
-      case TGSI_OPCODE_KIL_NV:
+      case TGSI_OPCODE_KILP:
       default:
 	 break;
       }
diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c
index a19ca62328..d3d678a5e6 100644
--- a/src/gallium/drivers/i965/brw_wm_pass2.c
+++ b/src/gallium/drivers/i965/brw_wm_pass2.c
@@ -30,7 +30,7 @@
   */
                    
 
-#include "brw_context.h"
+#include "brw_debug.h"
 #include "brw_wm.h"
 
 
@@ -82,27 +82,14 @@ static void init_registers( struct brw_wm_compile *c )
    for (j = 0; j < c->nr_creg; j++) 
       prealloc_reg(c, &c->creg[j], i++);
 
-   for (j = 0; j < FRAG_ATTRIB_MAX; j++) {
-      if (c->key.vp_outputs_written & (1<<j)) {
-	 int fp_index;
-
-	 if (j >= VERT_RESULT_VAR0)
-	    fp_index = j - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
-	 else if (j <= VERT_RESULT_TEX7)
-	    fp_index = j;
-	 else
-	    fp_index = -1;
-
-	 nr_interp_regs++;
-	 if (fp_index >= 0)
-	    prealloc_reg(c, &c->payload.input_interp[fp_index], i++);
-      }
+   for (j = 0; j < c->key.vp_nr_outputs; j++) {
+      prealloc_reg(c, &c->payload.input_interp[j], i++);
    }
 
    assert(nr_interp_regs >= 1);
 
    c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
-   c->prog_data.urb_read_length = nr_interp_regs * 2;
+   c->prog_data.urb_read_length = c->key.vp_nr_outputs * 2;
    c->prog_data.curb_read_length = c->nr_creg * 2;
 
    c->max_wm_grf = i * 2;
@@ -308,9 +295,9 @@ void brw_wm_pass2( struct brw_wm_compile *c )
       /* Allocate registers to hold results:
        */
       switch (inst->opcode) {
-      case OPCODE_TEX:
-      case OPCODE_TXB:
-      case OPCODE_TXP:
+      case TGSI_OPCODE_TEX:
+      case TGSI_OPCODE_TXB:
+      case TGSI_OPCODE_TXP:
 	 alloc_contiguous_dest(c, inst->dst, 4, insn);
 	 break;
 
-- 
cgit v1.2.3


From 1cc16e1b831cef8e1573cc998cee3e55179bb830 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sat, 31 Oct 2009 20:46:59 +0100
Subject: nv50: fix textures with block size != cpp

First, using width * block size as pitch is evidently
wrong if a block contains more than 1 texel.

For tiled textures, since a block occupies a contiguous
area of memory, y addressing in m2mf has to be done by
block index, not the y coordinate itself.

This should fix compressed textures.
---
 src/gallium/drivers/nv50/nv50_miptree.c  | 35 ++++++++++------------
 src/gallium/drivers/nv50/nv50_transfer.c | 50 ++++++++++++++++++++------------
 2 files changed, 48 insertions(+), 37 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 93479a0314..229a59cb74 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -26,6 +26,16 @@
 
 #include "nv50_context.h"
 
+static INLINE uint32_t
+get_tile_mode(unsigned ny)
+{
+	if (ny > 32) return 4;
+	if (ny > 16) return 3;
+	if (ny >  8) return 2;
+	if (ny >  4) return 1;
+	return 0;
+}
+
 static struct pipe_texture *
 nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 {
@@ -34,7 +44,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 	struct pipe_texture *pt = &mt->base.base;
 	unsigned width = tmp->width[0], height = tmp->height[0];
 	unsigned depth = tmp->depth[0];
-	uint32_t tile_mode, tile_flags, tile_h;
+	uint32_t tile_flags;
 	int ret, i, l;
 
 	*pt = *tmp;
@@ -57,13 +67,6 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 		break;
 	}
 
-	if      (pt->height[0] > 32) tile_mode = 4;
-	else if (pt->height[0] > 16) tile_mode = 3;
-	else if (pt->height[0] >  8) tile_mode = 2;
-	else if (pt->height[0] >  4) tile_mode = 1;
-	else                         tile_mode = 0;
-	tile_h = 1 << (tile_mode + 2);
-
 	switch (pt->target) {
 	case PIPE_TEXTURE_3D:
 		mt->image_nr = pt->depth[0];
@@ -86,28 +89,22 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 
 		lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
-		lvl->pitch = align(pt->width[l] * pt->block.size, 64);
-		lvl->tile_mode = tile_mode;
+		lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64);
+		lvl->tile_mode = get_tile_mode(pt->nblocksy[l]);
 
 		width = MAX2(1, width >> 1);
 		height = MAX2(1, height >> 1);
 		depth = MAX2(1, depth >> 1);
-
-		if (tile_mode && height <= (tile_h >> 1)) {
-			tile_mode--;
-			tile_h >>= 1;
-		}
 	}
 
 	for (i = 0; i < mt->image_nr; i++) {
 		for (l = 0; l <= pt->last_level; l++) {
 			struct nv50_miptree_level *lvl = &mt->level[l];
 			int size;
-			tile_h = 1 << (lvl->tile_mode + 2);
+			unsigned tile_ny = 1 << (lvl->tile_mode + 2);
 
-			size  = align(pt->width[l], 8) * pt->block.size;
-			size  = align(size, 64);
-			size *= align(pt->height[l], tile_h);
+			size  = align(pt->nblocksx[l] * pt->block.size, 64);
+			size *= align(pt->nblocksy[l], tile_ny);
 
 			lvl->image_offset[i] = mt->total_size;
 
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index f1eb672336..9c008090b8 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -124,7 +124,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	struct nv50_miptree *mt = nv50_miptree(pt);
 	struct nv50_miptree_level *lvl = &mt->level[level];
 	struct nv50_transfer *tx;
-	unsigned image = 0;
+	unsigned nx, ny, image = 0;
 	int ret;
 
 	if (pt->target == PIPE_TEXTURE_CUBE)
@@ -142,9 +142,16 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->base.width = w;
 	tx->base.height = h;
 	tx->base.block = pt->block;
-	tx->base.nblocksx = pt->nblocksx[level];
-	tx->base.nblocksy = pt->nblocksy[level];
-	tx->base.stride = (w * pt->block.size);
+	if (!pt->nblocksx[level]) {
+		tx->base.nblocksx = pf_get_nblocksx(&pt->block,
+						    pt->width[level]);
+		tx->base.nblocksy = pf_get_nblocksy(&pt->block,
+						    pt->height[level]);
+	} else {
+		tx->base.nblocksx = pt->nblocksx[level];
+		tx->base.nblocksy = pt->nblocksy[level];
+	}
+	tx->base.stride = tx->base.nblocksx * pt->block.size;
 	tx->base.usage = usage;
 
 	tx->level_pitch = lvl->pitch;
@@ -152,24 +159,28 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->level_height = mt->base.base.height[level];
 	tx->level_offset = lvl->image_offset[image];
 	tx->level_tiling = lvl->tile_mode;
-	tx->level_x = x;
-	tx->level_y = y;
+	tx->level_x = pf_get_nblocksx(&tx->base.block, x);
+	tx->level_y = pf_get_nblocksy(&tx->base.block, y);
 	ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
-			     w * pt->block.size * h, &tx->bo);
+			     tx->base.nblocksy * tx->base.stride, &tx->bo);
 	if (ret) {
 		FREE(tx);
 		return NULL;
 	}
 
 	if (usage & PIPE_TRANSFER_READ) {
+		nx = pf_get_nblocksx(&tx->base.block, tx->base.width);
+		ny = pf_get_nblocksy(&tx->base.block, tx->base.height);
+
 		nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
 					x, y,
-					tx->level_width, tx->level_height,
-					tx->bo, 0, tx->base.stride,
-					tx->bo->tile_mode, 0, 0,
-					tx->base.width, tx->base.height,
-					tx->base.block.size, w, h,
+					tx->base.nblocksx, tx->base.nblocksy,
+					tx->bo, 0,
+					tx->base.stride, tx->bo->tile_mode,
+					0, 0,
+					tx->base.nblocksx, tx->base.nblocksy,
+					tx->base.block.size, nx, ny,
 					NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
 					NOUVEAU_BO_GART);
 	}
@@ -183,17 +194,20 @@ nv50_transfer_del(struct pipe_transfer *ptx)
 	struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
 	struct nv50_miptree *mt = nv50_miptree(ptx->texture);
 
+	unsigned nx = pf_get_nblocksx(&tx->base.block, tx->base.width);
+	unsigned ny = pf_get_nblocksy(&tx->base.block, tx->base.height);
+
 	if (ptx->usage & PIPE_TRANSFER_WRITE) {
 		struct pipe_screen *pscreen = ptx->texture->screen;
-		nv50_transfer_rect_m2mf(pscreen, tx->bo, 0, tx->base.stride,
-					tx->bo->tile_mode, 0, 0,
-					tx->base.width, tx->base.height,
+		nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
+					tx->base.stride, tx->bo->tile_mode,
+					0, 0,
+					tx->base.nblocksx, tx->base.nblocksy,
 					mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
 					tx->level_x, tx->level_y,
-					tx->level_width, tx->level_height,
-					tx->base.block.size, tx->base.width,
-					tx->base.height,
+					tx->base.nblocksx, tx->base.nblocksy,
+					tx->base.block.size, nx, ny,
 					NOUVEAU_BO_GART, NOUVEAU_BO_VRAM |
 					NOUVEAU_BO_GART);
 	}
-- 
cgit v1.2.3


From e7b76000826ff4faf8bf6a834d55b50a2784c9f2 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sat, 31 Oct 2009 20:05:19 +0000
Subject: i965g: more work on compilation

---
 src/gallium/auxiliary/util/u_math.h             |  13 ++
 src/gallium/drivers/i965/brw_context.h          |  10 +-
 src/gallium/drivers/i965/brw_pipe_sampler.c     |  32 ++++
 src/gallium/drivers/i965/brw_structs.h          |   8 +-
 src/gallium/drivers/i965/brw_wm_sampler_state.c | 201 +++++++++---------------
 src/gallium/drivers/i965/brw_wm_state.c         |  76 ++++-----
 6 files changed, 169 insertions(+), 171 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 75b075f160..c13bf96177 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -539,6 +539,19 @@ do {                                     \
 #endif
 
 
+static INLINE uint32_t util_unsigned_fixed(float value, unsigned frac_bits)
+{
+   value *= (1<<frac_bits);
+   return value < 0 ? 0 : value;
+}
+
+static INLINE int32_t util_signed_fixed(float value, unsigned frac_bits)
+{
+   return value * (1<<frac_bits);
+}
+
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index e6c3161066..8067e20c96 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -177,6 +177,14 @@ struct brw_fragment_shader {
 };
 
 
+struct brw_sampler {
+   struct pipe_sampler_state templ;
+   struct brw_ss0 ss0;
+   struct brw_ss1 ss1;
+   struct brw_ss3 ss3;
+};
+
+
 
 #define PIPE_NEW_DEPTH_STENCIL_ALPHA    0x1
 #define PIPE_NEW_RAST                   0x2
@@ -494,7 +502,7 @@ struct brw_context
       const struct brw_depth_stencil_state *zstencil;
 
       const struct brw_texture *texture[PIPE_MAX_SAMPLERS];
-      const struct pipe_sampler *sampler[PIPE_MAX_SAMPLERS];
+      const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS];
       unsigned num_textures;
       unsigned num_samplers;
       
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
index bc20eef6fb..08a5d22009 100644
--- a/src/gallium/drivers/i965/brw_pipe_sampler.c
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -9,6 +9,38 @@
 
 
+/* The brw (and related graphics cores) do not support GL_CLAMP.  The
+ * Intel drivers for "other operating systems" implement GL_CLAMP as
+ * GL_CLAMP_TO_EDGE, so the same is done here.
+ */
+static GLuint translate_wrap_mode( unsigned wrap )
+{
+   switch( wrap ) {
+   case PIPE_TEX_WRAP_REPEAT: 
+      return BRW_TEXCOORDMODE_WRAP;
+
+   case PIPE_TEX_WRAP_CLAMP:
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+      return BRW_TEXCOORDMODE_CLAMP;
+      
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+      return BRW_TEXCOORDMODE_CLAMP_BORDER;
+
+   case PIPE_TEX_WRAP_MIRROR_REPEAT: 
+      return BRW_TEXCOORDMODE_MIRROR;
+
+   case PIPE_TEX_WRAP_MIRROR_CLAMP: 
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 
+      return BRW_TEXCOORDMODE_MIRROR_ONCE;
+
+   default: 
+      return BRW_TEXCOORDMODE_WRAP;
+   }
+}
+
+
+
 static void *brw_create_sampler_state( struct pipe_context *pipe,
 				     const struct pipe_sampler_state *templ )
 {
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
index 11372697f9..f5d6a2599b 100644
--- a/src/gallium/drivers/i965/brw_structs.h
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -965,7 +965,7 @@ struct brw_sampler_default_color {
 struct brw_sampler_state
 {
    
-   struct
+   struct brw_ss0
    {
       GLuint shadow_function:3; 
       GLuint lod_bias:11; 
@@ -980,7 +980,7 @@ struct brw_sampler_state
       GLuint disable:1; 
    } ss0;
 
-   struct
+   struct brw_ss1
    {
       GLuint r_wrap_mode:3; 
       GLuint t_wrap_mode:3; 
@@ -991,13 +991,13 @@ struct brw_sampler_state
    } ss1;
 
    
-   struct
+   struct brw_ss2
    {
       GLuint pad:5;
       GLuint default_color_pointer:27; 
    } ss2;
    
-   struct
+   struct brw_ss3
    {
       GLuint pad:19;
       GLuint max_aniso:3; 
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index 32692d533c..55698a58bb 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -29,10 +29,12 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
                    
+#include "util/u_math.h"
 
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_screen.h"
 
 
 /* Samplers aren't strictly wm state from the hardware's perspective,
@@ -41,41 +43,6 @@
 
 
-/* The brw (and related graphics cores) do not support GL_CLAMP.  The
- * Intel drivers for "other operating systems" implement GL_CLAMP as
- * GL_CLAMP_TO_EDGE, so the same is done here.
- */
-static GLuint translate_wrap_mode( GLenum wrap )
-{
-   switch( wrap ) {
-   case GL_REPEAT: 
-      return BRW_TEXCOORDMODE_WRAP;
-   case GL_CLAMP:  
-      return BRW_TEXCOORDMODE_CLAMP;
-   case GL_CLAMP_TO_EDGE: 
-      return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */
-   case GL_CLAMP_TO_BORDER: 
-      return BRW_TEXCOORDMODE_CLAMP_BORDER;
-   case GL_MIRRORED_REPEAT: 
-      return BRW_TEXCOORDMODE_MIRROR;
-   default: 
-      return BRW_TEXCOORDMODE_WRAP;
-   }
-}
-
-
-static GLuint U_FIXED(GLfloat value, GLuint frac_bits)
-{
-   value *= (1<<frac_bits);
-   return value < 0 ? 0 : value;
-}
-
-static GLint S_FIXED(GLfloat value, GLuint frac_bits)
-{
-   return value * (1<<frac_bits);
-}
-
-
 static struct brw_winsys_buffer *
 upload_default_color( struct brw_context *brw,
 		      const GLfloat *color )
@@ -91,91 +58,78 @@ upload_default_color( struct brw_context *brw,
 
 struct wm_sampler_key {
    int sampler_count;
-
-   struct wm_sampler_entry {
-      GLenum tex_target;
-      GLenum wrap_r, wrap_s, wrap_t;
-      float maxlod, minlod;
-      float lod_bias;
-      float max_aniso;
-      GLenum minfilter, magfilter;
-      GLenum comparemode, comparefunc;
-      struct brw_winsys_buffer *sdc_bo;
-
-      /** If target is cubemap, take context setting.
-       */
-      GLboolean seamless_cube_map;
-   } sampler[BRW_MAX_TEX_UNIT];
+   struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
 };
 
-/**
- * Sets the sampler state for a single unit based off of the sampler key
- * entry.
- */
-static void brw_update_sampler_state(struct wm_sampler_entry *key,
-				     struct brw_winsys_buffer *sdc_bo,
-				     struct brw_sampler_state *sampler)
-{
-   _mesa_memset(sampler, 0, sizeof(*sampler));
-
-   /* Cube-maps on 965 and later must use the same wrap mode for all 3
-    * coordinate dimensions.  Futher, only CUBE and CLAMP are valid.
-    */
-   if (key->tex_target == GL_TEXTURE_CUBE_MAP) {
-      if (key->seamless_cube_map &&
-	  (key->minfilter != GL_NEAREST || key->magfilter != GL_NEAREST)) {
-	 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
-	 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
-	 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
-      } else {
-	 sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
-	 sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
-	 sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
-      }
-   } else if (key->tex_target == GL_TEXTURE_1D) {
-      /* There's a bug in 1D texture sampling - it actually pays
-       * attention to the wrap_t value, though it should not.
-       * Override the wrap_t value here to GL_REPEAT to keep
-       * any nonexistent border pixels from floating in.
-       */
-      sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
-   }
-
-
-
-   sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */
-}
-
 
 /** Sets up the cache key for sampler state for all texture units */
 static void
 brw_wm_sampler_populate_key(struct brw_context *brw,
 			    struct wm_sampler_key *key)
 {
-   int nr = MIN2(brw->curr.number_textures,
-		 brw->curr.number_samplers);
    int i;
 
    memset(key, 0, sizeof(*key));
 
-   for (i = 0; i < nr; i++) {
+   key->sampler_count = MIN2(brw->curr.num_textures,
+			    brw->curr.num_samplers);
+
+   for (i = 0; i < key->sampler_count; i++) {
       const struct brw_texture *tex = brw->curr.texture[i];
       const struct brw_sampler *sampler = brw->curr.sampler[i];
-      struct wm_sampler_entry *entry = &key->sampler[i];
+      struct brw_sampler_state *entry = &key->sampler[i];
 
-      entry->tex_target = texObj->Target;
-      entry->seamless_cube_map = FALSE; /* XXX: add this to gallium */
       entry->ss0 = sampler->ss0;
       entry->ss1 = sampler->ss1;
+      entry->ss2.default_color_pointer = brw->wm.sdc_bo[i]->offset >> 5; /* reloc */
       entry->ss3 = sampler->ss3;
 
+      /* Cube-maps on 965 and later must use the same wrap mode for all 3
+       * coordinate dimensions.  Futher, only CUBE and CLAMP are valid.
+       */
+      if (tex->base.target == PIPE_TEXTURE_CUBE) {
+	 if (FALSE &&
+	     (sampler->ss0.min_filter != BRW_MAPFILTER_NEAREST || 
+	      sampler->ss0.mag_filter != BRW_MAPFILTER_NEAREST)) {
+	    entry->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+	    entry->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+	    entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE;
+	 } else {
+	    entry->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+	    entry->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+	    entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
+	 }
+      } else if (tex->base.target == PIPE_TEXTURE_1D) {
+	 /* There's a bug in 1D texture sampling - it actually pays
+	  * attention to the wrap_t value, though it should not.
+	  * Override the wrap_t value here to GL_REPEAT to keep
+	  * any nonexistent border pixels from floating in.
+	  */
+	 entry->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
+      }
+   }
+}
+
+
+static void
+brw_wm_sampler_update_default_colors(struct brw_context *brw)
+{
+   int nr = MIN2(brw->curr.num_textures,
+		 brw->curr.num_samplers);
+   int i;
+
+   for (i = 0; i < nr; i++) {
+      const struct brw_texture *tex = brw->curr.texture[i];
+      const struct brw_sampler *sampler = brw->curr.sampler[i];
+
       brw->sws->bo_unreference(brw->wm.sdc_bo[i]);
-      if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+
+      if (pf_is_depth_or_stencil(tex->base.format)) {
 	 float bordercolor[4] = {
-	    texObj->BorderColor[0],
-	    texObj->BorderColor[0],
-	    texObj->BorderColor[0],
-	    texObj->BorderColor[0]
+	    sampler->templ.border_color[0],
+	    sampler->templ.border_color[0],
+	    sampler->templ.border_color[0],
+	    sampler->templ.border_color[0]
 	 };
 	 /* GL specs that border color for depth textures is taken from the
 	  * R channel, while the hardware uses A.  Spam R into all the
@@ -183,22 +137,21 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
 	  */
 	 brw->wm.sdc_bo[i] = upload_default_color(brw, bordercolor);
       } else {
-	 brw->wm.sdc_bo[i] = upload_default_color(brw, texObj->BorderColor);
+	 brw->wm.sdc_bo[i] = upload_default_color(brw, sampler->templ.border_color);
       }
    }
-
-   key->sampler_count = nr;
 }
 
-/* All samplers must be uploaded in a single contiguous array, which
- * complicates various things.  However, this is still too confusing -
- * FIXME: simplify all the different new texture state flags.
+
+
+/* All samplers must be uploaded in a single contiguous array.  
  */
-static void upload_wm_samplers( struct brw_context *brw )
+static int upload_wm_samplers( struct brw_context *brw )
 {
    struct wm_sampler_key key;
    int i;
 
+   brw_wm_sampler_update_default_colors(brw);
    brw_wm_sampler_populate_key(brw, &key);
 
    if (brw->wm.sampler_count != key.sampler_count) {
@@ -209,7 +162,7 @@ static void upload_wm_samplers( struct brw_context *brw )
    brw->sws->bo_unreference(brw->wm.sampler_bo);
    brw->wm.sampler_bo = NULL;
    if (brw->wm.sampler_count == 0)
-      return;
+      return 0;
 
    brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
 					 &key, sizeof(key),
@@ -220,41 +173,29 @@ static void upload_wm_samplers( struct brw_context *brw )
     * cache.
     */
    if (brw->wm.sampler_bo == NULL) {
-      struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
-
-      memset(sampler, 0, sizeof(sampler));
-      for (i = 0; i < key.sampler_count; i++) {
-	 if (brw->wm.sdc_bo[i] == NULL)
-	    continue;
-
-	 brw_update_sampler_state(&key.sampler[i], brw->wm.sdc_bo[i],
-				  &sampler[i]);
-      }
-
       brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
 					    &key, sizeof(key),
 					    brw->wm.sdc_bo, key.sampler_count,
-					    &sampler, sizeof(sampler),
+					    &key.sampler, sizeof(key.sampler),
 					    NULL, NULL);
 
       /* Emit SDC relocations */
-      for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
-	 if (!ctx->Texture.Unit[i]._ReallyEnabled)
-	    continue;
-
-	 dri_bo_emit_reloc(brw->wm.sampler_bo,
-			   I915_GEM_DOMAIN_SAMPLER, 0,
-			   0,
-			   i * sizeof(struct brw_sampler_state) +
-			   offsetof(struct brw_sampler_state, ss2),
-			   brw->wm.sdc_bo[i]);
+      for (i = 0; i < key.sampler_count; i++) {
+	 brw->sws->bo_emit_reloc(brw->wm.sampler_bo,
+				 I915_GEM_DOMAIN_SAMPLER, 0,
+				 0,
+				 i * sizeof(struct brw_sampler_state) +
+				 offsetof(struct brw_sampler_state, ss2),
+				 brw->wm.sdc_bo[i]);
       }
    }
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_wm_samplers = {
    .dirty = {
-      .mesa = PIPE_NEW_BOUND_TEXTURES | PIPE_NEW_SAMPLER,
+      .mesa = PIPE_NEW_BOUND_TEXTURES | PIPE_NEW_SAMPLERS,
       .brw = 0,
       .cache = 0
    },
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index 1898f38cef..f161de9b40 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -29,12 +29,14 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
                    
-
+#include "util/u_math.h"
 
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "brw_wm.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
 
 /***********************************************************************
  * WM unit - fragment programs and rasterization
@@ -60,8 +62,7 @@ struct brw_wm_unit_key {
 static void
 wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 {
-   const struct gl_fragment_program *fp = brw->fragment_program;
-   const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
+   const struct brw_fragment_shader *fp = brw->curr.fragment_shader;
 
    memset(key, 0, sizeof(*key));
 
@@ -82,7 +83,7 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
    key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
    key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
    key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
-   key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
+   key->total_scratch = align(brw->wm.prog_data->total_scratch, 1024);
 
    /* BRW_NEW_URB_FENCE */
    key->urb_size = brw->urb.vsize;
@@ -96,39 +97,42 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
    /* CACHE_NEW_SAMPLER */
    key->sampler_count = brw->wm.sampler_count;
 
-   /* _NEW_POLYGONSTIPPLE */
-   key->polygon_stipple = ctx->Polygon.StippleFlag;
+   /* PIPE_NEW_RAST */
+   key->polygon_stipple = brw->curr.rast->templ.poly_stipple_enable;
 
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
+   /* PIPE_NEW_FRAGMENT_PROGRAM */
+   key->uses_depth = fp->uses_depth;
+   key->computes_depth = fp->info.writes_z;
 
-   /* as far as we can tell */
-   key->computes_depth =
-      (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
    /* PIPE_NEW_DEPTH_BUFFER
+    *
     * Override for NULL depthbuffer case, required by the Pixel Shader Computed
     * Depth field.
     */
    if (brw->curr.fb.zsbuf == NULL)
       key->computes_depth = 0;
 
-   /* _NEW_COLOR */
-   key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
-   key->has_flow_control = bfp->has_flow_control;
+   /* PIPE_NEW_DEPTH_STENCIL_ALPHA */
+   key->uses_kill = (fp->info.uses_kill || 
+		     brw->curr.zstencil->cc3.alpha_test);
+
+   key->has_flow_control = fp->has_flow_control;
 
    /* temporary sanity check assertion */
-   ASSERT(bfp->has_flow_control == brw_wm_has_flow_control(fp));
+   assert(fp->has_flow_control == 0);
 
-   /* _NEW_QUERY */
+   /* PIPE_NEW_QUERY */
    key->stats_wm = (brw->query.stats_wm != 0);
 
-   /* _NEW_LINE */
-   key->line_stipple = ctx->Line.StippleFlag;
+   /* PIPE_NEW_RAST */
+   key->line_stipple = brw->curr.rast->templ.line_stipple_enable;
+
 
-   /* _NEW_POLYGON */
-   key->offset_enable = ctx->Polygon.OffsetFill;
-   key->offset_units = ctx->Polygon.OffsetUnits;
-   key->offset_factor = ctx->Polygon.OffsetFactor;
+   key->offset_enable = (brw->curr.rast->templ.offset_cw ||
+			 brw->curr.rast->templ.offset_ccw);
+
+   key->offset_units = brw->curr.rast->templ.offset_units;
+   key->offset_factor = brw->curr.rast->templ.offset_scale;
 }
 
 /**
@@ -143,7 +147,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
    memset(&wm, 0, sizeof(wm));
 
-   wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
+   wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
    wm.thread1.depth_coef_urb_read_offset = 1;
    wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
@@ -225,7 +229,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 			 NULL, NULL);
 
    /* Emit WM program relocation */
-   dri_bo_emit_reloc(bo,
+   brw->sws->bo_emit_reloc(bo,
 		     I915_GEM_DOMAIN_INSTRUCTION, 0,
 		     wm.thread0.grf_reg_count << 1,
 		     offsetof(struct brw_wm_unit_state, thread0),
@@ -233,7 +237,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
    /* Emit scratch space relocation */
    if (key->total_scratch != 0) {
-      dri_bo_emit_reloc(bo,
+      brw->sws->bo_emit_reloc(bo,
 			0, 0,
 			wm.thread2.per_thread_scratch_space,
 			offsetof(struct brw_wm_unit_state, thread2),
@@ -242,7 +246,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
    /* Emit sampler state relocation */
    if (key->sampler_count != 0) {
-      dri_bo_emit_reloc(bo,
+      brw->sws->bo_emit_reloc(bo,
 			I915_GEM_DOMAIN_INSTRUCTION, 0,
 			wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
 			offsetof(struct brw_wm_unit_state, wm4),
@@ -253,7 +257,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 }
 
 
-static void upload_wm_unit( struct brw_context *brw )
+static int upload_wm_unit( struct brw_context *brw )
 {
    struct brw_wm_unit_key key;
    struct brw_winsys_buffer *reloc_bufs[3];
@@ -291,19 +295,19 @@ static void upload_wm_unit( struct brw_context *brw )
    if (brw->wm.state_bo == NULL) {
       brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
    }
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_wm_unit = {
    .dirty = {
-      .mesa = (PIPE_NEW_DEPTH_BUFFER |
-	       _NEW_POLYGON | 
-	       _NEW_POLYGONSTIPPLE | 
-	       _NEW_LINE | 
-	       _NEW_COLOR |
-	       _NEW_QUERY),
-
-      .brw = (BRW_NEW_FRAGMENT_PROGRAM | 
-	      BRW_NEW_CURBE_OFFSETS |
+      .mesa = (PIPE_NEW_FRAGMENT_SHADER |
+	       PIPE_NEW_DEPTH_BUFFER |
+	       PIPE_NEW_RAST | 
+	       PIPE_NEW_DEPTH_STENCIL_ALPHA |
+	       PIPE_NEW_QUERY),
+
+      .brw = (BRW_NEW_CURBE_OFFSETS |
 	      BRW_NEW_NR_WM_SURFACES),
 
       .cache = (CACHE_NEW_WM_PROG |
-- 
cgit v1.2.3


From 9b18ca095503eb80f02db55baf9c35aa69ae1cc9 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 1 Nov 2009 12:08:14 +0000
Subject: i965g: more work on compilation -- surface management

---
 src/gallium/drivers/i965/brw_context.h          |  11 +-
 src/gallium/drivers/i965/brw_defines.h          |   5 +
 src/gallium/drivers/i965/brw_screen.h           |  22 +-
 src/gallium/drivers/i965/brw_screen_surface.c   | 117 ++++
 src/gallium/drivers/i965/brw_screen_texture.c   | 215 ++++++++
 src/gallium/drivers/i965/brw_state.h            |  16 -
 src/gallium/drivers/i965/brw_state_cache.c      |   3 +-
 src/gallium/drivers/i965/brw_structs.h          |  12 +-
 src/gallium/drivers/i965/brw_wm_surface_state.c | 690 ++++--------------------
 9 files changed, 474 insertions(+), 617 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 8067e20c96..471855ab63 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -146,6 +146,8 @@ struct brw_blend_state {
    struct brw_cc3 cc3;
    struct brw_cc5 cc5;
    struct brw_cc6 cc6;
+
+   struct brw_surf_ss0 ss0;
 };
 
 
@@ -501,15 +503,14 @@ struct brw_context
       const struct brw_rasterizer_state *rast;
       const struct brw_depth_stencil_state *zstencil;
 
-      const struct brw_texture *texture[PIPE_MAX_SAMPLERS];
       const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS];
-      unsigned num_textures;
+      const struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+      unsigned num_vertex_elements;
       unsigned num_samplers;
-      
 
-      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+      struct brw_texture *texture[PIPE_MAX_SAMPLERS];
       struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-      unsigned num_vertex_elements;
+      unsigned num_textures;
       unsigned num_vertex_buffers;
 
       struct pipe_scissor_state scissor;
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
index 544d36306c..65cd71c939 100644
--- a/src/gallium/drivers/i965/brw_defines.h
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -841,4 +841,9 @@
 #define URB_SIZES(brw)                  (BRW_IS_IGDNG(brw) ? 1024 : \
                                          (BRW_IS_G4X(brw) ? 384 : 256))  /* 512 bit units */
 
+
+#define BRW_TILING_NONE  0
+#define BRW_TILING_Y     1
+#define BRW_TILING_X     2
+
 #endif
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index efa27db1e0..844c6355d5 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -32,6 +32,7 @@
 #include "pipe/p_screen.h"
 
 #include "brw_reg.h"
+#include "brw_structs.h"
 
 struct brw_winsys_screen;
 
@@ -68,10 +69,23 @@ struct brw_texture
 {
    struct pipe_texture base;
 
-   ubyte shader_swizzle;
+   struct brw_winsys_buffer *bo;
+   struct brw_surface_state ss;
+
+   unsigned brw_target;
+   unsigned pitch;
+   unsigned tiling;
+   unsigned cpp;
 };
 
 
+struct brw_surface
+{
+   struct pipe_surface base;
+   struct brw_surface_state ss;
+   struct brw_winsys_buffer *bo;
+};
+
 /*
  * Cast wrappers
  */
@@ -87,6 +101,12 @@ brw_transfer(struct pipe_transfer *transfer)
    return (struct brw_transfer *)transfer;
 }
 
+static INLINE struct brw_surface *
+brw_surface(struct pipe_surface *surface)
+{
+   return (struct brw_surface *)surface;
+}
+
 static INLINE struct brw_buffer *
 brw_buffer(struct pipe_buffer *buffer)
 {
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
index e0df6cc629..01d4b2d2b1 100644
--- a/src/gallium/drivers/i965/brw_screen_surface.c
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -2,6 +2,123 @@
 #include "pipe/p_screen.h"
 #include "brw_screen.h"
 
+
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static void
+brw_update_renderbuffer_surface(struct brw_context *brw,
+				struct gl_renderbuffer *rb,
+				unsigned int unit)
+{
+   struct brw_winsys_buffer *region_bo = NULL;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct intel_region *region = irb ? irb->region : NULL;
+   struct {
+      unsigned int surface_type;
+      unsigned int surface_format;
+      unsigned int width, height, pitch, cpp;
+      GLubyte color_mask[4];
+      GLboolean color_blend;
+      uint32_t tiling;
+      uint32_t draw_offset;
+   } key;
+
+   memset(&key, 0, sizeof(key));
+
+   if (region != NULL) {
+      region_bo = region->buffer;
+
+      key.surface_type = BRW_SURFACE_2D;
+      switch (irb->texformat->MesaFormat) {
+      case PIPE_FORMAT_ARGB8888:
+	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+	 break;
+      case PIPE_FORMAT_RGB565:
+	 key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
+	 break;
+      case PIPE_FORMAT_ARGB1555:
+	 key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+	 break;
+      case PIPE_FORMAT_ARGB4444:
+	 key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+	 break;
+      default:
+	 debug_printf("Bad renderbuffer format: %d\n",
+		      irb->texformat->MesaFormat);
+	 assert(0);
+	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+	 return;
+      }
+      key.tiling = region->tiling;
+      if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {
+	 key.width = rb->Width;
+	 key.height = rb->Height;
+      } else {
+	 key.width = region->width;
+	 key.height = region->height;
+      }
+      key.pitch = region->pitch;
+      key.cpp = region->cpp;
+      key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
+   } 
+
+   memcpy(key.color_mask, ctx->Color.ColorMask,
+	  sizeof(key.color_mask));
+
+   key.color_blend = (!ctx->Color._LogicOpEnabled &&
+		      ctx->Color.BlendEnabled);
+
+   brw->sws->bo_unreference(brw->wm.surf_bo[unit]);
+   brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
+					    BRW_SS_SURFACE,
+					    &key, sizeof(key),
+					    &region_bo, 1,
+					    NULL);
+
+   if (brw->wm.surf_bo[unit] == NULL) {
+      struct brw_surface_state surf;
+
+      memset(&surf, 0, sizeof(surf));
+
+      surf.ss0.surface_format = key.surface_format;
+      surf.ss0.surface_type = key.surface_type;
+      if (key.tiling == I915_TILING_NONE) {
+	 surf.ss1.base_addr = key.draw_offset;
+      } else {
+	 uint32_t tile_offset = key.draw_offset % 4096;
+
+	 surf.ss1.base_addr = key.draw_offset - tile_offset;
+
+	 assert(BRW_IS_G4X(brw) || tile_offset == 0);
+	 if (BRW_IS_G4X(brw)) {
+	    if (key.tiling == I915_TILING_X) {
+	       /* Note that the low bits of these fields are missing, so
+		* there's the possibility of getting in trouble.
+		*/
+	       surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4;
+	       surf.ss5.y_offset = tile_offset / 512 / 2;
+	    } else {
+	       surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4;
+	       surf.ss5.y_offset = tile_offset / 128 / 2;
+	    }
+	 }
+      }
+
+      if (region_bo != NULL)
+	 surf.ss1.base_addr += region_bo->offset; /* reloc */
+
+      surf.ss2.width = key.width - 1;
+      surf.ss2.height = key.height - 1;
+      brw_set_surface_tiling(&surf, key.tiling);
+      surf.ss3.pitch = (key.pitch * key.cpp) - 1;
+
+}
+
+
+
 struct brw_surface_id {
    unsigned face:3;
    unsigned zslice:13;
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 50c30878c6..3d069add6f 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -36,6 +36,166 @@
 
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
+
+
+static GLuint translate_tex_target( unsigned target )
+{
+   switch (target) {
+   case PIPE_TEXTURE_1D: 
+      return BRW_SURFACE_1D;
+
+   case PIPE_TEXTURE_2D: 
+      return BRW_SURFACE_2D;
+
+   case PIPE_TEXTURE_3D: 
+      return BRW_SURFACE_3D;
+
+   case PIPE_TEXTURE_CUBE:
+      return BRW_SURFACE_CUBE;
+
+   default: 
+      assert(0); 
+      return BRW_SURFACE_1D;
+   }
+}
+
+
+static GLuint translate_tex_format( enum pipe_format pf )
+{
+   switch( pf ) {
+   case PIPE_FORMAT_L8_UNORM:
+      return BRW_SURFACEFORMAT_L8_UNORM;
+
+   case PIPE_FORMAT_I8_UNORM:
+      return BRW_SURFACEFORMAT_I8_UNORM;
+
+   case PIPE_FORMAT_A8_UNORM:
+      return BRW_SURFACEFORMAT_A8_UNORM; 
+
+   case PIPE_FORMAT_A8L8_UNORM:
+      return BRW_SURFACEFORMAT_L8A8_UNORM;
+
+   case PIPE_FORMAT_A8R8G8B8_UNORM: /* XXX */
+   case PIPE_FORMAT_B8G8R8A8_UNORM: /* XXX */
+   case PIPE_FORMAT_R8G8B8A8_UNORM: /* XXX */
+      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+
+   case PIPE_FORMAT_R8G8B8X8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
+
+   case PIPE_FORMAT_R5G6B5_UNORM:
+      return BRW_SURFACEFORMAT_B5G6R5_UNORM;
+
+   case PIPE_FORMAT_A1R5G5B5_UNORM:
+      return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
+
+   case PIPE_FORMAT_A4R4G4B4_UNORM:
+      return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
+
+
+   case PIPE_FORMAT_L16_UNORM:
+      return BRW_SURFACEFORMAT_L16_UNORM;
+
+      /* XXX: Z texturing: 
+   case PIPE_FORMAT_I16_UNORM:
+      return BRW_SURFACEFORMAT_I16_UNORM;
+       */
+
+      /* XXX: Z texturing:
+   case PIPE_FORMAT_A16_UNORM:
+      return BRW_SURFACEFORMAT_A16_UNORM; 
+      */
+
+   case PIPE_FORMAT_YCBCR_REV:
+      return BRW_SURFACEFORMAT_YCRCB_NORMAL;
+
+   case PIPE_FORMAT_YCBCR:
+      return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
+
+      /* XXX: Add FXT to gallium?
+   case PIPE_FORMAT_FXT1_RGBA:
+      return BRW_SURFACEFORMAT_FXT1;
+      */
+
+   case PIPE_FORMAT_DXT1_RGB:
+       return BRW_SURFACEFORMAT_DXT1_RGB;
+
+   case PIPE_FORMAT_DXT1_RGBA:
+       return BRW_SURFACEFORMAT_BC1_UNORM;
+       
+   case PIPE_FORMAT_DXT3_RGBA:
+       return BRW_SURFACEFORMAT_BC2_UNORM;
+       
+   case PIPE_FORMAT_DXT5_RGBA:
+       return BRW_SURFACEFORMAT_BC3_UNORM;
+
+   case PIPE_FORMAT_R8G8B8A8_SRGB:
+      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
+
+   case PIPE_FORMAT_A8L8_SRGB:
+      return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
+
+   case PIPE_FORMAT_L8_SRGB:
+      return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
+
+   case PIPE_FORMAT_DXT1_SRGB:
+      return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
+
+      /* XXX: which pipe depth formats does i965 suppport
+       */
+   case PIPE_FORMAT_S8Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_Z24S8_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:
+         return BRW_SURFACEFORMAT_I24X8_UNORM;
+
+#if 0
+      /* XXX: these different surface formats don't seem to
+       * make any difference for shadow sampler/compares.
+       */
+      if (depth_mode == GL_INTENSITY) 
+         return BRW_SURFACEFORMAT_I24X8_UNORM;
+      else if (depth_mode == GL_ALPHA)
+         return BRW_SURFACEFORMAT_A24X8_UNORM;
+      else
+         return BRW_SURFACEFORMAT_L24X8_UNORM;
+#endif
+
+      /* XXX: presumably for bump mapping.  Add this to mesa state
+       * tracker?
+       */
+   case PIPE_FORMAT_R8G8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8_SNORM;
+
+   case PIPE_FORMAT_R8G8B8A8_SNORM:
+      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
+
+   default:
+      assert(0);
+      return 0;
+   }
+}
+
+static void
+brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
+{
+   switch (tiling) {
+   case BRW_TILING_NONE:
+      surf->ss3.tiled_surface = 0;
+      surf->ss3.tile_walk = 0;
+      break;
+   case BRW_TILING_X:
+      surf->ss3.tiled_surface = 1;
+      surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+      break;
+   case BRW_TILING_Y:
+      surf->ss3.tiled_surface = 1;
+      surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+      break;
+   }
+}
+
+
 GLboolean brw_miptree_layout(struct brw_context *brw,
 			     struct intel_mipmap_tree *mt,
 			     uint32_t tiling)
@@ -216,3 +376,58 @@ GLboolean brw_miptree_layout(struct brw_context *brw,
    return GL_TRUE;
 }
 
+
+static void brw_create_texture( struct pipe_screen *screen,
+				const pipe_texture *templ )
+
+{  
+
+   key.format = tex->base.format;
+   key.pitch = tex->pitch;
+   key.depth = tex->base.depth[0];
+   key.bo = tex->buffer;
+   key.offset = 0;
+
+   key.target = tex->brw_target;	/* translated to BRW enum */
+   //key.depthmode = 0; /* XXX: add this to gallium? or the state tracker? */
+   key.last_level = tex->base.last_level;
+   key.width = tex->base.depth[0];
+   key.height = tex->base.height[0];
+   key.cpp = tex->cpp;
+   key.tiling = tex->tiling;
+
+
+
+   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   surf.ss0.surface_type = translate_tex_target(key->target);
+   surf.ss0.surface_format = translate_tex_format(key->format /* , key->depthmode */ );
+
+   /* This is ok for all textures with channel width 8bit or less:
+    */
+/*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+   assert(key->bo);
+   surf.ss1.base_addr = key->bo->offset; /* reloc */
+   surf.ss2.mip_count = key->last_level;
+   surf.ss2.width = key->width - 1;
+   surf.ss2.height = key->height - 1;
+   brw_set_surface_tiling(&surf, key->tiling);
+   surf.ss3.pitch = (key->pitch * key->cpp) - 1;
+   surf.ss3.depth = key->depth - 1;
+
+   surf.ss4.min_lod = 0;
+ 
+   if (key->target == PIPE_TEXTURE_CUBE) {
+      surf.ss0.cube_pos_x = 1;
+      surf.ss0.cube_pos_y = 1;
+      surf.ss0.cube_pos_z = 1;
+      surf.ss0.cube_neg_x = 1;
+      surf.ss0.cube_neg_y = 1;
+      surf.ss0.cube_neg_z = 1;
+   }
+
+}
+
+
+
+
+
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index 2275e9ad69..b47b04fd46 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -94,19 +94,6 @@ const struct brw_tracked_state brw_indices;
 const struct brw_tracked_state brw_vertices;
 const struct brw_tracked_state brw_index_buffer;
 
-/**
- * Use same key for WM and VS surfaces.
- */
-struct brw_surface_key {
-   unsigned target;
-   struct brw_winsys_buffer *bo;
-   GLint format;
-   GLint first_level, last_level;
-   GLint width, height, depth;
-   GLint pitch, cpp;
-   uint32_t tiling;
-   GLuint offset;
-};
 
 /***********************************************************************
  * brw_state.c
@@ -171,9 +158,6 @@ void brw_clear_batch_cache( struct brw_context *brw );
 /***********************************************************************
  * brw_wm_surface_state.c 
  */
-struct brw_winsys_buffer *
-brw_create_constant_surface( struct brw_context *brw,
-                             struct brw_surface_key *key );
 
 /***********************************************************************
  * brw_state_debug.c
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
index 4310d01ba2..9cf44f7a5c 100644
--- a/src/gallium/drivers/i965/brw_state_cache.c
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -179,7 +179,8 @@ brw_search_cache(struct brw_cache *cache,
                  enum brw_cache_id cache_id,
                  const void *key,
                  GLuint key_size,
-                 struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs,
+                 struct brw_winsys_buffer **reloc_bufs, 
+		 GLuint nr_reloc_bufs,
                  void *aux_return)
 {
    struct brw_cache_item *item;
diff --git a/src/gallium/drivers/i965/brw_structs.h b/src/gallium/drivers/i965/brw_structs.h
index f5d6a2599b..bf10bc04de 100644
--- a/src/gallium/drivers/i965/brw_structs.h
+++ b/src/gallium/drivers/i965/brw_structs.h
@@ -1048,7 +1048,7 @@ struct brw_sf_viewport
  */
 struct brw_surface_state
 {
-   struct {
+   struct brw_surf_ss0 {
       GLuint cube_pos_z:1; 
       GLuint cube_neg_z:1; 
       GLuint cube_pos_y:1; 
@@ -1070,18 +1070,18 @@ struct brw_surface_state
       GLuint surface_type:3;       /**< BRW_SURFACE_1D/2D/3D/CUBE */
    } ss0;
    
-   struct {
+   struct brw_surf_ss1 {
       GLuint base_addr;  
    } ss1;
    
-   struct {
+   struct brw_surf_ss2 {
       GLuint pad:2;
       GLuint mip_count:4; 
       GLuint width:13; 
       GLuint height:13; 
    } ss2;
 
-   struct {
+   struct brw_surf_ss3 {
       GLuint tile_walk:1; 
       GLuint tiled_surface:1; 
       GLuint pad:1; 
@@ -1089,7 +1089,7 @@ struct brw_surface_state
       GLuint depth:11; 
    } ss3;
    
-   struct {
+   struct brw_surf_ss4 {
       GLuint multisample_position_palette_index:3;
       GLuint pad1:1;
       GLuint num_multisamples:3;
@@ -1099,7 +1099,7 @@ struct brw_surface_state
       GLuint min_lod:4; 
    } ss4;
 
-   struct {
+   struct brw_surf_ss5 {
       GLuint pad1:16;
       GLuint llc_mapping:1;
       GLuint mlc_mapping:1;
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index 7157feb6f3..88485c76cb 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -29,448 +29,49 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
                    
+#include "pipe/p_format.h"
 
 #include "brw_batchbuffer.h"
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_screen.h"
 
 
-static GLuint translate_tex_target( GLenum target )
-{
-   switch (target) {
-   case GL_TEXTURE_1D: 
-      return BRW_SURFACE_1D;
-
-   case GL_TEXTURE_RECTANGLE_NV: 
-      return BRW_SURFACE_2D;
-
-   case GL_TEXTURE_2D: 
-      return BRW_SURFACE_2D;
-
-   case GL_TEXTURE_3D: 
-      return BRW_SURFACE_3D;
-
-   case GL_TEXTURE_CUBE_MAP: 
-      return BRW_SURFACE_CUBE;
-
-   default: 
-      assert(0); 
-      return 0;
-   }
-}
-
-
-static GLuint translate_tex_format( GLuint mesa_format, 
-				    GLenum depth_mode )
-{
-   switch( pipe_format ) {
-   case PIPE_FORMAT_L8_UNORM:
-      return BRW_SURFACEFORMAT_L8_UNORM;
-
-   case PIPE_FORMAT_I8_UNORM:
-      return BRW_SURFACEFORMAT_I8_UNORM;
-
-   case PIPE_FORMAT_A8_UNORM:
-      return BRW_SURFACEFORMAT_A8_UNORM; 
-
-   case PIPE_FORMAT_A8L8_UNORM:
-      return BRW_SURFACEFORMAT_L8A8_UNORM;
-
-   case PIPE_FORMAT_A8R8G8B8_UNORM:
-   case PIPE_FORMAT_B8G8R8A8_UNORM:
-   case PIPE_FORMAT_R8G8B8A8_UNORM:
-      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-
-   case PIPE_FORMAT_R8G8B8X8_UNORM:
-      return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
-
-   case PIPE_FORMAT_:
-      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
 
-   case PIPE_FORMAT_RGB565:
-      return BRW_SURFACEFORMAT_B5G6R5_UNORM;
-
-   case PIPE_FORMAT_ARGB1555:
-      return BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
-
-   case PIPE_FORMAT_ARGB4444:
-      return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
-
-
-   case PIPE_FORMAT_L16_UNORM:
-      return BRW_SURFACEFORMAT_L16_UNORM;
-
-   case PIPE_FORMAT_I16_UNORM:
-      return BRW_SURFACEFORMAT_I16_UNORM;
-
-   case PIPE_FORMAT_A16_UNORM:
-      return BRW_SURFACEFORMAT_A16_UNORM; 
-
-   case PIPE_FORMAT_YCBCR_REV:
-      return BRW_SURFACEFORMAT_YCRCB_NORMAL;
-
-   case PIPE_FORMAT_YCBCR:
-      return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
-
-   case PIPE_FORMAT_RGB_FXT1:
-   case PIPE_FORMAT_RGBA_FXT1:
-      return BRW_SURFACEFORMAT_FXT1;
-
-   case PIPE_FORMAT_RGB_DXT1:
-       return BRW_SURFACEFORMAT_DXT1_RGB;
-
-   case PIPE_FORMAT_RGBA_DXT1:
-       return BRW_SURFACEFORMAT_BC1_UNORM;
-       
-   case PIPE_FORMAT_RGBA_DXT3:
-       return BRW_SURFACEFORMAT_BC2_UNORM;
-       
-   case PIPE_FORMAT_RGBA_DXT5:
-       return BRW_SURFACEFORMAT_BC3_UNORM;
-
-   case PIPE_FORMAT_R8G8B8A8_SRGB:
-      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
-
-   case PIPE_FORMAT_A8L8_SRGB:
-      return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB;
-
-   case PIPE_FORMAT_L8_SRGB:
-      return BRW_SURFACEFORMAT_L8_UNORM_SRGB;
-
-   case PIPE_FORMAT_SRGB_DXT1:
-      return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
-
-   case PIPE_FORMAT_S8_Z24:
-      /* XXX: these different surface formats don't seem to
-       * make any difference for shadow sampler/compares.
-       */
-      if (depth_mode == GL_INTENSITY) 
-         return BRW_SURFACEFORMAT_I24X8_UNORM;
-      else if (depth_mode == GL_ALPHA)
-         return BRW_SURFACEFORMAT_A24X8_UNORM;
-      else
-         return BRW_SURFACEFORMAT_L24X8_UNORM;
-
-   case PIPE_FORMAT_DUDV8:
-      return BRW_SURFACEFORMAT_R8G8_SNORM;
-
-   case PIPE_FORMAT_SIGNED_RGBA8888_REV:
-      return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
-
-   default:
-      assert(0);
-      return 0;
-   }
-}
-
-static void
-brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
-{
-   switch (tiling) {
-   case I915_TILING_NONE:
-      surf->ss3.tiled_surface = 0;
-      surf->ss3.tile_walk = 0;
-      break;
-   case I915_TILING_X:
-      surf->ss3.tiled_surface = 1;
-      surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
-      break;
-   case I915_TILING_Y:
-      surf->ss3.tiled_surface = 1;
-      surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR;
-      break;
-   }
-}
-
-static struct brw_winsys_buffer *
-brw_create_texture_surface( struct brw_context *brw,
-			    struct brw_surface_key *key )
-{
-   struct brw_surface_state surf;
-   struct brw_winsys_buffer *bo;
-
-   memset(&surf, 0, sizeof(surf));
-
-   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
-   surf.ss0.surface_type = translate_tex_target(key->target);
-   if (key->bo) {
-      surf.ss0.surface_format = translate_tex_format(key->format,
-						     key->internal_format,
-						     key->depthmode);
-   }
-   else {
-      switch (key->depth) {
-      case 32:
-         surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-         break;
-      default:
-      case 24:
-         surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8X8_UNORM;
-         break;
-      case 16:
-         surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
-         break;
-      }
-   }
-
-   /* This is ok for all textures with channel width 8bit or less:
-    */
-/*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
-   if (key->bo)
-      surf.ss1.base_addr = key->bo->offset; /* reloc */
-   else
-      surf.ss1.base_addr = key->offset;
-
-   surf.ss2.mip_count = key->last_level;
-   surf.ss2.width = key->width - 1;
-   surf.ss2.height = key->height - 1;
-   brw_set_surface_tiling(&surf, key->tiling);
-   surf.ss3.pitch = (key->pitch * key->cpp) - 1;
-   surf.ss3.depth = key->depth - 1;
-
-   surf.ss4.min_lod = 0;
- 
-   if (key->target == GL_TEXTURE_CUBE_MAP) {
-      surf.ss0.cube_pos_x = 1;
-      surf.ss0.cube_pos_y = 1;
-      surf.ss0.cube_pos_z = 1;
-      surf.ss0.cube_neg_x = 1;
-      surf.ss0.cube_neg_y = 1;
-      surf.ss0.cube_neg_z = 1;
-   }
-
-   bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
-			 key, sizeof(*key),
-			 &key->bo, key->bo ? 1 : 0,
-			 &surf, sizeof(surf),
-			 NULL, NULL);
-
-   if (key->bo) {
-      /* Emit relocation to surface contents */
-      dri_bo_emit_reloc(bo,
-			I915_GEM_DOMAIN_SAMPLER, 0,
-			0,
-			offsetof(struct brw_surface_state, ss1),
-			key->bo);
-   }
-   return bo;
-}
 
 static void
-brw_update_texture_surface( struct brw_context *brw, GLuint unit )
+brw_update_texture_surface( struct brw_context *brw,
+			    struct brw_texture *tex,
+			    GLuint surf )
 {
-   struct pipe_texture *tex = brw->texture[unit];
-   struct brw_surface_key key;
-   const GLuint surf = SURF_INDEX_TEXTURE(unit);
-
-   memset(&key, 0, sizeof(key));
-
-   key.format = tex->base.format;
-   key.pitch = tex->pitch;
-   key.depth = tex->base.depth[0];
-   key.bo = tex->buffer;
-   key.offset = 0;
-
-   key.target = tObj->target;	/* translated to BRW enum */
-   /* key.depthmode = tObj->DepthMode; */ /* XXX: add this to gallium? or the state tracker? */
-   key.first_level = 0;
-   key.last_level = tex->base.last_level;
-   key.width = tex->base.depth[0];
-   key.height = tex->base.height[0];
-   key.cpp = tex->cpp;
-   key.tiling = tex->tiling;
-
-   brw->sws->bo_unreference(brw->wm.surf_bo[surf]);
    brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
                                             BRW_SS_SURFACE,
-                                            &key, sizeof(key),
-                                            &key.bo, key.bo ? 1 : 0,
+                                            &tex->ss, sizeof tex->ss,
+                                            &tex->bo, 1,
                                             NULL);
-   if (brw->wm.surf_bo[surf] == NULL) {
-      brw->wm.surf_bo[surf] = brw_create_texture_surface(brw, &key);
-   }
-}
-
-
-
-/**
- * Create the constant buffer surface.  Vertex/fragment shader constants will be
- * read from this buffer with Data Port Read instructions/messages.
- */
-struct brw_winsys_buffer *
-brw_create_constant_surface( struct brw_context *brw,
-                             struct brw_surface_key *key )
-{
-   const GLint w = key->width - 1;
-   struct brw_surface_state surf;
-   struct brw_winsys_buffer *bo;
-
-   memset(&surf, 0, sizeof(surf));
-
-   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
-   surf.ss0.surface_type = BRW_SURFACE_BUFFER;
-   surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
-
-   assert(key->bo);
-   if (key->bo)
-      surf.ss1.base_addr = key->bo->offset; /* reloc */
-   else
-      surf.ss1.base_addr = key->offset;
-
-   surf.ss2.width = w & 0x7f;            /* bits 6:0 of size or width */
-   surf.ss2.height = (w >> 7) & 0x1fff;  /* bits 19:7 of size or width */
-   surf.ss3.depth = (w >> 20) & 0x7f;    /* bits 26:20 of size or width */
-   surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
-   brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
- 
-   bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
-			 key, sizeof(*key),
-			 &key->bo, key->bo ? 1 : 0,
-			 &surf, sizeof(surf),
-			 NULL, NULL);
 
-   if (key->bo) {
+   if (brw->wm.surf_bo[surf] == NULL) {
+      brw->wm.surf_bo[surf] = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+					       &tex->ss, sizeof tex->ss,
+					       &tex->bo, 1,
+					       &tex->ss, sizeof tex->ss,
+					       NULL, NULL);
+      
       /* Emit relocation to surface contents */
-      dri_bo_emit_reloc(bo,
-			I915_GEM_DOMAIN_SAMPLER, 0,
-			0,
-			offsetof(struct brw_surface_state, ss1),
-			key->bo);
+      brw->sws->bo_emit_reloc(brw->wm.surf_bo[surf],
+			      I915_GEM_DOMAIN_SAMPLER, 0,
+			      0,
+			      offsetof(struct brw_surface_state, ss1),
+			      tex->bo);
    }
-
-   return bo;
 }
 
-/* Creates a new WM constant buffer reflecting the current fragment program's
- * constants, if needed by the fragment program.
- *
- * Otherwise, constants go through the CURBEs using the brw_constant_buffer
- * state atom.
- */
-static drm_intel_bo *
-brw_wm_update_constant_buffer(struct brw_context *brw)
-{
-   struct brw_fragment_program *fp =
-      (struct brw_fragment_program *) brw->fragment_program;
-   const struct gl_program_parameter_list *params = fp->program.Base.Parameters;
-   const int size = params->NumParameters * 4 * sizeof(GLfloat);
-   drm_intel_bo *const_buffer;
 
-   /* BRW_NEW_FRAGMENT_PROGRAM */
-   if (!fp->use_const_buffer)
-      return NULL;
 
-   const_buffer = drm_intel_bo_alloc(intel->bufmgr, 
-				     BRW_BUFFER_TYPE_SHADER_CONSTANTS,
-				     size, 64);
 
-   /* _NEW_PROGRAM_CONSTANTS */
-   dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
 
-   return const_buffer;
-}
 
-/**
- * Update the surface state for a WM constant buffer.
- * The constant buffer will be (re)allocated here if needed.
- */
-static void
-brw_update_wm_constant_surface( struct brw_context *brw,
-                                GLuint surf)
-{
-   struct brw_context *brw = brw_context(ctx);
-   struct brw_surface_key key;
-   struct brw_fragment_program *fp =
-      (struct brw_fragment_program *) brw->fragment_program;
-   const struct gl_program_parameter_list *params =
-      fp->program.Base.Parameters;
-
-   /* If we're in this state update atom, we need to update WM constants, so
-    * free the old buffer and create a new one for the new contents.
-    */
-   brw->sws->bo_unreference(fp->const_buffer);
-   fp->const_buffer = brw_wm_update_constant_buffer(brw);
-
-   /* If there's no constant buffer, then no surface BO is needed to point at
-    * it.
-    */
-   if (fp->const_buffer == 0) {
-      drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
-      brw->wm.surf_bo[surf] = NULL;
-      return;
-   }
-
-   memset(&key, 0, sizeof(key));
-
-   key.format = PIPE_FORMAT_RGBA_FLOAT32;
-   key.internal_format = GL_RGBA;
-   key.bo = fp->const_buffer;
-   key.depthmode = GL_NONE;
-   key.pitch = params->NumParameters;
-   key.width = params->NumParameters;
-   key.height = 1;
-   key.depth = 1;
-   key.cpp = 16;
-
-   /*
-   printf("%s:\n", __FUNCTION__);
-   printf("  width %d  height %d  depth %d  cpp %d  pitch %d\n",
-          key.width, key.height, key.depth, key.cpp, key.pitch);
-   */
-
-   brw->sws->bo_unreference(brw->wm.surf_bo[surf]);
-   brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
-                                            BRW_SS_SURFACE,
-                                            &key, sizeof(key),
-                                            &key.bo, key.bo ? 1 : 0,
-                                            NULL);
-   if (brw->wm.surf_bo[surf] == NULL) {
-      brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
-   }
-   brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
-}
-
-/**
- * Updates surface / buffer for fragment shader constant buffer, if
- * one is required.
- *
- * This consumes the state updates for the constant buffer, and produces
- * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
- * inclusion in the binding table.
- */
-static void prepare_wm_constant_surface(struct brw_context *brw )
-{
-   struct brw_fragment_program *fp =
-      (struct brw_fragment_program *) brw->fragment_program;
-   GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
-
-   drm_intel_bo_unreference(fp->const_buffer);
-   fp->const_buffer = brw_wm_update_constant_buffer(brw);
-
-   /* If there's no constant buffer, then no surface BO is needed to point at
-    * it.
-    */
-   if (fp->const_buffer == 0) {
-      if (brw->wm.surf_bo[surf] != NULL) {
-	 drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
-	 brw->wm.surf_bo[surf] = NULL;
-	 brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
-      }
-      return;
-   }
-
-   brw_update_wm_constant_surface(ctx, surf);
-}
-
-const struct brw_tracked_state brw_wm_constant_surface = {
-   .dirty = {
-      .mesa = (_NEW_PROGRAM_CONSTANTS),
-      .brw = (BRW_NEW_FRAGMENT_PROGRAM),
-      .cache = 0
-   },
-   .prepare = prepare_wm_constant_surface,
-};
 
 
 /**
@@ -480,142 +81,46 @@ const struct brw_tracked_state brw_wm_constant_surface = {
  */
 static void
 brw_update_renderbuffer_surface(struct brw_context *brw,
-				struct gl_renderbuffer *rb,
+				struct brw_surface *surface,
 				unsigned int unit)
 {
-   struct brw_winsys_buffer *region_bo = NULL;
-   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
-   struct intel_region *region = irb ? irb->region : NULL;
-   struct {
-      unsigned int surface_type;
-      unsigned int surface_format;
-      unsigned int width, height, pitch, cpp;
-      GLubyte color_mask[4];
-      GLboolean color_blend;
-      uint32_t tiling;
-      uint32_t draw_offset;
-   } key;
-
-   memset(&key, 0, sizeof(key));
-
-   if (region != NULL) {
-      region_bo = region->buffer;
-
-      key.surface_type = BRW_SURFACE_2D;
-      switch (irb->texformat->MesaFormat) {
-      case PIPE_FORMAT_ARGB8888:
-	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-	 break;
-      case PIPE_FORMAT_RGB565:
-	 key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
-	 break;
-      case PIPE_FORMAT_ARGB1555:
-	 key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
-	 break;
-      case PIPE_FORMAT_ARGB4444:
-	 key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
-	 break;
-      default:
-	 debug_printf("Bad renderbuffer format: %d\n",
-		      irb->texformat->MesaFormat);
-	 assert(0);
-	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-	 return;
-      }
-      key.tiling = region->tiling;
-      if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {
-	 key.width = rb->Width;
-	 key.height = rb->Height;
-      } else {
-	 key.width = region->width;
-	 key.height = region->height;
-      }
-      key.pitch = region->pitch;
-      key.cpp = region->cpp;
-      key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
-   } else {
-      key.surface_type = BRW_SURFACE_NULL;
-      key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-      key.tiling = I915_TILING_X;
-      key.width = 1;
-      key.height = 1;
-      key.cpp = 4;
-      key.draw_offset = 0;
-   }
-   memcpy(key.color_mask, ctx->Color.ColorMask,
-	  sizeof(key.color_mask));
-   key.color_blend = (!ctx->Color._LogicOpEnabled &&
-		      ctx->Color.BlendEnabled);
+   struct brw_surf_ss0 blend_ss0 = brw->curr.blend->ss0;
+   struct brw_surface_state ss;
+
+   /* Surfaces are potentially shared between contexts, so can't
+    * scribble the in-place ss0 value in the surface.
+    */
+   memcpy(&ss, &surface->ss, sizeof ss);
+
+   ss.ss0.color_blend        = blend_ss0.color_blend;
+   ss.ss0.writedisable_blue  = blend_ss0.writedisable_blue;
+   ss.ss0.writedisable_green = blend_ss0.writedisable_green;
+   ss.ss0.writedisable_red   = blend_ss0.writedisable_red;
+   ss.ss0.writedisable_alpha = blend_ss0.writedisable_alpha;
 
    brw->sws->bo_unreference(brw->wm.surf_bo[unit]);
    brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
 					    BRW_SS_SURFACE,
-					    &key, sizeof(key),
-					    &region_bo, 1,
+					    &ss, sizeof(ss),
+					    &surface->bo, 1,
 					    NULL);
 
    if (brw->wm.surf_bo[unit] == NULL) {
-      struct brw_surface_state surf;
-
-      memset(&surf, 0, sizeof(surf));
-
-      surf.ss0.surface_format = key.surface_format;
-      surf.ss0.surface_type = key.surface_type;
-      if (key.tiling == I915_TILING_NONE) {
-	 surf.ss1.base_addr = key.draw_offset;
-      } else {
-	 uint32_t tile_offset = key.draw_offset % 4096;
-
-	 surf.ss1.base_addr = key.draw_offset - tile_offset;
-
-	 assert(BRW_IS_G4X(brw) || tile_offset == 0);
-	 if (BRW_IS_G4X(brw)) {
-	    if (key.tiling == I915_TILING_X) {
-	       /* Note that the low bits of these fields are missing, so
-		* there's the possibility of getting in trouble.
-		*/
-	       surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4;
-	       surf.ss5.y_offset = tile_offset / 512 / 2;
-	    } else {
-	       surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4;
-	       surf.ss5.y_offset = tile_offset / 128 / 2;
-	    }
-	 }
-      }
-      if (region_bo != NULL)
-	 surf.ss1.base_addr += region_bo->offset; /* reloc */
-
-      surf.ss2.width = key.width - 1;
-      surf.ss2.height = key.height - 1;
-      brw_set_surface_tiling(&surf, key.tiling);
-      surf.ss3.pitch = (key.pitch * key.cpp) - 1;
-
-      /* _NEW_COLOR */
-      surf.ss0.color_blend = key.color_blend;
-      surf.ss0.writedisable_red =   !key.color_mask[0];
-      surf.ss0.writedisable_green = !key.color_mask[1];
-      surf.ss0.writedisable_blue =  !key.color_mask[2];
-      surf.ss0.writedisable_alpha = !key.color_mask[3];
-
-      /* Key size will never match key size for textures, so we're safe. */
+
       brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
                                                BRW_SS_SURFACE,
-                                               &key, sizeof(key),
-					       &region_bo, 1,
-					       &surf, sizeof(surf),
+                                               &ss, sizeof ss,
+					       &surface->bo, 1,
+					       &ss, sizeof ss,
 					       NULL, NULL);
-      if (region_bo != NULL) {
-	 /* We might sample from it, and we might render to it, so flag
-	  * them both.  We might be able to figure out from other state
-	  * a more restrictive relocation to emit.
-	  */
-	 drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit],
-				 offsetof(struct brw_surface_state, ss1),
-				 region_bo,
-				 surf.ss1.base_addr - region_bo->offset,
-				 I915_GEM_DOMAIN_RENDER,
-				 I915_GEM_DOMAIN_RENDER);
-      }
+
+      /* XXX: we will only be rendering to this surface:
+       */
+      brw->sws->bo_emit_reloc(brw->wm.surf_bo[unit],
+			      I915_GEM_DOMAIN_RENDER, 0, 
+			      ss.ss1.base_addr - surface->bo->offset, /* XXX */
+			      offsetof(struct brw_surface_state, ss1),
+			      surface->bo);
    }
 }
 
@@ -631,21 +136,21 @@ brw_wm_get_binding_table(struct brw_context *brw)
 
    assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
 
+   /* Note there is no key for this search beyond the values in the
+    * relocation array:
+    */
    bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
 			      NULL, 0,
 			      brw->wm.surf_bo, brw->wm.nr_surfaces,
 			      NULL);
 
    if (bind_bo == NULL) {
-      GLuint data_size = brw->wm.nr_surfaces * sizeof(GLuint);
       uint32_t data[BRW_WM_MAX_SURF];
+      GLuint data_size = brw->wm.nr_surfaces * sizeof data[0];
       int i;
 
       for (i = 0; i < brw->wm.nr_surfaces; i++)
-         if (brw->wm.surf_bo[i])
-            data[i] = brw->wm.surf_bo[i]->offset;
-         else
-            data[i] = 0;
+	 data[i] = brw->wm.surf_bo[i]->offset;
 
       bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
 				  NULL, 0,
@@ -654,70 +159,79 @@ brw_wm_get_binding_table(struct brw_context *brw)
 				  NULL, NULL);
 
       /* Emit binding table relocations to surface state */
-      for (i = 0; i < BRW_WM_MAX_SURF; i++) {
-	 if (brw->wm.surf_bo[i] != NULL) {
-	    dri_bo_emit_reloc(bind_bo,
-			      I915_GEM_DOMAIN_INSTRUCTION, 0,
-			      0,
-			      i * sizeof(GLuint),
-			      brw->wm.surf_bo[i]);
-	 }
+      for (i = 0; i < brw->wm.nr_surfaces; i++) {
+	 brw->sws->bo_emit_reloc(bind_bo,
+				 I915_GEM_DOMAIN_INSTRUCTION, 0,
+				 0,
+				 i * sizeof(GLuint),
+				 brw->wm.surf_bo[i]);
       }
    }
 
    return bind_bo;
 }
 
-static void prepare_wm_surfaces(struct brw_context *brw )
+static int prepare_wm_surfaces(struct brw_context *brw )
 {
    GLuint i;
-   int old_nr_surfaces;
-
-   /* _NEW_BUFFERS */
-   /* Update surfaces for drawing buffers */
-   if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
-      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
-         brw_update_renderbuffer_surface(brw,
-					 ctx->DrawBuffer->_ColorDrawBuffers[i],
-					 i);
-      }
-   } else {
-      brw_update_renderbuffer_surface(brw, NULL, 0);
+   int nr_surfaces = 0;
+
+   /* Unreference old buffers
+    */
+   for (i = 0; i < brw->wm.nr_surfaces; i++) {
+      brw->sws->bo_unreference(brw->wm.surf_bo[i]);
+      brw->wm.surf_bo[i] = NULL;
    }
 
-   old_nr_surfaces = brw->wm.nr_surfaces;
-   brw->wm.nr_surfaces = PIPE_MAX_COLOR_BUFS;
 
-   if (brw->wm.surf_bo[SURF_INDEX_FRAG_CONST_BUFFER] != NULL)
-       brw->wm.nr_surfaces = SURF_INDEX_FRAG_CONST_BUFFER + 1;
+   /* PIPE_NEW_COLOR_BUFFERS | PIPE_NEW_BLEND
+    *
+    * Update surfaces for drawing buffers.  Mixes in colormask and
+    * blend state.
+    *
+    * XXX: no color buffer case
+    */
+   for (i = 0; i < brw->curr.fb.nr_cbufs; i++) {
+      brw_update_renderbuffer_surface(brw, 
+				      brw_surface(brw->curr.fb.cbufs[i]), 
+				      nr_surfaces++);
+   }
 
-   /* Update surfaces for textures */
-   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
-      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
-      const GLuint surf = SURF_INDEX_TEXTURE(i);
+   /* PIPE_NEW_TEXTURE 
+    */
+   for (i = 0; i < brw->curr.num_textures; i++) {
+      brw_update_texture_surface(brw, 
+				 brw->curr.texture[i],
+				 nr_surfaces++);
+   }
 
-      /* _NEW_TEXTURE, BRW_NEW_TEXDATA */
-      if (texUnit->_ReallyEnabled) {
-	 brw_update_texture_surface(ctx, i);
-	 brw->wm.nr_surfaces = surf + 1;
-      } else {
-         brw->sws->bo_unreference(brw->wm.surf_bo[surf]);
-         brw->wm.surf_bo[surf] = NULL;
-      }
+   /* PIPE_NEW_FRAGMENT_CONSTANTS
+    */
+#if 0
+   if (brw->curr.fragment_constants) {
+      brw_update_fragment_constant_surface(brw, 
+					   brw->curr.fragment_constants, 
+					   nr_surfaces++);
    }
+#endif
 
    brw->sws->bo_unreference(brw->wm.bind_bo);
    brw->wm.bind_bo = brw_wm_get_binding_table(brw);
 
-   if (brw->wm.nr_surfaces != old_nr_surfaces)
+   if (brw->wm.nr_surfaces != nr_surfaces) {
+      brw->wm.nr_surfaces = nr_surfaces;
       brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
+   }
+
+   return 0;
 }
 
 const struct brw_tracked_state brw_wm_surfaces = {
    .dirty = {
-      .mesa = (_NEW_COLOR |
-               _NEW_TEXTURE |
-               _NEW_BUFFERS),
+      .mesa = (PIPE_NEW_COLOR_BUFFERS |
+               PIPE_NEW_BOUND_TEXTURES |
+               PIPE_NEW_FRAGMENT_CONSTANTS |
+	       PIPE_NEW_BLEND),
       .brw = (BRW_NEW_CONTEXT |
 	      BRW_NEW_WM_SURFACES),
       .cache = 0
-- 
cgit v1.2.3


From 39448a9aa061291f4253ee2a1a42e2488e14233c Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 1 Nov 2009 13:11:56 +0000
Subject: i965g: more files compiling

---
 src/gallium/drivers/i965/Makefile          |   5 +-
 src/gallium/drivers/i965/brw_batchbuffer.c |  56 ++++---
 src/gallium/drivers/i965/brw_bo.c          |  12 --
 src/gallium/drivers/i965/brw_context.h     |   1 +
 src/gallium/drivers/i965/brw_pipe_blend.c  |  12 ++
 src/gallium/drivers/i965/brw_pipe_flush.c  |  25 ++--
 src/gallium/drivers/i965/brw_pipe_shader.c | 226 ++++++++++++++---------------
 7 files changed, 176 insertions(+), 161 deletions(-)
 delete mode 100644 src/gallium/drivers/i965/brw_bo.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 896cb234a6..ae37d2d702 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -29,6 +29,8 @@ C_SOURCES = \
 	brw_pipe_depth.c \
 	brw_pipe_fb.c \
 	brw_pipe_query.c \
+	brw_pipe_shader.c \
+	brw_pipe_flush.c \
 	brw_sf.c \
 	brw_sf_emit.c \
 	brw_sf_state.c \
@@ -56,10 +58,7 @@ C_SOURCES = \
 	brw_wm_surface_state.c \
 	brw_screen_surface.c \
 	brw_screen_texture.c \
-	brw_bo.c \
 	brw_batchbuffer.c \
-	brw_pipe_shader.c \
-	brw_pipe_flush.c \
 	intel_tex_layout.c 
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index 45fbd59273..1cffc0ab39 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -109,12 +109,13 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file,
       debug_printf("%s:%d: Batchbuffer flush with %db used\n", file, line,
 	      used);
 
-   /* Emit a flush if the bufmgr doesn't do it for us. */
-   if (intel->always_flush_cache || !intel->ttm) {
+#if 0
+   if (intel->always_flush_cache || 1) {
       *(GLuint *) (batch->ptr) = ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
       batch->ptr += 4;
       used = batch->ptr - batch->map;
    }
+#endif
 
    /* Round batchbuffer usage to 2 DWORDs. */
 
@@ -137,16 +138,25 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file,
    batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 );
       
    if (BRW_DEBUG & DEBUG_BATCH) {
-      dri_bo_map(batch->buf, GL_FALSE);
-      intel_decode(batch->buf->virtual, used / 4, batch->buf->offset,
-		   brw->brw_screen->pci_id);
-      dri_bo_unmap(batch->buf);
+      void *ptr = batch->sws->bo_map(batch->buf, GL_FALSE);
+
+      intel_decode(ptr,
+		   used / 4, 
+		   batch->buf->offset,
+		   batch->chipset);
+
+      batch->sws->bo_unmap(batch->buf);
    }
 
    if (BRW_DEBUG & DEBUG_SYNC) {
+      /* Abuse map/unmap to achieve wait-for-fence.
+       *
+       * XXX: hide this inside the winsys and export a fence
+       * interface.
+       */
       debug_printf("waiting for idle\n");
-      dri_bo_map(batch->buf, GL_TRUE);
-      dri_bo_unmap(batch->buf);
+      batch->sws->bo_map(batch->buf, GL_TRUE);
+      batch->sws->bo_unmap(batch->buf);
    }
 
    /* Reset the buffer:
@@ -155,9 +165,10 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file,
 }
 
 
-/*  This is the only way buffers get added to the validate list.
+/* The OUT_RELOC() macro ends up here, generating a relocation within
+ * the batch buffer.
  */
-GLboolean
+enum pipe_error
 brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
                              struct brw_winsys_buffer *buffer,
                              uint32_t read_domains, uint32_t write_domain,
@@ -165,9 +176,12 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
 {
    int ret;
 
-   if (batch->ptr - batch->map > batch->buf->size)
-      debug_printf ("bad relocation ptr %p map %p offset %d size %d\n",
-		    batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+   if (batch->ptr - batch->map > batch->buf->size) {
+      debug_printf("bad relocation ptr %p map %p offset %d size %d\n",
+		   batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size);
+
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   }
 
    ret = batch->sws->bo_emit_reloc(batch->buf,
 				   read_domains,
@@ -175,6 +189,8 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
 				   delta, 
 				   batch->ptr - batch->map,
 				   buffer);
+   if (ret != 0)
+      return ret;
 
    /*
     * Using the old buffer offset, write in what the right data would be, in case
@@ -182,17 +198,23 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
     * in the kernel
     */
    brw_batchbuffer_emit_dword (batch, buffer->offset + delta);
-
-   return GL_TRUE;
+   return 0;
 }
 
-void
+enum pipe_error
 brw_batchbuffer_data(struct brw_batchbuffer *batch,
                        const void *data, GLuint bytes,
 		       enum cliprect_mode cliprect_mode)
 {
+   enum pipe_error ret;
+
    assert((bytes & 3) == 0);
-   brw_batchbuffer_require_space(batch, bytes);
+
+   ret = brw_batchbuffer_require_space(batch, bytes);
+   if (ret)
+      return ret;
+
    __memcpy(batch->ptr, data, bytes);
    batch->ptr += bytes;
+   return 0;
 }
diff --git a/src/gallium/drivers/i965/brw_bo.c b/src/gallium/drivers/i965/brw_bo.c
deleted file mode 100644
index e7a4dac666..0000000000
--- a/src/gallium/drivers/i965/brw_bo.c
+++ /dev/null
@@ -1,12 +0,0 @@
-
-
-void brw_buffer_subdata()
-{
-      if (intel->intelScreen->kernel_exec_fencing) {
-	 drm_intel_gem_bo_map_gtt(bo);
-	 memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size);
-	 drm_intel_gem_bo_unmap_gtt(bo);
-      } else {
-	 dri_bo_subdata(bo, offset, ib_size, index_buffer->ptr);
-      }
-}
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 471855ab63..3e9315c41f 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -169,6 +169,7 @@ struct brw_fragment_shader {
    struct tgsi_shader_info info;
 
    unsigned iz_lookup;
+   //unsigned wm_lookup;
    
    boolean  uses_depth:1;
    boolean  has_flow_control:1;
diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c
index d3bb882b1a..cc9ee2e8db 100644
--- a/src/gallium/drivers/i965/brw_pipe_blend.c
+++ b/src/gallium/drivers/i965/brw_pipe_blend.c
@@ -130,6 +130,11 @@ static void *brw_create_blend_state( struct pipe_context *pipe,
 	 (blend->cc6.dest_blend_factor != blend->cc5.ia_dest_blend_factor ||
 	  blend->cc6.src_blend_factor != blend->cc5.ia_src_blend_factor ||
 	  blend->cc6.blend_function != blend->cc5.ia_blend_function);
+
+      /* Per-surface blend enables, currently just follow global
+       * state:
+       */
+      blend->ss0.color_blend = 1;
    }
 
    blend->cc5.dither_enable = templ->dither;
@@ -137,6 +142,13 @@ static void *brw_create_blend_state( struct pipe_context *pipe,
    if (BRW_DEBUG & DEBUG_STATS)
       blend->cc5.statistics_enable = 1;
 
+   /* Per-surface color mask -- just follow global state:
+    */
+   blend->ss0.writedisable_red   = (templ->colormask & PIPE_MASK_R) ? 1 : 0;
+   blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 1 : 0;
+   blend->ss0.writedisable_blue  = (templ->colormask & PIPE_MASK_B) ? 1 : 0;
+   blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 1 : 0;
+
    return (void *)blend;
 }
 
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
index fb4a784de9..1b43428760 100644
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -1,11 +1,15 @@
 
+#include "util/u_upload_mgr.h"
+
+#include "brw_context.h"
+
+
 /**
  * called from brw_batchbuffer_flush and children before sending a
  * batchbuffer off.
  */
-static void brw_finish_batch(struct intel_context *intel)
+static void brw_finish_batch(struct brw_context *brw)
 {
-   struct brw_context *brw = brw_context(&intel->ctx);
    brw_emit_query_end(brw);
 }
 
@@ -15,9 +19,6 @@ static void brw_finish_batch(struct intel_context *intel)
  */
 static void brw_new_batch( struct brw_context *brw )
 {
-   /* Check that we didn't just wrap our batchbuffer at a bad time. */
-   assert(!brw->no_batch_wrap);
-
    brw->curbe.need_new_bo = GL_TRUE;
 
    /* Mark all context state as needing to be re-emitted.
@@ -33,17 +34,9 @@ static void brw_new_batch( struct brw_context *brw )
    /* Move to the end of the current upload buffer so that we'll force choosing
     * a new buffer next time.
     */
-   if (brw->vb.upload.bo != NULL) {
-      brw->sws->bo_unreference(brw->vb.upload.bo);
-      brw->vb.upload.bo = NULL;
-      brw->vb.upload.offset = 0;
-   }
-}
-
+   u_upload_flush( brw->vb.upload_vertex );
+   u_upload_flush( brw->vb.upload_index );
 
-static void brw_note_fence( struct brw_context *brw, GLuint fence )
-{
-   brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_FENCE;
 }
 
 /* called from intelWaitForIdle() and intelFlush()
@@ -52,7 +45,7 @@ static void brw_note_fence( struct brw_context *brw, GLuint fence )
  */
 static GLuint brw_flush_cmd( void )
 {
-   return ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
+   return ((MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 6e37eac634..2422f77f34 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -28,151 +28,151 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
+
+#include "util/u_memory.h"
   
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+
 #include "brw_context.h"
 #include "brw_util.h"
 #include "brw_wm.h"
 
 
 /**
- * Determine if the given fragment program uses GLSL features such
- * as flow conditionals, loops, subroutines.
- * Some GLSL shaders may use these features, others might not.
+ * Determine if the given shader uses complex features such as flow
+ * conditionals, loops, subroutines.
  */
 GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp)
 {
-    return (fp->info.insn_count[TGSI_OPCODE_ARL] > 0 ||
-	    fp->info.insn_count[TGSI_OPCODE_IF] > 0 ||
-	    fp->info.insn_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */
-	    fp->info.insn_count[TGSI_OPCODE_CAL] > 0 ||
-	    fp->info.insn_count[TGSI_OPCODE_BRK] > 0 ||   /* redundant - BGNLOOP */
-	    fp->info.insn_count[TGSI_OPCODE_RET] > 0 ||	  /* redundant - CAL */
-	    fp->info.insn_count[TGSI_OPCODE_BGNLOOP] > 0);
+    return (fp->info.opcode_count[TGSI_OPCODE_ARL] > 0 ||
+	    fp->info.opcode_count[TGSI_OPCODE_IF] > 0 ||
+	    fp->info.opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */
+	    fp->info.opcode_count[TGSI_OPCODE_CAL] > 0 ||
+	    fp->info.opcode_count[TGSI_OPCODE_BRK] > 0 ||   /* redundant - BGNLOOP */
+	    fp->info.opcode_count[TGSI_OPCODE_RET] > 0 ||   /* redundant - CAL */
+	    fp->info.opcode_count[TGSI_OPCODE_BGNLOOP] > 0);
 }
 
 
-static void brwBindProgram( struct brw_context *brw,
-			    GLenum target, 
-			    struct gl_program *prog )
+static void brw_bind_fs_state( struct pipe_context *pipe, void *prog )
 {
-   struct brw_context *brw = brw_context(ctx);
-
-   switch (target) {
-   case GL_VERTEX_PROGRAM_ARB: 
-      brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
-      break;
-   case GL_FRAGMENT_PROGRAM_ARB:
-      brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
-      break;
-   }
+   struct brw_context *brw = brw_context(pipe);
+
+   brw->curr.fragment_shader = (struct brw_fragment_shader *)prog;
+   brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SHADER;
 }
 
-static struct gl_program *brwNewProgram( structg brw_context *brw,
-				      GLenum target, 
-				      GLuint id )
+static void brw_bind_vs_state( struct pipe_context *pipe, void *prog )
 {
-   struct brw_context *brw = brw_context(ctx);
-
-   switch (target) {
-   case GL_VERTEX_PROGRAM_ARB: {
-      struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
-      if (prog) {
-	 prog->id = brw->program_id++;
-
-	 return _mesa_init_vertex_program( ctx, &prog->program,
-					     target, id );
-      }
-      else
-	 return NULL;
-   }
-
-   case GL_FRAGMENT_PROGRAM_ARB: {
-      struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
-      if (prog) {
-	 prog->id = brw->program_id++;
-
-	 return _mesa_init_fragment_program( ctx, &prog->program,
-					     target, id );
-      }
-      else
-	 return NULL;
-   }
-
-   default:
-      return _mesa_new_program(ctx, target, id);
-   }
+   struct brw_context *brw = brw_context(pipe);
+
+   brw->curr.vertex_shader = (struct brw_vertex_shader *)prog;
+   brw->state.dirty.mesa |= PIPE_NEW_VERTEX_SHADER;
 }
 
-static void brwDeleteProgram( struct brw_context *brw,
-			      struct gl_program *prog )
+
+
+static void *brw_create_fs_state( struct pipe_context *pipe,
+				  const struct pipe_shader_state *shader )
 {
-   if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
-      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
-      struct brw_fragment_program *brw_fprog = brw_fragment_program(fprog);
-      brw->sws->bo_unreference(brw_fprog->const_buffer);
-   }
+   struct brw_context *brw = brw_context(pipe);
+   struct brw_fragment_shader *fs;
+   int i;
+
+   fs = CALLOC_STRUCT(brw_fragment_shader);
+   if (fs == NULL)
+      return NULL;
+
+   /* Duplicate tokens, scan shader
+    */
+   fs->id = brw->program_id++;
+   fs->has_flow_control = brw_wm_has_flow_control(fs);
+
+   fs->tokens = tgsi_dup_tokens(shader->tokens);
+   if (fs->tokens == NULL)
+      goto fail;
+
+   tgsi_scan_shader(fs->tokens, &fs->info);
+
+   for (i = 0; i < fs->info.num_inputs; i++)
+      if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION)
+	 fs->uses_depth = 1;
+
+   if (fs->info.uses_kill)
+      fs->iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+   if (fs->info.writes_z)
+      fs->iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+   return (void *)fs;
 
-   _mesa_delete_program( ctx, prog );
+fail:
+   FREE(fs);
+   return NULL;
 }
 
 
-static GLboolean brwIsProgramNative( struct brw_context *brw,
-				     GLenum target, 
-				     struct gl_program *prog )
+static void *brw_create_vs_state( struct pipe_context *pipe,
+				  const struct pipe_shader_state *shader )
 {
-   return GL_TRUE;
+   struct brw_context *brw = brw_context(pipe);
+
+   struct brw_vertex_shader *vs = CALLOC_STRUCT(brw_vertex_shader);
+   if (vs == NULL)
+      return NULL;
+
+   /* Duplicate tokens, scan shader
+    */
+   vs->id = brw->program_id++;
+   //vs->has_flow_control = brw_wm_has_flow_control(vs);
+
+   /* Tell the draw module about this shader:
+    */
+   
+   /* Done:
+    */
+   return (void *)vs;
 }
 
-static void brwProgramStringNotify( struct brw_context *brw,
-				    GLenum target,
-				    struct gl_program *prog )
+
+static void brw_delete_fs_state( struct pipe_context *pipe, void *prog )
 {
-   struct brw_context *brw = brw_context(ctx);
-
-   if (target == GL_FRAGMENT_PROGRAM_ARB) {
-      struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
-      struct brw_fragment_program *newFP = brw_fragment_program(fprog);
-      const struct brw_fragment_program *curFP =
-         brw_fragment_program_const(brw->fragment_program);
-
-      if (fprog->FogOption) {
-         _mesa_append_fog_code(ctx, fprog);
-         fprog->FogOption = GL_NONE;
-      }
-
-      if (newFP == curFP)
-	 brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
-      newFP->id = brw->program_id++;      
-      newFP->has_flow_control = brw_wm_has_flow_control(fprog);
-   }
-   else if (target == GL_VERTEX_PROGRAM_ARB) {
-      struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
-      struct brw_vertex_program *newVP = brw_vertex_program(vprog);
-      const struct brw_vertex_program *curVP =
-         brw_vertex_program_const(brw->vertex_program);
-
-      if (newVP == curVP)
-	 brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
-      if (newVP->program.IsPositionInvariant) {
-	 _mesa_insert_mvp_code(ctx, &newVP->program);
-      }
-      newVP->id = brw->program_id++;      
-
-      /* Also tell tnl about it:
-       */
-      _tnl_program_string(ctx, target, prog);
-   }
+   struct brw_context *brw = brw_context(pipe);
+   struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog;
+
+   brw->sws->bo_unreference(fs->const_buffer);
+   FREE( (void *)fs->tokens );
+   FREE( fs );
 }
 
-void brwInitFragProgFuncs( struct dd_function_table *functions )
+
+static void brw_delete_vs_state( struct pipe_context *pipe, void *prog )
 {
-   assert(functions->ProgramStringNotify == _tnl_program_string); 
+   struct brw_fragment_shader *vs = (struct brw_fragment_shader *)prog;
 
-   functions->BindProgram = brwBindProgram;
-   functions->NewProgram = brwNewProgram;
-   functions->DeleteProgram = brwDeleteProgram;
-   functions->IsProgramNative = brwIsProgramNative;
-   functions->ProgramStringNotify = brwProgramStringNotify;
+   /* Delete draw shader
+    */
+   FREE( (void *)vs->tokens );
+   FREE( vs );
 }
 
+
+
+
+
+void brw_pipe_shader_init( struct brw_context *brw )
+{
+   brw->base.create_vs_state = brw_create_vs_state;
+   brw->base.bind_vs_state = brw_bind_vs_state;
+   brw->base.delete_vs_state = brw_delete_vs_state;
+
+   brw->base.create_fs_state = brw_create_fs_state;
+   brw->base.bind_fs_state = brw_bind_fs_state;
+   brw->base.delete_fs_state = brw_delete_fs_state;
+}
+
+void brw_pipe_shader_cleanup( struct brw_context *brw )
+{
+}
-- 
cgit v1.2.3


From 99e308a0e0479971fe3a8a0aba586e19456e4b88 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 1 Nov 2009 14:27:35 +0100
Subject: nv50: implement TGSI_OPCODE_AND/OR/XOR

Will use AND for gl_FrontFacing, the face input
is either 0 or 0xffffffff.
---
 src/gallium/drivers/nv50/nv50_program.c | 47 +++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index faf638949f..5944a0b7ff 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -890,6 +890,43 @@ emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	src1->neg ^= 1;
 }
 
+static void
+emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
+	    struct nv50_reg *src1, unsigned op)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	e->inst[0] = 0xd0000000;
+	set_long(pc, e);
+
+	check_swap_src_0_1(pc, &src0, &src1);
+	set_dst(pc, dst, e);
+	set_src_0(pc, src0, e);
+
+	if (op != TGSI_OPCODE_AND && op != TGSI_OPCODE_OR &&
+	    op != TGSI_OPCODE_XOR)
+		assert(!"invalid bit op");
+
+	if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) {
+		set_immd(pc, src1, e);
+		if (op == TGSI_OPCODE_OR)
+			e->inst[0] |= 0x0100;
+		else
+		if (op == TGSI_OPCODE_XOR)
+			e->inst[0] |= 0x8000;
+	} else {
+		set_src_1(pc, src1, e);
+		e->inst[1] |= 0x04000000; /* 32 bit */
+		if (op == TGSI_OPCODE_OR)
+			e->inst[1] |= 0x4000;
+		else
+		if (op == TGSI_OPCODE_XOR)
+			e->inst[1] |= 0x8000;
+	}
+
+	emit(pc, e);
+}
+
 static void
 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1, struct nv50_reg *src2)
@@ -1838,6 +1875,16 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			emit_add(pc, dst[c], src[0][c], src[1][c]);
 		}
 		break;
+	case TGSI_OPCODE_AND:
+	case TGSI_OPCODE_XOR:
+	case TGSI_OPCODE_OR:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_bitop2(pc, dst[c], src[0][c], src[1][c],
+				    inst->Instruction.Opcode);
+		}
+		break;
 	case TGSI_OPCODE_ARL:
 		assert(src[0][0]);
 		temp = temp_temp(pc);
-- 
cgit v1.2.3


From 496c9eaacfabc4df4e6fb5ba230e60dc660554c8 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 1 Nov 2009 14:04:54 +0100
Subject: nv50: make IF condition safe

Don't assume that a SET that writes to IF's argument
directly precedes the IF.
---
 src/gallium/drivers/nv50/nv50_program.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 5944a0b7ff..66190f070d 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2026,7 +2026,9 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 	case TGSI_OPCODE_IF:
 		/* emitting a join_at may not be necessary */
 		assert(pc->if_lvl < MAX_IF_DEPTH);
-		set_pred_wr(pc, 1, 0, pc->if_cond);
+		/* set_pred_wr(pc, 1, 0, pc->if_cond); */
+		emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN,
+			 CVT_F32_F32);
 		emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]);
 		pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
 		terminate_mbb(pc);
-- 
cgit v1.2.3


From 5de8f9744015d3645a12dac244ad47daf8481dd2 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 1 Nov 2009 14:15:30 +0100
Subject: nv50: handle TGSI_SEMANTIC_FACE

---
 src/gallium/drivers/nv50/nv50_program.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 66190f070d..27827c7ecf 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2463,6 +2463,23 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg)
 	emit_interp(pc, reg, iv, mode);
 }
 
+/* The face input is always at v[255] (varying space), with a
+ * value of 0 for back-facing, and 0xffffffff for front-facing.
+ */
+static void
+load_frontfacing(struct nv50_pc *pc, struct nv50_reg *a)
+{
+	struct nv50_reg *one = alloc_immd(pc, 1.0f);
+
+	assert(a->rhw == -1);
+	alloc_reg(pc, a); /* do this before rhw is set */
+	a->rhw = 255;
+	load_interpolant(pc, a);
+	emit_bitop2(pc, a, a, one, TGSI_OPCODE_AND);
+
+	FREE(one);
+}
+
 static boolean
 nv50_program_tx_prep(struct nv50_pc *pc)
 {
@@ -2607,6 +2624,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 		int rid, aid;
 		unsigned n = 0, m = pc->attr_nr - flat_nr;
 
+		pc->allow32 = TRUE;
+
 		int base = (TGSI_SEMANTIC_POSITION ==
 			    p->info.input_semantic_name[0]) ? 0 : 1;
 
@@ -2635,6 +2654,12 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			p->cfg.io[n].hw = rid = aid;
 			i = p->cfg.io[n].id_fp;
 
+			if (p->info.input_semantic_name[n] ==
+			    TGSI_SEMANTIC_FACE) {
+				load_frontfacing(pc, &pc->attr[i * 4]);
+				continue;
+			}
+
 			for (c = 0; c < 4; ++c) {
 				if (!pc->attr[i * 4 + c].acc)
 					continue;
-- 
cgit v1.2.3


From 15e7a3b8bb6771d24e5bde7805ea394f9ce0a3ec Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 1 Nov 2009 14:32:50 +0000
Subject: i965g: more files compiling

---
 src/gallium/drivers/i965/Makefile                 |   4 +-
 src/gallium/drivers/i965/brw_defines.h            |   3 -
 src/gallium/drivers/i965/brw_screen.h             |  20 ++
 src/gallium/drivers/i965/brw_screen_tex_layout.c  | 387 ++++++++++++++++++++++
 src/gallium/drivers/i965/brw_screen_texture.c     | 196 +----------
 src/gallium/drivers/i965/brw_wm_constant_buffer.c | 151 +++++++++
 src/gallium/drivers/i965/intel_tex_layout.c       | 137 --------
 7 files changed, 576 insertions(+), 322 deletions(-)
 create mode 100644 src/gallium/drivers/i965/brw_screen_tex_layout.c
 create mode 100644 src/gallium/drivers/i965/brw_wm_constant_buffer.c
 delete mode 100644 src/gallium/drivers/i965/intel_tex_layout.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index ae37d2d702..d88f34cb7e 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -56,9 +56,9 @@ C_SOURCES = \
 	brw_wm_sampler_state.c \
 	brw_wm_state.c \
 	brw_wm_surface_state.c \
+	brw_screen_tex_layout.c \
 	brw_screen_surface.c \
 	brw_screen_texture.c \
-	brw_batchbuffer.c \
-	intel_tex_layout.c 
+	brw_batchbuffer.c 
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
index 65cd71c939..92c6b6edc3 100644
--- a/src/gallium/drivers/i965/brw_defines.h
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -842,8 +842,5 @@
                                          (BRW_IS_G4X(brw) ? 384 : 256))  /* 512 bit units */
 
 
-#define BRW_TILING_NONE  0
-#define BRW_TILING_Y     1
-#define BRW_TILING_X     2
 
 #endif
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index 844c6355d5..bd04e689d9 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -65,6 +65,11 @@ struct brw_buffer
    boolean is_user_buffer;
 };
 
+#define BRW_TILING_NONE  0
+#define BRW_TILING_Y     1
+#define BRW_TILING_X     2
+
+
 struct brw_texture
 {
    struct pipe_texture base;
@@ -72,10 +77,17 @@ struct brw_texture
    struct brw_winsys_buffer *bo;
    struct brw_surface_state ss;
 
+   unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS];
+   unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS];
+
+   unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS];
+
+   boolean compressed;
    unsigned brw_target;
    unsigned pitch;
    unsigned tiling;
    unsigned cpp;
+   unsigned total_height;
 };
 
 
@@ -128,5 +140,13 @@ brw_surface_bo( struct pipe_surface *surface );
 unsigned
 brw_surface_pitch( const struct pipe_surface *surface );
 
+/***********************************************************************
+ * Internal functions 
+ */
+GLboolean brw_texture_layout(struct brw_screen *brw_screen,
+			     struct brw_texture *tex );
+
+
+
 
 #endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c
new file mode 100644
index 0000000000..8377d30564
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c
@@ -0,0 +1,387 @@
+
+#include "pipe/p_format.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "brw_screen.h"
+#include "brw_debug.h"
+
+static int 
+brw_tex_pitch_align (struct brw_texture *tex,
+		     int pitch)
+{
+   if (!tex->compressed) {
+      int pitch_align;
+
+      switch (tex->tiling) {
+      case BRW_TILING_X:
+	 pitch_align = 512;
+	 break;
+      case BRW_TILING_Y:
+	 pitch_align = 128;
+	 break;
+      default:
+	 /* XXX: Untiled pitch alignment of 64 bytes for now to allow
+	  * render-to-texture to work in all cases. This should
+	  * probably be replaced at some point by some scheme to only
+	  * do this when really necessary, for example standalone
+	  * render target views.
+	  */
+	 pitch_align = 64;
+	 break;
+      }
+
+      pitch = align(pitch * tex->cpp, pitch_align);
+      pitch /= tex->cpp;
+   }
+
+   return pitch;
+}
+
+
+static void 
+brw_tex_alignment_unit(enum pipe_format pf, 
+		       GLuint *w, GLuint *h)
+{
+    switch (pf) {
+    case PIPE_FORMAT_DXT1_RGB:
+    case PIPE_FORMAT_DXT1_RGBA:
+    case PIPE_FORMAT_DXT3_RGBA:
+    case PIPE_FORMAT_DXT5_RGBA:
+    case PIPE_FORMAT_DXT1_SRGB:
+    case PIPE_FORMAT_DXT1_SRGBA:
+    case PIPE_FORMAT_DXT3_SRGBA:
+    case PIPE_FORMAT_DXT5_SRGBA:
+        *w = 4;
+        *h = 4;
+        break;
+
+    default:
+        *w = 4;
+        *h = 2;
+        break;
+    }
+}
+
+
+static void 
+brw_tex_set_level_info(struct brw_texture *tex,
+		       GLuint level,
+		       GLuint nr_images,
+		       GLuint x, GLuint y,
+		       GLuint w, GLuint h, GLuint d)
+{
+   assert(tex->base.width[level] == w);
+   assert(tex->base.height[level] == h);
+   assert(tex->base.depth[level] == d);
+   assert(tex->image_offset[level] == NULL);
+   assert(nr_images >= 1);
+
+   if (BRW_DEBUG & DEBUG_TEXTURE)
+      debug_printf("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
+		   level, w, h, d, x, y, tex->level_offset[level]);
+
+
+   tex->level_offset[level] = (x + y * tex->pitch) * tex->cpp;
+   tex->nr_images[level] = nr_images;
+
+   tex->image_offset[level] = MALLOC(nr_images * sizeof(GLuint));
+   tex->image_offset[level][0] = 0;
+}
+
+
+static void
+brw_tex_set_image_offset(struct brw_texture *tex,
+			 GLuint level, GLuint img,
+			 GLuint x, GLuint y, 
+			 GLuint offset)
+{
+   assert((x == 0 && y == 0) || img != 0 || level != 0);
+   assert(img < tex->nr_images[level]);
+
+   if (BRW_DEBUG & DEBUG_TEXTURE)
+      debug_printf("%s level %d img %d pos %d,%d image_offset %x\n",
+		   __FUNCTION__, level, img, x, y, 
+		   tex->image_offset[level][img]);
+
+   tex->image_offset[level][img] = (x + y * tex->pitch) * tex->cpp + offset;
+}
+
+
+
+static void brw_layout_2d( struct brw_texture *tex )
+{
+   GLuint align_h = 2, align_w = 4;
+   GLuint level;
+   GLuint x = 0;
+   GLuint y = 0;
+   GLuint width = tex->base.width[0];
+   GLuint height = tex->base.height[0];
+
+   tex->pitch = tex->base.width[0];
+   brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+
+   if (tex->compressed) {
+       tex->pitch = align(tex->base.width[0], align_w);
+   }
+
+   /* May need to adjust pitch to accomodate the placement of
+    * the 2nd mipmap.  This occurs when the alignment
+    * constraints of mipmap placement push the right edge of the
+    * 2nd mipmap out past the width of its parent.
+    */
+   if (tex->base.last_level > 0) {
+       GLuint mip1_width;
+
+       if (tex->compressed) {
+           mip1_width = align(minify(tex->base.width[0]), align_w)
+               + align(minify(minify(tex->base.width[0])), align_w);
+       } else {
+           mip1_width = align(minify(tex->base.width[0]), align_w)
+               + minify(minify(tex->base.width[0]));
+       }
+
+       if (mip1_width > tex->pitch) {
+           tex->pitch = mip1_width;
+       }
+   }
+
+   /* Pitch must be a whole number of dwords, even though we
+    * express it in texels.
+    */
+   tex->pitch = brw_tex_pitch_align (tex, tex->pitch);
+   tex->total_height = 0;
+
+   for ( level = 0 ; level <= tex->base.last_level ; level++ ) {
+      GLuint img_height;
+
+      brw_tex_set_level_info(tex, level, 1, x, y, width, height, 1);
+
+      if (tex->compressed)
+	 img_height = MAX2(1, height/4);
+      else
+	 img_height = align(height, align_h);
+
+
+      /* Because the images are packed better, the final offset
+       * might not be the maximal one:
+       */
+      tex->total_height = MAX2(tex->total_height, y + img_height);
+
+      /* Layout_below: step right after second mipmap.
+       */
+      if (level == 1) {
+	 x += align(width, align_w);
+      }
+      else {
+	 y += img_height;
+      }
+
+      width  = minify(width);
+      height = minify(height);
+   }
+}
+
+
+static boolean 
+brw_layout_cubemap_idgng( struct brw_texture *tex )
+{
+   GLuint align_h = 2, align_w = 4;
+   GLuint level;
+   GLuint x = 0;
+   GLuint y = 0;
+   GLuint width = tex->base.width[0];
+   GLuint height = tex->base.height[0];
+   GLuint qpitch = 0;
+   GLuint y_pitch = 0;
+
+   tex->pitch = tex->base.width[0];
+   brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+   y_pitch = align(height, align_h);
+
+   if (tex->compressed) {
+      tex->pitch = align(tex->base.width[0], align_w);
+   }
+
+   if (tex->base.last_level != 0) {
+      GLuint mip1_width;
+
+      if (tex->compressed) {
+	 mip1_width = (align(minify(tex->base.width[0]), align_w) +
+		       align(minify(minify(tex->base.width[0])), align_w));
+      } else {
+	 mip1_width = (align(minify(tex->base.width[0]), align_w) +
+		       minify(minify(tex->base.width[0])));
+      }
+
+      if (mip1_width > tex->pitch) {
+	 tex->pitch = mip1_width;
+      }
+   }
+
+   tex->pitch = brw_tex_pitch_align(tex, tex->pitch);
+
+   if (tex->compressed) {
+      qpitch = ((y_pitch + 
+		 align(minify(y_pitch), align_h) +
+		 11 * align_h) / 4) * tex->pitch * tex->cpp;
+
+      tex->total_height = ((y_pitch + 
+			    align(minify(y_pitch), align_h) + 
+			    11 * align_h) / 4) * 6;
+   } else {
+      qpitch = (y_pitch + 
+		align(minify(y_pitch), align_h) + 
+		11 * align_h) * tex->pitch * tex->cpp;
+
+      tex->total_height = (y_pitch +
+			   align(minify(y_pitch), align_h) +
+			   11 * align_h) * 6;
+   }
+
+   for (level = 0; level <= tex->base.last_level; level++) {
+      GLuint img_height;
+      GLuint nr_images = 6;
+      GLuint q = 0;
+
+      brw_tex_set_level_info(tex, level, nr_images, x, y, width, height, 1);
+
+      for (q = 0; q < nr_images; q++)
+	 brw_tex_set_image_offset(tex, level, q, x, y, q * qpitch);
+
+      if (tex->compressed)
+	 img_height = MAX2(1, height/4);
+      else
+	 img_height = align(height, align_h);
+
+      if (level == 1) {
+	 x += align(width, align_w);
+      }
+      else {
+	 y += img_height;
+      }
+
+      width  = minify(width);
+      height = minify(height);
+   }
+
+   return TRUE;
+}
+
+
+static boolean
+brw_layout_3d_cube( struct brw_texture *tex )
+{
+   GLuint width  = tex->base.width[0];
+   GLuint height = tex->base.height[0];
+   GLuint depth = tex->base.depth[0];
+   GLuint pack_x_pitch, pack_x_nr;
+   GLuint pack_y_pitch;
+   GLuint level;
+   GLuint align_h = 2;
+   GLuint align_w = 4;
+
+   tex->total_height = 0;
+   brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
+
+   if (tex->compressed) {
+      tex->pitch = align(width, align_w);
+      pack_y_pitch = (height + 3) / 4;
+   } else {
+      tex->pitch = brw_tex_pitch_align(tex, tex->base.width[0]);
+      pack_y_pitch = align(tex->base.height[0], align_h);
+   }
+
+   pack_x_pitch = width;
+   pack_x_nr = 1;
+
+   for (level = 0 ; level <= tex->base.last_level ; level++) {
+      GLuint nr_images = tex->base.target == PIPE_TEXTURE_3D ? depth : 6;
+      GLint x = 0;
+      GLint y = 0;
+      GLint q, j;
+
+      brw_tex_set_level_info(tex, level, nr_images,
+				   0, tex->total_height,
+				   width, height, depth);
+
+      for (q = 0; q < nr_images;) {
+	 for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
+	    brw_tex_set_image_offset(tex, level, q, x, y, 0);
+	    x += pack_x_pitch;
+	 }
+
+	 x = 0;
+	 y += pack_y_pitch;
+      }
+
+
+      tex->total_height += y;
+      width  = minify(width);
+      height = minify(height);
+      depth  = minify(depth);
+
+      if (tex->compressed) {
+	 pack_y_pitch = (height + 3) / 4;
+
+	 if (pack_x_pitch > align(width, align_w)) {
+	    pack_x_pitch = align(width, align_w);
+	    pack_x_nr <<= 1;
+	 }
+      } else {
+	 if (pack_x_pitch > 4) {
+	    pack_x_pitch >>= 1;
+	    pack_x_nr <<= 1;
+	    assert(pack_x_pitch * pack_x_nr <= tex->pitch);
+	 }
+
+	 if (pack_y_pitch > 2) {
+	    pack_y_pitch >>= 1;
+	    pack_y_pitch = align(pack_y_pitch, align_h);
+	 }
+      }
+   }
+
+   /* The 965's sampler lays cachelines out according to how accesses
+    * in the texture surfaces run, so they may be "vertical" through
+    * memory.  As a result, the docs say in Surface Padding Requirements:
+    * Sampling Engine Surfaces that two extra rows of padding are required.
+    */
+   if (tex->base.target == PIPE_TEXTURE_CUBE)
+      tex->total_height += 2;
+
+   return TRUE;
+}
+
+
+
+GLboolean brw_texture_layout(struct brw_screen *brw_screen,
+			     struct brw_texture *tex )
+{
+   switch (tex->base.target) {
+   case PIPE_TEXTURE_CUBE:
+      if (brw_screen->chipset.is_igdng)
+	 brw_layout_cubemap_idgng( tex );
+      else
+	 brw_layout_3d_cube( tex );
+      break;
+	    
+   case PIPE_TEXTURE_3D:
+      brw_layout_3d_cube( tex );
+      break;
+
+   default:
+      brw_layout_2d( tex );
+      break;
+   }
+
+   if (BRW_DEBUG & DEBUG_TEXTURE)
+      debug_printf("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
+		   tex->pitch,
+		   tex->total_height,
+		   tex->cpp,
+		   tex->pitch * tex->total_height * tex->cpp );
+
+   return GL_TRUE;
+}
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 3d069add6f..d527f22a8d 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -196,185 +196,6 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
 }
 
 
-GLboolean brw_miptree_layout(struct brw_context *brw,
-			     struct intel_mipmap_tree *mt,
-			     uint32_t tiling)
-{
-   /* XXX: these vary depending on image format: */
-   /* GLint align_w = 4; */
-
-   switch (mt->target) {
-   case GL_TEXTURE_CUBE_MAP:
-      if (IS_IGDNG(brw->brw_screen->pci_id)) {
-          GLuint align_h = 2, align_w = 4;
-          GLuint level;
-          GLuint x = 0;
-          GLuint y = 0;
-          GLuint width = mt->width0;
-          GLuint height = mt->height0;
-          GLuint qpitch = 0;
-          GLuint y_pitch = 0;
-
-          mt->pitch = mt->width0;
-          intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
-          y_pitch = ALIGN(height, align_h);
-
-          if (mt->compressed) {
-              mt->pitch = ALIGN(mt->width0, align_w);
-          }
-
-          if (mt->last_level != 0) {
-              GLuint mip1_width;
-
-              if (mt->compressed) {
-                  mip1_width = ALIGN(minify(mt->width0), align_w)
-                      + ALIGN(minify(minify(mt->width0)), align_w);
-              } else {
-                  mip1_width = ALIGN(minify(mt->width0), align_w)
-                      + minify(minify(mt->width0));
-              }
-
-              if (mip1_width > mt->pitch) {
-                  mt->pitch = mip1_width;
-              }
-          }
-
-          mt->pitch = intel_miptree_pitch_align(intel, mt, tiling, mt->pitch);
-
-          if (mt->compressed) {
-              qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * mt->pitch * mt->cpp;
-              mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) / 4 * 6;
-          } else {
-              qpitch = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * mt->pitch * mt->cpp;
-              mt->total_height = (y_pitch + ALIGN(minify(y_pitch), align_h) + 11 * align_h) * 6;
-          }
-
-          for (level = 0; level <= mt->last_level; level++) {
-              GLuint img_height;
-              GLuint nr_images = 6;
-              GLuint q = 0;
-
-              intel_miptree_set_level_info(mt, level, nr_images, x, y, width, 
-                                           height, 1);
-
-              for (q = 0; q < nr_images; q++)
-                  intel_miptree_set_image_offset_ex(mt, level, q, x, y, q * qpitch);
-
-              if (mt->compressed)
-                  img_height = MAX2(1, height/4);
-              else
-                  img_height = ALIGN(height, align_h);
-
-              if (level == 1) {
-                  x += ALIGN(width, align_w);
-              }
-              else {
-                  y += img_height;
-              }
-
-              width  = minify(width);
-              height = minify(height);
-          }
-
-          break;
-      }
-
-   case GL_TEXTURE_3D: {
-      GLuint width  = mt->width0;
-      GLuint height = mt->height0;
-      GLuint depth = mt->depth0;
-      GLuint pack_x_pitch, pack_x_nr;
-      GLuint pack_y_pitch;
-      GLuint level;
-      GLuint align_h = 2;
-      GLuint align_w = 4;
-
-      mt->total_height = 0;
-      intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
-
-      if (mt->compressed) {
-          mt->pitch = ALIGN(width, align_w);
-          pack_y_pitch = (height + 3) / 4;
-      } else {
-	 mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->width0);
-	 pack_y_pitch = ALIGN(mt->height0, align_h);
-      }
-
-      pack_x_pitch = width;
-      pack_x_nr = 1;
-
-      for (level = 0 ; level <= mt->last_level ; level++) {
-	 GLuint nr_images = mt->target == GL_TEXTURE_3D ? depth : 6;
-	 GLint x = 0;
-	 GLint y = 0;
-	 GLint q, j;
-
-	 intel_miptree_set_level_info(mt, level, nr_images,
-				      0, mt->total_height,
-				      width, height, depth);
-
-	 for (q = 0; q < nr_images;) {
-	    for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) {
-	       intel_miptree_set_image_offset(mt, level, q, x, y);
-	       x += pack_x_pitch;
-	    }
-
-	    x = 0;
-	    y += pack_y_pitch;
-	 }
-
-
-	 mt->total_height += y;
-	 width  = minify(width);
-	 height = minify(height);
-	 depth  = minify(depth);
-
-	 if (mt->compressed) {
-	    pack_y_pitch = (height + 3) / 4;
-
-	    if (pack_x_pitch > ALIGN(width, align_w)) {
-	       pack_x_pitch = ALIGN(width, align_w);
-	       pack_x_nr <<= 1;
-	    }
-	 } else {
-	    if (pack_x_pitch > 4) {
-	       pack_x_pitch >>= 1;
-	       pack_x_nr <<= 1;
-	       assert(pack_x_pitch * pack_x_nr <= mt->pitch);
-	    }
-
-	    if (pack_y_pitch > 2) {
-	       pack_y_pitch >>= 1;
-	       pack_y_pitch = ALIGN(pack_y_pitch, align_h);
-	    }
-	 }
-
-      }
-      /* The 965's sampler lays cachelines out according to how accesses
-       * in the texture surfaces run, so they may be "vertical" through
-       * memory.  As a result, the docs say in Surface Padding Requirements:
-       * Sampling Engine Surfaces that two extra rows of padding are required.
-       * We don't know of similar requirements for pre-965, but given that
-       * those docs are silent on padding requirements in general, let's play
-       * it safe.
-       */
-      if (mt->target == GL_TEXTURE_CUBE_MAP)
-	 mt->total_height += 2;
-      break;
-   }
-
-   default:
-      i945_miptree_layout_2d(intel, mt, tiling);
-      break;
-   }
-   DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__,
-		mt->pitch,
-		mt->total_height,
-		mt->cpp,
-		mt->pitch * mt->total_height * mt->cpp );
-
-   return GL_TRUE;
-}
 
 
 static void brw_create_texture( struct pipe_screen *screen,
@@ -382,6 +203,21 @@ static void brw_create_texture( struct pipe_screen *screen,
 
 {  
 
+   tex->compressed = pf_is_compressed(tex->base.format);
+
+   if (intel->use_texture_tiling && compress_byte == 0 &&
+       intel->intelScreen->kernel_exec_fencing) {
+      if (IS_965(intel->intelScreen->deviceID) &&
+	  (base_format == GL_DEPTH_COMPONENT ||
+	   base_format == GL_DEPTH_STENCIL_EXT))
+	 tiling = I915_TILING_Y;
+      else
+	 tiling = I915_TILING_X;
+   } else
+      tiling = I915_TILING_NONE;
+
+
+
    key.format = tex->base.format;
    key.pitch = tex->pitch;
    key.depth = tex->base.depth[0];
@@ -389,7 +225,7 @@ static void brw_create_texture( struct pipe_screen *screen,
    key.offset = 0;
 
    key.target = tex->brw_target;	/* translated to BRW enum */
-   //key.depthmode = 0; /* XXX: add this to gallium? or the state tracker? */
+   //key.depthmode = 0; /* XXX: add this to gallium? or handle in the state tracker? */
    key.last_level = tex->base.last_level;
    key.width = tex->base.depth[0];
    key.height = tex->base.height[0];
diff --git a/src/gallium/drivers/i965/brw_wm_constant_buffer.c b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
new file mode 100644
index 0000000000..7d2533b104
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
@@ -0,0 +1,151 @@
+/* XXX: Constant buffers disabled
+ */
+
+
+/**
+ * Create the constant buffer surface.  Vertex/fragment shader constants will be
+ * read from this buffer with Data Port Read instructions/messages.
+ */
+struct brw_winsys_buffer *
+brw_create_constant_surface( struct brw_context *brw,
+                             struct brw_surface_key *key )
+{
+   const GLint w = key->width - 1;
+   struct brw_winsys_buffer *bo;
+
+   memset(&surf, 0, sizeof(surf));
+
+   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   surf.ss0.surface_type = BRW_SURFACE_BUFFER;
+   surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+   assert(key->bo);
+   surf.ss1.base_addr = key->bo->offset; /* reloc */
+
+   surf.ss2.width = w & 0x7f;            /* bits 6:0 of size or width */
+   surf.ss2.height = (w >> 7) & 0x1fff;  /* bits 19:7 of size or width */
+   surf.ss3.depth = (w >> 20) & 0x7f;    /* bits 26:20 of size or width */
+   surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
+   brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
+ 
+   bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+			 key, sizeof(*key),
+			 &key->bo, key->bo ? 1 : 0,
+			 &surf, sizeof(surf),
+			 NULL, NULL);
+
+   if (key->bo) {
+      /* Emit relocation to surface contents */
+      brw->sws->bo_emit_reloc(bo,
+			      I915_GEM_DOMAIN_SAMPLER, 0,
+			      0,
+			      offsetof(struct brw_surface_state, ss1),
+			      key->bo);
+   }
+
+   return bo;
+}
+
+
+
+/**
+ * Update the surface state for a WM constant buffer.
+ * The constant buffer will be (re)allocated here if needed.
+ */
+static void
+brw_update_wm_constant_surface( struct brw_context *brw,
+                                GLuint surf)
+{
+   struct brw_surface_key key;
+   struct brw_fragment_shader *fp = brw->curr.fragment_shader;
+   struct pipe_buffer *cbuf = brw->curr.fragment_constants;
+   int pitch = cbuf->size / (4 * sizeof(float));
+
+   /* If we're in this state update atom, we need to update WM constants, so
+    * free the old buffer and create a new one for the new contents.
+    */
+   brw->sws->bo_unreference(fp->const_buffer);
+   fp->const_buffer = brw_wm_update_constant_buffer(brw);
+
+   /* If there's no constant buffer, then no surface BO is needed to point at
+    * it.
+    */
+   if (cbuf == NULL) {
+      drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+      brw->wm.surf_bo[surf] = NULL;
+      return;
+   }
+
+   memset(&key, 0, sizeof(key));
+
+   key.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   key.ss0.surface_type = BRW_SURFACE_BUFFER;
+   key.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
+
+   key.bo = brw_buffer(cbuf)->bo;
+
+   key.ss2.width = (pitch-1) & 0x7f;            /* bits 6:0 of size or width */
+   key.ss2.height = ((pitch-1) >> 7) & 0x1fff;  /* bits 19:7 of size or width */
+   key.ss3.depth = ((pitch-1) >> 20) & 0x7f;    /* bits 26:20 of size or width */
+   key.ss3.pitch = (pitch * 4 * sizeof(float)) - 1; /* ignored?? */
+   brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
+
+
+   /*
+   printf("%s:\n", __FUNCTION__);
+   printf("  width %d  height %d  depth %d  cpp %d  pitch %d\n",
+          key.width, key.height, key.depth, key.cpp, key.pitch);
+   */
+
+   brw->sws->bo_unreference(brw->wm.surf_bo[surf]);
+   brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
+                                            BRW_SS_SURFACE,
+                                            &key, sizeof(key),
+                                            &key.bo, 1,
+                                            NULL);
+   if (brw->wm.surf_bo[surf] == NULL) {
+      brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
+   }
+   brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+}
+
+/**
+ * Updates surface / buffer for fragment shader constant buffer, if
+ * one is required.
+ *
+ * This consumes the state updates for the constant buffer, and produces
+ * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
+ * inclusion in the binding table.
+ */
+static void prepare_wm_constant_surface(struct brw_context *brw )
+{
+   struct brw_fragment_program *fp =
+      (struct brw_fragment_program *) brw->fragment_program;
+   GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
+
+   drm_intel_bo_unreference(fp->const_buffer);
+   fp->const_buffer = brw_wm_update_constant_buffer(brw);
+
+   /* If there's no constant buffer, then no surface BO is needed to point at
+    * it.
+    */
+   if (fp->const_buffer == 0) {
+      if (brw->wm.surf_bo[surf] != NULL) {
+	 drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
+	 brw->wm.surf_bo[surf] = NULL;
+	 brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+      }
+      return;
+   }
+
+   brw_update_wm_constant_surface(ctx, surf);
+}
+
+const struct brw_tracked_state brw_wm_constant_surface = {
+   .dirty = {
+      .mesa = (_NEW_PROGRAM_CONSTANTS),
+      .brw = (BRW_NEW_FRAGMENT_PROGRAM),
+      .cache = 0
+   },
+   .prepare = prepare_wm_constant_surface,
+};
diff --git a/src/gallium/drivers/i965/intel_tex_layout.c b/src/gallium/drivers/i965/intel_tex_layout.c
deleted file mode 100644
index 7e0ca553f2..0000000000
--- a/src/gallium/drivers/i965/intel_tex_layout.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keith@tungstengraphics.com>
-  *   Michel Dänzer <michel@tungstengraphics.com>
-  */
-
-#include "intel_tex_layout.h"
-
-void intel_get_texture_alignment_unit(GLenum internalFormat, GLuint *w, GLuint *h)
-{
-    switch (internalFormat) {
-    case GL_COMPRESSED_RGB_FXT1_3DFX:
-    case GL_COMPRESSED_RGBA_FXT1_3DFX:
-        *w = 8;
-        *h = 4;
-        break;
-
-    case GL_RGB_S3TC:
-    case GL_RGB4_S3TC:
-    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
-    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
-    case GL_RGBA_S3TC:
-    case GL_RGBA4_S3TC:
-    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
-    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
-        *w = 4;
-        *h = 4;
-        break;
-
-    default:
-        *w = 4;
-        *h = 2;
-        break;
-    }
-}
-
-void i945_miptree_layout_2d( struct intel_context *intel,
-			     struct intel_mipmap_tree *mt,
-			     uint32_t tiling )
-{
-   GLuint align_h = 2, align_w = 4;
-   GLuint level;
-   GLuint x = 0;
-   GLuint y = 0;
-   GLuint width = mt->width0;
-   GLuint height = mt->height0;
-
-   mt->pitch = mt->width0;
-   intel_get_texture_alignment_unit(mt->internal_format, &align_w, &align_h);
-
-   if (mt->compressed) {
-       mt->pitch = ALIGN(mt->width0, align_w);
-   }
-
-   /* May need to adjust pitch to accomodate the placement of
-    * the 2nd mipmap.  This occurs when the alignment
-    * constraints of mipmap placement push the right edge of the
-    * 2nd mipmap out past the width of its parent.
-    */
-   if (mt->last_level) {
-       GLuint mip1_width;
-
-       if (mt->compressed) {
-           mip1_width = ALIGN(minify(mt->width0), align_w)
-               + ALIGN(minify(minify(mt->width0)), align_w);
-       } else {
-           mip1_width = ALIGN(minify(mt->width0), align_w)
-               + minify(minify(mt->width0));
-       }
-
-       if (mip1_width > mt->pitch) {
-           mt->pitch = mip1_width;
-       }
-   }
-
-   /* Pitch must be a whole number of dwords, even though we
-    * express it in texels.
-    */
-   mt->pitch = intel_miptree_pitch_align (intel, mt, tiling, mt->pitch);
-   mt->total_height = 0;
-
-   for ( level = 0 ; level <= mt->last_level ; level++ ) {
-      GLuint img_height;
-
-      intel_miptree_set_level_info(mt, level, 1, x, y, width, 
-				   height, 1);
-
-      if (mt->compressed)
-	 img_height = MAX2(1, height/4);
-      else
-	 img_height = ALIGN(height, align_h);
-
-
-      /* Because the images are packed better, the final offset
-       * might not be the maximal one:
-       */
-      mt->total_height = MAX2(mt->total_height, y + img_height);
-
-      /* Layout_below: step right after second mipmap.
-       */
-      if (level == 1) {
-	 x += ALIGN(width, align_w);
-      }
-      else {
-	 y += img_height;
-      }
-
-      width  = minify(width);
-      height = minify(height);
-   }
-}
-- 
cgit v1.2.3


From 6981bbfabeeaf60111d737dd0d5a93496fd16758 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 1 Nov 2009 15:59:21 +0000
Subject: i965g: more files compiling

---
 src/gallium/drivers/i965/Makefile                |   3 +-
 src/gallium/drivers/i965/brw_context.h           |   8 +-
 src/gallium/drivers/i965/brw_pipe_blend.c        |   2 +
 src/gallium/drivers/i965/brw_pipe_rast.c         | 199 ++++++++++++++++-------
 src/gallium/drivers/i965/brw_screen_tex_layout.c |  29 ++++
 src/gallium/drivers/i965/brw_screen_texture.c    | 157 ++++++++++--------
 src/gallium/drivers/i965/brw_winsys.h            |   8 +-
 src/gallium/drivers/i965/brw_wm_sampler_state.c  |  10 +-
 8 files changed, 269 insertions(+), 147 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index d88f34cb7e..48950544c9 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -31,6 +31,7 @@ C_SOURCES = \
 	brw_pipe_query.c \
 	brw_pipe_shader.c \
 	brw_pipe_flush.c \
+	brw_pipe_rast.c \
 	brw_sf.c \
 	brw_sf_emit.c \
 	brw_sf_state.c \
@@ -57,8 +58,8 @@ C_SOURCES = \
 	brw_wm_state.c \
 	brw_wm_surface_state.c \
 	brw_screen_tex_layout.c \
-	brw_screen_surface.c \
 	brw_screen_texture.c \
+	brw_screen_surface.c \
 	brw_batchbuffer.c 
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 3e9315c41f..b94c511499 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -102,7 +102,7 @@
  *
  * CS - Clipper.  Mesa's clipping algorithms are imported to run on
  * this unit.  The fixed function part performs cliptesting against
- * the 6 fixed clipplanes and makes descisions on whether or not the
+ * the 6 fixed clipplanes and makes decisions on whether or not the
  * incoming primitive needs to be passed to a thread for clipping.
  * User clip planes are handled via cooperation with the VS thread.
  *
@@ -123,8 +123,6 @@
 struct brw_context;
 
 struct brw_depth_stencil_state {
-   //struct pipe_depth_stencil_alpha_state templ; /* for draw module */
-
    /* Precalculated hardware state:
     */
    struct brw_cc0 cc0;
@@ -138,8 +136,6 @@ struct brw_depth_stencil_state {
 
 
 struct brw_blend_state {
-   //struct pipe_depth_stencil_alpha_state templ; /* for draw module */
-
    /* Precalculated hardware state:
     */
    struct brw_cc2 cc2;
@@ -181,7 +177,7 @@ struct brw_fragment_shader {
 
 
 struct brw_sampler {
-   struct pipe_sampler_state templ;
+   float border_color[4];
    struct brw_ss0 ss0;
    struct brw_ss1 ss1;
    struct brw_ss3 ss3;
diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c
index cc9ee2e8db..f6da9254ef 100644
--- a/src/gallium/drivers/i965/brw_pipe_blend.c
+++ b/src/gallium/drivers/i965/brw_pipe_blend.c
@@ -111,6 +111,8 @@ static void *brw_create_blend_state( struct pipe_context *pipe,
 				     const struct pipe_blend_state *templ )
 {
    struct brw_blend_state *blend = CALLOC_STRUCT(brw_blend_state);
+   if (blend == NULL)
+      return NULL;
 
    if (templ->logicop_enable) {
       blend->cc2.logicop_enable = 1;
diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c
index 51159bf147..27c568de0a 100644
--- a/src/gallium/drivers/i965/brw_pipe_rast.c
+++ b/src/gallium/drivers/i965/brw_pipe_rast.c
@@ -1,84 +1,159 @@
 
-static void
-calculate_clip_key_rast()
-{
-   if (BRW_IS_IGDNG(brw))
-       key.clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
-   else
-       key.clip_mode = BRW_CLIPMODE_NORMAL;
+#include "util/u_memory.h"
+#include "pipe/p_defines.h"
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_pipe_rast.h"
+#include "brw_wm.h"
 
-   key.do_flat_shading = brw->rast->templ.flatshade;
 
-   if (key.primitive == PIPE_PRIM_TRIANGLES) {
-      if (brw->rast->templ.cull_mode = PIPE_WINDING_BOTH)
-	 key.clip_mode = BRW_CLIPMODE_REJECT_ALL;
-      else {
-	 key.fill_ccw = CLIP_CULL;
-	 key.fill_cw = CLIP_CULL;
-
-	 if (!(brw->rast->templ.cull_mode & PIPE_WINDING_CCW)) {
-	    key.fill_ccw = translate_fill(brw->rast.fill_ccw);
-	 }
-
-	 if (!(brw->rast->templ.cull_mode & PIPE_WINDING_CW)) {
-	    key.fill_cw = translate_fill(brw->rast.fill_cw);
-	 }
-
-	 if (key.fill_cw != CLIP_FILL ||
-	     key.fill_ccw != CLIP_FILL) {
-	    key.do_unfilled = 1;
-	    key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
-	 }
-
-	 key.offset_ccw = brw->rast.templ.offset_ccw;
-	 key.offset_cw = brw->rast.templ.offset_cw;
-
-	 if (brw->rast.templ.light_twoside &&
-	     key.fill_cw != CLIP_CULL) 
-	    key.copy_bfc_cw = 1;
-
-	 if (brw->rast.templ.light_twoside &&
-	     key.fill_ccw != CLIP_CULL) 
-	    key.copy_bfc_ccw = 1;
-	 }
-      }
+static unsigned translate_fill( unsigned fill )
+{
+   switch (fill) {
+   case PIPE_POLYGON_MODE_FILL:
+      return CLIP_FILL;
+   case PIPE_POLYGON_MODE_LINE:
+      return CLIP_LINE;
+   case PIPE_POLYGON_MODE_POINT:
+      return CLIP_POINT;
+   default:
+      assert(0);
+      return CLIP_FILL;
    }
 }
 
 
+/* Calculates the key for triangle-mode clipping.  Non-triangle
+ * clipping keys use much less information and are computed on the
+ * fly.
+ */
 static void
-calculate_line_stipple_rast()
+calculate_clip_key_rast( const struct brw_context *brw,
+			 const struct pipe_rasterizer_state *templ,
+			 const struct brw_rasterizer_state *rast,
+			 struct brw_clip_prog_key *key)
 {
-   GLfloat tmp;
-   GLint tmpi;
+   memset(key, 0, sizeof *key);
 
-   memset(&bls, 0, sizeof(bls));
-   bls.header.opcode = CMD_LINE_STIPPLE_PATTERN;
-   bls.header.length = sizeof(bls)/4 - 2;
-   bls.bits0.pattern = brw->curr.rast.line_stipple_pattern;
-   bls.bits1.repeat_count = brw->curr.rast.line_stipple_factor + 1;
+   if (brw->chipset.is_igdng)
+       key->clip_mode = BRW_CLIPMODE_KERNEL_CLIP;
+   else
+       key->clip_mode = BRW_CLIPMODE_NORMAL;
 
-   tmp = 1.0 / (GLfloat) bls.bits1.repeat_count;
-   tmpi = tmp * (1<<13);
+   key->do_flat_shading = templ->flatshade;
 
-   bls.bits1.inverse_repeat_count = tmpi;
+   if (templ->cull_mode == PIPE_WINDING_BOTH) {
+      key->clip_mode = BRW_CLIPMODE_REJECT_ALL;
+      return;
+   }
 
-}
+   key->fill_ccw = CLIP_CULL;
+   key->fill_cw = CLIP_CULL;
 
+   if (!(templ->cull_mode & PIPE_WINDING_CCW)) {
+      key->fill_ccw = translate_fill(templ->fill_ccw);
+   }
+
+   if (!(templ->cull_mode & PIPE_WINDING_CW)) {
+      key->fill_cw = translate_fill(templ->fill_cw);
+   }
+
+   if (key->fill_cw != CLIP_FILL ||
+       key->fill_ccw != CLIP_FILL) {
+      key->do_unfilled = 1;
+      key->clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
+   }
+
+   key->offset_ccw = templ->offset_ccw;
+   key->offset_cw = templ->offset_cw;
+
+   if (templ->light_twoside && key->fill_cw != CLIP_CULL) 
+      key->copy_bfc_cw = 1;
+   
+   if (templ->light_twoside && key->fill_ccw != CLIP_CULL) 
+      key->copy_bfc_ccw = 1;
+}
 
 
 static void
-calculate_wm_lookup()
+calculate_line_stipple_rast( const struct pipe_rasterizer_state *templ,
+			     struct brw_line_stipple *bls )
 {
-   if (rast->fill_cw == PIPE_POLYGON_MODE_LINE &&
-       rast->fill_ccw == PIPE_POLYGON_MODE_LINE) {
-      line_aa = AA_ALWAYS;
-   }
-   else if (rast->fill_cw == PIPE_POLYGON_MODE_LINE ||
-	    rast->fill_ccw == PIPE_POLYGON_MODE_LINE) {
-      line_aa = AA_SOMETIMES;
+   GLfloat tmp = 1.0f / (templ->line_stipple_factor + 1);
+   GLint tmpi = tmp * (1<<13);
+
+   bls->header.opcode = CMD_LINE_STIPPLE_PATTERN;
+   bls->header.length = sizeof(*bls)/4 - 2;
+   bls->bits0.pattern = templ->line_stipple_pattern;
+   bls->bits1.repeat_count = templ->line_stipple_factor + 1;
+   bls->bits1.inverse_repeat_count = tmpi;
+}
+
+static void *brw_create_rasterizer_state( struct pipe_context *pipe,
+					  const struct pipe_rasterizer_state *templ )
+{
+   struct brw_context *brw = brw_context(pipe);
+   struct brw_rasterizer_state *rast;
+
+   rast = CALLOC_STRUCT(brw_rasterizer_state);
+   if (rast == NULL)
+      return NULL;
+
+   rast->templ = *templ;
+
+   calculate_clip_key_rast( brw, templ, rast, &rast->clip_key );
+   
+   if (templ->line_stipple_enable)
+      calculate_line_stipple_rast( templ, &rast->bls );
+
+   /* Caclculate lookup value for WM IZ table.
+    */
+   if (templ->line_smooth) {
+      if (templ->fill_cw == PIPE_POLYGON_MODE_LINE &&
+	  templ->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+	 rast->unfilled_aa_line = AA_ALWAYS;
+      }
+      else if (templ->fill_cw == PIPE_POLYGON_MODE_LINE ||
+	       templ->fill_ccw == PIPE_POLYGON_MODE_LINE) {
+	 rast->unfilled_aa_line = AA_SOMETIMES;
+      }
+      else {
+	 rast->unfilled_aa_line = AA_NEVER;
+      }
    }
    else {
-      line_aa = AA_NEVER;
+      rast->unfilled_aa_line = AA_NEVER;
    }
+
+   return (void *)rast;
+}
+
+
+static void brw_bind_rasterizer_state(struct pipe_context *pipe,
+				 void *cso)
+{
+   struct brw_context *brw = brw_context(pipe);
+   brw->curr.rast = (const struct brw_rasterizer_state *)cso;
+   brw->state.dirty.mesa |= PIPE_NEW_RAST;
+}
+
+static void brw_delete_rasterizer_state(struct pipe_context *pipe,
+				  void *cso)
+{
+   struct brw_context *brw = brw_context(pipe);
+   assert((const void *)cso != (const void *)brw->curr.rast);
+   FREE(cso);
+}
+
+
+
+void brw_pipe_rast_init( struct brw_context *brw )
+{
+   brw->base.create_rasterizer_state = brw_create_rasterizer_state;
+   brw->base.bind_rasterizer_state = brw_bind_rasterizer_state;
+   brw->base.delete_rasterizer_state = brw_delete_rasterizer_state;
+}
+
+void brw_pipe_rast_cleanup( struct brw_context *brw )
+{
 }
diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c
index 8377d30564..bcdf8d8074 100644
--- a/src/gallium/drivers/i965/brw_screen_tex_layout.c
+++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c
@@ -1,3 +1,29 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
 
 #include "pipe/p_format.h"
 
@@ -7,6 +33,9 @@
 #include "brw_screen.h"
 #include "brw_debug.h"
 
+/* Code to layout images in a mipmap tree for i965.
+ */
+
 static int 
 brw_tex_pitch_align (struct brw_texture *tex,
 		     int pitch)
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index d527f22a8d..989013953b 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -29,12 +29,12 @@
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
 
-/* Code to layout images in a mipmap tree for i965.
- */
+#include "util/u_memory.h"
 
-#include "brw_tex_layout.h"
-
-#define FILE_DEBUG_FLAG DEBUG_MIPTREE
+#include "brw_screen.h"
+#include "brw_defines.h"
+#include "brw_structs.h"
+#include "brw_winsys.h"
 
 
@@ -176,94 +176,113 @@ static GLuint translate_tex_format( enum pipe_format pf )
    }
 }
 
-static void
-brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling)
-{
-   switch (tiling) {
-   case BRW_TILING_NONE:
-      surf->ss3.tiled_surface = 0;
-      surf->ss3.tile_walk = 0;
-      break;
-   case BRW_TILING_X:
-      surf->ss3.tiled_surface = 1;
-      surf->ss3.tile_walk = BRW_TILEWALK_XMAJOR;
-      break;
-   case BRW_TILING_Y:
-      surf->ss3.tiled_surface = 1;
-      surf->ss3.tile_walk = BRW_TILEWALK_YMAJOR;
-      break;
-   }
-}
 
 
-static void brw_create_texture( struct pipe_screen *screen,
-				const pipe_texture *templ )
+static struct pipe_texture *brw_create_texture( struct pipe_screen *screen,
+						const struct pipe_texture *templ )
 
 {  
+   struct brw_screen *bscreen = brw_screen(screen);
+   struct brw_texture *tex;
+   
+   tex = CALLOC_STRUCT(brw_texture);
+   if (tex == NULL)
+      return NULL;
 
    tex->compressed = pf_is_compressed(tex->base.format);
 
-   if (intel->use_texture_tiling && compress_byte == 0 &&
-       intel->intelScreen->kernel_exec_fencing) {
-      if (IS_965(intel->intelScreen->deviceID) &&
-	  (base_format == GL_DEPTH_COMPONENT ||
-	   base_format == GL_DEPTH_STENCIL_EXT))
-	 tiling = I915_TILING_Y;
+   /* XXX: No tiling with compressed textures??
+    */
+   if (tex->compressed == 0 
+       /* && bscreen->use_texture_tiling */
+       /* && bscreen->kernel_exec_fencing */) 
+   {
+      if (bscreen->chipset.is_965 &&
+	  pf_is_depth_or_stencil(templ->format))
+	 tex->tiling = BRW_TILING_Y;
       else
-	 tiling = I915_TILING_X;
-   } else
-      tiling = I915_TILING_NONE;
+	 tex->tiling = BRW_TILING_X;
+   } 
+   else {
+      tex->tiling = BRW_TILING_NONE;
+   }
 
 
+   memcpy(&tex->base, templ, sizeof *templ);
 
-   key.format = tex->base.format;
-   key.pitch = tex->pitch;
-   key.depth = tex->base.depth[0];
-   key.bo = tex->buffer;
-   key.offset = 0;
+   if (!brw_texture_layout( bscreen, tex ))
+      goto fail;
 
-   key.target = tex->brw_target;	/* translated to BRW enum */
-   //key.depthmode = 0; /* XXX: add this to gallium? or handle in the state tracker? */
-   key.last_level = tex->base.last_level;
-   key.width = tex->base.depth[0];
-   key.height = tex->base.height[0];
-   key.cpp = tex->cpp;
-   key.tiling = tex->tiling;
+   tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
+   tex->ss.ss0.surface_format = translate_tex_format(tex->base.format);
 
+   /* This is ok for all textures with channel width 8bit or less:
+    */
+/*    tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+   tex->ss.ss1.base_addr = tex->bo->offset; /* reloc */
+   tex->ss.ss2.mip_count = tex->base.last_level;
+   tex->ss.ss2.width = tex->base.width[0] - 1;
+   tex->ss.ss2.height = tex->base.height[0] - 1;
 
+   switch (tex->tiling) {
+   case BRW_TILING_NONE:
+      tex->ss.ss3.tiled_surface = 0;
+      tex->ss.ss3.tile_walk = 0;
+      break;
+   case BRW_TILING_X:
+      tex->ss.ss3.tiled_surface = 1;
+      tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+      break;
+   case BRW_TILING_Y:
+      tex->ss.ss3.tiled_surface = 1;
+      tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+      break;
+   }
 
-   surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
-   surf.ss0.surface_type = translate_tex_target(key->target);
-   surf.ss0.surface_format = translate_tex_format(key->format /* , key->depthmode */ );
+   tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1;
+   tex->ss.ss3.depth = tex->base.depth[0] - 1;
 
-   /* This is ok for all textures with channel width 8bit or less:
-    */
-/*    surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
-   assert(key->bo);
-   surf.ss1.base_addr = key->bo->offset; /* reloc */
-   surf.ss2.mip_count = key->last_level;
-   surf.ss2.width = key->width - 1;
-   surf.ss2.height = key->height - 1;
-   brw_set_surface_tiling(&surf, key->tiling);
-   surf.ss3.pitch = (key->pitch * key->cpp) - 1;
-   surf.ss3.depth = key->depth - 1;
-
-   surf.ss4.min_lod = 0;
+   tex->ss.ss4.min_lod = 0;
  
-   if (key->target == PIPE_TEXTURE_CUBE) {
-      surf.ss0.cube_pos_x = 1;
-      surf.ss0.cube_pos_y = 1;
-      surf.ss0.cube_pos_z = 1;
-      surf.ss0.cube_neg_x = 1;
-      surf.ss0.cube_neg_y = 1;
-      surf.ss0.cube_neg_z = 1;
+   if (tex->base.target == PIPE_TEXTURE_CUBE) {
+      tex->ss.ss0.cube_pos_x = 1;
+      tex->ss.ss0.cube_pos_y = 1;
+      tex->ss.ss0.cube_pos_z = 1;
+      tex->ss.ss0.cube_neg_x = 1;
+      tex->ss.ss0.cube_neg_y = 1;
+      tex->ss.ss0.cube_neg_z = 1;
    }
 
+   return &tex->base;
+
+fail:
+   bscreen->sws->bo_unreference(tex->bo);
+   FREE(tex);
+   return NULL;
 }
 
 
+static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen,
+						const struct pipe_texture *templ,
+						const unsigned *stride,
+						struct pipe_buffer *buffer)
+{
+   return NULL;
+}
+
+static void brw_texture_destroy(struct pipe_texture *pt)
+{
+   //bscreen->sws->bo_unreference(tex->bo);
+   FREE(pt);
+}
+
+
+
+
+
 
 
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index 33032276bc..66a94b4b00 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -243,10 +243,10 @@ boolean brw_get_texture_buffer_brw(struct pipe_texture *texture,
  *
  * TODO UGLY
  */
-struct pipe_texture * brw_texture_blanket(struct pipe_screen *screen,
-					  struct pipe_texture *tmplt,
-					  unsigned pitch,
-					  struct brw_winsys_buffer *buffer);
+struct pipe_texture * brw_texture_blanket_ws(struct pipe_screen *screen,
+					     const struct pipe_texture *tmplt,
+					     const unsigned *stride,
+					     struct brw_winsys_buffer *buffer);
 
 
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index 55698a58bb..ddd88d6e22 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -126,10 +126,10 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw)
 
       if (pf_is_depth_or_stencil(tex->base.format)) {
 	 float bordercolor[4] = {
-	    sampler->templ.border_color[0],
-	    sampler->templ.border_color[0],
-	    sampler->templ.border_color[0],
-	    sampler->templ.border_color[0]
+	    sampler->border_color[0],
+	    sampler->border_color[0],
+	    sampler->border_color[0],
+	    sampler->border_color[0]
 	 };
 	 /* GL specs that border color for depth textures is taken from the
 	  * R channel, while the hardware uses A.  Spam R into all the
@@ -137,7 +137,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw)
 	  */
 	 brw->wm.sdc_bo[i] = upload_default_color(brw, bordercolor);
       } else {
-	 brw->wm.sdc_bo[i] = upload_default_color(brw, sampler->templ.border_color);
+	 brw->wm.sdc_bo[i] = upload_default_color(brw, sampler->border_color);
       }
    }
 }
-- 
cgit v1.2.3


From 4a3e002cf9bad3e7314653abbc740624c2c8b31a Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 1 Nov 2009 17:18:56 +0000
Subject: i965g: more files compiling

---
 src/gallium/drivers/i965/brw_defines.h        |   1 +
 src/gallium/drivers/i965/brw_screen.h         |  42 +++-
 src/gallium/drivers/i965/brw_screen_surface.c | 304 +++++++++++++-------------
 src/gallium/drivers/i965/brw_screen_texture.c |  24 +-
 4 files changed, 205 insertions(+), 166 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_defines.h b/src/gallium/drivers/i965/brw_defines.h
index 92c6b6edc3..e201ce4d7c 100644
--- a/src/gallium/drivers/i965/brw_defines.h
+++ b/src/gallium/drivers/i965/brw_defines.h
@@ -417,6 +417,7 @@
 #define BRW_SURFACEFORMAT_R16G16B16_SNORM                0x19D 
 #define BRW_SURFACEFORMAT_R16G16B16_SSCALED              0x19E 
 #define BRW_SURFACEFORMAT_R16G16B16_USCALED              0x19F
+#define BRW_SURFACEFORMAT_INVALID                        0xFFF
 
 #define BRW_SURFACERETURNFORMAT_FLOAT32  0
 #define BRW_SURFACERETURNFORMAT_S1       1
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index bd04e689d9..11b480b1ac 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -69,17 +69,35 @@ struct brw_buffer
 #define BRW_TILING_Y     1
 #define BRW_TILING_X     2
 
+union brw_surface_id {
+   struct {
+      unsigned face:3;
+      unsigned zslice:13;
+      unsigned level:16;
+   } bits;
+   unsigned value;
+};
+
+
+struct brw_surface
+{
+   struct pipe_surface base;
+   union brw_surface_id id;
+   struct brw_surface_state ss;
+   struct brw_winsys_buffer *bo;
+   struct brw_surface *next, *prev;
+};
+
+
 
 struct brw_texture
 {
    struct pipe_texture base;
-
    struct brw_winsys_buffer *bo;
    struct brw_surface_state ss;
 
    unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS];
    unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS];
-
    unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS];
 
    boolean compressed;
@@ -88,15 +106,11 @@ struct brw_texture
    unsigned tiling;
    unsigned cpp;
    unsigned total_height;
+
+   struct brw_surface views[2];
 };
 
 
-struct brw_surface
-{
-   struct pipe_surface base;
-   struct brw_surface_state ss;
-   struct brw_winsys_buffer *bo;
-};
 
 /*
  * Cast wrappers
@@ -125,6 +139,12 @@ brw_buffer(struct pipe_buffer *buffer)
    return (struct brw_buffer *)buffer;
 }
 
+static INLINE struct brw_texture *
+brw_texture(struct pipe_texture *texture)
+{
+   return (struct brw_texture *)texture;
+}
+
 
 /* Pipe buffer helpers
  */
@@ -146,6 +166,12 @@ brw_surface_pitch( const struct pipe_surface *surface );
 GLboolean brw_texture_layout(struct brw_screen *brw_screen,
 			     struct brw_texture *tex );
 
+void brw_update_texture( struct brw_screen *brw_screen,
+			 struct brw_texture *tex );
+
+
+void brw_screen_tex_init( struct brw_screen *brw_screen );
+void brw_screen_tex_surface_init( struct brw_screen *brw_screen );
 
 
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
index 01d4b2d2b1..b4ad91278b 100644
--- a/src/gallium/drivers/i965/brw_screen_surface.c
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -1,135 +1,51 @@
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+ 
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+ 
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+ 
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ 
+ **********************************************************************/
+ /*
+  * Authors:
+  *   Keith Whitwell <keith@tungstengraphics.com>
+  */
+
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
 
 #include "pipe/p_screen.h"
 #include "brw_screen.h"
+#include "brw_defines.h"
 
-
-/**
- * Sets up a surface state structure to point at the given region.
- * While it is only used for the front/back buffer currently, it should be
- * usable for further buffers when doing ARB_draw_buffer support.
- */
-static void
-brw_update_renderbuffer_surface(struct brw_context *brw,
-				struct gl_renderbuffer *rb,
-				unsigned int unit)
-{
-   struct brw_winsys_buffer *region_bo = NULL;
-   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
-   struct intel_region *region = irb ? irb->region : NULL;
-   struct {
-      unsigned int surface_type;
-      unsigned int surface_format;
-      unsigned int width, height, pitch, cpp;
-      GLubyte color_mask[4];
-      GLboolean color_blend;
-      uint32_t tiling;
-      uint32_t draw_offset;
-   } key;
-
-   memset(&key, 0, sizeof(key));
-
-   if (region != NULL) {
-      region_bo = region->buffer;
-
-      key.surface_type = BRW_SURFACE_2D;
-      switch (irb->texformat->MesaFormat) {
-      case PIPE_FORMAT_ARGB8888:
-	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-	 break;
-      case PIPE_FORMAT_RGB565:
-	 key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM;
-	 break;
-      case PIPE_FORMAT_ARGB1555:
-	 key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM;
-	 break;
-      case PIPE_FORMAT_ARGB4444:
-	 key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
-	 break;
-      default:
-	 debug_printf("Bad renderbuffer format: %d\n",
-		      irb->texformat->MesaFormat);
-	 assert(0);
-	 key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
-	 return;
-      }
-      key.tiling = region->tiling;
-      if (brw->intel.intelScreen->driScrnPriv->dri2.enabled) {
-	 key.width = rb->Width;
-	 key.height = rb->Height;
-      } else {
-	 key.width = region->width;
-	 key.height = region->height;
-      }
-      key.pitch = region->pitch;
-      key.cpp = region->cpp;
-      key.draw_offset = region->draw_offset; /* cur 3d or cube face offset */
-   } 
-
-   memcpy(key.color_mask, ctx->Color.ColorMask,
-	  sizeof(key.color_mask));
-
-   key.color_blend = (!ctx->Color._LogicOpEnabled &&
-		      ctx->Color.BlendEnabled);
-
-   brw->sws->bo_unreference(brw->wm.surf_bo[unit]);
-   brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
-					    BRW_SS_SURFACE,
-					    &key, sizeof(key),
-					    &region_bo, 1,
-					    NULL);
-
-   if (brw->wm.surf_bo[unit] == NULL) {
-      struct brw_surface_state surf;
-
-      memset(&surf, 0, sizeof(surf));
-
-      surf.ss0.surface_format = key.surface_format;
-      surf.ss0.surface_type = key.surface_type;
-      if (key.tiling == I915_TILING_NONE) {
-	 surf.ss1.base_addr = key.draw_offset;
-      } else {
-	 uint32_t tile_offset = key.draw_offset % 4096;
-
-	 surf.ss1.base_addr = key.draw_offset - tile_offset;
-
-	 assert(BRW_IS_G4X(brw) || tile_offset == 0);
-	 if (BRW_IS_G4X(brw)) {
-	    if (key.tiling == I915_TILING_X) {
-	       /* Note that the low bits of these fields are missing, so
-		* there's the possibility of getting in trouble.
-		*/
-	       surf.ss5.x_offset = (tile_offset % 512) / key.cpp / 4;
-	       surf.ss5.y_offset = tile_offset / 512 / 2;
-	    } else {
-	       surf.ss5.x_offset = (tile_offset % 128) / key.cpp / 4;
-	       surf.ss5.y_offset = tile_offset / 128 / 2;
-	    }
-	 }
-      }
-
-      if (region_bo != NULL)
-	 surf.ss1.base_addr += region_bo->offset; /* reloc */
-
-      surf.ss2.width = key.width - 1;
-      surf.ss2.height = key.height - 1;
-      brw_set_surface_tiling(&surf, key.tiling);
-      surf.ss3.pitch = (key.pitch * key.cpp) - 1;
-
-}
-
-
-
-struct brw_surface_id {
-   unsigned face:3;
-   unsigned zslice:13;
-   unsigned level:16;
+enum {
+   BRW_VIEW_LINEAR,
+   BRW_VIEW_IN_PLACE
 };
 
+
 static boolean need_linear_view( struct brw_screen *brw_screen,
 				 struct brw_texture *brw_texture,
-				 unsigned face,
-				 unsigned level,
-				 unsigned zslice )
+				 union brw_surface_id id,
+				 unsigned usage )
 {
 #if 0
    /* XXX: what about IDGNG?
@@ -178,71 +94,155 @@ static boolean need_linear_view( struct brw_screen *brw_screen,
 /* Look at all texture views and figure out if any of them need to be
  * back-copied into the texture for sampling
  */
-void brw_update_texture( struct pipe_screen *screen,
-			 struct pipe_texture *texture )
+void brw_update_texture( struct brw_screen *brw_screen,
+			 struct brw_texture *tex )
 {
    /* currently nothing to do */
 }
 
 
-static struct pipe_surface *create_linear_view( struct brw_screen *brw_screen,
-						struct brw_texture *brw_tex,
-						struct brw_surface_id id )
+/* Create a new surface with linear layout to serve as a render-target
+ * where it would be illegal (perhaps due to tiling constraints) to do
+ * this in-place.
+ * 
+ * Currently not implmented, not sure if it's needed.
+ */
+static struct brw_surface *create_linear_view( struct brw_screen *brw_screen,
+					       struct brw_texture *tex,
+					       union brw_surface_id id,
+					       unsigned usage )
 {
-   
+   return NULL;
 }
 
-static struct pipe_surface *create_in_place_view( struct brw_screen *brw_screen,
-						  struct brw_texture *brw_tex,
-						  struct brw_surface_id id )
+
+/* Create a pipe_surface that just points directly into the existing
+ * texture's storage.
+ */
+static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
+						  struct brw_texture *tex,
+						  union brw_surface_id id,
+						  unsigned usage )
 {
-   struct brw_surface *surface = CALLOC_STRUCT(brw_surface);
+   struct brw_surface *surface;
+
+   surface = CALLOC_STRUCT(brw_surface);
+   if (surface == NULL)
+      return NULL;
+
+   /* XXX: ignoring render-to-slice-of-3d-texture
+    */
+   assert(id.bits.zslice == 0);
+
+   surface->base.format = tex->base.format;
+   surface->base.width = tex->base.width[id.bits.level];
+   surface->base.height = tex->base.height[id.bits.level];
+   surface->base.offset = tex->image_offset[id.bits.level][id.bits.face];
+   surface->base.usage = usage;
+   surface->base.zslice = id.bits.zslice;
+   surface->base.face = id.bits.face;
+   surface->base.level = id.bits.level;
    surface->id = id;
-   
+
+   pipe_texture_reference( &surface->base.texture, &tex->base );
+
+   surface->ss.ss0.surface_format = tex->ss.ss0.surface_format;
+   surface->ss.ss0.surface_type = BRW_SURFACE_2D;
+
+   if (tex->tiling == BRW_TILING_NONE) {
+      surface->ss.ss1.base_addr = surface->base.offset;
+   } else {
+      uint32_t tile_offset = surface->base.offset % 4096;
+
+      surface->ss.ss1.base_addr = surface->base.offset - tile_offset;
+
+      if (brw_screen->chipset.is_g4x) {
+	 if (tex->tiling == BRW_TILING_X) {
+	    /* Note that the low bits of these fields are missing, so
+	     * there's the possibility of getting in trouble.
+	     */
+	    surface->ss.ss5.x_offset = (tile_offset % 512) / tex->cpp / 4;
+	    surface->ss.ss5.y_offset = tile_offset / 512 / 2;
+	 } else {
+	    surface->ss.ss5.x_offset = (tile_offset % 128) / tex->cpp / 4;
+	    surface->ss.ss5.y_offset = tile_offset / 128 / 2;
+	 }
+      }
+      else {
+	 assert(tile_offset == 0);
+      }
+   }
+
+#if 0
+   if (region_bo != NULL)
+      surface->ss.ss1.base_addr += region_bo->offset; /* reloc */
+#endif
+
+   surface->ss.ss2.width = surface->base.width - 1;
+   surface->ss.ss2.height = surface->base.height - 1;
+   surface->ss.ss3.tiled_surface = tex->ss.ss3.tiled_surface;
+   surface->ss.ss3.tile_walk = tex->ss.ss3.tile_walk;
+   surface->ss.ss3.pitch = tex->ss.ss3.pitch;
+
+   return surface;
 }
 
 /* Get a surface which is view into a texture 
  */
-struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen,
-					 struct pipe_texture *texture,
-					 unsigned face, unsigned level,
-					 unsigned zslice,
-					 unsigned usage )
+static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen,
+						struct pipe_texture *pt,
+						unsigned face, unsigned level,
+						unsigned zslice,
+						unsigned usage )
 {
+   struct brw_texture *tex = brw_texture(pt);
    struct brw_screen *bscreen = brw_screen(screen);
-   struct brw_surface_id id;
+   struct brw_surface *surface;
+   union brw_surface_id id;
+   int type;
 
-   id.face = face;
-   id.level = level;
-   id.zslice = zslice;
+   id.bits.face = face;
+   id.bits.level = level;
+   id.bits.zslice = zslice;
 
-   if (need_linear_view(brw_screen, brw_tex, id)) 
+   if (need_linear_view(bscreen, tex, id, usage)) 
       type = BRW_VIEW_LINEAR;
    else
       type = BRW_VIEW_IN_PLACE;
 
    
-   foreach (surface, texture->views[type]) {
+   foreach (surface, &tex->views[type]) {
       if (id.value == surface->id.value)
-	 return surface;
+	 return &surface->base;
    }
 
    switch (type) {
    case BRW_VIEW_LINEAR:
-      surface = create_linear_view( texture, id, type );
+      surface = create_linear_view( bscreen, tex, id, usage );
       break;
    case BRW_VIEW_IN_PLACE:
-      surface = create_in_place_view( texture, id, type );
+      surface = create_in_place_view( bscreen, tex, id, usage );
       break;
    default:
       return NULL;
    }
 
-   insert_at_head( texture->views[type], surface );
-   return surface;
+   insert_at_head( &tex->views[type], surface );
+   return &surface->base;
+}
+
+
+static void brw_tex_surface_destroy( struct pipe_surface *surface )
+{
+   /* Unreference texture, shared buffer:
+    */
+
+   FREE(surface);
 }
 
 
-void brw_tex_surface_destroy( struct pipe_surface *surface )
+void brw_screen_tex_surface_init( struct brw_screen *brw_screen )
 {
+   brw_screen->base.get_tex_surface = brw_get_tex_surface;
+   brw_screen->base.tex_surface_destroy = brw_tex_surface_destroy;
 }
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 989013953b..3fd486986f 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -171,8 +171,7 @@ static GLuint translate_tex_format( enum pipe_format pf )
       return BRW_SURFACEFORMAT_R8G8B8A8_SNORM;
 
    default:
-      assert(0);
-      return 0;
+      return BRW_SURFACEFORMAT_INVALID;
    }
 }
 
@@ -180,7 +179,7 @@ static GLuint translate_tex_format( enum pipe_format pf )
 
 
-static struct pipe_texture *brw_create_texture( struct pipe_screen *screen,
+static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
 						const struct pipe_texture *templ )
 
 {  
@@ -218,6 +217,7 @@ static struct pipe_texture *brw_create_texture( struct pipe_screen *screen,
    tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
    tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
    tex->ss.ss0.surface_format = translate_tex_format(tex->base.format);
+   assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID);
 
    /* This is ok for all textures with channel width 8bit or less:
     */
@@ -281,8 +281,20 @@ static void brw_texture_destroy(struct pipe_texture *pt)
 }
 
 
+static boolean brw_is_format_supported( struct pipe_screen *screen,
+					enum pipe_format format,
+					enum pipe_texture_target target,
+					unsigned tex_usage, 
+					unsigned geom_flags )
+{
+   return translate_tex_format(format) != BRW_SURFACEFORMAT_INVALID;
+}
 
 
-
-
-
+void brw_screen_tex_init( struct brw_screen *brw_screen )
+{
+   brw_screen->base.is_format_supported = brw_is_format_supported;
+   brw_screen->base.texture_create = brw_texture_create;
+   brw_screen->base.texture_destroy = brw_texture_destroy;
+   brw_screen->base.texture_blanket = brw_texture_blanket;
+}
-- 
cgit v1.2.3


From cab749a1d0046f59ca10f96d2e6343404e5f2616 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 1 Nov 2009 09:24:02 -0800
Subject: r300g: Adopt osiris' PSC data and swizzle.

A fair amount more flexible and easier to maintain.
---
 src/gallium/drivers/r300/r300_state_inlines.h | 128 ++++++++++++++++++--------
 1 file changed, 88 insertions(+), 40 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index ec11a41253..176e59f281 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -402,58 +402,106 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
     return 0;
 }
 
+/* Utility function to count the number of components in RGBAZS formats.
+ * XXX should go to util or p_format.h */
+static INLINE unsigned pf_component_count(enum pipe_format format) {
+    unsigned count = 0;
+
+    if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) {
+        return count;
+    }
+
+    if (pf_size_x(format)) {
+        count++;
+    }
+    if (pf_size_y(format)) {
+        count++;
+    }
+    if (pf_size_z(format)) {
+        count++;
+    }
+    if (pf_size_w(format)) {
+        count++;
+    }
+
+    return count;
+}
+
 /* Translate pipe_formats into PSC vertex types. */
 static INLINE uint16_t
 r300_translate_vertex_data_type(enum pipe_format format) {
-    switch (format) {
-        case PIPE_FORMAT_R32_FLOAT:
-            return R300_DATA_TYPE_FLOAT_1;
-            break;
-        case PIPE_FORMAT_R32G32_FLOAT:
-            return R300_DATA_TYPE_FLOAT_2;
-            break;
-        case PIPE_FORMAT_R32G32B32_FLOAT:
-            return R300_DATA_TYPE_FLOAT_3;
-            break;
-        case PIPE_FORMAT_R32G32B32A32_FLOAT:
-            return R300_DATA_TYPE_FLOAT_4;
-            break;
-        case PIPE_FORMAT_R8G8B8A8_UNORM:
-            return R300_DATA_TYPE_BYTE |
-                R300_NORMALIZE;
+    uint32_t result = 0;
+    unsigned components = pf_component_count(format);
+
+    if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) {
+        debug_printf("r300: Bad format %s in %s\n", pf_name(format),
+            __FUNCTION__);
+        return 0;
+    }
+
+    switch (pf_type(format)) {
+        /* Half-floats, floats, doubles */
+        case PIPE_FORMAT_TYPE_FLOAT:
+            switch (pf_size_x(format)) {
+                case 4:
+                    result = R300_DATA_TYPE_FLOAT_1 + (components - 1);
+                    break;
+                default:
+                    assert(0);
+            }
+            break;
+        /* Normalized unsigned ints */
+        case PIPE_FORMAT_TYPE_UNORM:
+        /* Normalized signed ints */
+        case PIPE_FORMAT_TYPE_SNORM:
+        /* Non-normalized unsigned ints */
+        case PIPE_FORMAT_TYPE_USCALED:
+        /* Non-normalized signed ints */
+        case PIPE_FORMAT_TYPE_SSCALED:
+            switch (pf_size_x(format)) {
+                case 1:
+                    result = R300_DATA_TYPE_BYTE;
+                    break;
+                case 2:
+                    if (components > 2) {
+                        result = R300_DATA_TYPE_SHORT_4;
+                    } else {
+                        result = R300_DATA_TYPE_SHORT_2;
+                    }
+                    break;
+                default:
+                    assert(0);
+            }
             break;
         default:
-            debug_printf("r300: Implementation error: "
-                    "Bad vertex data format %s!\n", pf_name(format));
             assert(0);
-            break;
     }
-    return 0;
+
+    if (pf_type(format) == PIPE_FORMAT_TYPE_SSCALED) {
+        result |= R300_SIGNED;
+    } else if (pf_type(format) == PIPE_FORMAT_TYPE_UNORM) {
+        result |= R300_NORMALIZE;
+    } else if (pf_type(format) == PIPE_FORMAT_TYPE_SNORM) {
+        result |= (R300_SIGNED | R300_NORMALIZE);
+    }
+
+    return result;
 }
 
 static INLINE uint16_t
 r300_translate_vertex_data_swizzle(enum pipe_format format) {
-    switch (format) {
-        case PIPE_FORMAT_R32_FLOAT:
-            return R300_VAP_SWIZZLE_X001;
-            break;
-        case PIPE_FORMAT_R32G32_FLOAT:
-            return R300_VAP_SWIZZLE_XY01;
-            break;
-        case PIPE_FORMAT_R32G32B32_FLOAT:
-            return R300_VAP_SWIZZLE_XYZ1;
-            break;
-        case PIPE_FORMAT_R32G32B32A32_FLOAT:
-        case PIPE_FORMAT_R8G8B8A8_UNORM:
-            return R300_VAP_SWIZZLE_XYZW;
-            break;
-        default:
-            debug_printf("r300: Implementation error: "
-                    "Bad vertex data format %s!\n", pf_name(format));
-            assert(0);
-            break;
+
+    if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) {
+        debug_printf("r300: Bad format %s in %s\n", pf_name(format),
+            __FUNCTION__);
+        return 0;
     }
-    return 0;
+
+    return ((pf_swizzle_x(format) << R300_SWIZZLE_SELECT_X_SHIFT) |
+        (pf_swizzle_y(format) << R300_SWIZZLE_SELECT_Y_SHIFT) |
+        (pf_swizzle_z(format) << R300_SWIZZLE_SELECT_Z_SHIFT) |
+        (pf_swizzle_w(format) << R300_SWIZZLE_SELECT_W_SHIFT) |
+        (0xf << R300_WRITE_ENA_SHIFT));
 }
 
 #endif /* R300_STATE_INLINES_H */
-- 
cgit v1.2.3


From efda453d646c767fbf0f8e85aa09178095ab04d6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 1 Nov 2009 17:55:16 +0000
Subject: i965g: the whole drivers/i965 directory is compiling

That was a lot more work than I expected.  Still the winsys to go,
then the small matter of making it work and re-enabling the
missing functionality.
---
 src/gallium/drivers/i965/brw_batchbuffer.c |  85 ++++++++++++------------
 src/gallium/drivers/i965/brw_batchbuffer.h |  64 ++++++++++++++----
 src/gallium/drivers/i965/brw_winsys.h      | 102 +++++------------------------
 3 files changed, 108 insertions(+), 143 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index 1cffc0ab39..080c92046b 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -25,38 +25,42 @@
  * 
  **************************************************************************/
 
+#include "util/u_memory.h"
+
+#include "pipe/p_error.h"
+
 #include "brw_batchbuffer.h"
-#include "brw_decode.h"
+//#include "brw_decode.h"
 #include "brw_reg.h"
 #include "brw_winsys.h"
+#include "brw_debug.h"
+#include "brw_structs.h"
 
+#define BATCH_SIZE (32*1024)
+#define USE_LOCAL_BUFFER 1
+#define ALWAYS_EMIT_MI_FLUSH 1
 
 void
 brw_batchbuffer_reset(struct brw_batchbuffer *batch)
 {
-   struct intel_context *intel = batch->intel;
-
    if (batch->buf != NULL) {
-      brw->sws->bo_unreference(batch->buf);
+      batch->sws->bo_unreference(batch->buf);
       batch->buf = NULL;
    }
 
-   if (!batch->buffer && intel->ttm == GL_TRUE)
-      batch->buffer = malloc (intel->maxBatchSize);
+   if (USE_LOCAL_BUFFER && !batch->buffer)
+      batch->buffer = MALLOC(BATCH_SIZE);
 
    batch->buf = batch->sws->bo_alloc(batch->sws,
 				     BRW_BUFFER_TYPE_BATCH,
-				     intel->maxBatchSize, 4096);
+				     BATCH_SIZE, 4096);
    if (batch->buffer)
       batch->map = batch->buffer;
-   else {
-      batch->sws->bo_map(batch->buf, GL_TRUE);
-      batch->map = batch->buf->virtual;
-   }
-   batch->size = intel->maxBatchSize;
+   else 
+      batch->map = batch->sws->bo_map(batch->buf, GL_TRUE);
+
+   batch->size = BATCH_SIZE;
    batch->ptr = batch->map;
-   batch->dirty_state = ~0;
-   batch->cliprect_mode = IGNORE_CLIPRECTS;
 }
 
 struct brw_batchbuffer *
@@ -74,79 +78,74 @@ void
 brw_batchbuffer_free(struct brw_batchbuffer *batch)
 {
    if (batch->map) {
-      dri_bo_unmap(batch->buf);
+      batch->sws->bo_unmap(batch->buf);
       batch->map = NULL;
    }
 
-   brw->sws->bo_unreference(batch->buf);
+
+   batch->sws->bo_unreference(batch->buf);
    batch->buf = NULL;
+
+   FREE(batch->buffer);
    FREE(batch);
 }
 
 
 void
-_brw_batchbuffer_flush(struct brw_batchbuffer *batch, const char *file,
-			 int line)
+_brw_batchbuffer_flush(struct brw_batchbuffer *batch, 
+		       const char *file,
+		       int line)
 {
-   struct intel_context *intel = batch->intel;
    GLuint used = batch->ptr - batch->map;
 
    if (used == 0)
       return;
 
-   if (intel->first_post_swapbuffers_batch == NULL) {
-      intel->first_post_swapbuffers_batch = intel->batch->buf;
-      batch->sws->bo_reference(intel->first_post_swapbuffers_batch);
-   }
-
-   if (intel->first_post_swapbuffers_batch == NULL) {
-      intel->first_post_swapbuffers_batch = intel->batch->buf;
-      batch->sws->bo_reference(intel->first_post_swapbuffers_batch);
-   }
-
+   /* Post-swap throttling done by the state tracker.
+    */
 
    if (BRW_DEBUG & DEBUG_BATCH)
-      debug_printf("%s:%d: Batchbuffer flush with %db used\n", file, line,
-	      used);
+      debug_printf("%s:%d: Batchbuffer flush with %db used\n", 
+		   file, line, used);
 
-#if 0
-   if (intel->always_flush_cache || 1) {
-      *(GLuint *) (batch->ptr) = ((CMD_MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
+   if (ALWAYS_EMIT_MI_FLUSH) {
+      *(GLuint *) (batch->ptr) = ((MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
       batch->ptr += 4;
       used = batch->ptr - batch->map;
    }
-#endif
-
-   /* Round batchbuffer usage to 2 DWORDs. */
 
+   /* Round batchbuffer usage to 2 DWORDs. 
+    */
    if ((used & 4) == 0) {
       *(GLuint *) (batch->ptr) = 0; /* noop */
       batch->ptr += 4;
       used = batch->ptr - batch->map;
    }
 
-   /* Mark the end of the buffer. */
-   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
+   /* Mark the end of the buffer. 
+    */
+   *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END;
    batch->ptr += 4;
    used = batch->ptr - batch->map;
 
    batch->sws->bo_unmap(batch->buf);
-
    batch->map = NULL;
    batch->ptr = NULL;
       
    batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 );
-      
+
+#if 0      
    if (BRW_DEBUG & DEBUG_BATCH) {
       void *ptr = batch->sws->bo_map(batch->buf, GL_FALSE);
 
       intel_decode(ptr,
 		   used / 4, 
 		   batch->buf->offset,
-		   batch->chipset);
+		   batch->chipset.pci_id);
 
       batch->sws->bo_unmap(batch->buf);
    }
+#endif
 
    if (BRW_DEBUG & DEBUG_SYNC) {
       /* Abuse map/unmap to achieve wait-for-fence.
@@ -214,7 +213,7 @@ brw_batchbuffer_data(struct brw_batchbuffer *batch,
    if (ret)
       return ret;
 
-   __memcpy(batch->ptr, data, bytes);
+   memcpy(batch->ptr, data, bytes);
    batch->ptr += bytes;
    return 0;
 }
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index 25bb9cefca..d687b79f93 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -18,6 +18,43 @@ enum cliprect_mode {
    REFERENCES_CLIPRECTS
 };
 
+
+
+
+struct brw_batchbuffer {
+
+   struct brw_winsys_screen *sws;
+   struct brw_winsys_buffer *buf;
+
+   /* Main-memory copy of the batch-buffer, built up incrementally &
+    * then copied as one to the true buffer.
+    *
+    * XXX: is this still necessary?
+    * XXX: if so, can this be hidden inside the GEM-specific winsys code?
+    */
+   uint8_t *buffer;
+
+   /**
+    * Values exported to speed up the writing the batchbuffer,
+    * instead of having to go trough a accesor function for
+    * each dword written.
+    */
+   /*{@*/
+   uint8_t *map;
+   uint8_t *ptr;
+   size_t size;
+   struct {
+      uint8_t *end_ptr;
+   } emit;
+
+
+   size_t relocs;
+   size_t max_relocs;
+   /*@}*/
+};
+
+struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws );
+
 void brw_batchbuffer_free(struct brw_batchbuffer *batch);
 
 void _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
@@ -83,26 +120,27 @@ brw_batchbuffer_require_space(struct brw_batchbuffer *batch,
 /* Here are the crusty old macros, to be removed:
  */
 #define BEGIN_BATCH(n, cliprect_mode) do {				\
-   brw_batchbuffer_require_space(brw->batch, (n)*4); \
-} while (0)
+      brw_batchbuffer_require_space(brw->batch, (n)*4);			\
+   } while (0)
 
 #define OUT_BATCH(d) brw_batchbuffer_emit_dword(brw->batch, d)
 
 #define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
-   assert((unsigned) (delta) < buf->size);				\
-   brw_batchbuffer_emit_reloc(brw->batch, buf,			\
-				read_domains, write_domain, delta);	\
-} while (0)
+      assert((unsigned) (delta) < buf->size);				\
+      brw_batchbuffer_emit_reloc(brw->batch, buf,			\
+				 read_domains, write_domain, delta);	\
+   } while (0)
 
 #ifdef DEBUG
 #define ADVANCE_BATCH() do {						\
-   unsigned int _n = brw->batch->ptr - brw->batch->emit.end_ptr;	\
-   if (_n != 0) {							\
-      debug_printf("%s: %d too many bytes emitted to batch\n", __FUNCTION__, _n); \
-      abort();								\
-   }									\
-   brw->batch->emit.end_ptr = NULL;					\
-} while(0)
+      unsigned int _n = brw->batch->ptr - brw->batch->emit.end_ptr;	\
+      if (_n != 0) {							\
+	 debug_printf("%s: %d too many bytes emitted to batch\n",	\
+		      __FUNCTION__, _n);				\
+	 abort();							\
+      }									\
+      brw->batch->emit.end_ptr = NULL;					\
+   } while(0)
 #else
 #define ADVANCE_BATCH()
 #endif
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index 66a94b4b00..bc3d31196c 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -72,66 +72,8 @@ enum brw_buffer_type
    BRW_BUFFER_TYPE_STATE_CACHE,
 };
 
-
-/* AKA winsys context:
- */
-struct brw_batchbuffer {
-
-   struct brw_winsys *iws;
-   struct brw_winsys_buffer *buf;
-
-   /**
-    * Values exported to speed up the writing the batchbuffer,
-    * instead of having to go trough a accesor function for
-    * each dword written.
-    */
-   /*{@*/
-   uint8_t *map;
-   uint8_t *ptr;
-   size_t size;
-   struct {
-      uint8_t *end_ptr;
-   } emit;
-
-
-   size_t relocs;
-   size_t max_relocs;
-   /*@}*/
-};
-
 struct brw_winsys_screen {
 
-   /**
-    * Batchbuffer functions.
-    */
-   /*@{*/
-   /**
-    * Create a new batchbuffer.
-    */
-   struct brw_batchbuffer *(*batchbuffer_create)(struct brw_winsys_screen *iws);
-
-   /**
-    * Emit a relocation to a buffer.
-    * Target position in batchbuffer is the same as ptr.
-    */
-   int (*batchbuffer_reloc)(struct brw_batchbuffer *batch,
-			    unsigned offset,
-                            struct brw_winsys_buffer *reloc,
-			    unsigned pre_add,
-                            enum brw_buffer_usage usage);
-
-   /**
-    * Flush a bufferbatch.
-    */
-   void (*batchbuffer_flush)(struct brw_batchbuffer *batch,
-                             struct pipe_fence_handle **fence);
-
-   /**
-    * Destroy a batchbuffer.
-    */
-   void (*batchbuffer_destroy)(struct brw_batchbuffer *batch);
-   /*@}*/
-
 
    /**
     * Buffer functions.
@@ -150,12 +92,21 @@ struct brw_winsys_screen {
     */
    void (*bo_reference)( struct brw_winsys_buffer *buffer );
    void (*bo_unreference)( struct brw_winsys_buffer *buffer );
-   void (*bo_emit_reloc)( struct brw_winsys_buffer *buffer,
-			  unsigned domain,
-			  unsigned a,
-			  unsigned b,
-			  unsigned offset,
-			  struct brw_winsys_buffer *b2);
+
+   /* XXX: parameter names!!
+    */
+   int (*bo_emit_reloc)( struct brw_winsys_buffer *buffer,
+			 unsigned domain,
+			 unsigned a,
+			 unsigned b,
+			 unsigned offset,
+			 struct brw_winsys_buffer *b2);
+
+   int (*bo_exec)( struct brw_winsys_buffer *buffer,
+		   unsigned bytes_used,
+		   void *foo,
+		   int a,
+		   int b );
 
    void (*bo_subdata)(struct brw_winsys_buffer *buffer,
 		      size_t offset,
@@ -186,29 +137,6 @@ struct brw_winsys_screen {
    /*@}*/
 
 
-   /**
-    * Fence functions.
-    */
-   /*@{*/
-   /**
-    * Reference fence and set ptr to fence.
-    */
-   void (*fence_reference)(struct brw_winsys *iws,
-                           struct pipe_fence_handle **ptr,
-                           struct pipe_fence_handle *fence);
-
-   /**
-    * Check if a fence has finished.
-    */
-   int (*fence_signalled)(struct brw_winsys *iws,
-                          struct pipe_fence_handle *fence);
-
-   /**
-    * Wait on a fence to finish.
-    */
-   int (*fence_finish)(struct brw_winsys *iws,
-                       struct pipe_fence_handle *fence);
-   /*@}*/
 
 
    /**
-- 
cgit v1.2.3


From 2db46af8758bf77a2748460f617d0ead5b08a454 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Wed, 21 Oct 2009 21:17:43 +0200
Subject: r300g: split constant buffer and shader emittion

---
 src/gallium/drivers/r300/r300_context.c |   2 -
 src/gallium/drivers/r300/r300_context.h |  16 ++--
 src/gallium/drivers/r300/r300_emit.c    | 157 ++++++++++++++++++++------------
 src/gallium/drivers/r300/r300_emit.h    |  21 +++--
 src/gallium/drivers/r300/r300_state.c   |  54 +++++------
 5 files changed, 152 insertions(+), 98 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 02f201b49a..f974147ea4 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -22,8 +22,6 @@
 
 #include "draw/draw_context.h"
 
-#include "pipe/p_inlines.h"
-
 #include "tgsi/tgsi_scan.h"
 
 #include "util/u_hash_table.h"
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index cee0734d21..b1738452de 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -26,6 +26,7 @@
 #include "draw/draw_vertex.h"
 
 #include "pipe/p_context.h"
+#include "pipe/p_inlines.h"
 
 struct r300_fragment_shader;
 struct r300_vertex_shader;
@@ -119,10 +120,10 @@ struct r300_ztop_state {
 #define R300_NEW_BLEND           0x00000001
 #define R300_NEW_BLEND_COLOR     0x00000002
 #define R300_NEW_CLIP            0x00000004
-#define R300_NEW_CONSTANTS       0x00000008
-#define R300_NEW_DSA             0x00000010
-#define R300_NEW_FRAMEBUFFERS    0x00000020
-#define R300_NEW_FRAGMENT_SHADER 0x00000040
+#define R300_NEW_DSA             0x00000008
+#define R300_NEW_FRAMEBUFFERS    0x00000010
+#define R300_NEW_FRAGMENT_SHADER 0x00000020
+#define R300_NEW_FRAGMENT_SHADER_CONSTANTS    0x00000040
 #define R300_NEW_RASTERIZER      0x00000080
 #define R300_NEW_RS_BLOCK        0x00000100
 #define R300_NEW_SAMPLER         0x00000200
@@ -132,9 +133,10 @@ struct r300_ztop_state {
 #define R300_ANY_NEW_TEXTURES    0x03fc0000
 #define R300_NEW_VERTEX_FORMAT   0x04000000
 #define R300_NEW_VERTEX_SHADER   0x08000000
-#define R300_NEW_VIEWPORT        0x10000000
-#define R300_NEW_QUERY           0x20000000
-#define R300_NEW_KITCHEN_SINK    0x3fffffff
+#define R300_NEW_VERTEX_SHADER_CONSTANTS    0x10000000
+#define R300_NEW_VIEWPORT        0x20000000
+#define R300_NEW_QUERY           0x40000000
+#define R300_NEW_KITCHEN_SINK    0x7fffffff
 
 /* The next several objects are not pure Radeon state; they inherit from
  * various Gallium classes. */
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 22cf9cac2a..de27f0939b 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -178,18 +178,15 @@ static uint32_t pack_float24(float f)
 }
 
 void r300_emit_fragment_program_code(struct r300_context* r300,
-                                     struct rX00_fragment_program_code* generic_code,
-                                     struct r300_constant_buffer* externals)
+                                     struct rX00_fragment_program_code* generic_code)
 {
     struct r300_fragment_program_code * code = &generic_code->code.r300;
-    struct rc_constant_list * constants = &generic_code->constants;
     int i;
     CS_LOCALS(r300);
 
     BEGIN_CS(15 +
              code->alu.length * 4 +
-             (code->tex.length ? (1 + code->tex.length) : 0) +
-             (constants->Count ? (1 + constants->Count * 4) : 0));
+             (code->tex.length ? (1 + code->tex.length) : 0));
 
     OUT_CS_REG(R300_US_CONFIG, code->config);
     OUT_CS_REG(R300_US_PIXSIZE, code->pixsize);
@@ -221,32 +218,41 @@ void r300_emit_fragment_program_code(struct r300_context* r300,
             OUT_CS(code->tex.inst[i]);
     }
 
-    if (constants->Count) {
-        OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, constants->Count * 4);
-        for(i = 0; i < constants->Count; ++i) {
-            const float * data = get_shader_constant(r300, &constants->Constants[i], externals);
-            OUT_CS(pack_float24(data[0]));
-            OUT_CS(pack_float24(data[1]));
-            OUT_CS(pack_float24(data[2]));
-            OUT_CS(pack_float24(data[3]));
-        }
-    }
+    END_CS;
+}
+
+void r300_emit_fs_constant_buffer(struct r300_context* r300,
+                                  struct rc_constant_list* constants)
+{
+    int i;
+    CS_LOCALS(r300);
+
+    if (constants->Count == 0)
+        return;
 
+    BEGIN_CS(constants->Count * 4 + 1);
+    OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, constants->Count * 4);
+    for(i = 0; i < constants->Count; ++i) {
+        const float * data = get_shader_constant(r300,
+                                                 &constants->Constants[i],
+                                                 &r300->shader_constants[PIPE_SHADER_FRAGMENT]);
+        OUT_CS(pack_float24(data[0]));
+        OUT_CS(pack_float24(data[1]));
+        OUT_CS(pack_float24(data[2]));
+        OUT_CS(pack_float24(data[3]));
+    }
     END_CS;
 }
 
 void r500_emit_fragment_program_code(struct r300_context* r300,
-                                     struct rX00_fragment_program_code* generic_code,
-                                     struct r300_constant_buffer* externals)
+                                     struct rX00_fragment_program_code* generic_code)
 {
     struct r500_fragment_program_code * code = &generic_code->code.r500;
-    struct rc_constant_list * constants = &generic_code->constants;
     int i;
     CS_LOCALS(r300);
 
     BEGIN_CS(13 +
-             ((code->inst_end + 1) * 6) +
-             (constants->Count ? (3 + (constants->Count * 4)) : 0));
+             ((code->inst_end + 1) * 6));
     OUT_CS_REG(R500_US_CONFIG, 0);
     OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx);
     OUT_CS_REG(R500_US_CODE_RANGE,
@@ -266,18 +272,30 @@ void r500_emit_fragment_program_code(struct r300_context* r300,
         OUT_CS(code->inst[i].inst5);
     }
 
-    if (constants->Count) {
-        OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
-        OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4);
-        for (i = 0; i < constants->Count; i++) {
-            const float * data = get_shader_constant(r300, &constants->Constants[i], externals);
-            OUT_CS_32F(data[0]);
-            OUT_CS_32F(data[1]);
-            OUT_CS_32F(data[2]);
-            OUT_CS_32F(data[3]);
-        }
-    }
+    END_CS;
+}
+
+void r500_emit_fs_constant_buffer(struct r300_context* r300,
+                                  struct rc_constant_list* constants)
+{
+    int i;
+    CS_LOCALS(r300);
+
+    if (constants->Count == 0)
+        return;
 
+    BEGIN_CS(constants->Count * 4 + 2);
+    OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
+    OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4);
+    for (i = 0; i < constants->Count; i++) {
+        const float * data = get_shader_constant(r300,
+                                                 &constants->Constants[i],
+                                                 &r300->shader_constants[PIPE_SHADER_FRAGMENT]);
+        OUT_CS_32F(data[0]);
+        OUT_CS_32F(data[1]);
+        OUT_CS_32F(data[2]);
+        OUT_CS_32F(data[3]);
+    }
     END_CS;
 }
 
@@ -621,8 +639,7 @@ void r300_emit_vertex_format_state(struct r300_context* r300)
 }
 
 void r300_emit_vertex_program_code(struct r300_context* r300,
-                                   struct r300_vertex_program_code* code,
-                                   struct r300_constant_buffer* constants)
+                                   struct r300_vertex_program_code* code)
 {
     int i;
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
@@ -635,12 +652,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
         return;
     }
 
-    if (code->constants.Count) {
-        BEGIN_CS(14 + code->length + (code->constants.Count * 4));
-    } else {
-        BEGIN_CS(11 + code->length);
-    }
-
+    BEGIN_CS(11 + code->length);
     /* R300_VAP_PVS_CODE_CNTL_0
      * R300_VAP_PVS_CONST_CNTL
      * R300_VAP_PVS_CODE_CNTL_1
@@ -658,20 +670,6 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
     for (i = 0; i < code->length; i++)
         OUT_CS(code->body.d[i]);
 
-    if (code->constants.Count) {
-        OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
-                (r300screen->caps->is_r500 ?
-                 R500_PVS_CONST_START : R300_PVS_CONST_START));
-        OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->constants.Count * 4);
-        for (i = 0; i < code->constants.Count; i++) {
-            const float * data = get_shader_constant(r300, &code->constants.Constants[i], constants);
-            OUT_CS_32F(data[0]);
-            OUT_CS_32F(data[1]);
-            OUT_CS_32F(data[2]);
-            OUT_CS_32F(data[3]);
-        }
-    }
-
     OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) |
             R300_PVS_NUM_CNTLRS(5) |
             R300_PVS_NUM_FPUS(r300screen->caps->num_vert_fpus) |
@@ -683,7 +681,40 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
 void r300_emit_vertex_shader(struct r300_context* r300,
                              struct r300_vertex_shader* vs)
 {
-    r300_emit_vertex_program_code(r300, &vs->code, &r300->shader_constants[PIPE_SHADER_VERTEX]);
+    r300_emit_vertex_program_code(r300, &vs->code);
+}
+
+void r300_emit_vs_constant_buffer(struct r300_context* r300,
+                                  struct rc_constant_list* constants)
+{
+    int i;
+    struct r300_screen* r300screen = r300_screen(r300->context.screen);
+    CS_LOCALS(r300);
+
+    if (!r300screen->caps->has_tcl) {
+        debug_printf("r300: Implementation error: emit_vertex_shader called,"
+        " but has_tcl is FALSE!\n");
+        return;
+    }
+
+    if (constants->Count == 0)
+        return;
+
+    BEGIN_CS(constants->Count * 4 + 3);
+    OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
+               (r300screen->caps->is_r500 ?
+               R500_PVS_CONST_START : R300_PVS_CONST_START));
+    OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, constants->Count * 4);
+    for (i = 0; i < constants->Count; i++) {
+        const float * data = get_shader_constant(r300,
+                                                 &constants->Constants[i],
+                                                 &r300->shader_constants[PIPE_SHADER_VERTEX]);
+        OUT_CS_32F(data[0]);
+        OUT_CS_32F(data[1]);
+        OUT_CS_32F(data[2]);
+        OUT_CS_32F(data[3]);
+    }
+    END_CS;
 }
 
 void r300_emit_viewport_state(struct r300_context* r300,
@@ -822,13 +853,22 @@ validate:
 
     if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) {
         if (r300screen->caps->is_r500) {
-            r500_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]);
+            r500_emit_fragment_program_code(r300, &r300->fs->code);
         } else {
-            r300_emit_fragment_program_code(r300, &r300->fs->code, &r300->shader_constants[PIPE_SHADER_FRAGMENT]);
+            r300_emit_fragment_program_code(r300, &r300->fs->code);
         }
         r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER;
     }
 
+    if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER_CONSTANTS) {
+        if (r300screen->caps->is_r500) {
+            r500_emit_fs_constant_buffer(r300, &r300->fs->code.constants);
+        } else {
+            r300_emit_fs_constant_buffer(r300, &r300->fs->code.constants);
+        }
+        r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+    }
+
     if (r300->dirty_state & R300_NEW_FRAMEBUFFERS) {
         r300_emit_fb_state(r300, &r300->framebuffer_state);
         r300->dirty_state &= ~R300_NEW_FRAMEBUFFERS;
@@ -887,6 +927,11 @@ validate:
         r300->dirty_state &= ~R300_NEW_VERTEX_SHADER;
     }
 
+    if (r300->dirty_state & R300_NEW_VERTEX_SHADER_CONSTANTS) {
+        r300_emit_vs_constant_buffer(r300, &r300->vs->code.constants);
+        r300->dirty_state &= ~R300_NEW_VERTEX_SHADER_CONSTANTS;
+    }
+
     /* XXX
     assert(r300->dirty_state == 0);
     */
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 02ac5bebbd..6befca72ce 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -23,6 +23,9 @@
 #ifndef R300_EMIT_H
 #define R300_EMIT_H
 
+#include "r300_context.h"
+#include "radeon_code.h"
+
 struct rX00_fragment_program_code;
 struct r300_vertex_program_code;
 
@@ -39,12 +42,16 @@ void r300_emit_dsa_state(struct r300_context* r300,
                          struct r300_dsa_state* dsa);
 
 void r300_emit_fragment_program_code(struct r300_context* r300,
-                                     struct rX00_fragment_program_code* generic_code,
-                                     struct r300_constant_buffer* externals);
+                                     struct rX00_fragment_program_code* generic_code);
+
+void r300_emit_fs_constant_buffer(struct r300_context* r300,
+                                  struct rc_constant_list* constants);
 
 void r500_emit_fragment_program_code(struct r300_context* r300,
-                                     struct rX00_fragment_program_code* generic_code,
-                                     struct r300_constant_buffer* externals);
+                                     struct rX00_fragment_program_code* generic_code);
+
+void r500_emit_fs_constant_buffer(struct r300_context* r300,
+                                  struct rc_constant_list* constants);
 
 void r300_emit_fb_state(struct r300_context* r300,
                         struct pipe_framebuffer_state* fb);
@@ -72,8 +79,10 @@ void r300_emit_vertex_buffer(struct r300_context* r300);
 void r300_emit_vertex_format_state(struct r300_context* r300);
 
 void r300_emit_vertex_program_code(struct r300_context* r300,
-                                   struct r300_vertex_program_code* code,
-                                   struct r300_constant_buffer* constants);
+                                   struct r300_vertex_program_code* code);
+
+void r300_emit_vs_constant_buffer(struct r300_context* r300,
+                                  struct rc_constant_list* constants);
 
 void r300_emit_vertex_shader(struct r300_context* r300,
                              struct r300_vertex_shader* vs);
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 3ac627e959..4cf01389d2 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -166,31 +166,6 @@ static void r300_set_clip_state(struct pipe_context* pipe,
     }
 }
 
-static void
-    r300_set_constant_buffer(struct pipe_context* pipe,
-                             uint shader, uint index,
-                             const struct pipe_constant_buffer* buffer)
-{
-    struct r300_context* r300 = r300_context(pipe);
-
-    /* This entire chunk of code seems ever-so-slightly baked.
-     * It's as if I've got pipe_buffer* matryoshkas... */
-    if (buffer && buffer->buffer && buffer->buffer->size) {
-        void* map = pipe->winsys->buffer_map(pipe->winsys, buffer->buffer,
-                                             PIPE_BUFFER_USAGE_CPU_READ);
-        memcpy(r300->shader_constants[shader].constants,
-            map, buffer->buffer->size);
-        pipe->winsys->buffer_unmap(pipe->winsys, buffer->buffer);
-
-        r300->shader_constants[shader].count =
-            buffer->buffer->size / (sizeof(float) * 4);
-    } else {
-        r300->shader_constants[shader].count = 0;
-    }
-
-    r300->dirty_state |= R300_NEW_CONSTANTS;
-}
-
 /* Create a new depth, stencil, and alpha state based on the CSO dsa state.
  *
  * This contains the depth buffer, stencil buffer, alpha test, and such.
@@ -345,7 +320,7 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
 
     r300->fs = fs;
 
-    r300->dirty_state |= R300_NEW_FRAGMENT_SHADER;
+    r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS;
 }
 
 /* Delete fragment shader state. */
@@ -702,7 +677,7 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
 
         draw_bind_vertex_shader(r300->draw, vs->draw);
         r300->vs = vs;
-        r300->dirty_state |= R300_NEW_VERTEX_SHADER;
+        r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS;
     } else {
         draw_bind_vertex_shader(r300->draw,
                 (struct draw_vertex_shader*)shader);
@@ -726,6 +701,31 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
     }
 }
 
+static void r300_set_constant_buffer(struct pipe_context *pipe,
+                                     uint shader, uint index,
+                                     const struct pipe_constant_buffer *buf)
+{
+    struct r300_context* r300 = r300_context(pipe);
+    void *mapped;
+
+    if (buf == NULL || buf->buffer->size == 0 ||
+        (mapped = pipe_buffer_map(pipe->screen, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ)) == NULL)
+    {
+        r300->shader_constants[shader].count = 0;
+        return;
+    }
+
+    assert((buf->buffer->size % 4 * sizeof(float)) == 0);
+    memcpy(r300->shader_constants[shader].constants, mapped, buf->buffer->size);
+    r300->shader_constants[shader].count = buf->buffer->size / (4 * sizeof(float));
+    pipe_buffer_unmap(pipe->screen, buf->buffer);
+
+    if (shader == PIPE_SHADER_VERTEX)
+        r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS;
+    else if (shader == PIPE_SHADER_FRAGMENT)
+        r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+}
+
 void r300_init_state_functions(struct r300_context* r300)
 {
     r300->context.create_blend_state = r300_create_blend_state;
-- 
cgit v1.2.3


From 3d73852121f13832f6bc87918798ff96589d0349 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <maciej@osiris.(none)>
Date: Sun, 1 Nov 2009 18:50:52 +0100
Subject: r300g: fix geometry corruptions

PVS flush is needed before changing the vertex shader or vertex shader constants.
---
 src/gallium/drivers/r300/r300_emit.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index de27f0939b..5b03c1aa6c 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -652,7 +652,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
         return;
     }
 
-    BEGIN_CS(11 + code->length);
+    BEGIN_CS(9 + code->length);
     /* R300_VAP_PVS_CODE_CNTL_0
      * R300_VAP_PVS_CONST_CNTL
      * R300_VAP_PVS_CODE_CNTL_1
@@ -674,7 +674,6 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
             R300_PVS_NUM_CNTLRS(5) |
             R300_PVS_NUM_FPUS(r300screen->caps->num_vert_fpus) |
             R300_PVS_VF_MAX_VTX_NUM(12));
-    OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
     END_CS;
 }
 
@@ -749,6 +748,15 @@ void r300_flush_textures(struct r300_context* r300)
     END_CS;
 }
 
+static void r300_flush_pvs(struct r300_context* r300)
+{
+    CS_LOCALS(r300);
+
+    BEGIN_CS(2);
+    OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
+    END_CS;
+}
+
 /* Emit all dirty state. */
 void r300_emit_dirty_state(struct r300_context* r300)
 {
@@ -922,6 +930,10 @@ validate:
         r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT;
     }
 
+    if (r300->dirty_state & (R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS)) {
+        r300_flush_pvs(r300);
+    }
+
     if (r300->dirty_state & R300_NEW_VERTEX_SHADER) {
         r300_emit_vertex_shader(r300, r300->vs);
         r300->dirty_state &= ~R300_NEW_VERTEX_SHADER;
-- 
cgit v1.2.3


From 15a8ac2c9d6ed13468ef88f3f3bd3ccf4ee2fd0e Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 1 Nov 2009 19:30:53 +0000
Subject: i965g: driver and winsys compile

A milestone of sorts.  Still a long way from something working --
the old one compiled too, at least some of the time...
---
 src/gallium/drivers/i965/brw_batchbuffer.c         |  11 +-
 src/gallium/drivers/i965/brw_cc.c                  |   2 +-
 src/gallium/drivers/i965/brw_clip_state.c          |   2 +-
 src/gallium/drivers/i965/brw_gs_state.c            |   2 +-
 src/gallium/drivers/i965/brw_screen_texture.c      |   6 +-
 src/gallium/drivers/i965/brw_sf_state.c            |   4 +-
 src/gallium/drivers/i965/brw_state_dump.c          |   6 +-
 src/gallium/drivers/i965/brw_vs_state.c            |   2 +-
 src/gallium/drivers/i965/brw_winsys.h              |  39 +--
 src/gallium/drivers/i965/brw_wm_sampler_state.c    |   2 +-
 src/gallium/drivers/i965/brw_wm_state.c            |   6 +-
 src/gallium/drivers/i965/brw_wm_surface_state.c    |   4 +-
 src/gallium/winsys/drm/i965/gem/Makefile           |   4 +-
 src/gallium/winsys/drm/i965/gem/SConscript         |   2 -
 src/gallium/winsys/drm/i965/gem/i965_drm_api.c     | 105 ++++----
 .../winsys/drm/i965/gem/i965_drm_batchbuffer.c     | 244 ------------------
 src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c  | 287 ++++++++++++++-------
 src/gallium/winsys/drm/i965/gem/i965_drm_fence.c   |  81 ------
 src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h  |  50 ++--
 19 files changed, 308 insertions(+), 551 deletions(-)
 delete mode 100644 src/gallium/winsys/drm/i965/gem/i965_drm_batchbuffer.c
 delete mode 100644 src/gallium/winsys/drm/i965/gem/i965_drm_fence.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index 080c92046b..72650cdb5d 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -36,7 +36,6 @@
 #include "brw_debug.h"
 #include "brw_structs.h"
 
-#define BATCH_SIZE (32*1024)
 #define USE_LOCAL_BUFFER 1
 #define ALWAYS_EMIT_MI_FLUSH 1
 
@@ -49,17 +48,17 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch)
    }
 
    if (USE_LOCAL_BUFFER && !batch->buffer)
-      batch->buffer = MALLOC(BATCH_SIZE);
+      batch->buffer = MALLOC(BRW_BATCH_SIZE);
 
    batch->buf = batch->sws->bo_alloc(batch->sws,
 				     BRW_BUFFER_TYPE_BATCH,
-				     BATCH_SIZE, 4096);
+				     BRW_BATCH_SIZE, 4096);
    if (batch->buffer)
       batch->map = batch->buffer;
    else 
       batch->map = batch->sws->bo_map(batch->buf, GL_TRUE);
 
-   batch->size = BATCH_SIZE;
+   batch->size = BRW_BATCH_SIZE;
    batch->ptr = batch->map;
 }
 
@@ -132,7 +131,7 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
    batch->map = NULL;
    batch->ptr = NULL;
       
-   batch->sws->bo_exec(batch->buf, used, NULL, 0, 0 );
+   batch->sws->bo_exec(batch->buf, used );
 
 #if 0      
    if (BRW_DEBUG & DEBUG_BATCH) {
@@ -196,7 +195,7 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
     * the buffer doesn't move and we can short-circuit the relocation processing
     * in the kernel
     */
-   brw_batchbuffer_emit_dword (batch, buffer->offset + delta);
+   brw_batchbuffer_emit_dword (batch, buffer->offset[0] + delta);
    return 0;
 }
 
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index bdd6418ae1..cf3791e11e 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -137,7 +137,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
    cc.cc3 = key->cc3;
 
    /* CACHE_NEW_CC_VP */
-   cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */
+   cc.cc4.cc_viewport_state_offset = *(brw->cc.vp_bo->offset) >> 5; /* reloc */
 
    cc.cc5 = key->cc5;
    cc.cc6 = key->cc6;
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index bf4e6f5103..31e2e0bc17 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -83,7 +83,7 @@ clip_unit_create_from_key(struct brw_context *brw,
 
    clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    /* reloc */
-   clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6;
+   clip.thread0.kernel_start_pointer = *(brw->clip.prog_bo->offset) >> 6;
 
    clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    clip.thread1.single_program_flow = 1;
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
index 15a66c9741..9046969394 100644
--- a/src/gallium/drivers/i965/brw_gs_state.c
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -79,7 +79,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
 
    gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    if (key->prog_active) /* reloc */
-      gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6;
+      gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset[0] >> 6;
 
    gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    gs.thread1.single_program_flow = 1;
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 3fd486986f..48b3451bfc 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -222,7 +222,11 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
    /* This is ok for all textures with channel width 8bit or less:
     */
 /*    tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
-   tex->ss.ss1.base_addr = tex->bo->offset; /* reloc */
+
+
+   /* XXX: what happens when tex->bo->offset changes???
+    */
+   tex->ss.ss1.base_addr = tex->bo->offset[0]; /* reloc */
    tex->ss.ss2.mip_count = tex->base.last_level;
    tex->ss.ss2.width = tex->base.width[0] - 1;
    tex->ss.ss2.height = tex->base.height[0] - 1;
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index fbc9f15eb4..4ab5709d53 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -138,7 +138,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
    memset(&sf, 0, sizeof(sf));
 
    sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
-   sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
+   sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset[0] >> 6; /* reloc */
 
    sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
@@ -171,7 +171,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
       sf.thread4.stats_enable = 1;
 
    /* CACHE_NEW_SF_VP */
-   sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */
+   sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset[0] >> 5; /* reloc */
 
    sf.sf5.viewport_transform = 1;
 
diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c
index 72604304d4..345e42a6b2 100644
--- a/src/gallium/drivers/i965/brw_state_dump.c
+++ b/src/gallium/drivers/i965/brw_state_dump.c
@@ -67,7 +67,7 @@ state_struct_out(struct brw_winsys_screen *sws,
 
    data = sws->bo_map(buffer, GL_FALSE);
    for (i = 0; i < state_size / 4; i++) {
-      state_out(name, data, buffer->offset, i,
+      state_out(name, data, buffer->offset[0], i,
 		"dword %d\n", i);
    }
    sws->bo_unmap(buffer);
@@ -115,7 +115,7 @@ static void dump_wm_surface_state(struct brw_context *brw)
 	 continue;
       }
       surf = (struct brw_surface_state *)brw->sws->bo_map(surf_bo, GL_FALSE);
-      surfoff = surf_bo->offset;
+      surfoff = surf_bo->offset[0];
 
       sprintf(name, "WM SS%d", i);
       state_out(name, surf, surfoff, 0, "%s %s\n",
@@ -145,7 +145,7 @@ static void dump_sf_viewport_state(struct brw_context *brw)
       return;
 
    vp = (struct brw_sf_viewport *)brw->sws->bo_map(brw->sf.vp_bo, GL_FALSE);
-   vp_off = brw->sf.vp_bo->offset;
+   vp_off = brw->sf.vp_bo->offset[0];
 
    state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
    state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11);
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index 549696f7ae..6a2395dd96 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -87,7 +87,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 
    memset(&vs, 0, sizeof(vs));
 
-   vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */
+   vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset[0] >> 6; /* reloc */
    vs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    /* Choosing multiple program flow means that we may get 2-vertex threads,
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index bc3d31196c..d19cd5d248 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -31,12 +31,15 @@
 struct brw_winsys;
 struct pipe_fence_handle;
 
-/* This currently just wraps dri_bo:
+/* Not sure why the winsys needs this:
+ */
+#define BRW_BATCH_SIZE (32*1024)
+
+
+/* Need a tiny bit of information inside the abstract buffer struct:
  */
 struct brw_winsys_buffer {
-   struct brw_winsys_screen *sws;
-   void *bo;
-   unsigned offset;
+   unsigned *offset;
    unsigned size;
 };
 
@@ -70,6 +73,8 @@ enum brw_buffer_type
    BRW_BUFFER_TYPE_WM_SCRATCH,
    BRW_BUFFER_TYPE_BATCH,
    BRW_BUFFER_TYPE_STATE_CACHE,
+   
+   BRW_BUFFER_TYPE_MAX		/* Count of possible values */
 };
 
 struct brw_winsys_screen {
@@ -103,12 +108,9 @@ struct brw_winsys_screen {
 			 struct brw_winsys_buffer *b2);
 
    int (*bo_exec)( struct brw_winsys_buffer *buffer,
-		   unsigned bytes_used,
-		   void *foo,
-		   int a,
-		   int b );
+		   unsigned bytes_used );
 
-   void (*bo_subdata)(struct brw_winsys_buffer *buffer,
+   int (*bo_subdata)(struct brw_winsys_buffer *buffer,
 		      size_t offset,
 		      size_t size,
 		      const void *data);
@@ -142,14 +144,14 @@ struct brw_winsys_screen {
    /**
     * Destroy the winsys.
     */
-   void (*destroy)(struct brw_winsys *iws);
+   void (*destroy)(struct brw_winsys_screen *iws);
 };
 
 
 /**
  * Create brw pipe_screen.
  */
-struct pipe_screen *brw_create_screen(struct brw_winsys *iws, unsigned pci_id);
+struct pipe_screen *brw_create_screen(struct brw_winsys_screen *iws, unsigned pci_id);
 
 /**
  * Create a brw pipe_context.
@@ -162,19 +164,20 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen);
  * TODO UGLY
  */
 struct pipe_texture;
-boolean brw_get_texture_buffer_brw(struct pipe_texture *texture,
-				    struct brw_winsys_buffer **buffer,
-				    unsigned *stride);
+boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
+				      struct brw_winsys_buffer **buffer,
+				      unsigned *stride);
 
 /**
  * Wrap a brw_winsys buffer with a texture blanket.
  *
  * TODO UGLY
  */
-struct pipe_texture * brw_texture_blanket_ws(struct pipe_screen *screen,
-					     const struct pipe_texture *tmplt,
-					     const unsigned *stride,
-					     struct brw_winsys_buffer *buffer);
+struct pipe_texture * 
+brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
+				  const struct pipe_texture *template,
+				  const unsigned pitch,
+				  struct brw_winsys_buffer *buffer);
 
 
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index ddd88d6e22..d43968c85a 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -81,7 +81,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
 
       entry->ss0 = sampler->ss0;
       entry->ss1 = sampler->ss1;
-      entry->ss2.default_color_pointer = brw->wm.sdc_bo[i]->offset >> 5; /* reloc */
+      entry->ss2.default_color_pointer = brw->wm.sdc_bo[i]->offset[0] >> 5; /* reloc */
       entry->ss3 = sampler->ss3;
 
       /* Cube-maps on 965 and later must use the same wrap mode for all 3
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index f161de9b40..5cfa8fe2d1 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -148,7 +148,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    memset(&wm, 0, sizeof(wm));
 
    wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
-   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
+   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset[0] >> 6; /* reloc */
    wm.thread1.depth_coef_urb_read_offset = 1;
    wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
@@ -159,7 +159,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
    if (key->total_scratch != 0) {
       wm.thread2.scratch_space_base_pointer =
-	 brw->wm.scratch_bo->offset >> 10; /* reloc */
+	 brw->wm.scratch_bo->offset[0] >> 10; /* reloc */
       wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
    } else {
       wm.thread2.scratch_space_base_pointer = 0;
@@ -179,7 +179,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
    if (brw->wm.sampler_bo != NULL) {
       /* reloc */
-      wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
+      wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset[0] >> 5;
    } else {
       wm.wm4.sampler_state_pointer = 0;
    }
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index 88485c76cb..f55a6c4af2 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -118,7 +118,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
        */
       brw->sws->bo_emit_reloc(brw->wm.surf_bo[unit],
 			      I915_GEM_DOMAIN_RENDER, 0, 
-			      ss.ss1.base_addr - surface->bo->offset, /* XXX */
+			      ss.ss1.base_addr - surface->bo->offset[0], /* XXX */
 			      offsetof(struct brw_surface_state, ss1),
 			      surface->bo);
    }
@@ -150,7 +150,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
       int i;
 
       for (i = 0; i < brw->wm.nr_surfaces; i++)
-	 data[i] = brw->wm.surf_bo[i]->offset;
+	 data[i] = brw->wm.surf_bo[i]->offset[0];
 
       bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
 				  NULL, 0,
diff --git a/src/gallium/winsys/drm/i965/gem/Makefile b/src/gallium/winsys/drm/i965/gem/Makefile
index 74d81b4bc8..6a7497b6be 100644
--- a/src/gallium/winsys/drm/i965/gem/Makefile
+++ b/src/gallium/winsys/drm/i965/gem/Makefile
@@ -1,12 +1,10 @@
 TOP = ../../../../../..
 include $(TOP)/configs/current
 
-LIBNAME = inteldrm
+LIBNAME = i965drm
 
 C_SOURCES = \
-	i965_drm_batchbuffer.c \
 	i965_drm_buffer.c \
-	i965_drm_fence.c \
 	i965_drm_api.c
 
 LIBRARY_INCLUDES = $(shell pkg-config libdrm --cflags-only-I)
diff --git a/src/gallium/winsys/drm/i965/gem/SConscript b/src/gallium/winsys/drm/i965/gem/SConscript
index 9f1391caff..6256ec6eaf 100644
--- a/src/gallium/winsys/drm/i965/gem/SConscript
+++ b/src/gallium/winsys/drm/i965/gem/SConscript
@@ -4,9 +4,7 @@ env = drienv.Clone()
 
 i965drm_sources = [
     'i965_drm_api.c',
-    'i965_drm_batchbuffer.c',
     'i965_drm_buffer.c',
-    'i965_drm_fence.c',
 ]
 
 i965drm = env.ConvenienceLibrary(
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c
index de68cb3551..8b9c777a6f 100644
--- a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c
@@ -1,11 +1,12 @@
 
+#include <stdio.h>
 #include "state_tracker/drm_api.h"
 
 #include "i965_drm_winsys.h"
 #include "util/u_memory.h"
 
-#include "brw/brw_context.h"	/* XXX: shouldn't be doing this */
-#include "brw/brw_screen.h"	/* XXX: shouldn't be doing this */
+#include "i965/brw_context.h"	/* XXX: shouldn't be doing this */
+#include "i965/brw_screen.h"	/* XXX: shouldn't be doing this */
 
 #include "trace/tr_drm.h"
 
@@ -15,7 +16,7 @@
 
 
 static void
-i965_drm_get_device_id(unsigned int *device_id)
+i965_libdrm_get_device_id(unsigned int *device_id)
 {
    char path[512];
    FILE *file;
@@ -36,29 +37,28 @@ i965_drm_get_device_id(unsigned int *device_id)
    fclose(file);
 }
 
-static struct i965_buffer *
-i965_drm_buffer_from_handle(struct i965_drm_winsys *idws,
+static struct i965_libdrm_buffer *
+i965_libdrm_buffer_from_handle(struct i965_libdrm_winsys *idws,
                              const char* name, unsigned handle)
 {
-   struct i965_drm_buffer *buf = CALLOC_STRUCT(i965_drm_buffer);
+   struct i965_libdrm_buffer *buf = CALLOC_STRUCT(i965_libdrm_buffer);
    uint32_t tile = 0, swizzle = 0;
 
    if (!buf)
       return NULL;
 
-   buf->magic = 0xDEAD1337;
-   buf->bo = drm_i965_bo_gem_create_from_name(idws->pools.gem, name, handle);
+   buf->bo = drm_intel_bo_gem_create_from_name(idws->gem, name, handle);
    buf->flinked = TRUE;
    buf->flink = handle;
 
    if (!buf->bo)
       goto err;
 
-   drm_i965_bo_get_tiling(buf->bo, &tile, &swizzle);
-   if (tile != I965_TILE_NONE)
+   drm_intel_bo_get_tiling(buf->bo, &tile, &swizzle);
+   if (tile != 0)
       buf->map_gtt = TRUE;
 
-   return (struct i965_buffer *)buf;
+   return buf;
 
 err:
    FREE(buf);
@@ -72,38 +72,43 @@ err:
 
 
 static struct pipe_texture *
-i965_drm_texture_from_shared_handle(struct drm_api *api,
+i965_libdrm_texture_from_shared_handle(struct drm_api *api,
                                      struct pipe_screen *screen,
-                                     struct pipe_texture *templ,
+                                     struct pipe_texture *template,
                                      const char* name,
                                      unsigned pitch,
                                      unsigned handle)
 {
-   struct i965_drm_winsys *idws = i965_drm_winsys(i965_screen(screen)->iws);
-   struct i965_buffer *buffer;
+   /* XXX: this is silly -- there should be a way to get directly from
+    * the "drm_api" struct to ourselves, without peering into
+    * unrelated code:
+    */
+   struct i965_libdrm_winsys *idws = i965_libdrm_winsys(brw_screen(screen)->sws);
+   struct i965_libdrm_buffer *buffer;
 
-   buffer = i965_drm_buffer_from_handle(idws, name, handle);
+   buffer = i965_libdrm_buffer_from_handle(idws, name, handle);
    if (!buffer)
       return NULL;
 
-   return i965_texture_blanket_i965(screen, templ, pitch, buffer);
+   return brw_texture_blanket_winsys_buffer(screen, template, pitch, &buffer->base);
 }
 
+
 static boolean
-i965_drm_shared_handle_from_texture(struct drm_api *api,
+i965_libdrm_shared_handle_from_texture(struct drm_api *api,
                                      struct pipe_screen *screen,
                                      struct pipe_texture *texture,
                                      unsigned *pitch,
                                      unsigned *handle)
 {
-   struct i965_drm_buffer *buf = NULL;
-   struct i965_buffer *buffer = NULL;
-   if (!i965_get_texture_buffer_i965(texture, &buffer, pitch))
+   struct i965_libdrm_buffer *buf = NULL;
+   struct brw_winsys_buffer *buffer = NULL;
+   if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch))
       return FALSE;
 
-   buf = i965_drm_buffer(buffer);
+   buf = i965_libdrm_buffer(buffer);
    if (!buf->flinked) {
-      if (drm_i965_bo_flink(buf->bo, &buf->flink))
+      if (drm_intel_bo_flink(buf->bo, &buf->flink))
          return FALSE;
       buf->flinked = TRUE;
    }
@@ -114,36 +119,36 @@ i965_drm_shared_handle_from_texture(struct drm_api *api,
 }
 
 static boolean
-i965_drm_local_handle_from_texture(struct drm_api *api,
+i965_libdrm_local_handle_from_texture(struct drm_api *api,
                                     struct pipe_screen *screen,
                                     struct pipe_texture *texture,
                                     unsigned *pitch,
                                     unsigned *handle)
 {
-   struct i965_buffer *buffer = NULL;
-   if (!i965_get_texture_buffer_i965(texture, &buffer, pitch))
+   struct brw_winsys_buffer *buffer = NULL;
+   if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch))
       return FALSE;
 
-   *handle = i965_drm_buffer(buffer)->bo->handle;
+   *handle = i965_libdrm_buffer(buffer)->bo->handle;
 
    return TRUE;
 }
 
 static void
-i965_drm_winsys_destroy(struct i965_winsys *iws)
+i965_libdrm_winsys_destroy(struct brw_winsys_screen *iws)
 {
-   struct i965_drm_winsys *idws = i965_drm_winsys(iws);
+   struct i965_libdrm_winsys *idws = i965_libdrm_winsys(iws);
 
-   drm_i965_bufmgr_destroy(idws->pools.gem);
+   drm_intel_bufmgr_destroy(idws->gem);
 
    FREE(idws);
 }
 
 static struct pipe_screen *
-i965_drm_create_screen(struct drm_api *api, int drmFD,
+i965_libdrm_create_screen(struct drm_api *api, int drmFD,
 		      struct drm_create_screen_arg *arg)
 {
-   struct i965_drm_winsys *idws;
+   struct i965_libdrm_winsys *idws;
    unsigned int deviceID;
 
    if (arg != NULL) {
@@ -155,35 +160,31 @@ i965_drm_create_screen(struct drm_api *api, int drmFD,
       }
    }
 
-   idws = CALLOC_STRUCT(i965_drm_winsys);
+   idws = CALLOC_STRUCT(i965_libdrm_winsys);
    if (!idws)
       return NULL;
 
-   i965_drm_get_device_id(&deviceID);
+   i965_libdrm_get_device_id(&deviceID);
 
-   i965_drm_winsys_init_batchbuffer_functions(idws);
-   i965_drm_winsys_init_buffer_functions(idws);
-   i965_drm_winsys_init_fence_functions(idws);
+   i965_libdrm_winsys_init_buffer_functions(idws);
 
    idws->fd = drmFD;
    idws->id = deviceID;
-   idws->max_batch_size = 16 * 4096;
 
-   idws->base.destroy = i965_drm_winsys_destroy;
+   idws->base.destroy = i965_libdrm_winsys_destroy;
 
-   idws->pools.gem = drm_i965_bufmgr_gem_init(idws->fd, idws->max_batch_size);
-   drm_i965_bufmgr_gem_enable_reuse(idws->pools.gem);
+   idws->gem = drm_intel_bufmgr_gem_init(idws->fd, BRW_BATCH_SIZE);
+   drm_intel_bufmgr_gem_enable_reuse(idws->gem);
 
-   idws->softpipe = FALSE;
    idws->dump_cmd = debug_get_bool_option("I965_DUMP_CMD", FALSE);
 
-   return i965_create_screen(&idws->base, deviceID);
+   return brw_create_screen(&idws->base, deviceID);
 }
 
 static struct pipe_context *
-i965_drm_create_context(struct drm_api *api, struct pipe_screen *screen)
+i965_libdrm_create_context(struct drm_api *api, struct pipe_screen *screen)
 {
-   return i965_create_context(screen);
+   return brw_create_context(screen);
 }
 
 static void
@@ -192,18 +193,18 @@ destroy(struct drm_api *api)
 
 }
 
-struct drm_api i965_drm_api =
+struct drm_api i965_libdrm_api =
 {
-   .create_context = i965_drm_create_context,
-   .create_screen = i965_drm_create_screen,
-   .texture_from_shared_handle = i965_drm_texture_from_shared_handle,
-   .shared_handle_from_texture = i965_drm_shared_handle_from_texture,
-   .local_handle_from_texture = i965_drm_local_handle_from_texture,
+   .create_context = i965_libdrm_create_context,
+   .create_screen = i965_libdrm_create_screen,
+   .texture_from_shared_handle = i965_libdrm_texture_from_shared_handle,
+   .shared_handle_from_texture = i965_libdrm_shared_handle_from_texture,
+   .local_handle_from_texture = i965_libdrm_local_handle_from_texture,
    .destroy = destroy,
 };
 
 struct drm_api *
 drm_api_create()
 {
-   return trace_drm_create(&i965_drm_api);
+   return trace_drm_create(&i965_libdrm_api);
 }
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_batchbuffer.c b/src/gallium/winsys/drm/i965/gem/i965_drm_batchbuffer.c
deleted file mode 100644
index 5b4dafc8e4..0000000000
--- a/src/gallium/winsys/drm/i965/gem/i965_drm_batchbuffer.c
+++ /dev/null
@@ -1,244 +0,0 @@
-
-#include "intel_drm_winsys.h"
-#include "util/u_memory.h"
-
-#include "i915_drm.h"
-
-#define BATCH_RESERVED 16
-
-#define INTEL_DEFAULT_RELOCS 100
-#define INTEL_MAX_RELOCS 400
-
-#define INTEL_BATCH_NO_CLIPRECTS 0x1
-#define INTEL_BATCH_CLIPRECTS    0x2
-
-#undef INTEL_RUN_SYNC
-#undef INTEL_MAP_BATCHBUFFER
-#undef INTEL_MAP_GTT
-#define INTEL_ALWAYS_FLUSH
-
-struct intel_drm_batchbuffer
-{
-   struct intel_batchbuffer base;
-
-   size_t actual_size;
-
-   drm_intel_bo *bo;
-};
-
-static INLINE struct intel_drm_batchbuffer *
-intel_drm_batchbuffer(struct intel_batchbuffer *batch)
-{
-   return (struct intel_drm_batchbuffer *)batch;
-}
-
-static void
-intel_drm_batchbuffer_reset(struct intel_drm_batchbuffer *batch)
-{
-   struct intel_drm_winsys *idws = intel_drm_winsys(batch->base.iws);
-   int ret;
-
-   if (batch->bo)
-      drm_intel_bo_unreference(batch->bo);
-   batch->bo = drm_intel_bo_alloc(idws->pools.gem,
-                                  "gallium3d_batchbuffer",
-                                  batch->actual_size,
-                                  4096);
-
-#ifdef INTEL_MAP_BATCHBUFFER
-#ifdef INTEL_MAP_GTT
-   ret = drm_intel_gem_bo_map_gtt(batch->bo);
-#else
-   ret = drm_intel_bo_map(batch->bo, TRUE);
-#endif
-   assert(ret == 0);
-   batch->base.map = batch->bo->virtual;
-#else
-   (void)ret;
-#endif
-
-   memset(batch->base.map, 0, batch->actual_size);
-   batch->base.ptr = batch->base.map;
-   batch->base.size = batch->actual_size - BATCH_RESERVED;
-   batch->base.relocs = 0;
-}
-
-static struct intel_batchbuffer *
-intel_drm_batchbuffer_create(struct intel_winsys *iws)
-{
-   struct intel_drm_winsys *idws = intel_drm_winsys(iws);
-   struct intel_drm_batchbuffer *batch = CALLOC_STRUCT(intel_drm_batchbuffer);
-
-   batch->actual_size = idws->max_batch_size;
-
-#ifdef INTEL_MAP_BATCHBUFFER
-   batch->base.map = NULL;
-#else
-   batch->base.map = MALLOC(batch->actual_size);
-#endif
-   batch->base.ptr = NULL;
-   batch->base.size = 0;
-
-   batch->base.relocs = 0;
-   batch->base.max_relocs = 300;/*INTEL_DEFAULT_RELOCS;*/
-
-   batch->base.iws = iws;
-
-   intel_drm_batchbuffer_reset(batch);
-
-   return &batch->base;
-}
-
-static int
-intel_drm_batchbuffer_reloc(struct intel_batchbuffer *ibatch,
-                            struct intel_buffer *buffer,
-                            enum intel_buffer_usage usage,
-                            unsigned pre_add)
-{
-   struct intel_drm_batchbuffer *batch = intel_drm_batchbuffer(ibatch);
-   unsigned write_domain = 0;
-   unsigned read_domain = 0;
-   unsigned offset;
-   int ret = 0;
-
-   assert(batch->base.relocs < batch->base.max_relocs);
-
-   if (usage == INTEL_USAGE_SAMPLER) {
-      write_domain = 0;
-      read_domain = I915_GEM_DOMAIN_SAMPLER;
-
-   } else if (usage == INTEL_USAGE_RENDER) {
-      write_domain = I915_GEM_DOMAIN_RENDER;
-      read_domain = I915_GEM_DOMAIN_RENDER;
-
-   } else if (usage == INTEL_USAGE_2D_TARGET) {
-      write_domain = I915_GEM_DOMAIN_RENDER;
-      read_domain = I915_GEM_DOMAIN_RENDER;
-
-   } else if (usage == INTEL_USAGE_2D_SOURCE) {
-      write_domain = 0;
-      read_domain = I915_GEM_DOMAIN_RENDER;
-
-   } else if (usage == INTEL_USAGE_VERTEX) {
-      write_domain = 0;
-      read_domain = I915_GEM_DOMAIN_VERTEX;
-
-   } else {
-      assert(0);
-      return -1;
-   }
-
-   offset = (unsigned)(batch->base.ptr - batch->base.map);
-
-   ret = drm_intel_bo_emit_reloc(batch->bo, offset,
-                                 intel_bo(buffer), pre_add,
-                                 read_domain,
-                                 write_domain);
-
-   ((uint32_t*)batch->base.ptr)[0] = intel_bo(buffer)->offset + pre_add;
-   batch->base.ptr += 4;
-
-   if (!ret)
-      batch->base.relocs++;
-
-   return ret;
-}
-
-static void
-intel_drm_batchbuffer_flush(struct intel_batchbuffer *ibatch,
-                            struct pipe_fence_handle **fence)
-{
-   struct intel_drm_batchbuffer *batch = intel_drm_batchbuffer(ibatch);
-   unsigned used = 0;
-   int ret = 0;
-   int i;
-
-   assert(intel_batchbuffer_space(ibatch) >= 0);
-
-   used = batch->base.ptr - batch->base.map;
-   assert((used & 3) == 0);
-
-
-#ifdef INTEL_ALWAYS_FLUSH
-   /* MI_FLUSH | FLUSH_MAP_CACHE */
-   intel_batchbuffer_dword(ibatch, (0x4<<23)|(1<<0));
-   used += 4;
-#endif
-
-   if ((used & 4) == 0) {
-      /* MI_NOOP */
-      intel_batchbuffer_dword(ibatch, 0);
-   }
-   /* MI_BATCH_BUFFER_END */
-   intel_batchbuffer_dword(ibatch, (0xA<<23));
-
-   used = batch->base.ptr - batch->base.map;
-   assert((used & 4) == 0);
-
-#ifdef INTEL_MAP_BATCHBUFFER
-#ifdef INTEL_MAP_GTT
-   drm_intel_gem_bo_unmap_gtt(batch->bo);
-#else
-   drm_intel_bo_unmap(batch->bo);
-#endif
-#else
-   drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map);
-#endif
-
-   /* Do the sending to HW */
-   ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0);
-   assert(ret == 0);
-
-   if (intel_drm_winsys(ibatch->iws)->dump_cmd) {
-      unsigned *ptr;
-      drm_intel_bo_map(batch->bo, FALSE);
-      ptr = (unsigned*)batch->bo->virtual;
-
-      debug_printf("%s:\n", __func__);
-      for (i = 0; i < used / 4; i++, ptr++) {
-         debug_printf("\t%08x:    %08x\n", i*4, *ptr);
-      }
-
-      drm_intel_bo_unmap(batch->bo);
-   } else {
-#ifdef INTEL_RUN_SYNC
-      drm_intel_bo_map(batch->bo, FALSE);
-      drm_intel_bo_unmap(batch->bo);
-#endif
-   }
-
-   if (fence) {
-      ibatch->iws->fence_reference(ibatch->iws, fence, NULL);
-
-#ifdef INTEL_RUN_SYNC
-      /* we run synced to GPU so just pass null */
-      (*fence) = intel_drm_fence_create(NULL);
-#else
-      (*fence) = intel_drm_fence_create(batch->bo);
-#endif
-   }
-
-   intel_drm_batchbuffer_reset(batch);
-}
-
-static void
-intel_drm_batchbuffer_destroy(struct intel_batchbuffer *ibatch)
-{
-   struct intel_drm_batchbuffer *batch = intel_drm_batchbuffer(ibatch);
-
-   if (batch->bo)
-      drm_intel_bo_unreference(batch->bo);
-
-#ifndef INTEL_MAP_BATCHBUFFER
-   FREE(batch->base.map);
-#endif
-   FREE(batch);
-}
-
-void intel_drm_winsys_init_batchbuffer_functions(struct intel_drm_winsys *idws)
-{
-   idws->base.batchbuffer_create = intel_drm_batchbuffer_create;
-   idws->base.batchbuffer_reloc = intel_drm_batchbuffer_reloc;
-   idws->base.batchbuffer_flush = intel_drm_batchbuffer_flush;
-   idws->base.batchbuffer_destroy = intel_drm_batchbuffer_destroy;
-}
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c
index 4f123bae05..5dbfd2e6b0 100644
--- a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c
@@ -3,48 +3,58 @@
 #include "util/u_memory.h"
 
 #include "i915_drm.h"
-
-static struct intel_buffer *
-intel_drm_buffer_create(struct intel_winsys *iws,
-                        unsigned size, unsigned alignment,
-                        enum intel_buffer_type type)
+#include "intel_bufmgr.h"
+
+const char *names[BRW_BUFFER_TYPE_MAX] = {
+   "texture",
+   "scanout",
+   "vertex",
+   "curbe",
+   "query",
+   "shader_constants",
+   "wm_scratch",
+   "batch",
+   "state_cache",
+};
+
+static struct brw_winsys_buffer *
+i965_libdrm_bo_alloc( struct brw_winsys_screen *sws,
+		      enum brw_buffer_type type,
+		      unsigned size,
+		      unsigned alignment )
 {
-   struct intel_drm_buffer *buf = CALLOC_STRUCT(intel_drm_buffer);
-   struct intel_drm_winsys *idws = intel_drm_winsys(iws);
-   drm_intel_bufmgr *pool;
-   char *name;
+   struct i965_libdrm_winsys *idws = i965_libdrm_winsys(sws);
+   struct i965_libdrm_buffer *buf;
 
+   buf = CALLOC_STRUCT(i965_libdrm_buffer);
    if (!buf)
       return NULL;
 
-   buf->magic = 0xDEAD1337;
-   buf->flinked = FALSE;
-   buf->flink = 0;
-   buf->map_gtt = FALSE;
-
-   if (type == INTEL_NEW_TEXTURE) {
-      name = "gallium3d_texture";
-      pool = idws->pools.gem;
-   } else if (type == INTEL_NEW_VERTEX) {
-      name = "gallium3d_vertex";
-      pool = idws->pools.gem;
+   switch (type) {
+   case BRW_BUFFER_TYPE_TEXTURE:
+      break;
+   case BRW_BUFFER_TYPE_VERTEX:
       buf->map_gtt = TRUE;
-   } else if (type == INTEL_NEW_SCANOUT) {
-      name = "gallium3d_scanout";
-      pool = idws->pools.gem;
+      break;
+   case BRW_BUFFER_TYPE_SCANOUT:
       buf->map_gtt = TRUE;
-   } else {
-      assert(0);
-      name = "gallium3d_unknown";
-      pool = idws->pools.gem;
+      break;
+   default:
+      break;
    }
 
-   buf->bo = drm_intel_bo_alloc(pool, name, size, alignment);
+   buf->bo = drm_intel_bo_alloc(idws->gem, 
+				names[type], 
+				size, 
+				alignment);
 
    if (!buf->bo)
       goto err;
 
-   return (struct intel_buffer *)buf;
+   buf->base.offset = &buf->bo->offset;
+   buf->base.size = size;
+
+   return &buf->base;
 
 err:
    assert(0);
@@ -52,103 +62,186 @@ err:
    return NULL;
 }
 
-static int
-intel_drm_buffer_set_fence_reg(struct intel_winsys *iws,
-                               struct intel_buffer *buffer,
-                               unsigned stride,
-                               enum intel_buffer_tile tile)
+
+
+
+/* Reference and unreference buffers:
+ */
+static void 
+i965_libdrm_bo_reference( struct brw_winsys_buffer *buffer )
 {
-   struct intel_drm_buffer *buf = intel_drm_buffer(buffer);
-   assert(I915_TILING_NONE == INTEL_TILE_NONE);
-   assert(I915_TILING_X == INTEL_TILE_X);
-   assert(I915_TILING_Y == INTEL_TILE_Y);
+   struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
 
-   if (tile != INTEL_TILE_NONE) {
-      assert(buf->map_count == 0);
-      buf->map_gtt = TRUE;
-   }
+   /* I think we have to refcount ourselves and then just pass through
+    * the final dereference to the bo on destruction.
+    */
+   buf->cheesy_refcount++;
+}
 
-   return drm_intel_bo_set_tiling(buf->bo, &tile, stride);
+static void 
+i965_libdrm_bo_unreference( struct brw_winsys_buffer *buffer )
+{
+   struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+
+   if (--buf->cheesy_refcount == 0) {
+      drm_intel_bo_unreference(buf->bo);
+      FREE(buffer);
+   }
 }
 
-static void *
-intel_drm_buffer_map(struct intel_winsys *iws,
-                     struct intel_buffer *buffer,
-                     boolean write)
+   /* XXX: parameter names!!
+    */
+static int 
+i965_libdrm_bo_emit_reloc( struct brw_winsys_buffer *buffer,
+			   unsigned domain,
+			   unsigned a,
+			   unsigned b,
+			   unsigned offset,
+			   struct brw_winsys_buffer *buffer2)
 {
-   struct intel_drm_buffer *buf = intel_drm_buffer(buffer);
-   drm_intel_bo *bo = intel_bo(buffer);
-   int ret = 0;
+   struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+   struct i965_libdrm_buffer *buf2 = i965_libdrm_buffer(buffer2);
+   int ret;
 
-   assert(bo);
+   ret = dri_bo_emit_reloc( buf->bo, domain, a, b, offset, buf2->bo );
+   if (ret)
+      return -1;
 
-   if (buf->map_count)
-      goto out;
+   return 0;
+}
 
-   if (buf->map_gtt)
-      ret = drm_intel_gem_bo_map_gtt(bo);
-   else
-      ret = drm_intel_bo_map(bo, write);
+static int 
+i965_libdrm_bo_exec( struct brw_winsys_buffer *buffer,
+		     unsigned bytes_used )
+{
+   struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+   int ret;
 
-   buf->ptr = bo->virtual;
+   ret = dri_bo_exec(buf->bo, bytes_used, NULL, 0, 0);
+   if (ret)
+      return -1;
+
+   return 0;
+}
+
+static int
+i965_libdrm_bo_subdata(struct brw_winsys_buffer *buffer,
+		       size_t offset,
+		       size_t size,
+		       const void *data)
+{
+   struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+   int ret;
 
-   assert(ret == 0);
-out:
+   /* XXX: use bo_map_gtt/memcpy/unmap_gtt under some circumstances???
+    */
+   ret = drm_intel_bo_subdata(buf->bo, offset, size, (void*)data);
    if (ret)
-      return NULL;
+      return -1;
+   
+   return 0;
+}
 
-   buf->map_count++;
-   return buf->ptr;
+
+static boolean 
+i965_libdrm_bo_is_busy(struct brw_winsys_buffer *buffer)
+{
+   struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+
+   return drm_intel_bo_busy(buf->bo);
 }
 
-static void
-intel_drm_buffer_unmap(struct intel_winsys *iws,
-                       struct intel_buffer *buffer)
+static boolean 
+i965_libdrm_bo_references(struct brw_winsys_buffer *a,
+			  struct brw_winsys_buffer *b)
 {
-   struct intel_drm_buffer *buf = intel_drm_buffer(buffer);
+   struct i965_libdrm_buffer *bufa = i965_libdrm_buffer(a);
+   struct i965_libdrm_buffer *bufb = i965_libdrm_buffer(b);
 
-   if (--buf->map_count)
-      return;
+   /* XXX: can't find this func:
+    */
+   return drm_intel_bo_references(bufa->bo, bufb->bo);
+}
 
-   if (buf->map_gtt)
-      drm_intel_gem_bo_unmap_gtt(intel_bo(buffer));
-   else
-      drm_intel_bo_unmap(intel_bo(buffer));
+/* XXX: couldn't this be handled by returning true/false on
+ * bo_emit_reloc?
+ */
+static boolean 
+i965_libdrm_check_aperture_space( struct brw_winsys_screen *iws,
+				  struct brw_winsys_buffer **buffers,
+				  unsigned count )
+{
+   static drm_intel_bo *bos[128];
+   int i;
+
+   if (count > Elements(bos)) {
+      assert(0);
+      return FALSE;
+   }
+
+   for (i = 0; i < count; i++)
+      bos[i] = i965_libdrm_buffer(buffers[i])->bo;
+
+   return dri_bufmgr_check_aperture_space(bos, count);
 }
 
-static int
-intel_drm_buffer_write(struct intel_winsys *iws,
-                       struct intel_buffer *buffer,
-                       size_t offset,
-                       size_t size,
-                       const void *data)
+/**
+ * Map a buffer.
+ */
+static void *
+i965_libdrm_bo_map(struct brw_winsys_buffer *buffer,
+		   boolean write)
 {
-   struct intel_drm_buffer *buf = intel_drm_buffer(buffer);
+   struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+   int ret;
+
+   if (!buf->map_count) {
+      if (buf->map_gtt) {
+	 ret = drm_intel_gem_bo_map_gtt(buf->bo);
+	 if (ret)
+	    return NULL;
+      }
+      else {
+	 ret = drm_intel_bo_map(buf->bo, write);
+	 if (ret)
+	    return NULL;
+      }
+   }
 
-   return drm_intel_bo_subdata(buf->bo, offset, size, (void*)data);
+   buf->map_count++;
+   return buf->bo->virtual;
 }
 
-static void
-intel_drm_buffer_destroy(struct intel_winsys *iws,
-                         struct intel_buffer *buffer)
+/**
+ * Unmap a buffer.
+ */
+static void 
+i965_libdrm_bo_unmap(struct brw_winsys_buffer *buffer)
 {
-   drm_intel_bo_unreference(intel_bo(buffer));
+   struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
 
-#ifdef DEBUG
-   intel_drm_buffer(buffer)->magic = 0;
-   intel_drm_buffer(buffer)->bo = NULL;
-#endif
+   if (--buf->map_count > 0)
+      return;
 
-   FREE(buffer);
+   if (buf->map_gtt)
+      drm_intel_gem_bo_unmap_gtt(buf->bo);
+   else
+      drm_intel_bo_unmap(buf->bo);
 }
 
+
 void
-intel_drm_winsys_init_buffer_functions(struct intel_drm_winsys *idws)
+i965_libdrm_winsys_init_buffer_functions(struct i965_libdrm_winsys *idws)
 {
-   idws->base.buffer_create = intel_drm_buffer_create;
-   idws->base.buffer_set_fence_reg = intel_drm_buffer_set_fence_reg;
-   idws->base.buffer_map = intel_drm_buffer_map;
-   idws->base.buffer_unmap = intel_drm_buffer_unmap;
-   idws->base.buffer_write = intel_drm_buffer_write;
-   idws->base.buffer_destroy = intel_drm_buffer_destroy;
+   idws->base.bo_alloc             = i965_libdrm_bo_alloc;
+   idws->base.bo_reference         = i965_libdrm_bo_reference;
+   idws->base.bo_unreference       = i965_libdrm_bo_unreference;
+   idws->base.bo_emit_reloc        = i965_libdrm_bo_emit_reloc;
+   idws->base.bo_exec              = i965_libdrm_bo_exec;
+   idws->base.bo_subdata           = i965_libdrm_bo_subdata;
+   idws->base.bo_is_busy           = i965_libdrm_bo_is_busy;
+   idws->base.bo_references        = i965_libdrm_bo_references;
+   idws->base.check_aperture_space = i965_libdrm_check_aperture_space;
+   idws->base.bo_map               = i965_libdrm_bo_map;
+   idws->base.bo_unmap             = i965_libdrm_bo_unmap;
 }
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_fence.c b/src/gallium/winsys/drm/i965/gem/i965_drm_fence.c
deleted file mode 100644
index e70bfe7b44..0000000000
--- a/src/gallium/winsys/drm/i965/gem/i965_drm_fence.c
+++ /dev/null
@@ -1,81 +0,0 @@
-
-#include "intel_drm_winsys.h"
-#include "util/u_memory.h"
-#include "pipe/p_refcnt.h"
-
-/**
- * Because gem does not have fence's we have to create our own fences.
- *
- * They work by keeping the batchbuffer around and checking if that has
- * been idled. If bo is NULL fence has expired.
- */
-struct intel_drm_fence
-{
-   struct pipe_reference reference;
-   drm_intel_bo *bo;
-};
-
-
-struct pipe_fence_handle *
-intel_drm_fence_create(drm_intel_bo *bo)
-{
-   struct intel_drm_fence *fence = CALLOC_STRUCT(intel_drm_fence);
-
-   pipe_reference_init(&fence->reference, 1);
-   /* bo is null if fence already expired */
-   if (bo) {
-      drm_intel_bo_reference(bo);
-      fence->bo = bo;
-   }
-
-   return (struct pipe_fence_handle *)fence;
-}
-
-static void
-intel_drm_fence_reference(struct intel_winsys *iws,
-                          struct pipe_fence_handle **ptr,
-                          struct pipe_fence_handle *fence)
-{
-   struct intel_drm_fence *old = (struct intel_drm_fence *)*ptr;
-   struct intel_drm_fence *f = (struct intel_drm_fence *)fence;
-
-   if (pipe_reference((struct pipe_reference**)ptr, &f->reference)) {
-      if (old->bo)
-         drm_intel_bo_unreference(old->bo);
-      FREE(old);
-   }
-}
-
-static int
-intel_drm_fence_signalled(struct intel_winsys *iws,
-                          struct pipe_fence_handle *fence)
-{
-   assert(0);
-
-   return 0;
-}
-
-static int
-intel_drm_fence_finish(struct intel_winsys *iws,
-                       struct pipe_fence_handle *fence)
-{
-   struct intel_drm_fence *f = (struct intel_drm_fence *)fence;
-
-   /* fence already expired */
-   if (!f->bo)
-      return 0;
-
-   drm_intel_bo_wait_rendering(f->bo);
-   drm_intel_bo_unreference(f->bo);
-   f->bo = NULL;
-
-   return 0;
-}
-
-void
-intel_drm_winsys_init_fence_functions(struct intel_drm_winsys *idws)
-{
-   idws->base.fence_reference = intel_drm_fence_reference;
-   idws->base.fence_signalled = intel_drm_fence_signalled;
-   idws->base.fence_finish = intel_drm_fence_finish;
-}
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h
index 9854756880..bfcd512cef 100644
--- a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h
@@ -2,56 +2,45 @@
 #ifndef INTEL_DRM_WINSYS_H
 #define INTEL_DRM_WINSYS_H
 
-#include "i965/intel_batchbuffer.h"
+#include "i965/brw_winsys.h"
 
 #include "drm.h"
 #include "intel_bufmgr.h"
 
 
+
 /*
  * Winsys
  */
 
 
-struct intel_drm_winsys
+struct i965_libdrm_winsys
 {
-   struct intel_winsys base;
+   struct brw_winsys_screen base;
+   drm_intel_bufmgr *gem;
 
-   boolean softpipe;
    boolean dump_cmd;
 
    int fd; /**< Drm file discriptor */
 
    unsigned id;
-
-   size_t max_batch_size;
-
-   struct {
-      drm_intel_bufmgr *gem;
-   } pools;
 };
 
-static INLINE struct intel_drm_winsys *
-intel_drm_winsys(struct intel_winsys *iws)
+static INLINE struct i965_libdrm_winsys *
+i965_libdrm_winsys(struct brw_winsys_screen *iws)
 {
-   return (struct intel_drm_winsys *)iws;
+   return (struct i965_libdrm_winsys *)iws;
 }
 
-struct intel_drm_winsys * intel_drm_winsys_create(int fd, unsigned pci_id);
-struct pipe_fence_handle * intel_drm_fence_create(drm_intel_bo *bo);
+struct i965_libdrm_winsys *i965_libdrm_winsys_create(int fd, unsigned pci_id);
 
-void intel_drm_winsys_init_batchbuffer_functions(struct intel_drm_winsys *idws);
-void intel_drm_winsys_init_buffer_functions(struct intel_drm_winsys *idws);
-void intel_drm_winsys_init_fence_functions(struct intel_drm_winsys *idws);
+void i965_libdrm_winsys_init_buffer_functions(struct i965_libdrm_winsys *idws);
 
 
-/*
- * Buffer
+/* Buffer.  
  */
-
-
-struct intel_drm_buffer {
-   unsigned magic;
+struct i965_libdrm_buffer {
+   struct brw_winsys_buffer base;
 
    drm_intel_bo *bo;
 
@@ -61,18 +50,15 @@ struct intel_drm_buffer {
 
    boolean flinked;
    unsigned flink;
+
+   unsigned cheesy_refcount;
 };
 
-static INLINE struct intel_drm_buffer *
-intel_drm_buffer(struct intel_buffer *buffer)
+static INLINE struct i965_libdrm_buffer *
+i965_libdrm_buffer(struct brw_winsys_buffer *buffer)
 {
-   return (struct intel_drm_buffer *)buffer;
+   return (struct i965_libdrm_buffer *)buffer;
 }
 
-static INLINE drm_intel_bo *
-intel_bo(struct intel_buffer *buffer)
-{
-   return intel_drm_buffer(buffer)->bo;
-}
 
 #endif
-- 
cgit v1.2.3


From 1f630fa0167ed799556a764178772c096a3ddeba Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 1 Nov 2009 11:54:52 -0800
Subject: r300g: Miscellania. Avoid draw segfaults, s/true/TRUE/, etc.

Cleared out my git stash.
---
 src/gallium/drivers/r300/r300_context.h |  2 +-
 src/gallium/drivers/r300/r300_debug.c   |  6 +++---
 src/gallium/drivers/r300/r300_emit.c    |  2 +-
 src/gallium/drivers/r300/r300_state.c   | 28 +++++++++++++++++++++-------
 src/gallium/drivers/r300/r300_vs.c      |  4 ++--
 5 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index b1738452de..ae7015634c 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -339,7 +339,7 @@ void r300_init_surface_functions(struct r300_context* r300);
 
 static INLINE boolean DBG_ON(struct r300_context * ctx, unsigned flags)
 {
-    return (ctx->debug & flags) ? true : false;
+    return (ctx->debug & flags) ? TRUE : FALSE;
 }
 
 static INLINE void DBG(struct r300_context * ctx, unsigned flags, const char * fmt, ...)
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index 421253ca72..2a6ed54ac9 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -49,7 +49,7 @@ static struct debug_option debug_options[] = {
 void r300_init_debug(struct r300_context * ctx)
 {
     const char * options = debug_get_option("RADEON_DEBUG", 0);
-    boolean printhint = false;
+    boolean printhint = FALSE;
     size_t length;
     struct debug_option * opt;
 
@@ -71,14 +71,14 @@ void r300_init_debug(struct r300_context * ctx)
 
             if (!opt->name) {
                 debug_printf("Unknown debug option: %s\n", options);
-                printhint = true;
+                printhint = TRUE;
             }
 
             options += length;
         }
 
         if (!ctx->debug)
-            printhint = true;
+            printhint = TRUE;
     }
 
     if (printhint || ctx->debug & DBG_HELP) {
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 5b03c1aa6c..79972dbb49 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -800,7 +800,7 @@ validate:
     for (i = 0; i < r300->texture_count; i++) {
         tex = r300->textures[i];
         if (!tex)
-	    continue;
+            continue;
         if (!r300->winsys->add_buffer(r300->winsys, tex->buffer,
                     RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0)) {
             r300->context.flush(&r300->context, 0, NULL);
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 4cf01389d2..af063d4b20 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -281,7 +281,9 @@ static void
 {
     struct r300_context* r300 = r300_context(pipe);
 
-    draw_flush(r300->draw);
+    if (r300->draw) {
+        draw_flush(r300->draw);
+    }
 
     r300->framebuffer_state = *state;
 
@@ -444,10 +446,13 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
     struct r300_context* r300 = r300_context(pipe);
     struct r300_rs_state* rs = (struct r300_rs_state*)state;
 
-    draw_flush(r300->draw);
-    draw_set_rasterizer_state(r300->draw, &rs->rs);
+    if (r300->draw) {
+        draw_flush(r300->draw);
+        draw_set_rasterizer_state(r300->draw, &rs->rs);
+    }
 
     r300->rs_state = rs;
+    /* XXX Clean these up when we move to atom emits */
     r300->dirty_state |= R300_NEW_RASTERIZER;
     r300->dirty_state |= R300_NEW_RS_BLOCK;
     r300->dirty_state |= R300_NEW_SCISSOR;
@@ -623,8 +628,10 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
 
     r300->vertex_buffer_count = count;
 
-    draw_flush(r300->draw);
-    draw_set_vertex_buffers(r300->draw, count, buffers);
+    if (r300->draw) {
+        draw_flush(r300->draw);
+        draw_set_vertex_buffers(r300->draw, count, buffers);
+    }
 }
 
 static void r300_set_vertex_elements(struct pipe_context* pipe,
@@ -633,8 +640,15 @@ static void r300_set_vertex_elements(struct pipe_context* pipe,
 {
     struct r300_context* r300 = r300_context(pipe);
 
-    draw_flush(r300->draw);
-    draw_set_vertex_elements(r300->draw, count, elements);
+    memcpy(r300->vertex_elements, elements,
+        sizeof(struct pipe_vertex_element) * count);
+
+    r300->vertex_element_count = count;
+
+    if (r300->draw) {
+        draw_flush(r300->draw);
+        draw_set_vertex_elements(r300->draw, count, elements);
+    }
 }
 
 static void* r300_create_vs_state(struct pipe_context* pipe,
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index eca85879a7..74ef416dc1 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -37,7 +37,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
     struct tgsi_shader_info* info = &vs->info;
     struct tgsi_parse_context parser;
     struct tgsi_full_declaration * decl;
-    boolean pointsize = false;
+    boolean pointsize = FALSE;
     int out_colors = 0;
     int colors = 0;
     int out_generic = 0;
@@ -52,7 +52,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
     for (i = 0; i < info->num_outputs; i++) {
         switch (info->output_semantic_name[i]) {
             case TGSI_SEMANTIC_PSIZE:
-                pointsize = true;
+                pointsize = TRUE;
                 break;
             case TGSI_SEMANTIC_COLOR:
                 out_colors++;
-- 
cgit v1.2.3


From 87d7c1aa15a944d64e43b217e18553256f9fb681 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sun, 1 Nov 2009 18:25:59 -0500
Subject: nouveau: Assume all texture blankets are linear for now.

---
 src/gallium/drivers/nv30/nv30_miptree.c              | 3 +++
 src/gallium/drivers/nv40/nv40_miptree.c              | 3 +++
 src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c | 3 +--
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index 17acca61ab..280696d450 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -147,6 +147,9 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 	mt->level[0].pitch = stride[0];
 	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
 
+	/* Assume whoever created this buffer expects it to be linear for now */
+	mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
 	pipe_buffer_reference(&mt->buffer, pb);
 	return &mt->base;
 }
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index 5a201ccf45..465dd3b069 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -141,6 +141,9 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 	mt->level[0].pitch = stride[0];
 	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
 
+	/* Assume whoever created this buffer expects it to be linear for now */
+	mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
 	pipe_buffer_reference(&mt->buffer, pb);
 	return &mt->base;
 }
diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
index f512c0e5f3..317dc44d22 100644
--- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
+++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
@@ -21,8 +21,7 @@ dri_surface_from_handle(struct drm_api *api, struct pipe_screen *pscreen,
 	struct pipe_texture tmpl;
 
 	memset(&tmpl, 0, sizeof(tmpl));
-	tmpl.tex_usage = PIPE_TEXTURE_USAGE_PRIMARY |
-			 NOUVEAU_TEXTURE_USAGE_LINEAR;
+	tmpl.tex_usage = PIPE_TEXTURE_USAGE_PRIMARY;
 	tmpl.target = PIPE_TEXTURE_2D;
 	tmpl.last_level = 0;
 	tmpl.depth[0] = 1;
-- 
cgit v1.2.3


From 23c2ba828058255d8fdf3471bb924127e6c27f6c Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 2 Nov 2009 08:48:26 +0000
Subject: i965g: rename brw_constant_buffer to brw_curbe_buffer

Now that there are real constant buffers, try to reduce naming confusion.
---
 src/gallium/drivers/i965/brw_curbe.c        | 10 +++++-----
 src/gallium/drivers/i965/brw_state.h        |  2 +-
 src/gallium/drivers/i965/brw_state_upload.c |  8 +-------
 3 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 3dd08f6eeb..ed5b250f82 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -160,7 +160,7 @@ static GLfloat fixed_plane[6][4] = {
  * cache mechanism, but maybe would benefit from a comparison against
  * the current uploaded set of constants.
  */
-static int prepare_constant_buffer(struct brw_context *brw)
+static int prepare_curbe_buffer(struct brw_context *brw)
 {
    const GLuint sz = brw->curbe.total_size;
    const GLuint bufsz = sz * 16 * sizeof(GLfloat);
@@ -312,7 +312,7 @@ static int prepare_constant_buffer(struct brw_context *brw)
    return 0;
 }
 
-static int emit_constant_buffer(struct brw_context *brw)
+static int emit_curbe_buffer(struct brw_context *brw)
 {
    GLuint sz = brw->curbe.total_size;
 
@@ -330,7 +330,7 @@ static int emit_constant_buffer(struct brw_context *brw)
    return 0;
 }
 
-const struct brw_tracked_state brw_constant_buffer = {
+const struct brw_tracked_state brw_curbe_buffer = {
    .dirty = {
       .mesa = (PIPE_NEW_FRAGMENT_CONSTANTS |
 	       PIPE_NEW_VERTEX_CONSTANTS |
@@ -343,7 +343,7 @@ const struct brw_tracked_state brw_constant_buffer = {
 	       BRW_NEW_BATCH),
       .cache = (CACHE_NEW_WM_PROG) 
    },
-   .prepare = prepare_constant_buffer,
-   .emit = emit_constant_buffer,
+   .prepare = prepare_curbe_buffer,
+   .emit = emit_curbe_buffer,
 };
 
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index b47b04fd46..3b9151ab2f 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -55,7 +55,7 @@ const struct brw_tracked_state brw_cc_vp;
 const struct brw_tracked_state brw_check_fallback;
 const struct brw_tracked_state brw_clip_prog;
 const struct brw_tracked_state brw_clip_unit;
-const struct brw_tracked_state brw_constant_buffer;
+const struct brw_tracked_state brw_curbe_buffer;
 const struct brw_tracked_state brw_curbe_offsets;
 const struct brw_tracked_state brw_invarient_state;
 const struct brw_tracked_state brw_gs_prog;
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index eff3a40a46..4132c6ac69 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -36,12 +36,6 @@
 #include "brw_batchbuffer.h"
 #include "brw_debug.h"
 
-/* This is used to initialize brw->state.atoms[].  We could use this
- * list directly except for a single atom, brw_constant_buffer, which
- * has a .dirty value which changes according to the parameters of the
- * current fragment and vertex programs, and so cannot be a static
- * value.
- */
 const struct brw_tracked_state *atoms[] =
 {
    &brw_check_fallback,
@@ -94,7 +88,7 @@ const struct brw_tracked_state *atoms[] =
    &brw_index_buffer,
    &brw_vertices,
 
-   &brw_constant_buffer
+   &brw_curbe_buffer
 };
 
 
-- 
cgit v1.2.3


From 4ea94c04c9ab7b11fa06c60f2487a911f1422844 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 2 Nov 2009 08:49:02 +0000
Subject: i965g: format RELOCs similarly

---
 src/gallium/drivers/i965/brw_cc.c         |  3 +--
 src/gallium/drivers/i965/brw_clip_state.c |  3 +--
 src/gallium/drivers/i965/brw_misc_state.c | 32 +++++++++++++++++++++++--------
 3 files changed, 26 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index cf3791e11e..c6267e1c60 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -151,8 +151,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
 
    /* Emit CC viewport relocation */
    brw->sws->bo_emit_reloc(bo,
-			   I915_GEM_DOMAIN_INSTRUCTION,
-			   0,
+			   I915_GEM_DOMAIN_INSTRUCTION, 0,
 			   0,
 			   offsetof(struct brw_cc_unit_state, cc4),
 			   brw->cc.vp_bo);
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 31e2e0bc17..8be53e4bfb 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -150,8 +150,7 @@ clip_unit_create_from_key(struct brw_context *brw,
    /* Emit clip program relocation */
    assert(brw->clip.prog_bo);
    brw->sws->bo_emit_reloc(bo,
-			   I915_GEM_DOMAIN_INSTRUCTION,
-			   0,
+			   I915_GEM_DOMAIN_INSTRUCTION, 0,
 			   clip.thread0.grf_reg_count << 1,
 			   offsetof(struct brw_clip_unit_state, thread0),
 			   brw->clip.prog_bo);
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index db8a2a5008..06b9a2d2df 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -110,13 +110,17 @@ static int upload_binding_table_pointers(struct brw_context *brw)
    BEGIN_BATCH(6, IGNORE_CLIPRECTS);
    OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
    if (brw->vs.bind_bo != NULL)
-      OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */
+      OUT_RELOC(brw->vs.bind_bo, 
+		I915_GEM_DOMAIN_SAMPLER, 0,
+		0); /* vs */
    else
       OUT_BATCH(0);
    OUT_BATCH(0); /* gs */
    OUT_BATCH(0); /* clip */
    OUT_BATCH(0); /* sf */
-   OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */
+   OUT_RELOC(brw->wm.bind_bo,
+	     I915_GEM_DOMAIN_SAMPLER, 0,
+	     0); /* wm/ps */
    ADVANCE_BATCH();
    return 0;
 }
@@ -142,15 +146,27 @@ static int upload_pipelined_state_pointers(struct brw_context *brw )
 {
    BEGIN_BATCH(7, IGNORE_CLIPRECTS);
    OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
-   OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->vs.state_bo, 
+	     I915_GEM_DOMAIN_INSTRUCTION, 0,
+	     0);
    if (brw->gs.prog_active)
-      OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+      OUT_RELOC(brw->gs.state_bo, 
+		I915_GEM_DOMAIN_INSTRUCTION, 0,
+		1);
    else
       OUT_BATCH(0);
-   OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
-   OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-   OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
-   OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+   OUT_RELOC(brw->clip.state_bo, 
+	     I915_GEM_DOMAIN_INSTRUCTION, 0,
+	     1);
+   OUT_RELOC(brw->sf.state_bo,
+	     I915_GEM_DOMAIN_INSTRUCTION, 0,
+	     0);
+   OUT_RELOC(brw->wm.state_bo,
+	     I915_GEM_DOMAIN_INSTRUCTION, 0,
+	     0);
+   OUT_RELOC(brw->cc.state_bo,
+	     I915_GEM_DOMAIN_INSTRUCTION, 0,
+	     0);
    ADVANCE_BATCH();
 
    brw->state.dirty.brw |= BRW_NEW_PSP;
-- 
cgit v1.2.3


From eb699d64ec7057032139baccedcb0694ca41d706 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Fri, 30 Oct 2009 08:27:17 +0000
Subject: softpipe: Sanitise shader semantic and interpolator handling.

Handle the remaining semantic names and indices.
Respect color interpolator when not flatshading.
---
 src/gallium/drivers/softpipe/sp_state_derived.c | 34 ++++++++-----------------
 1 file changed, 10 insertions(+), 24 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index 1faeca1c2a..3bc96b9538 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -66,8 +66,6 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
    if (vinfo->num_attribs == 0) {
       /* compute vertex layout now */
       const struct sp_fragment_shader *spfs = softpipe->fs;
-      const enum interp_mode colorInterp
-         = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
       struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
       const uint num = draw_num_vs_outputs(softpipe->draw);
       uint i;
@@ -108,33 +106,21 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
 
          switch (spfs->info.input_semantic_name[i]) {
          case TGSI_SEMANTIC_POSITION:
-            src = draw_find_vs_output(softpipe->draw,
-                                      TGSI_SEMANTIC_POSITION, 0);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
+            interp = INTERP_POS;
             break;
 
          case TGSI_SEMANTIC_COLOR:
-            src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR, 
-                                 spfs->info.input_semantic_index[i]);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+            if (softpipe->rasterizer->flatshade) {
+               interp = INTERP_CONSTANT;
+            }
             break;
-
-         case TGSI_SEMANTIC_FOG:
-            src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
-            break;
-
-         case TGSI_SEMANTIC_GENERIC:
-         case TGSI_SEMANTIC_FACE:
-            /* this includes texcoords and varying vars */
-            src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC,
-                                      spfs->info.input_semantic_index[i]);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
-            break;
-
-         default:
-            assert(0);
          }
+
+         /* this includes texcoords and varying vars */
+         src = draw_find_vs_output(softpipe->draw,
+                                   spfs->info.input_semantic_name[i],
+                                   spfs->info.input_semantic_index[i]);
+         draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
       }
 
       softpipe->psize_slot = draw_find_vs_output(softpipe->draw,
-- 
cgit v1.2.3


From 677a055fa0cf7b6476c716be187513c41060d417 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 3 Nov 2009 13:10:58 +0000
Subject: llvmpipe: Respect gl_rasterization_rules in primitive setup.

Based on Michal's identical commit for softpipe
(ca9c413647bf9efb5ed770e3a655bc758075aec7).
---
 src/gallium/drivers/llvmpipe/lp_setup.c | 48 ++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 18 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index c43b3da450..11ebfa0236 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -90,6 +90,8 @@ struct setup_context {
    float oneoverarea;
    int facing;
 
+   float pixel_offset;
+
    struct quad_header quad[MAX_QUADS];
    struct quad_header *quad_ptrs[MAX_QUADS];
    unsigned count;
@@ -483,6 +485,16 @@ static boolean setup_sort_vertices( struct setup_context *setup,
       ((det > 0.0) ^ 
        (setup->llvmpipe->rasterizer->front_winding == PIPE_WINDING_CW));
 
+   /* Prepare pixel offset for rasterisation:
+    *  - pixel center (0.5, 0.5) for GL, or
+    *  - assume (0.0, 0.0) for other APIs.
+    */
+   if (setup->llvmpipe->rasterizer->gl_rasterization_rules) {
+      setup->pixel_offset = 0.5f;
+   } else {
+      setup->pixel_offset = 0.0f;
+   }
+
    return TRUE;
 }
 
@@ -508,7 +520,7 @@ static void tri_pos_coeff( struct setup_context *setup,
 
    /* calculate a0 as the value which would be sampled for the
     * fragment at (0,0), taking into account that we want to sample at
-    * pixel centers, in other words (0.5, 0.5).
+    * pixel centers, in other words (pixel_offset, pixel_offset).
     *
     * this is neat but unfortunately not a good way to do things for
     * triangles with very large values of dadx or dady as it will
@@ -519,8 +531,8 @@ static void tri_pos_coeff( struct setup_context *setup,
     * instead - i'll switch to this later.
     */
    setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
-                           (dadx * (setup->vmin[0][0] - 0.5f) +
-                            dady * (setup->vmin[0][1] - 0.5f)));
+                           (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                            dady * (setup->vmin[0][1] - setup->pixel_offset)));
 
    /*
    debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
@@ -609,8 +621,8 @@ static void tri_linear_coeff( struct setup_context *setup,
        * instead - i'll switch to this later.
        */
       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
-                     (dadx * (setup->vmin[0][0] - 0.5f) +
-                      dady * (setup->vmin[0][1] - 0.5f)));
+                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
 
       /*
       debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
@@ -661,8 +673,8 @@ static void tri_persp_coeff( struct setup_context *setup,
       setup->coef.dadx[1 + attrib][i] = dadx;
       setup->coef.dady[1 + attrib][i] = dady;
       setup->coef.a0[1 + attrib][i] = (mina -
-                     (dadx * (setup->vmin[0][0] - 0.5f) +
-                      dady * (setup->vmin[0][1] - 0.5f)));
+                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
    }
 }
 
@@ -746,12 +758,12 @@ static void setup_tri_coefficients( struct setup_context *setup )
 
 static void setup_tri_edges( struct setup_context *setup )
 {
-   float vmin_x = setup->vmin[0][0] + 0.5f;
-   float vmid_x = setup->vmid[0][0] + 0.5f;
+   float vmin_x = setup->vmin[0][0] + setup->pixel_offset;
+   float vmid_x = setup->vmid[0][0] + setup->pixel_offset;
 
-   float vmin_y = setup->vmin[0][1] - 0.5f;
-   float vmid_y = setup->vmid[0][1] - 0.5f;
-   float vmax_y = setup->vmax[0][1] - 0.5f;
+   float vmin_y = setup->vmin[0][1] - setup->pixel_offset;
+   float vmid_y = setup->vmid[0][1] - setup->pixel_offset;
+   float vmax_y = setup->vmax[0][1] - setup->pixel_offset;
 
    setup->emaj.sy = ceilf(vmin_y);
    setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy);
@@ -950,8 +962,8 @@ linear_pos_coeff(struct setup_context *setup,
    setup->coef.dadx[0][i] = dadx;
    setup->coef.dady[0][i] = dady;
    setup->coef.a0[0][i] = (setup->vmin[vertSlot][i] -
-                           (dadx * (setup->vmin[0][0] - 0.5f) +
-                            dady * (setup->vmin[0][1] - 0.5f)));
+                           (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                            dady * (setup->vmin[0][1] - setup->pixel_offset)));
 }
 
 
@@ -972,8 +984,8 @@ line_linear_coeff(struct setup_context *setup,
       setup->coef.dadx[1 + attrib][i] = dadx;
       setup->coef.dady[1 + attrib][i] = dady;
       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
-                     (dadx * (setup->vmin[0][0] - 0.5f) +
-                      dady * (setup->vmin[0][1] - 0.5f)));
+                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
    }
 }
 
@@ -998,8 +1010,8 @@ line_persp_coeff(struct setup_context *setup,
       setup->coef.dadx[1 + attrib][i] = dadx;
       setup->coef.dady[1 + attrib][i] = dady;
       setup->coef.a0[1 + attrib][i] = (setup->vmin[vertSlot][i] -
-                     (dadx * (setup->vmin[0][0] - 0.5f) +
-                      dady * (setup->vmin[0][1] - 0.5f)));
+                     (dadx * (setup->vmin[0][0] - setup->pixel_offset) +
+                      dady * (setup->vmin[0][1] - setup->pixel_offset)));
    }
 }
 
-- 
cgit v1.2.3


From 026cf84bbbd939f0ae573a9841bb49aaa1d9ae75 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 23 Aug 2009 11:22:41 +0100
Subject: llvmpipe: remove old prim_setup draw stage

Everything now goes through the draw_vbuf handler, the same as
regular drivers.

Based on Keith's commit 4fe0fc3eba1f79beda890a5016359d549bab6ad4.
---
 src/gallium/drivers/llvmpipe/Makefile           |   1 -
 src/gallium/drivers/llvmpipe/SConscript         |   1 -
 src/gallium/drivers/llvmpipe/lp_context.c       |  26 ++--
 src/gallium/drivers/llvmpipe/lp_context.h       |   5 +-
 src/gallium/drivers/llvmpipe/lp_prim_setup.c    | 190 ------------------------
 src/gallium/drivers/llvmpipe/lp_prim_setup.h    |  85 -----------
 src/gallium/drivers/llvmpipe/lp_prim_vbuf.c     | 107 ++++---------
 src/gallium/drivers/llvmpipe/lp_prim_vbuf.h     |   4 +-
 src/gallium/drivers/llvmpipe/lp_setup.c         |   1 -
 src/gallium/drivers/llvmpipe/lp_state_derived.c |  25 ++--
 10 files changed, 59 insertions(+), 386 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_setup.c
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_prim_setup.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index cdf318844c..e038a5229e 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -35,7 +35,6 @@ C_SOURCES = \
 	lp_draw_arrays.c \
 	lp_flush.c \
 	lp_jit.c \
-	lp_prim_setup.c \
 	lp_prim_vbuf.c \
 	lp_setup.c \
 	lp_query.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index f4410f8201..3bd2e70013 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -49,7 +49,6 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_draw_arrays.c',
 		'lp_flush.c',
 		'lp_jit.c',
-		'lp_prim_setup.c',
 		'lp_prim_vbuf.c',
 		'lp_setup.c',
 		'lp_query.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 202cb8ef43..57e71f3e98 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -31,13 +31,13 @@
  */
 
 #include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "lp_clear.h"
 #include "lp_context.h"
 #include "lp_flush.h"
-#include "lp_prim_setup.h"
 #include "lp_prim_vbuf.h"
 #include "lp_state.h"
 #include "lp_surface.h"
@@ -264,21 +264,21 @@ llvmpipe_create( struct pipe_screen *screen )
                          (struct tgsi_sampler **)
                             llvmpipe->tgsi.vert_samplers_list);
 
-   llvmpipe->setup = lp_draw_render_stage(llvmpipe);
-   if (!llvmpipe->setup)
-      goto fail;
-
    if (debug_get_bool_option( "LP_NO_RAST", FALSE ))
       llvmpipe->no_rast = TRUE;
 
-   if (debug_get_bool_option( "LP_NO_VBUF", FALSE )) {
-      /* Deprecated path -- vbuf is the intended interface to the draw module:
-       */
-      draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->setup);
-   }
-   else {
-      lp_init_vbuf(llvmpipe);
-   }
+   llvmpipe->vbuf_backend = lp_create_vbuf_backend(llvmpipe);
+   if (!llvmpipe->vbuf_backend)
+      goto fail;
+
+   llvmpipe->vbuf = draw_vbuf_stage(llvmpipe->draw, llvmpipe->vbuf_backend);
+   if (!llvmpipe->vbuf)
+      goto fail;
+
+   draw_set_rasterize_stage(llvmpipe->draw, llvmpipe->vbuf);
+   draw_set_render(llvmpipe->draw, llvmpipe->vbuf_backend);
+
+
 
    /* plug in AA line/point stages */
    draw_install_aaline_stage(llvmpipe->draw, &llvmpipe->pipe);
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 7df340554e..3ad95d0bfc 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -121,9 +121,10 @@ struct llvmpipe_context {
 
    /** The primitive drawing context */
    struct draw_context *draw;
-   struct draw_stage *setup;
+
+   /** Draw module backend */
+   struct vbuf_render *vbuf_backend;
    struct draw_stage *vbuf;
-   struct llvmpipe_vbuf_render *vbuf_render;
 
    boolean dirty_render_cache;
    
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.c b/src/gallium/drivers/llvmpipe/lp_prim_setup.c
deleted file mode 100644
index b14f8fb99d..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_prim_setup.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * \brief A draw stage that drives our triangle setup routines from
- * within the draw pipeline.  One of two ways to drive setup, the
- * other being in lp_prim_vbuf.c.
- *
- * \author  Keith Whitwell <keith@tungstengraphics.com>
- * \author  Brian Paul
- */
-
-
-#include "lp_context.h"
-#include "lp_setup.h"
-#include "lp_state.h"
-#include "lp_prim_setup.h"
-#include "draw/draw_pipe.h"
-#include "draw/draw_vertex.h"
-#include "util/u_memory.h"
-
-/**
- * Triangle setup info (derived from draw_stage).
- * Also used for line drawing (taking some liberties).
- */
-struct setup_stage {
-   struct draw_stage stage; /**< This must be first (base class) */
-
-   struct setup_context *setup;
-};
-
-
-
-/**
- * Basically a cast wrapper.
- */
-static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
-{
-   return (struct setup_stage *)stage;
-}
-
-
-typedef const float (*cptrf4)[4];
-
-static void
-do_tri(struct draw_stage *stage, struct prim_header *prim)
-{
-   struct setup_stage *setup = setup_stage( stage );
-   
-   llvmpipe_setup_tri( setup->setup,
-              (cptrf4)prim->v[0]->data,
-              (cptrf4)prim->v[1]->data,
-              (cptrf4)prim->v[2]->data );
-}
-
-static void
-do_line(struct draw_stage *stage, struct prim_header *prim)
-{
-   struct setup_stage *setup = setup_stage( stage );
-
-   llvmpipe_setup_line( setup->setup,
-               (cptrf4)prim->v[0]->data,
-               (cptrf4)prim->v[1]->data );
-}
-
-static void
-do_point(struct draw_stage *stage, struct prim_header *prim)
-{
-   struct setup_stage *setup = setup_stage( stage );
-
-   llvmpipe_setup_point( setup->setup,
-                (cptrf4)prim->v[0]->data );
-}
-
-
-
-
-static void setup_begin( struct draw_stage *stage )
-{
-   struct setup_stage *setup = setup_stage(stage);
-
-   llvmpipe_setup_prepare( setup->setup );
-
-   stage->point = do_point;
-   stage->line = do_line;
-   stage->tri = do_tri;
-}
-
-
-static void setup_first_point( struct draw_stage *stage,
-			       struct prim_header *header )
-{
-   setup_begin(stage);
-   stage->point( stage, header );
-}
-
-static void setup_first_line( struct draw_stage *stage,
-			       struct prim_header *header )
-{
-   setup_begin(stage);
-   stage->line( stage, header );
-}
-
-
-static void setup_first_tri( struct draw_stage *stage,
-			       struct prim_header *header )
-{
-   setup_begin(stage);
-   stage->tri( stage, header );
-}
-
-
-
-static void setup_flush( struct draw_stage *stage,
-			 unsigned flags )
-{
-   stage->point = setup_first_point;
-   stage->line = setup_first_line;
-   stage->tri = setup_first_tri;
-}
-
-
-static void reset_stipple_counter( struct draw_stage *stage )
-{
-}
-
-
-static void render_destroy( struct draw_stage *stage )
-{
-   struct setup_stage *ssetup = setup_stage(stage);
-   llvmpipe_setup_destroy_context(ssetup->setup);
-   FREE( stage );
-}
-
-
-/**
- * Create a new primitive setup/render stage.
- */
-struct draw_stage *lp_draw_render_stage( struct llvmpipe_context *llvmpipe )
-{
-   struct setup_stage *sstage = CALLOC_STRUCT(setup_stage);
-
-   sstage->setup = llvmpipe_setup_create_context(llvmpipe);
-   sstage->stage.draw = llvmpipe->draw;
-   sstage->stage.point = setup_first_point;
-   sstage->stage.line = setup_first_line;
-   sstage->stage.tri = setup_first_tri;
-   sstage->stage.flush = setup_flush;
-   sstage->stage.reset_stipple_counter = reset_stipple_counter;
-   sstage->stage.destroy = render_destroy;
-
-   return (struct draw_stage *)sstage;
-}
-
-struct setup_context *
-lp_draw_setup_context( struct draw_stage *stage )
-{
-   struct setup_stage *ssetup = setup_stage(stage);
-   return ssetup->setup;
-}
-
-void
-lp_draw_flush( struct draw_stage *stage )
-{
-   stage->flush( stage, 0 );
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_setup.h b/src/gallium/drivers/llvmpipe/lp_prim_setup.h
deleted file mode 100644
index da6cae6375..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_prim_setup.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-
-#ifndef LP_PRIM_SETUP_H
-#define LP_PRIM_SETUP_H
-
-
-/**
- * vbuf is a special stage to gather the stream of triangles, lines, points
- * together and reconstruct vertex buffers for hardware upload.
- *
- * First attempt, work in progress.
- * 
- * TODO:
- *    - separate out vertex buffer building and primitive emit, ie >1 draw per vb.
- *    - tell vbuf stage how to build hw vertices directly
- *    - pass vbuf stage a buffer pointer for direct emit to agp/vram.
- *
- *
- *
- * Vertices are just an array of floats, with all the attributes
- * packed.  We currently assume a layout like:
- *
- * attr[0][0..3] - window position
- * attr[1..n][0..3] - remaining attributes.
- *
- * Attributes are assumed to be 4 floats wide but are packed so that
- * all the enabled attributes run contiguously.
- */
-
-
-struct draw_stage;
-struct llvmpipe_context;
-
-
-typedef void (*vbuf_draw_func)( struct pipe_context *pipe,
-                                unsigned prim,
-                                const ushort *elements,
-                                unsigned nr_elements,
-                                const void *vertex_buffer,
-                                unsigned nr_vertices );
-
-
-extern struct draw_stage *
-lp_draw_render_stage( struct llvmpipe_context *llvmpipe );
-
-extern struct setup_context *
-lp_draw_setup_context( struct draw_stage * );
-
-extern void
-lp_draw_flush( struct draw_stage * );
-
-
-extern struct draw_stage *
-lp_draw_vbuf_stage( struct draw_context *draw_context,
-                    struct pipe_context *pipe,
-                    vbuf_draw_func draw );
-
-
-#endif /* LP_PRIM_SETUP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
index c394dcb61d..4abff4eccc 100644
--- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
@@ -37,10 +37,9 @@
 
 
 #include "lp_context.h"
+#include "lp_setup.h"
 #include "lp_state.h"
 #include "lp_prim_vbuf.h"
-#include "lp_prim_setup.h"
-#include "lp_setup.h"
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
 #include "util/u_memory.h"
@@ -59,6 +58,8 @@ struct llvmpipe_vbuf_render
 {
    struct vbuf_render base;
    struct llvmpipe_context *llvmpipe;
+   struct setup_context *setup;
+
    uint prim;
    uint vertex_size;
    uint nr_vertices;
@@ -75,6 +76,11 @@ llvmpipe_vbuf_render(struct vbuf_render *vbr)
 }
 
 
+
+
+
+
+
 static const struct vertex_info *
 lp_vbuf_get_vertex_info(struct vbuf_render *vbr)
 {
@@ -105,36 +111,6 @@ lp_vbuf_allocate_vertices(struct vbuf_render *vbr,
 static void
 lp_vbuf_release_vertices(struct vbuf_render *vbr)
 {
-#if 0
-   {
-      struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
-      const struct vertex_info *info = 
-         llvmpipe_get_vbuf_vertex_info(cvbr->llvmpipe);
-      const float *vtx = (const float *) cvbr->vertex_buffer;
-      uint i, j;
-      debug_printf("%s (vtx_size = %u,  vtx_used = %u)\n",
-             __FUNCTION__, cvbr->vertex_size, cvbr->nr_vertices);
-      for (i = 0; i < cvbr->nr_vertices; i++) {
-         for (j = 0; j < info->num_attribs; j++) {
-            uint k;
-            switch (info->attrib[j].emit) {
-            case EMIT_4F:  k = 4;   break;
-            case EMIT_3F:  k = 3;   break;
-            case EMIT_2F:  k = 2;   break;
-            case EMIT_1F:  k = 1;   break;
-            default: assert(0);
-            }
-            debug_printf("Vert %u attr %u: ", i, j);
-            while (k-- > 0) {
-               debug_printf("%g ", vtx[0]);
-               vtx++;
-            }
-            debug_printf("\n");
-         }
-      }
-   }
-#endif
-
    /* keep the old allocation for next time */
 }
 
@@ -160,11 +136,7 @@ static boolean
 lp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
 {
    struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
-
-   /* XXX: break this dependency - make setup_context live under
-    * llvmpipe, rename the old "setup" draw stage to something else.
-    */
-   struct setup_context *setup_ctx = lp_draw_setup_context(cvbr->llvmpipe->setup);
+   struct setup_context *setup_ctx = cvbr->setup;
    
    llvmpipe_setup_prepare( setup_ctx );
 
@@ -193,14 +165,9 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
    struct llvmpipe_context *llvmpipe = cvbr->llvmpipe;
    const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float);
    const void *vertex_buffer = cvbr->vertex_buffer;
+   struct setup_context *setup_ctx = cvbr->setup;
    unsigned i;
 
-   /* XXX: break this dependency - make setup_context live under
-    * llvmpipe, rename the old "setup" draw stage to something else.
-    */
-   struct draw_stage *setup = llvmpipe->setup;
-   struct setup_context *setup_ctx = lp_draw_setup_context(setup);
-
    switch (cvbr->prim) {
    case PIPE_PRIM_POINTS:
       for (i = 0; i < nr; i++) {
@@ -367,11 +334,6 @@ lp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr)
    default:
       assert(0);
    }
-
-   /* XXX: why are we calling this???  If we had to call something, it
-    * would be a function in lp_setup.c:
-    */
-   lp_draw_flush( setup );
 }
 
 
@@ -384,17 +346,12 @@ lp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr)
 {
    struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
    struct llvmpipe_context *llvmpipe = cvbr->llvmpipe;
+   struct setup_context *setup_ctx = cvbr->setup;
    const unsigned stride = llvmpipe->vertex_info_vbuf.size * sizeof(float);
    const void *vertex_buffer =
       (void *) get_vert(cvbr->vertex_buffer, start, stride);
    unsigned i;
 
-   /* XXX: break this dependency - make setup_context live under
-    * llvmpipe, rename the old "setup" draw stage to something else.
-    */
-   struct draw_stage *setup = llvmpipe->setup;
-   struct setup_context *setup_ctx = lp_draw_setup_context(setup);
-
    switch (cvbr->prim) {
    case PIPE_PRIM_POINTS:
       for (i = 0; i < nr; i++) {
@@ -568,40 +525,38 @@ static void
 lp_vbuf_destroy(struct vbuf_render *vbr)
 {
    struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
-   cvbr->llvmpipe->vbuf_render = NULL;
+   llvmpipe_setup_destroy_context(cvbr->setup);
    FREE(cvbr);
 }
 
 
 /**
- * Initialize the post-transform vertex buffer information for the given
- * context.
+ * Create the post-transform vertex handler for the given context.
  */
-void
-lp_init_vbuf(struct llvmpipe_context *lp)
+struct vbuf_render *
+lp_create_vbuf_backend(struct llvmpipe_context *lp)
 {
-   assert(lp->draw);
+   struct llvmpipe_vbuf_render *cvbr = CALLOC_STRUCT(llvmpipe_vbuf_render);
 
-   lp->vbuf_render = CALLOC_STRUCT(llvmpipe_vbuf_render);
+   assert(lp->draw);
 
-   lp->vbuf_render->base.max_indices = LP_MAX_VBUF_INDEXES;
-   lp->vbuf_render->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE;
 
-   lp->vbuf_render->base.get_vertex_info = lp_vbuf_get_vertex_info;
-   lp->vbuf_render->base.allocate_vertices = lp_vbuf_allocate_vertices;
-   lp->vbuf_render->base.map_vertices = lp_vbuf_map_vertices;
-   lp->vbuf_render->base.unmap_vertices = lp_vbuf_unmap_vertices;
-   lp->vbuf_render->base.set_primitive = lp_vbuf_set_primitive;
-   lp->vbuf_render->base.draw = lp_vbuf_draw;
-   lp->vbuf_render->base.draw_arrays = lp_vbuf_draw_arrays;
-   lp->vbuf_render->base.release_vertices = lp_vbuf_release_vertices;
-   lp->vbuf_render->base.destroy = lp_vbuf_destroy;
+   cvbr->base.max_indices = LP_MAX_VBUF_INDEXES;
+   cvbr->base.max_vertex_buffer_bytes = LP_MAX_VBUF_SIZE;
 
-   lp->vbuf_render->llvmpipe = lp;
+   cvbr->base.get_vertex_info = lp_vbuf_get_vertex_info;
+   cvbr->base.allocate_vertices = lp_vbuf_allocate_vertices;
+   cvbr->base.map_vertices = lp_vbuf_map_vertices;
+   cvbr->base.unmap_vertices = lp_vbuf_unmap_vertices;
+   cvbr->base.set_primitive = lp_vbuf_set_primitive;
+   cvbr->base.draw = lp_vbuf_draw;
+   cvbr->base.draw_arrays = lp_vbuf_draw_arrays;
+   cvbr->base.release_vertices = lp_vbuf_release_vertices;
+   cvbr->base.destroy = lp_vbuf_destroy;
 
-   lp->vbuf = draw_vbuf_stage(lp->draw, &lp->vbuf_render->base);
+   cvbr->llvmpipe = lp;
 
-   draw_set_rasterize_stage(lp->draw, lp->vbuf);
+   cvbr->setup = llvmpipe_setup_create_context(cvbr->llvmpipe);
 
-   draw_set_render(lp->draw, &lp->vbuf_render->base);
+   return &cvbr->base;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h
index 6c4e6063e6..0676e2f42a 100644
--- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h
+++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.h
@@ -31,8 +31,8 @@
 
 struct llvmpipe_context;
 
-extern void
-lp_init_vbuf(struct llvmpipe_context *llvmpipe);
+extern struct vbuf_render *
+lp_create_vbuf_backend(struct llvmpipe_context *llvmpipe);
 
 
 #endif /* LP_VBUF_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 11ebfa0236..ffcbc9a379 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -33,7 +33,6 @@
  */
 
 #include "lp_context.h"
-#include "lp_prim_setup.h"
 #include "lp_quad.h"
 #include "lp_setup.h"
 #include "lp_state.h"
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 30fb41ea65..31eaadda21 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -67,24 +67,19 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
       const struct lp_fragment_shader *lpfs = llvmpipe->fs;
       const enum interp_mode colorInterp
          = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
+      struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf;
+      const uint num = draw_num_vs_outputs(llvmpipe->draw);
       uint i;
 
-      if (llvmpipe->vbuf) {
-         /* if using the post-transform vertex buffer, tell draw_vbuf to
-          * simply emit the whole post-xform vertex as-is:
-          */
-         struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf;
-         const uint num = draw_num_vs_outputs(llvmpipe->draw);
-         uint i;
-
-         /* No longer any need to try and emit draw vertex_header info.
-          */
-         vinfo_vbuf->num_attribs = 0;
-         for (i = 0; i < num; i++) {
-            draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i);
-         }
-         draw_compute_vertex_size(vinfo_vbuf);
+      /* Tell draw_vbuf to simply emit the whole post-xform vertex
+       * as-is.  No longer any need to try and emit draw vertex_header
+       * info.
+       */
+      vinfo_vbuf->num_attribs = 0;
+      for (i = 0; i < num; i++) {
+	 draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i);
       }
+      draw_compute_vertex_size(vinfo_vbuf);
 
       /*
        * Loop over fragment shader inputs, searching for the matching output
-- 
cgit v1.2.3


From ceb6728725a1eefe35a4d8371b2ff0abe212b5ad Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Fri, 30 Oct 2009 08:27:17 +0000
Subject: llvmpipe: Sanitise shader semantic and interpolator handling.

Handle the remaining semantic names and indices.
Respect color interpolator when not flatshading.

Based on Michal's softpipe commit
eb699d64ec7057032139baccedcb0694ca41d706.
---
 src/gallium/drivers/llvmpipe/lp_state_derived.c | 34 ++++++++-----------------
 1 file changed, 10 insertions(+), 24 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index 31eaadda21..c753b183c0 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -65,8 +65,6 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
    if (vinfo->num_attribs == 0) {
       /* compute vertex layout now */
       const struct lp_fragment_shader *lpfs = llvmpipe->fs;
-      const enum interp_mode colorInterp
-         = llvmpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
       struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf;
       const uint num = draw_num_vs_outputs(llvmpipe->draw);
       uint i;
@@ -107,33 +105,21 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
 
          switch (lpfs->info.input_semantic_name[i]) {
          case TGSI_SEMANTIC_POSITION:
-            src = draw_find_vs_output(llvmpipe->draw,
-                                      TGSI_SEMANTIC_POSITION, 0);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
+            interp = INTERP_POS;
             break;
 
          case TGSI_SEMANTIC_COLOR:
-            src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_COLOR, 
-                                 lpfs->info.input_semantic_index[i]);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+            if (llvmpipe->rasterizer->flatshade) {
+               interp = INTERP_CONSTANT;
+            }
             break;
-
-         case TGSI_SEMANTIC_FOG:
-            src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_FOG, 0);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
-            break;
-
-         case TGSI_SEMANTIC_GENERIC:
-         case TGSI_SEMANTIC_FACE:
-            /* this includes texcoords and varying vars */
-            src = draw_find_vs_output(llvmpipe->draw, TGSI_SEMANTIC_GENERIC,
-                                      lpfs->info.input_semantic_index[i]);
-            draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
-            break;
-
-         default:
-            assert(0);
          }
+
+         /* this includes texcoords and varying vars */
+         src = draw_find_vs_output(llvmpipe->draw,
+                                   lpfs->info.input_semantic_name[i],
+                                   lpfs->info.input_semantic_index[i]);
+         draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
       }
 
       llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw,
-- 
cgit v1.2.3


From 040e1d008f8f8258f1b0ee0fcdf4906e0979fb66 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Tue, 3 Nov 2009 23:19:56 +0100
Subject: nv50: add 3d texture tiling and mip-mapping

Mip-mapped 3D textures are not arrays of 2D layers
with a mip-map layout like 2D textures, therefore we
cannot use image_nr == depth for them.

Making use of "volume tiling" modes now, the allowed
modes are 0xZY where Z <= 5 and y <= 5.
---
 src/gallium/drivers/nv50/nv50_context.h  | 12 ++++++
 src/gallium/drivers/nv50/nv50_miptree.c  | 63 +++++++++++++++++++++-----------
 src/gallium/drivers/nv50/nv50_tex.c      | 37 ++++++++++++++++---
 src/gallium/drivers/nv50/nv50_transfer.c | 39 +++++++++++++++-----
 4 files changed, 114 insertions(+), 37 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 890defb90c..4b0f062295 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -69,6 +69,18 @@ struct nv50_sampler_stateobj {
 	unsigned tsc[8];
 };
 
+static INLINE unsigned
+get_tile_height(uint32_t tile_mode)
+{
+        return 1 << ((tile_mode & 0xf) + 2);
+}
+
+static INLINE unsigned
+get_tile_depth(uint32_t tile_mode)
+{
+        return 1 << (tile_mode >> 4);
+}
+
 struct nv50_miptree_level {
 	int *image_offset;
 	unsigned pitch;
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 229a59cb74..9c20c5cc28 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -26,14 +26,33 @@
 
 #include "nv50_context.h"
 
+/* The restrictions in tile mode selection probably aren't necessary. */
 static INLINE uint32_t
-get_tile_mode(unsigned ny)
+get_tile_mode(unsigned ny, unsigned d)
 {
-	if (ny > 32) return 4;
-	if (ny > 16) return 3;
-	if (ny >  8) return 2;
-	if (ny >  4) return 1;
-	return 0;
+	uint32_t tile_mode = 0x00;
+
+	if (ny > 32) tile_mode = 0x04; /* height 64 tiles */
+	else
+	if (ny > 16) tile_mode = 0x03; /* height 32 tiles */
+	else
+	if (ny >  8) tile_mode = 0x02; /* height 16 tiles */
+	else
+	if (ny >  4) tile_mode = 0x01; /* height 8 tiles */
+
+	if (d == 1)
+		return tile_mode;
+	else
+	if (tile_mode > 0x02)
+		tile_mode = 0x02;
+
+	if (d > 16 && tile_mode < 0x02)
+		return tile_mode | 0x50; /* depth 32 tiles */
+	if (d >  8) return tile_mode | 0x40; /* depth 16 tiles */
+	if (d >  4) return tile_mode | 0x30; /* depth 8 tiles */
+	if (d >  2) return tile_mode | 0x20; /* depth 4 tiles */
+
+	return tile_mode | 0x10;
 }
 
 static struct pipe_texture *
@@ -43,7 +62,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 	struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
 	struct pipe_texture *pt = &mt->base.base;
 	unsigned width = tmp->width[0], height = tmp->height[0];
-	unsigned depth = tmp->depth[0];
+	unsigned depth = tmp->depth[0], image_alignment;
 	uint32_t tile_flags;
 	int ret, i, l;
 
@@ -67,17 +86,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 		break;
 	}
 
-	switch (pt->target) {
-	case PIPE_TEXTURE_3D:
-		mt->image_nr = pt->depth[0];
-		break;
-	case PIPE_TEXTURE_CUBE:
-		mt->image_nr = 6;
-		break;
-	default:
-		mt->image_nr = 1;
-		break;
-	}
+	/* XXX: texture arrays */
+	mt->image_nr = (pt->target == PIPE_TEXTURE_CUBE) ? 6 : 1;
 
 	for (l = 0; l <= pt->last_level; l++) {
 		struct nv50_miptree_level *lvl = &mt->level[l];
@@ -90,26 +100,35 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 
 		lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
 		lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64);
-		lvl->tile_mode = get_tile_mode(pt->nblocksy[l]);
+		lvl->tile_mode = get_tile_mode(pt->nblocksy[l], depth);
 
 		width = MAX2(1, width >> 1);
 		height = MAX2(1, height >> 1);
 		depth = MAX2(1, depth >> 1);
 	}
 
+	image_alignment  = get_tile_height(mt->level[0].tile_mode) * 64;
+	image_alignment *= get_tile_depth(mt->level[0].tile_mode);
+
+	/* NOTE the distinction between arrays of mip-mapped 2D textures and
+	 * mip-mapped 3D textures. We can't use image_nr == depth for 3D mip.
+	 */
 	for (i = 0; i < mt->image_nr; i++) {
 		for (l = 0; l <= pt->last_level; l++) {
 			struct nv50_miptree_level *lvl = &mt->level[l];
 			int size;
-			unsigned tile_ny = 1 << (lvl->tile_mode + 2);
+			unsigned tile_h = get_tile_height(lvl->tile_mode);
+			unsigned tile_d = get_tile_depth(lvl->tile_mode);
 
-			size  = align(pt->nblocksx[l] * pt->block.size, 64);
-			size *= align(pt->nblocksy[l], tile_ny);
+			size  = lvl->pitch;
+			size *= align(pt->nblocksy[l], tile_h);
+			size *= align(pt->depth[l], tile_d);
 
 			lvl->image_offset[i] = mt->total_size;
 
 			mt->total_size += size;
 		}
+		mt->total_size = align(mt->total_size, image_alignment);
 	}
 
 	ret = nouveau_bo_new_tile(dev, NOUVEAU_BO_VRAM, 256, mt->total_size,
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 52ccdaa407..2813f54477 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -96,19 +96,44 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 	if (i == NV50_TEX_FORMAT_LIST_SIZE)
                 return 1;
 
-	mode = (nv50->sampler[unit]->normalized ? 0xd0005000 : 0x5001d000) |
-	       (mt->base.bo->tile_mode << 22);
+	if (nv50->sampler[unit]->normalized)
+		mode = 0x50001000 | (1 << 31);
+	else {
+		mode = 0x50001000 | (7 << 14);
+		assert(mt->base.base.target == PIPE_TEXTURE_2D);
+	}
+
+	mode |= ((mt->base.bo->tile_mode & 0x0f) << 22) |
+		((mt->base.bo->tile_mode & 0xf0) << 21);
+
 	if (pf_type(mt->base.base.format) == PIPE_FORMAT_TYPE_SRGB)
 		mode |= 0x0400;
 
+	switch (mt->base.base.target) {
+	case PIPE_TEXTURE_1D:
+		break;
+	case PIPE_TEXTURE_2D:
+		mode |= (1 << 14);
+		break;
+	case PIPE_TEXTURE_3D:
+		mode |= (2 << 14);
+		break;
+	case PIPE_TEXTURE_CUBE:
+		mode |= (3 << 14);
+		break;
+	default:
+		assert(!"unsupported texture target");
+		break;
+	}
+
 	so_data (so, nv50_tex_format_list[i].hw);
 	so_reloc(so, mt->base.bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW |
-		     NOUVEAU_BO_RD, 0, 0);
+		 NOUVEAU_BO_RD, 0, 0);
 	so_data (so, mode);
 	so_data (so, 0x00300000);
-	so_data (so, mt->base.base.width[0]);
+	so_data (so, mt->base.base.width[0] | (1 << 31));
 	so_data (so, (mt->base.base.last_level << 28) |
-		     (mt->base.base.depth[0] << 16) | mt->base.base.height[0]);
+		 (mt->base.base.depth[0] << 16) | mt->base.base.height[0]);
 	so_data (so, 0x03000000);
 	so_data (so, mt->base.base.last_level << 4);
 
@@ -124,7 +149,7 @@ nv50_tex_validate(struct nv50_context *nv50)
 	unsigned i, unit, push;
 
 	push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6;
-	so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr + 2);
+	so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr * 2 + 2);
 
 	nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM,
 			  nv50->miptree_nr * 8 * 4);
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 9c008090b8..ea61357aaa 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -12,6 +12,7 @@ struct nv50_transfer {
 	int level_pitch;
 	int level_width;
 	int level_height;
+	int level_depth;
 	int level_x;
 	int level_y;
 };
@@ -20,10 +21,10 @@ static void
 nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 			struct nouveau_bo *src_bo, unsigned src_offset,
 			int src_pitch, unsigned src_tile_mode,
-			int sx, int sy, int sw, int sh,
+			int sx, int sy, int sw, int sh, int sd,
 			struct nouveau_bo *dst_bo, unsigned dst_offset,
 			int dst_pitch, unsigned dst_tile_mode,
-			int dx, int dy, int dw, int dh,
+			int dx, int dy, int dw, int dh, int dd,
 			int cpp, int width, int height,
 			unsigned src_reloc, unsigned dst_reloc)
 {
@@ -51,7 +52,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 		OUT_RING  (chan, src_tile_mode << 4);
 		OUT_RING  (chan, sw * cpp);
 		OUT_RING  (chan, sh);
-		OUT_RING  (chan, 1);
+		OUT_RING  (chan, sd);
 		OUT_RING  (chan, 0);
 	}
 
@@ -70,7 +71,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 		OUT_RING  (chan, dst_tile_mode << 4);
 		OUT_RING  (chan, dw * cpp);
 		OUT_RING  (chan, dh);
-		OUT_RING  (chan, 1);
+		OUT_RING  (chan, dd);
 		OUT_RING  (chan, 0);
 	}
 
@@ -114,6 +115,20 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 	}
 }
 
+static INLINE unsigned
+get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned ny)
+{
+	unsigned tile_h = get_tile_height(tile_mode);
+	unsigned tile_d = get_tile_depth(tile_mode);
+
+	/* pitch_2d == to next slice within this volume-tile */
+	/* pitch_3d == to next slice in next 2D array of blocks */
+	unsigned pitch_2d = tile_h * 64;
+	unsigned pitch_3d = tile_d * align(ny, tile_h) * pitch;
+
+	return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
+}
+
 static struct pipe_transfer *
 nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		  unsigned face, unsigned level, unsigned zslice,
@@ -129,9 +144,6 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 
 	if (pt->target == PIPE_TEXTURE_CUBE)
 		image = face;
-	else
-	if (pt->target == PIPE_TEXTURE_3D)
-		image = zslice;
 
 	tx = CALLOC_STRUCT(nv50_transfer);
 	if (!tx)
@@ -157,6 +169,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->level_pitch = lvl->pitch;
 	tx->level_width = mt->base.base.width[level];
 	tx->level_height = mt->base.base.height[level];
+	tx->level_depth = mt->base.base.depth[level];
 	tx->level_offset = lvl->image_offset[image];
 	tx->level_tiling = lvl->tile_mode;
 	tx->level_x = pf_get_nblocksx(&tx->base.block, x);
@@ -168,6 +181,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	}
 
+	if (pt->target == PIPE_TEXTURE_3D)
+		tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice,
+						      lvl->pitch,
+						      tx->base.nblocksy);
+
 	if (usage & PIPE_TRANSFER_READ) {
 		nx = pf_get_nblocksx(&tx->base.block, tx->base.width);
 		ny = pf_get_nblocksy(&tx->base.block, tx->base.height);
@@ -176,10 +194,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 					tx->level_pitch, tx->level_tiling,
 					x, y,
 					tx->base.nblocksx, tx->base.nblocksy,
+					tx->level_depth,
 					tx->bo, 0,
 					tx->base.stride, tx->bo->tile_mode,
 					0, 0,
-					tx->base.nblocksx, tx->base.nblocksy,
+					tx->base.nblocksx, tx->base.nblocksy, 1,
 					tx->base.block.size, nx, ny,
 					NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
 					NOUVEAU_BO_GART);
@@ -199,14 +218,16 @@ nv50_transfer_del(struct pipe_transfer *ptx)
 
 	if (ptx->usage & PIPE_TRANSFER_WRITE) {
 		struct pipe_screen *pscreen = ptx->texture->screen;
+
 		nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
 					tx->base.stride, tx->bo->tile_mode,
 					0, 0,
-					tx->base.nblocksx, tx->base.nblocksy,
+					tx->base.nblocksx, tx->base.nblocksy, 1,
 					mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
 					tx->level_x, tx->level_y,
 					tx->base.nblocksx, tx->base.nblocksy,
+					tx->level_depth,
 					tx->base.block.size, nx, ny,
 					NOUVEAU_BO_GART, NOUVEAU_BO_VRAM |
 					NOUVEAU_BO_GART);
-- 
cgit v1.2.3


From 317ccfe0dfbfda13f58a26f661324d883b25a316 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Tue, 3 Nov 2009 22:09:32 +0100
Subject: nv50: add abs-modifier for emit_minmax

---
 src/gallium/drivers/nv50/nv50_program.c | 48 +++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 17 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 27827c7ecf..64a0b571a5 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -88,12 +88,16 @@ struct nv50_reg {
 	int index;
 
 	int hw;
-	int neg;
+	int mod;
 
 	int rhw; /* result hw for FP outputs, or interpolant index */
 	int acc; /* instruction where this reg is last read (first insn == 1) */
 };
 
+#define NV50_MOD_NEG 1
+#define NV50_MOD_ABS 2
+#define NV50_MOD_SAT 4
+
 /* arbitrary limits */
 #define MAX_IF_DEPTH 4
 #define MAX_LOOP_DEPTH 4
@@ -152,7 +156,7 @@ ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
 	reg->type = type;
 	reg->index = index;
 	reg->hw = hw;
-	reg->neg = 0;
+	reg->mod = 0;
 	reg->rhw = -1;
 	reg->acc = 0;
 }
@@ -460,8 +464,12 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
 static INLINE void
 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
 {
+	unsigned val;
 	float f = pc->immd_buf[imm->hw];
-	unsigned val = fui(imm->neg ? -f : f);
+
+	if (imm->mod & NV50_MOD_ABS)
+		f = fabsf(f);
+	val = fui((imm->mod & NV50_MOD_NEG) ? -f : f);
 
 	set_long(pc, e);
 	/*XXX: can't be predicated - bits overlap.. catch cases where both
@@ -801,12 +809,12 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	set_dst(pc, dst, e);
 	set_src_0(pc, src0, e);
 	if (src1->type == P_IMMD && !is_long(e)) {
-		if (src0->neg)
+		if (src0->mod & NV50_MOD_NEG)
 			e->inst[0] |= 0x00008000;
 		set_immd(pc, src1, e);
 	} else {
 		set_src_1(pc, src1, e);
-		if (src0->neg ^ src1->neg) {
+		if ((src0->mod ^ src1->mod) & NV50_MOD_NEG) {
 			if (is_long(e))
 				e->inst[1] |= 0x08000000;
 			else
@@ -828,9 +836,10 @@ emit_add(struct nv50_pc *pc, struct nv50_reg *dst,
 	alloc_reg(pc, src1);
 	check_swap_src_0_1(pc, &src0, &src1);
 
-	if (!pc->allow32 || (src0->neg | src1->neg) || src1->hw > 63) {
+	if (!pc->allow32 || (src0->mod | src1->mod) || src1->hw > 63) {
 		set_long(pc, e);
-		e->inst[1] |= (src0->neg << 26) | (src1->neg << 27);
+		e->inst[1] |= ((src0->mod & NV50_MOD_NEG) << 26) |
+			      ((src1->mod & NV50_MOD_NEG) << 27);
 	}
 
 	set_dst(pc, dst, e);
@@ -877,6 +886,11 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
 	set_src_0(pc, src0, e);
 	set_src_1(pc, src1, e);
 
+	if (src0->mod & NV50_MOD_ABS)
+		e->inst[1] |= 0x00100000;
+	if (src1->mod & NV50_MOD_ABS)
+		e->inst[1] |= 0x00080000;
+
 	emit(pc, e);
 }
 
@@ -885,9 +899,9 @@ emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1)
 {
 	assert(src0 != src1);
-	src1->neg ^= 1;
+	src1->mod ^= NV50_MOD_NEG;
 	emit_add(pc, dst, src0, src1);
-	src1->neg ^= 1;
+	src1->mod ^= NV50_MOD_NEG;
 }
 
 static void
@@ -941,9 +955,9 @@ emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	set_src_1(pc, src1, e);
 	set_src_2(pc, src2, e);
 
-	if (src0->neg ^ src1->neg)
+	if ((src0->mod ^ src1->mod) & NV50_MOD_NEG)
 		e->inst[1] |= 0x04000000;
-	if (src2->neg)
+	if (src2->mod & NV50_MOD_NEG)
 		e->inst[1] |= 0x08000000;
 
 	emit(pc, e);
@@ -954,9 +968,9 @@ emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1, struct nv50_reg *src2)
 {
 	assert(src2 != src0 && src2 != src1);
-	src2->neg ^= 1;
+	src2->mod ^= NV50_MOD_NEG;
 	emit_mad(pc, dst, src0, src1, src2);
-	src2->neg ^= 1;
+	src2->mod ^= NV50_MOD_NEG;
 }
 
 static void
@@ -1230,7 +1244,7 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 	const int r_pred = 1;
 	unsigned cvn = CVT_F32_F32;
 
-	if (src->neg)
+	if (src->mod & NV50_MOD_NEG)
 		cvn |= CVT_NEG;
 	/* write predicate reg */
 	emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn);
@@ -1408,7 +1422,7 @@ emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 
 	assert(src->type == P_TEMP);
 
-	if (!src->neg) /* ! double negation */
+	if (!(src->mod & NV50_MOD_NEG)) /* ! double negation */
 		emit_neg(pc, src, src);
 
 	e->inst[0] = 0xc0150000;
@@ -1671,7 +1685,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 		break;
 	case TGSI_UTIL_SIGN_TOGGLE:
 		if (neg)
-			r->neg = 1;
+			r->mod = NV50_MOD_NEG;
 		else {
 			temp = temp_temp(pc);
 			emit_neg(pc, temp, r);
@@ -2207,7 +2221,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		for (c = 0; c < 4; c++) {
 			if (!src[i][c])
 				continue;
-			src[i][c]->neg = 0;
+			src[i][c]->mod = 0;
 			if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
 				FREE(src[i][c]);
 			else
-- 
cgit v1.2.3


From 618e3b89f6ecdf422132ecea19315b326dd348ec Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Tue, 3 Nov 2009 23:30:18 +0100
Subject: nv50: fix shader emit_tex for cube textures

---
 src/gallium/drivers/nv50/nv50_program.c | 50 ++++++++++++++++++++++++---------
 1 file changed, 36 insertions(+), 14 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 64a0b571a5..bf50982dd1 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1257,11 +1257,37 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 	emit(pc, e);
 }
 
+static void
+load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
+		     struct nv50_reg **src, boolean proj)
+{
+	int mod[3] = { src[0]->mod, src[1]->mod, src[2]->mod };
+
+	src[0]->mod |= NV50_MOD_ABS;
+	src[1]->mod |= NV50_MOD_ABS;
+	src[2]->mod |= NV50_MOD_ABS;
+
+	emit_minmax(pc, 4, t[2], src[0], src[1]);
+	emit_minmax(pc, 4, t[2], src[2], t[2]);
+
+	src[0]->mod = mod[0];
+	src[1]->mod = mod[1];
+	src[2]->mod = mod[2];
+
+	if (proj && 0 /* looks more correct without this */)
+		emit_mul(pc, t[2], t[2], src[3]);
+	emit_flop(pc, 0, t[2], t[2]);
+
+	emit_mul(pc, t[0], src[0], t[2]);
+	emit_mul(pc, t[1], src[1], t[2]);
+	emit_mul(pc, t[2], src[2], t[2]);
+}
+
 static void
 emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 	 struct nv50_reg **src, unsigned unit, unsigned type, boolean proj)
 {
-	struct nv50_reg *temp, *t[4];
+	struct nv50_reg *t[4];
 	struct nv50_program_exec *e;
 
 	unsigned c, mode, dim;
@@ -1290,6 +1316,9 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 	/* some cards need t[0]'s hw index to be a multiple of 4 */
 	alloc_temp4(pc, t, 0);
 
+	if (type == TGSI_TEXTURE_CUBE) {
+		load_cube_tex_coords(pc, t, src, proj);
+	} else
 	if (proj) {
 		if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
 			mode = pc->interp_mode[src[0]->index];
@@ -1314,17 +1343,8 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 			 */
 		}
 	} else {
-		if (type == TGSI_TEXTURE_CUBE) {
-			temp = temp_temp(pc);
-			emit_minmax(pc, 4, temp, src[0], src[1]);
-			emit_minmax(pc, 4, temp, temp, src[2]);
-			emit_flop(pc, 0, temp, temp);
-			for (c = 0; c < 3; c++)
-				emit_mul(pc, t[c], src[c], temp);
-		} else {
-			for (c = 0; c < dim; c++)
-				emit_mov(pc, t[c], src[c]);
-		}
+		for (c = 0; c < dim; c++)
+			emit_mov(pc, t[c], src[c]);
 	}
 
 	e = exec(pc);
@@ -1337,14 +1357,16 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 	if (dim == 2)
 		e->inst[0] |= 0x00400000;
 	else
-	if (dim == 3)
+	if (dim == 3) {
 		e->inst[0] |= 0x00800000;
+		if (type == TGSI_TEXTURE_CUBE)
+			e->inst[0] |= 0x08000000;
+	}
 
 	e->inst[0] |= (mask & 0x3) << 25;
 	e->inst[1] |= (mask & 0xc) << 12;
 
 	emit(pc, e);
-
 #if 1
 	c = 0;
 	if (mask & 1) emit_mov(pc, dst[0], t[c++]);
-- 
cgit v1.2.3


From a277bb20debc413f6ccf46f529497bf8bafa64dd Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 3 Nov 2009 23:16:02 +0000
Subject: i965g: convert read/write domain pairs into single usage value

Easier to understand what's going on in the driver sources, convert
stereotype usage values back to GEM read/write domain flags in the
winsys.
---
 src/gallium/drivers/i965/brw_batchbuffer.c        |  9 +++--
 src/gallium/drivers/i965/brw_batchbuffer.h        |  7 ++--
 src/gallium/drivers/i965/brw_cc.c                 |  2 +-
 src/gallium/drivers/i965/brw_clip_state.c         |  2 +-
 src/gallium/drivers/i965/brw_curbe.c              |  2 +-
 src/gallium/drivers/i965/brw_draw_upload.c        |  8 ++---
 src/gallium/drivers/i965/brw_gs_state.c           |  2 +-
 src/gallium/drivers/i965/brw_misc_state.c         | 18 +++++-----
 src/gallium/drivers/i965/brw_pipe_query.c         |  4 +--
 src/gallium/drivers/i965/brw_sf_state.c           |  4 +--
 src/gallium/drivers/i965/brw_vs_state.c           |  2 +-
 src/gallium/drivers/i965/brw_vs_surface_state.c   |  2 +-
 src/gallium/drivers/i965/brw_winsys.h             | 40 ++++++++++-----------
 src/gallium/drivers/i965/brw_wm_constant_buffer.c |  2 +-
 src/gallium/drivers/i965/brw_wm_sampler_state.c   |  2 +-
 src/gallium/drivers/i965/brw_wm_state.c           | 26 +++++++-------
 src/gallium/drivers/i965/brw_wm_surface_state.c   |  6 ++--
 src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c | 42 ++++++++++++++++++++---
 18 files changed, 104 insertions(+), 76 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index 72650cdb5d..fd6b34cb8a 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -168,9 +168,9 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
  */
 enum pipe_error
 brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
-                             struct brw_winsys_buffer *buffer,
-                             uint32_t read_domains, uint32_t write_domain,
-			     uint32_t delta)
+			   struct brw_winsys_buffer *buffer,
+			   uint32_t usage,
+			   uint32_t delta)
 {
    int ret;
 
@@ -182,8 +182,7 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
    }
 
    ret = batch->sws->bo_emit_reloc(batch->buf,
-				   read_domains,
-				   write_domain,
+				   usage,
 				   delta, 
 				   batch->ptr - batch->map,
 				   buffer);
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index d687b79f93..b7186b3757 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -77,8 +77,7 @@ int brw_batchbuffer_data(struct brw_batchbuffer *batch,
 
 int brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
 			       struct brw_winsys_buffer *buffer,
-			       uint32_t read_domains,
-			       uint32_t write_domain,
+			       enum brw_buffer_usage usage,
 			       uint32_t offset);
 
 /* Inline functions - might actually be better off with these
@@ -125,10 +124,10 @@ brw_batchbuffer_require_space(struct brw_batchbuffer *batch,
 
 #define OUT_BATCH(d) brw_batchbuffer_emit_dword(brw->batch, d)
 
-#define OUT_RELOC(buf, read_domains, write_domain, delta) do {		\
+#define OUT_RELOC(buf, usage, delta) do {				\
       assert((unsigned) (delta) < buf->size);				\
       brw_batchbuffer_emit_reloc(brw->batch, buf,			\
-				 read_domains, write_domain, delta);	\
+				 usage, delta);				\
    } while (0)
 
 #ifdef DEBUG
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index c6267e1c60..20967f0191 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -151,7 +151,7 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
 
    /* Emit CC viewport relocation */
    brw->sws->bo_emit_reloc(bo,
-			   I915_GEM_DOMAIN_INSTRUCTION, 0,
+			   BRW_USAGE_STATE,
 			   0,
 			   offsetof(struct brw_cc_unit_state, cc4),
 			   brw->cc.vp_bo);
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 8be53e4bfb..6f8309fea9 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -150,7 +150,7 @@ clip_unit_create_from_key(struct brw_context *brw,
    /* Emit clip program relocation */
    assert(brw->clip.prog_bo);
    brw->sws->bo_emit_reloc(bo,
-			   I915_GEM_DOMAIN_INSTRUCTION, 0,
+			   BRW_USAGE_STATE,
 			   clip.thread0.grf_reg_count << 1,
 			   offsetof(struct brw_clip_unit_state, thread0),
 			   brw->clip.prog_bo);
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index ed5b250f82..3910174bda 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -323,7 +323,7 @@ static int emit_curbe_buffer(struct brw_context *brw)
    } else {
       OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
       OUT_RELOC(brw->curbe.curbe_bo,
-		I915_GEM_DOMAIN_INSTRUCTION, 0,
+		BRW_USAGE_STATE,
 		(sz - 1) + brw->curbe.curbe_offset);
    }
    ADVANCE_BATCH();
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index 040d8ca93a..f0b7c741c0 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -300,11 +300,11 @@ static int brw_emit_vertex_buffers( struct brw_context *brw )
 		BRW_VB0_ACCESS_VERTEXDATA |
 		(brw->vb.vb[i].stride << BRW_VB0_PITCH_SHIFT));
       OUT_RELOC(brw->vb.vb[i].bo,
-		I915_GEM_DOMAIN_VERTEX, 0,
+		BRW_USAGE_VERTEX,
 		brw->vb.vb[i].offset);
       if (BRW_IS_IGDNG(brw)) {
 	 OUT_RELOC(brw->vb.vb[i].bo,
-		   I915_GEM_DOMAIN_VERTEX, 0,
+		   BRW_USAGE_VERTEX,
 		   brw->vb.vb[i].bo->size - 1);
       } else
 	 OUT_BATCH(brw->vb.vb[i].stride ? brw->vb.vb[i].vertex_count : 0);
@@ -527,10 +527,10 @@ static int brw_emit_index_buffer(struct brw_context *brw)
       BEGIN_BATCH(4, IGNORE_CLIPRECTS);
       OUT_BATCH( ib.header.dword );
       OUT_RELOC(brw->ib.bo,
-		I915_GEM_DOMAIN_VERTEX, 0,
+		BRW_USAGE_VERTEX,
 		brw->ib.offset);
       OUT_RELOC(brw->ib.bo,
-		I915_GEM_DOMAIN_VERTEX, 0,
+		BRW_USAGE_VERTEX,
 		brw->ib.offset + brw->ib.size - 1);
       OUT_BATCH( 0 );
       ADVANCE_BATCH();
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
index 9046969394..f27f886a65 100644
--- a/src/gallium/drivers/i965/brw_gs_state.c
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -113,7 +113,7 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
    if (key->prog_active) {
       /* Emit GS program relocation */
       brw->sws->bo_emit_reloc(bo,
-			      I915_GEM_DOMAIN_INSTRUCTION, 0,
+			      BRW_USAGE_STATE,
 			      gs.thread0.grf_reg_count << 1,
 			      offsetof(struct brw_gs_unit_state, thread0),
 			      brw->gs.prog_bo);
diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index 06b9a2d2df..e786ea1100 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -111,7 +111,7 @@ static int upload_binding_table_pointers(struct brw_context *brw)
    OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2));
    if (brw->vs.bind_bo != NULL)
       OUT_RELOC(brw->vs.bind_bo, 
-		I915_GEM_DOMAIN_SAMPLER, 0,
+		BRW_USAGE_SAMPLER,
 		0); /* vs */
    else
       OUT_BATCH(0);
@@ -119,7 +119,7 @@ static int upload_binding_table_pointers(struct brw_context *brw)
    OUT_BATCH(0); /* clip */
    OUT_BATCH(0); /* sf */
    OUT_RELOC(brw->wm.bind_bo,
-	     I915_GEM_DOMAIN_SAMPLER, 0,
+	     BRW_USAGE_SAMPLER,
 	     0); /* wm/ps */
    ADVANCE_BATCH();
    return 0;
@@ -147,25 +147,25 @@ static int upload_pipelined_state_pointers(struct brw_context *brw )
    BEGIN_BATCH(7, IGNORE_CLIPRECTS);
    OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2));
    OUT_RELOC(brw->vs.state_bo, 
-	     I915_GEM_DOMAIN_INSTRUCTION, 0,
+	     BRW_USAGE_STATE,
 	     0);
    if (brw->gs.prog_active)
       OUT_RELOC(brw->gs.state_bo, 
-		I915_GEM_DOMAIN_INSTRUCTION, 0,
+		BRW_USAGE_STATE,
 		1);
    else
       OUT_BATCH(0);
    OUT_RELOC(brw->clip.state_bo, 
-	     I915_GEM_DOMAIN_INSTRUCTION, 0,
+	     BRW_USAGE_STATE,
 	     1);
    OUT_RELOC(brw->sf.state_bo,
-	     I915_GEM_DOMAIN_INSTRUCTION, 0,
+	     BRW_USAGE_STATE,
 	     0);
    OUT_RELOC(brw->wm.state_bo,
-	     I915_GEM_DOMAIN_INSTRUCTION, 0,
+	     BRW_USAGE_STATE,
 	     0);
    OUT_RELOC(brw->cc.state_bo,
-	     I915_GEM_DOMAIN_INSTRUCTION, 0,
+	     BRW_USAGE_STATE,
 	     0);
    ADVANCE_BATCH();
 
@@ -288,7 +288,7 @@ static int emit_depthbuffer(struct brw_context *brw)
 		((surface->layout != PIPE_SURFACE_LAYOUT_LINEAR) << 27) |
 		(BRW_SURFACE_2D << 29));
       OUT_RELOC(bo,
-		I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		BRW_USAGE_DEPTH_BUFFER,
 		surface->offset);
       OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
 		((pitch - 1) << 6) |
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
index 18a9b71af0..1fe2f4da4f 100644
--- a/src/gallium/drivers/i965/brw_pipe_query.c
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -193,7 +193,7 @@ brw_emit_query_begin(struct brw_context *brw)
     * to pick up the results.
     */
    OUT_RELOC(brw->query.bo,
-	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+	     BRW_USAGE_QUERY_RESULT,
 	     PIPE_CONTROL_GLOBAL_GTT_WRITE |
 	     ((brw->query.index * 2) * sizeof(uint64_t)));
    OUT_BATCH(0);
@@ -234,7 +234,7 @@ brw_emit_query_end(struct brw_context *brw)
 	     PIPE_CONTROL_DEPTH_STALL |
 	     PIPE_CONTROL_WRITE_DEPTH_COUNT);
    OUT_RELOC(brw->query.bo,
-	     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+	     BRW_USAGE_QUERY_RESULT,
 	     PIPE_CONTROL_GLOBAL_GTT_WRITE |
 	     ((brw->query.index * 2 + 1) * sizeof(uint64_t)));
    OUT_BATCH(0);
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index 4ab5709d53..31343ff245 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -284,14 +284,14 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
     */
    /* Emit SF program relocation */
    brw->sws->bo_emit_reloc(bo,
-			   I915_GEM_DOMAIN_INSTRUCTION, 0,
+			   BRW_USAGE_STATE,
 			   sf.thread0.grf_reg_count << 1,
 			   offsetof(struct brw_sf_unit_state, thread0),
 			   brw->sf.prog_bo);
 
    /* Emit SF viewport relocation */
    brw->sws->bo_emit_reloc(bo,
-			   I915_GEM_DOMAIN_INSTRUCTION, 0,
+			   BRW_USAGE_STATE,
 			   sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
 			   offsetof(struct brw_sf_unit_state, sf5),
 			   brw->sf.vp_bo);
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index 6a2395dd96..26d5d005fa 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -149,7 +149,7 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 
    /* Emit VS program relocation */
    brw->sws->bo_emit_reloc(bo,
-			   I915_GEM_DOMAIN_INSTRUCTION, 0,
+			   BRW_USAGE_STATE,
 			   vs.thread0.grf_reg_count << 1,
 			   offsetof(struct brw_vs_unit_state, thread0),
 			   brw->vs.prog_bo);
diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c
index 9a9d47a8a3..32fb9b2a8b 100644
--- a/src/gallium/drivers/i965/brw_vs_surface_state.c
+++ b/src/gallium/drivers/i965/brw_vs_surface_state.c
@@ -170,7 +170,7 @@ brw_vs_get_binding_table(struct brw_context *brw)
 	     */
 	    drm_intel_bo_emit_reloc(bind_bo, i * 4,
 				    brw->vs.surf_bo[i], 0,
-				    I915_GEM_DOMAIN_INSTRUCTION, 0);
+				    BRW_USAGE_STATE);
 	 }
       }
 
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index d19cd5d248..d0bd97d994 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -43,25 +43,22 @@ struct brw_winsys_buffer {
    unsigned size;
 };
 
+/* Describe the usage of a particular buffer in a relocation.  The DRM
+ * winsys will translate these back to GEM read/write domain flags.
+ */
 enum brw_buffer_usage {
-   I915_GEM_DOMAIN_RENDER,
-   I915_GEM_DOMAIN_SAMPLER,
-   I915_GEM_DOMAIN_VERTEX,
-   I915_GEM_DOMAIN_INSTRUCTION,
-
-
-   /* XXX: migrate from domains to explicit usage cases, eg below:
-    */
-
-   /* use on textures */
-   BRW_USAGE_RENDER    = 0x01,
-   BRW_USAGE_SAMPLER   = 0x02,
-   BRW_USAGE_2D_TARGET = 0x04,
-   BRW_USAGE_2D_SOURCE = 0x08,
-   /* use on vertex */
-   BRW_USAGE_VERTEX    = 0x10,
+   BRW_USAGE_STATE,		/* INSTRUCTION, 0 */
+   BRW_USAGE_QUERY_RESULT,	/* INSTRUCTION, INSTRUCTION */
+   BRW_USAGE_RENDER_TARGET,	/* RENDER,      0 */
+   BRW_USAGE_DEPTH_BUFFER,	/* RENDER,      RENDER */
+   BRW_USAGE_SAMPLER,		/* SAMPLER,     0 */
+   BRW_USAGE_VERTEX,		/* VERTEX,      0 */
+   BRW_USAGE_SCRATCH,		/* 0,           0 */
 };
 
+/* Should be possible to validate usages above against buffer creation
+ * types, below:
+ */
 enum brw_buffer_type
 {
    BRW_BUFFER_TYPE_TEXTURE,
@@ -70,10 +67,9 @@ enum brw_buffer_type
    BRW_BUFFER_TYPE_CURBE,
    BRW_BUFFER_TYPE_QUERY,
    BRW_BUFFER_TYPE_SHADER_CONSTANTS,
-   BRW_BUFFER_TYPE_WM_SCRATCH,
+   BRW_BUFFER_TYPE_SHADER_SCRATCH,
    BRW_BUFFER_TYPE_BATCH,
    BRW_BUFFER_TYPE_STATE_CACHE,
-   
    BRW_BUFFER_TYPE_MAX		/* Count of possible values */
 };
 
@@ -98,12 +94,12 @@ struct brw_winsys_screen {
    void (*bo_reference)( struct brw_winsys_buffer *buffer );
    void (*bo_unreference)( struct brw_winsys_buffer *buffer );
 
-   /* XXX: parameter names!!
+   /* delta -- added to b2->offset, and written into buffer
+    * offset -- location above value is written to within buffer
     */
    int (*bo_emit_reloc)( struct brw_winsys_buffer *buffer,
-			 unsigned domain,
-			 unsigned a,
-			 unsigned b,
+			 enum brw_buffer_usage usage,
+			 unsigned delta,
 			 unsigned offset,
 			 struct brw_winsys_buffer *b2);
 
diff --git a/src/gallium/drivers/i965/brw_wm_constant_buffer.c b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
index 7d2533b104..50ecef29a4 100644
--- a/src/gallium/drivers/i965/brw_wm_constant_buffer.c
+++ b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
@@ -37,7 +37,7 @@ brw_create_constant_surface( struct brw_context *brw,
    if (key->bo) {
       /* Emit relocation to surface contents */
       brw->sws->bo_emit_reloc(bo,
-			      I915_GEM_DOMAIN_SAMPLER, 0,
+			      BRW_USAGE_SAMPLER,
 			      0,
 			      offsetof(struct brw_surface_state, ss1),
 			      key->bo);
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index d43968c85a..2909dd3876 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -182,7 +182,7 @@ static int upload_wm_samplers( struct brw_context *brw )
       /* Emit SDC relocations */
       for (i = 0; i < key.sampler_count; i++) {
 	 brw->sws->bo_emit_reloc(brw->wm.sampler_bo,
-				 I915_GEM_DOMAIN_SAMPLER, 0,
+				 BRW_USAGE_SAMPLER,
 				 0,
 				 i * sizeof(struct brw_sampler_state) +
 				 offsetof(struct brw_sampler_state, ss2),
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index 5cfa8fe2d1..ccbb647bcd 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -230,27 +230,27 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
    /* Emit WM program relocation */
    brw->sws->bo_emit_reloc(bo,
-		     I915_GEM_DOMAIN_INSTRUCTION, 0,
-		     wm.thread0.grf_reg_count << 1,
-		     offsetof(struct brw_wm_unit_state, thread0),
-		     brw->wm.prog_bo);
+			   BRW_USAGE_STATE,
+			   wm.thread0.grf_reg_count << 1,
+			   offsetof(struct brw_wm_unit_state, thread0),
+			   brw->wm.prog_bo);
 
    /* Emit scratch space relocation */
    if (key->total_scratch != 0) {
       brw->sws->bo_emit_reloc(bo,
-			0, 0,
-			wm.thread2.per_thread_scratch_space,
-			offsetof(struct brw_wm_unit_state, thread2),
-			brw->wm.scratch_bo);
+			      BRW_USAGE_SCRATCH,
+			      wm.thread2.per_thread_scratch_space,
+			      offsetof(struct brw_wm_unit_state, thread2),
+			      brw->wm.scratch_bo);
    }
 
    /* Emit sampler state relocation */
    if (key->sampler_count != 0) {
       brw->sws->bo_emit_reloc(bo,
-			I915_GEM_DOMAIN_INSTRUCTION, 0,
-			wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
-			offsetof(struct brw_wm_unit_state, wm4),
-			brw->wm.sampler_bo);
+			      BRW_USAGE_STATE,
+			      wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
+			      offsetof(struct brw_wm_unit_state, wm4),
+			      brw->wm.sampler_bo);
    }
 
    return bo;
@@ -277,7 +277,7 @@ static int upload_wm_unit( struct brw_context *brw )
       }
       if (brw->wm.scratch_bo == NULL) {
 	 brw->wm.scratch_bo = brw->sws->bo_alloc(brw->sws,
-						 BRW_BUFFER_TYPE_WM_SCRATCH,
+						 BRW_BUFFER_TYPE_SHADER_SCRATCH,
 						 total,
 						 4096);
       }
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index f55a6c4af2..e5a0ed7d61 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -60,7 +60,7 @@ brw_update_texture_surface( struct brw_context *brw,
       
       /* Emit relocation to surface contents */
       brw->sws->bo_emit_reloc(brw->wm.surf_bo[surf],
-			      I915_GEM_DOMAIN_SAMPLER, 0,
+			      BRW_USAGE_SAMPLER,
 			      0,
 			      offsetof(struct brw_surface_state, ss1),
 			      tex->bo);
@@ -117,7 +117,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
       /* XXX: we will only be rendering to this surface:
        */
       brw->sws->bo_emit_reloc(brw->wm.surf_bo[unit],
-			      I915_GEM_DOMAIN_RENDER, 0, 
+			      BRW_USAGE_RENDER_TARGET,
 			      ss.ss1.base_addr - surface->bo->offset[0], /* XXX */
 			      offsetof(struct brw_surface_state, ss1),
 			      surface->bo);
@@ -161,7 +161,7 @@ brw_wm_get_binding_table(struct brw_context *brw)
       /* Emit binding table relocations to surface state */
       for (i = 0; i < brw->wm.nr_surfaces; i++) {
 	 brw->sws->bo_emit_reloc(bind_bo,
-				 I915_GEM_DOMAIN_INSTRUCTION, 0,
+				 BRW_USAGE_STATE,
 				 0,
 				 i * sizeof(GLuint),
 				 brw->wm.surf_bo[i]);
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c
index 5dbfd2e6b0..61717d2942 100644
--- a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c
@@ -93,17 +93,51 @@ i965_libdrm_bo_unreference( struct brw_winsys_buffer *buffer )
     */
 static int 
 i965_libdrm_bo_emit_reloc( struct brw_winsys_buffer *buffer,
-			   unsigned domain,
-			   unsigned a,
-			   unsigned b,
+			   enum brw_buffer_usage usage,
+			   unsigned delta,
 			   unsigned offset,
 			   struct brw_winsys_buffer *buffer2)
 {
    struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
    struct i965_libdrm_buffer *buf2 = i965_libdrm_buffer(buffer2);
+   int read, write;
    int ret;
 
-   ret = dri_bo_emit_reloc( buf->bo, domain, a, b, offset, buf2->bo );
+   switch (usage) {
+   case BRW_USAGE_STATE:
+      read = I915_GEM_DOMAIN_INSTRUCTION;
+      write = 0;
+      break;
+   case BRW_USAGE_QUERY_RESULT:
+      read = I915_GEM_DOMAIN_INSTRUCTION;
+      write = I915_GEM_DOMAIN_INSTRUCTION;
+      break;
+   case BRW_USAGE_RENDER_TARGET:
+      read = I915_GEM_DOMAIN_RENDER;
+      write = 0;
+      break;
+   case BRW_USAGE_DEPTH_BUFFER:
+      read = I915_GEM_DOMAIN_RENDER;
+      write = I915_GEM_DOMAIN_RENDER;
+      break;
+   case BRW_USAGE_SAMPLER:
+      read = I915_GEM_DOMAIN_SAMPLER;
+      write = 0;
+      break;
+   case BRW_USAGE_VERTEX:
+      read = I915_GEM_DOMAIN_VERTEX;
+      write = 0;
+      break;
+   case BRW_USAGE_SCRATCH:
+      read = 0;
+      write = 0;
+      break;
+   default:
+      assert(0);
+      return -1;
+   }
+
+   ret = dri_bo_emit_reloc( buf->bo, read, write, delta, offset, buf2->bo );
    if (ret)
       return -1;
 
-- 
cgit v1.2.3


From 211d7ab22b13430aaae00a0dfe95492450bcca20 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 13:03:35 +0000
Subject: i965g: add standalone xlib debug winsys

Create a dummy winsys that just debug-prints on calls into the winsys
functions.  Will use this to get to the point where we are generating
sane-looking debug dumps and diassembly.

Also fix various warnings generated with the new compiler flags set in
this config.
---
 Makefile                                     |   1 +
 configs/linux-i965                           |   8 +
 src/gallium/drivers/i965/brw_context.h       |   5 +-
 src/gallium/drivers/i965/brw_curbe.c         |   2 +-
 src/gallium/drivers/i965/brw_disasm.c        |   1 +
 src/gallium/drivers/i965/brw_state.h         |   2 +-
 src/gallium/drivers/i965/brw_state_cache.c   |   6 +-
 src/gallium/drivers/i965/brw_winsys.h        |   1 +
 src/gallium/winsys/drm/i965/xlib/Makefile    |  97 +++++++
 src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 362 +++++++++++++++++++++++++++
 src/gallium/winsys/xlib/Makefile             |   1 +
 11 files changed, 479 insertions(+), 7 deletions(-)
 create mode 100644 configs/linux-i965
 create mode 100644 src/gallium/winsys/drm/i965/xlib/Makefile
 create mode 100644 src/gallium/winsys/drm/i965/xlib/xlib_i965.c

(limited to 'src/gallium/drivers')

diff --git a/Makefile b/Makefile
index 7f073fd516..e437bd27d4 100644
--- a/Makefile
+++ b/Makefile
@@ -105,6 +105,7 @@ irix6-n32-static \
 irix6-o32 \
 irix6-o32-static \
 linux \
+linux-i965 \
 linux-alpha \
 linux-alpha-static \
 linux-cell \
diff --git a/configs/linux-i965 b/configs/linux-i965
new file mode 100644
index 0000000000..e66abc347b
--- /dev/null
+++ b/configs/linux-i965
@@ -0,0 +1,8 @@
+# Configuration for standalone mode i965 debug
+
+include $(TOP)/configs/linux-debug
+
+CONFIG_NAME = linux-i965
+
+GALLIUM_DRIVER_DIRS = i965
+GALLIUM_WINSYS_DIRS = drm/i965/xlib
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index b94c511499..97b2a8e27d 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -167,8 +167,8 @@ struct brw_fragment_shader {
    unsigned iz_lookup;
    //unsigned wm_lookup;
    
-   boolean  uses_depth:1;
-   boolean  has_flow_control:1;
+   unsigned  uses_depth:1;
+   unsigned  has_flow_control:1;
 
    unsigned id;
    struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
@@ -573,6 +573,7 @@ struct brw_context
       } vb[PIPE_MAX_ATTRIBS];
 
       struct {
+         int dummy;
       } ve[PIPE_MAX_ATTRIBS];
 
       unsigned nr_vb;		/* currently the same as curr.num_vertex_buffers */
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 3910174bda..5763173bca 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -243,7 +243,7 @@ static int prepare_curbe_buffer(struct brw_context *brw)
 		      buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
 
       debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
-		   brw->curbe.last_buf, buf,
+		   (void *)brw->curbe.last_buf, (void *)buf,
 		   bufsz, brw->curbe.last_bufsz,
 		   brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
    }
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
index a84c581c03..29fe848005 100644
--- a/src/gallium/drivers/i965/brw_disasm.c
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -143,6 +143,7 @@ char *chan_sel[4] = {
 };
 
 char *dest_condmod[16] = {
+   [0] = NULL
 };
 
 char *debug_ctrl[2] = {
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index 3b9151ab2f..94d2cb6f82 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -47,7 +47,7 @@ brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo)
       brw->sws->bo_reference(bo);
       brw->state.validated_bos[brw->state.validated_bo_count++] = bo;
    }
-};
+}
 
 const struct brw_tracked_state brw_blend_constant_color;
 const struct brw_tracked_state brw_cc_unit;
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
index 9cf44f7a5c..1cb1b5e721 100644
--- a/src/gallium/drivers/i965/brw_state_cache.c
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -236,8 +236,8 @@ brw_upload_cache( struct brw_cache *cache,
    tmp = MALLOC(key_size + aux_size + relocs_size);
 
    memcpy(tmp, key, key_size);
-   memcpy(tmp + key_size, aux, cache->aux_size[cache_id]);
-   memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size);
+   memcpy((char *)tmp + key_size, aux, cache->aux_size[cache_id]);
+   memcpy((char *)tmp + key_size + aux_size, reloc_bufs, relocs_size);
    for (i = 0; i < nr_reloc_bufs; i++) {
       if (reloc_bufs[i] != NULL)
 	 cache->sws->bo_reference(reloc_bufs[i]);
@@ -247,7 +247,7 @@ brw_upload_cache( struct brw_cache *cache,
    item->key = tmp;
    item->hash = hash;
    item->key_size = key_size;
-   item->reloc_bufs = tmp + key_size + aux_size;
+   item->reloc_bufs = (struct brw_winsys_buffer **)((char *)tmp + key_size + aux_size);
    item->nr_reloc_bufs = nr_reloc_bufs;
 
    item->bo = bo;
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index d0bd97d994..9338923da3 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -54,6 +54,7 @@ enum brw_buffer_usage {
    BRW_USAGE_SAMPLER,		/* SAMPLER,     0 */
    BRW_USAGE_VERTEX,		/* VERTEX,      0 */
    BRW_USAGE_SCRATCH,		/* 0,           0 */
+   BRW_USAGE_MAX
 };
 
 /* Should be possible to validate usages above against buffer creation
diff --git a/src/gallium/winsys/drm/i965/xlib/Makefile b/src/gallium/winsys/drm/i965/xlib/Makefile
new file mode 100644
index 0000000000..0efa0ca6f9
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/xlib/Makefile
@@ -0,0 +1,97 @@
+# src/gallium/winsys/xlib/Makefile
+
+# This makefile produces a "stand-alone" libGL.so which is based on
+# Xlib (no DRI HW acceleration)
+
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+
+GL_MAJOR = 1
+GL_MINOR = 5
+GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY)
+
+
+INCLUDE_DIRS = \
+	-I$(TOP)/include \
+	-I$(TOP)/src/mesa \
+	-I$(TOP)/src/mesa/main \
+	-I$(TOP)/src/gallium/include \
+	-I$(TOP)/src/gallium/drivers \
+	-I$(TOP)/src/gallium/drivers/i965 \
+	-I$(TOP)/src/gallium/drivers/i965/include \
+	-I$(TOP)/src/gallium/state_trackers/glx/xlib \
+	-I$(TOP)/src/gallium/auxiliary \
+	-I/usr/include/drm
+
+XLIB_WINSYS_SOURCES = \
+	xlib_i965.c \
+
+
+
+XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o)
+
+
+
+LIBS = \
+	$(TOP)/src/gallium/drivers/i965/libi965.a \
+	$(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/state_trackers/glx/xlib/libxlib.a \
+	$(TOP)/src/mesa/libglapi.a \
+	$(TOP)/src/mesa/libmesagallium.a \
+	$(GALLIUM_AUXILIARIES) 
+
+#	$(TOP)/src/gallium/drivers/i965/lib/libi9xx.a \
+
+.SUFFIXES : .cpp
+
+.c.o:
+	$(CC) -c $(INCLUDE_DIRS) $(DEFINES) $(CFLAGS) $< -o $@
+
+.cpp.o:
+	$(CXX) -c $(INCLUDE_DIRS) $(DEFINES) $(CXXFLAGS) $< -o $@
+
+
+
+default: $(TOP)/$(LIB_DIR)/gallium $(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME)
+
+$(TOP)/$(LIB_DIR)/gallium:
+	@ mkdir -p $(TOP)/$(LIB_DIR)/gallium
+
+# Make the libGL.so library
+$(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME): $(XLIB_WINSYS_OBJECTS) $(LIBS) Makefile
+	$(TOP)/bin/mklib -o $(GL_LIB) \
+		-linker "$(CC)" \
+		-major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \
+		-install $(TOP)/$(LIB_DIR)/gallium \
+		$(MKLIB_OPTIONS) $(XLIB_WINSYS_OBJECTS) \
+		-Wl,--start-group $(LIBS) -Wl,--end-group $(GL_LIB_DEPS)
+
+
+depend: $(XLIB_WINSYS_SOURCES)
+	@ echo "running $(MKDEP)"
+	@ rm -f depend  # workaround oops on gutsy?!?
+	@ touch depend
+	$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(XLIB_WINSYS_SOURCES) \
+		> /dev/null 2>/dev/null
+
+
+install: default
+	$(INSTALL) -d $(INSTALL_DIR)/include/GL
+	$(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR)
+	$(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL
+	@if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \
+		$(MINSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \
+	fi
+
+
+# Emacs tags
+tags:
+	etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h
+
+clean:
+	-rm -f *.o
+
+
+include depend
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
new file mode 100644
index 0000000000..60ab8e1993
--- /dev/null
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -0,0 +1,362 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * 
+ **************************************************************************/
+
+/*
+ * Authors:
+ *   Keith Whitwell
+ *   Brian Paul
+ */
+
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "pipe/p_context.h"
+
+#include "xm_winsys.h"
+
+#include "i965/brw_winsys.h"
+#include "i965/brw_screen.h"
+#include "i965/brw_reg.h"
+
+#define MAX_VRAM (128*1024*1024)
+
+struct xlib_brw_buffer
+{
+   struct brw_winsys_buffer base;
+   unsigned offset;
+   unsigned type;
+   char *virtual;
+   unsigned cheesy_refcount;
+   int map_count;
+};
+
+
+/**
+ * Subclass of brw_winsys_screen for Xlib winsys
+ */
+struct xlib_brw_winsys
+{
+   struct brw_winsys_screen base;
+   unsigned offset;
+};
+
+static struct xlib_brw_winsys *
+xlib_brw_winsys( struct brw_winsys_screen *screen )
+{
+   return (struct xlib_brw_winsys *)screen;
+}
+
+
+static struct xlib_brw_buffer *
+xlib_brw_buffer( struct brw_winsys_buffer *buffer )
+{
+   return (struct xlib_brw_buffer *)buffer;
+}
+
+
+
+const char *names[BRW_BUFFER_TYPE_MAX] = {
+   "texture",
+   "scanout",
+   "vertex",
+   "curbe",
+   "query",
+   "shader_constants",
+   "wm_scratch",
+   "batch",
+   "state_cache",
+};
+
+const char *usages[BRW_USAGE_MAX] = {
+   "state",
+   "query_result",
+   "render_target",
+   "depth_buffer",
+   "sampler",
+   "vertex",
+   "scratch"
+};
+
+static struct brw_winsys_buffer *
+xlib_brw_bo_alloc( struct brw_winsys_screen *sws,
+		      enum brw_buffer_type type,
+		      unsigned size,
+		      unsigned alignment )
+{
+   struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws);
+   struct xlib_brw_buffer *buf;
+
+   debug_printf("%s type %d sz %d align %d\n",
+                __FUNCTION__, type, size, alignment );
+
+   buf = CALLOC_STRUCT(xlib_brw_buffer);
+   if (!buf)
+      return NULL;
+
+   buf->offset = align(xbw->offset, alignment);
+   buf->type = type;
+   buf->virtual = MALLOC(size);
+   buf->base.offset = &buf->offset; /* hmm, cheesy */
+   buf->base.size = size;
+
+   xbw->offset = align(xbw->offset, alignment) + size;
+   if (xbw->offset > MAX_VRAM)
+      goto err;
+
+   return &buf->base;
+
+err:
+   assert(0);
+   FREE(buf);
+   return NULL;
+}
+
+static void 
+xlib_brw_bo_reference( struct brw_winsys_buffer *buffer )
+{
+   struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+
+   buf->cheesy_refcount++;
+}
+
+static void 
+xlib_brw_bo_unreference( struct brw_winsys_buffer *buffer )
+{
+   struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+
+   if (--buf->cheesy_refcount == 0) {
+      FREE(buffer);
+   }
+}
+
+static int 
+xlib_brw_bo_emit_reloc( struct brw_winsys_buffer *buffer,
+			   enum brw_buffer_usage usage,
+			   unsigned delta,
+			   unsigned offset,
+			   struct brw_winsys_buffer *buffer2)
+{
+   struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+   struct xlib_brw_buffer *buf2 = xlib_brw_buffer(buffer2);
+
+   debug_printf("%s buf %p offset %x val %x + %x buf2 %p/%s/%s\n",
+                __FUNCTION__, (void *)buffer, offset,
+                buf2->offset, delta,
+                (void *)buffer2, names[buf2->type], usages[usage]);
+
+   *(uint32_t *)(buf->virtual + offset) = buf2->offset + delta;
+
+   return 0;
+}
+
+static int 
+xlib_brw_bo_exec( struct brw_winsys_buffer *buffer,
+		     unsigned bytes_used )
+{
+   debug_printf("execute buffer %p, bytes %d\n", (void *)buffer, bytes_used);
+
+   return 0;
+}
+
+static int
+xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer,
+		       size_t offset,
+		       size_t size,
+		       const void *data)
+{
+   struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+
+   debug_printf("%s buf %p off %d sz %d data %p\n", 
+                __FUNCTION__, 
+                (void *)buffer, offset, size, data);
+
+   memcpy(buf->virtual + offset, data, size);
+   return 0;
+}
+
+
+static boolean 
+xlib_brw_bo_is_busy(struct brw_winsys_buffer *buffer)
+{
+   debug_printf("%s %p\n", __FUNCTION__, (void *)buffer);
+   return TRUE;
+}
+
+static boolean 
+xlib_brw_bo_references(struct brw_winsys_buffer *a,
+			  struct brw_winsys_buffer *b)
+{
+   debug_printf("%s %p %p\n", __FUNCTION__, (void *)a, (void *)b);
+   return TRUE;
+}
+
+static boolean 
+xlib_brw_check_aperture_space( struct brw_winsys_screen *iws,
+                                struct brw_winsys_buffer **buffers,
+                                unsigned count )
+{
+   unsigned tot_size = 0;
+   unsigned i;
+
+   for (i = 0; i < count; i++)
+      tot_size += buffers[i]->size;
+
+   debug_printf("%s %d bufs, tot_size: %d kb\n", 
+                __FUNCTION__, count, 
+                (tot_size + 1023) / 1024);
+
+   return TRUE;
+}
+
+static void *
+xlib_brw_bo_map(struct brw_winsys_buffer *buffer,
+		   boolean write)
+{
+   struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+
+   debug_printf("%s %p %s\n", __FUNCTION__, (void *)buffer, 
+                write ? "read/write" : "read");
+
+   buf->map_count++;
+   return buf->virtual;
+}
+
+static void 
+xlib_brw_bo_unmap(struct brw_winsys_buffer *buffer)
+{
+   struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+
+   debug_printf("%s %p\n", __FUNCTION__, (void *)buffer);
+
+   --buf->map_count;
+   assert(buf->map_count >= 0);
+}
+
+
+static void
+xlib_brw_winsys_destroy( struct brw_winsys_screen *screen )
+{
+   /* XXX: free all buffers */
+   FREE(screen);
+}
+
+static struct brw_winsys_screen *
+xlib_create_brw_winsys_screen( void )
+{
+   struct xlib_brw_winsys *ws;
+
+   ws = CALLOC_STRUCT(xlib_brw_winsys);
+   if (!ws)
+      return NULL;
+
+   ws->base.destroy              = xlib_brw_winsys_destroy;
+   ws->base.bo_alloc             = xlib_brw_bo_alloc;
+   ws->base.bo_reference         = xlib_brw_bo_reference;
+   ws->base.bo_unreference       = xlib_brw_bo_unreference;
+   ws->base.bo_emit_reloc        = xlib_brw_bo_emit_reloc;
+   ws->base.bo_exec              = xlib_brw_bo_exec;
+   ws->base.bo_subdata           = xlib_brw_bo_subdata;
+   ws->base.bo_is_busy           = xlib_brw_bo_is_busy;
+   ws->base.bo_references        = xlib_brw_bo_references;
+   ws->base.check_aperture_space = xlib_brw_check_aperture_space;
+   ws->base.bo_map               = xlib_brw_bo_map;
+   ws->base.bo_unmap             = xlib_brw_bo_unmap;
+
+   return &ws->base;
+}
+
+
+/***********************************************************************
+ * Implementation of Xlib co-state-tracker's winsys interface
+ */
+
+static struct pipe_screen *
+xlib_create_i965_screen( void )
+{
+   struct brw_winsys_screen *winsys;
+   struct pipe_screen *screen;
+
+   winsys = xlib_create_brw_winsys_screen();
+   if (winsys == NULL)
+      return NULL;
+
+   screen = brw_create_screen(winsys, 
+                              PCI_CHIP_GM45_GM);
+   if (screen == NULL)
+      goto fail;
+
+   return screen;
+
+fail:
+   if (winsys)
+      winsys->destroy( winsys );
+
+   return NULL;
+}
+
+
+static struct pipe_context *
+xlib_create_i965_context( struct pipe_screen *screen,
+                          void *context_private )
+{
+   struct pipe_context *pipe;
+   
+   pipe = brw_create_context(screen);
+   if (pipe == NULL)
+      goto fail;
+
+   pipe->priv = context_private;
+   return pipe;
+
+fail:
+   /* Free stuff here */
+   return NULL;
+}
+
+
+static void
+xlib_i965_display_surface(struct xmesa_buffer *xm_buffer,
+                              struct pipe_surface *surf)
+{
+   /* struct brw_texture *texture = brw_texture(surf->texture); */
+
+   debug_printf("%s tex %p, sz %dx%d\n", __FUNCTION__, 
+                (void *)surf->texture,
+                surf->texture->width[0],
+                surf->texture->height[0]);
+}
+
+
+struct xm_driver xlib_i965_driver = 
+{
+   .create_pipe_screen = xlib_create_i965_screen,
+   .create_pipe_context = xlib_create_i965_context,
+   .display_surface = xlib_i965_display_surface
+};
+
+
diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile
index 3dc38a78e4..a3c87ea272 100644
--- a/src/gallium/winsys/xlib/Makefile
+++ b/src/gallium/winsys/xlib/Makefile
@@ -30,6 +30,7 @@ DEFINES += \
 
 XLIB_WINSYS_SOURCES = \
 	xlib.c \
+	xlib_i965.c \
 	xlib_cell.c \
 	xlib_llvmpipe.c \
 	xlib_softpipe.c \
-- 
cgit v1.2.3


From b1d293321458ab00cc809aea4a19f46a256a7f98 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 13:54:44 +0000
Subject: i965g: hook up brw_screen.c

---
 src/gallium/drivers/i965/Makefile     |  1 +
 src/gallium/drivers/i965/brw_reg.h    | 10 ++---
 src/gallium/drivers/i965/brw_screen.c | 76 +++++++++++++++--------------------
 3 files changed, 38 insertions(+), 49 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 48950544c9..94b52bf0ec 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -57,6 +57,7 @@ C_SOURCES = \
 	brw_wm_sampler_state.c \
 	brw_wm_state.c \
 	brw_wm_surface_state.c \
+	brw_screen.c \
 	brw_screen_tex_layout.c \
 	brw_screen_texture.c \
 	brw_screen_surface.c \
diff --git a/src/gallium/drivers/i965/brw_reg.h b/src/gallium/drivers/i965/brw_reg.h
index f428ec9269..a63403b6af 100644
--- a/src/gallium/drivers/i965/brw_reg.h
+++ b/src/gallium/drivers/i965/brw_reg.h
@@ -98,11 +98,11 @@
 #define PCI_CHIP_ILM_G                  0x0046
 
 struct brw_chipset {
-   int pci_id:16;
-   int is_965:1;
-   int is_igdng:1;
-   int is_g4x:1;
-   int pad:13;
+   unsigned pci_id:16;
+   unsigned is_965:1;
+   unsigned is_igdng:1;
+   unsigned is_g4x:1;
+   unsigned pad:13;
 };
 
 
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 671467989d..a02e6acc39 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -33,9 +33,8 @@
 #include "brw_reg.h"
 #include "brw_context.h"
 #include "brw_screen.h"
-#include "brw_buffer.h"
-#include "brw_texture.h"
 #include "brw_winsys.h"
+#include "brw_debug.h"
 
 #ifdef DEBUG
 static const struct debug_named_value debug_names[] = {
@@ -49,18 +48,13 @@ static const struct debug_named_value debug_names[] = {
    { "bat",   DEBUG_BATCH},
    { "pix",   DEBUG_PIXEL},
    { "buf",   DEBUG_BUFMGR},
-   { "reg",   DEBUG_REGION},
-   { "fbo",   DEBUG_FBO},
-   { "lock",  DEBUG_LOCK},
    { "sync",  DEBUG_SYNC},
    { "prim",  DEBUG_PRIMS },
    { "vert",  DEBUG_VERTS },
-   { "dri",   DEBUG_DRI },
    { "dma",   DEBUG_DMA },
    { "san",   DEBUG_SANITY },
    { "sleep", DEBUG_SLEEP },
    { "stats", DEBUG_STATS },
-   { "tile",  DEBUG_TILE },
    { "sing",  DEBUG_SINGLE_THREAD },
    { "thre",  DEBUG_SINGLE_THREAD },
    { "wm",    DEBUG_WM },
@@ -90,7 +84,7 @@ brw_get_name(struct pipe_screen *screen)
    static char buffer[128];
    const char *chipset;
 
-   switch (brw_screen(screen)->pci_id) {
+   switch (brw_screen(screen)->chipset.pci_id) {
    case PCI_CHIP_I965_G:
       chipset = "I965_G";
       break;
@@ -250,9 +244,6 @@ brw_fence_reference(struct pipe_screen *screen,
                      struct pipe_fence_handle **ptr,
                      struct pipe_fence_handle *fence)
 {
-   struct brw_screen *is = brw_screen(screen);
-
-   is->iws->fence_reference(is->iws, ptr, fence);
 }
 
 static int
@@ -260,19 +251,15 @@ brw_fence_signalled(struct pipe_screen *screen,
                      struct pipe_fence_handle *fence,
                      unsigned flags)
 {
-   struct brw_screen *is = brw_screen(screen);
-
-   return is->iws->fence_signalled(is->iws, fence);
+   return 0;                    /* XXX shouldn't this be a boolean? */
 }
 
 static int
 brw_fence_finish(struct pipe_screen *screen,
-                  struct pipe_fence_handle *fence,
-                  unsigned flags)
+                 struct pipe_fence_handle *fence,
+                 unsigned flags)
 {
-   struct brw_screen *is = brw_screen(screen);
-
-   return is->iws->fence_finish(is->iws, fence);
+   return 0;
 }
 
 
@@ -284,21 +271,21 @@ brw_fence_finish(struct pipe_screen *screen,
 static void
 brw_destroy_screen(struct pipe_screen *screen)
 {
-   struct brw_screen *is = brw_screen(screen);
+   struct brw_screen *bscreen = brw_screen(screen);
 
-   if (is->iws)
-      is->iws->destroy(is->iws);
+   if (bscreen->sws)
+      bscreen->sws->destroy(bscreen->sws);
 
-   FREE(is);
+   FREE(bscreen);
 }
 
 /**
  * Create a new brw_screen object
  */
 struct pipe_screen *
-brw_create_screen(struct intel_winsys *iws, uint pci_id)
+brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
 {
-   struct brw_screen *is;
+   struct brw_screen *bscreen;
    struct brw_chipset chipset;
 
 #ifdef DEBUG
@@ -341,25 +328,26 @@ brw_create_screen(struct intel_winsys *iws, uint pci_id)
    }
 
 
-   is = CALLOC_STRUCT(brw_screen);
-   if (!is)
+   bscreen = CALLOC_STRUCT(brw_screen);
+   if (!bscreen)
       return NULL;
 
-   is->chipset = chipset;
-   is->iws = iws;
-   is->base.winsys = NULL;
-   is->base.destroy = brw_destroy_screen;
-   is->base.get_name = brw_get_name;
-   is->base.get_vendor = brw_get_vendor;
-   is->base.get_param = brw_get_param;
-   is->base.get_paramf = brw_get_paramf;
-   is->base.is_format_supported = brw_is_format_supported;
-   is->base.fence_reference = brw_fence_reference;
-   is->base.fence_signalled = brw_fence_signalled;
-   is->base.fence_finish = brw_fence_finish;
-
-   brw_screen_init_texture_functions(is);
-   brw_screen_init_buffer_functions(is);
-
-   return &is->base;
+   bscreen->chipset = chipset;
+   bscreen->sws = sws;
+   bscreen->base.winsys = NULL;
+   bscreen->base.destroy = brw_destroy_screen;
+   bscreen->base.get_name = brw_get_name;
+   bscreen->base.get_vendor = brw_get_vendor;
+   bscreen->base.get_param = brw_get_param;
+   bscreen->base.get_paramf = brw_get_paramf;
+   bscreen->base.is_format_supported = brw_is_format_supported;
+   bscreen->base.fence_reference = brw_fence_reference;
+   bscreen->base.fence_signalled = brw_fence_signalled;
+   bscreen->base.fence_finish = brw_fence_finish;
+
+   brw_screen_tex_init(bscreen);
+   brw_screen_tex_surface_init(bscreen);
+   brw_screen_init_buffer_functions(bscreen);
+
+   return &bscreen->base;
 }
-- 
cgit v1.2.3


From a09b3d50975e68c13c0421d770f3865ad2a1257c Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 15:10:34 +0000
Subject: i965g: add missing buffer functions

---
 src/gallium/drivers/i965/Makefile             |   1 +
 src/gallium/drivers/i965/brw_screen.c         |   2 +-
 src/gallium/drivers/i965/brw_screen.h         |  12 ++-
 src/gallium/drivers/i965/brw_screen_buffers.c | 142 ++++++++++++++++++++++++++
 src/gallium/drivers/i965/brw_winsys.h         |   4 +
 src/gallium/winsys/drm/i965/xlib/xlib_i965.c  |   4 +
 6 files changed, 162 insertions(+), 3 deletions(-)
 create mode 100644 src/gallium/drivers/i965/brw_screen_buffers.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 94b52bf0ec..38b7a30944 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -58,6 +58,7 @@ C_SOURCES = \
 	brw_wm_state.c \
 	brw_wm_surface_state.c \
 	brw_screen.c \
+	brw_screen_buffers.c \
 	brw_screen_tex_layout.c \
 	brw_screen_texture.c \
 	brw_screen_surface.c \
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index a02e6acc39..7991f4ae52 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -347,7 +347,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
 
    brw_screen_tex_init(bscreen);
    brw_screen_tex_surface_init(bscreen);
-   brw_screen_init_buffer_functions(bscreen);
+   brw_screen_buffer_init(bscreen);
 
    return &bscreen->base;
 }
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index 11b480b1ac..dda516ee68 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -60,9 +60,16 @@ struct brw_transfer
 struct brw_buffer
 {
    struct pipe_buffer base;
+
+   /* One of either bo or user_buffer will be non-null, depending on
+    * whether this is a hardware or user buffer.
+    */
    struct brw_winsys_buffer *bo;
+   void *user_buffer;
+
+   /* Mapped pointer??
+    */
    void *ptr;
-   boolean is_user_buffer;
 };
 
 #define BRW_TILING_NONE  0
@@ -151,7 +158,7 @@ brw_texture(struct pipe_texture *texture)
 static INLINE boolean
 brw_buffer_is_user_buffer( const struct pipe_buffer *buf )
 {
-   return ((const struct brw_buffer *)buf)->is_user_buffer;
+   return ((const struct brw_buffer *)buf)->user_buffer != NULL;
 }
 
 struct brw_winsys_buffer *
@@ -173,6 +180,7 @@ void brw_update_texture( struct brw_screen *brw_screen,
 void brw_screen_tex_init( struct brw_screen *brw_screen );
 void brw_screen_tex_surface_init( struct brw_screen *brw_screen );
 
+void brw_screen_buffer_init(struct brw_screen *brw_screen);
 
 
 #endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c
new file mode 100644
index 0000000000..0bf885ce8c
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_screen_buffers.c
@@ -0,0 +1,142 @@
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+
+#include "brw_screen.h"
+#include "brw_winsys.h"
+
+
+
+static void *
+brw_buffer_map( struct pipe_screen *screen,
+                struct pipe_buffer *buffer,
+                unsigned usage )
+{
+   struct brw_screen *bscreen = brw_screen(screen); 
+   struct brw_winsys_screen *sws = bscreen->sws;
+   struct brw_buffer *buf = brw_buffer( buffer );
+
+   if (buf->user_buffer)
+      return buf->user_buffer;
+
+   return sws->bo_map( buf->bo, 
+                       (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE );
+}
+
+static void 
+brw_buffer_unmap( struct pipe_screen *screen,
+                   struct pipe_buffer *buffer )
+{
+   struct brw_screen *bscreen = brw_screen(screen); 
+   struct brw_winsys_screen *sws = bscreen->sws;
+   struct brw_buffer *buf = brw_buffer( buffer );
+   
+   if (buf->bo)
+      sws->bo_unmap(buf->bo);
+}
+
+static void
+brw_buffer_destroy( struct pipe_buffer *buffer )
+{
+   struct brw_screen *bscreen = brw_screen( buffer->screen );
+   struct brw_winsys_screen *sws = bscreen->sws;
+   struct brw_buffer *buf = brw_buffer( buffer );
+
+   assert(!p_atomic_read(&buffer->reference.count));
+
+   if (buf->bo)
+      sws->bo_unreference(buf->bo);
+   
+   FREE(buf);
+}
+
+
+static struct pipe_buffer *
+brw_buffer_create(struct pipe_screen *screen,
+                   unsigned alignment,
+                   unsigned usage,
+                   unsigned size)
+{
+   struct brw_screen *bscreen = brw_screen(screen);
+   struct brw_winsys_screen *sws = bscreen->sws;
+   struct brw_buffer *buf;
+   unsigned usage_type;
+   
+   buf = CALLOC_STRUCT(brw_buffer);
+   if (!buf)
+      return NULL;
+      
+   pipe_reference_init(&buf->base.reference, 1);
+   buf->base.screen = screen;
+   buf->base.alignment = alignment;
+   buf->base.usage = usage;
+   buf->base.size = size;
+
+   switch (usage & (PIPE_BUFFER_USAGE_VERTEX |
+                    PIPE_BUFFER_USAGE_INDEX |
+                    PIPE_BUFFER_USAGE_PIXEL |
+                    PIPE_BUFFER_USAGE_CONSTANT))
+   {
+   case PIPE_BUFFER_USAGE_VERTEX:
+   case PIPE_BUFFER_USAGE_INDEX:
+   case (PIPE_BUFFER_USAGE_VERTEX|PIPE_BUFFER_USAGE_INDEX):
+      usage_type = BRW_BUFFER_TYPE_VERTEX;
+      break;
+      
+   case PIPE_BUFFER_USAGE_PIXEL:
+      usage_type = BRW_BUFFER_TYPE_PIXEL;
+      break;
+
+   case PIPE_BUFFER_USAGE_CONSTANT:
+      usage_type = BRW_BUFFER_TYPE_SHADER_CONSTANTS;
+      break;
+
+   default:
+      usage_type = BRW_BUFFER_TYPE_GENERIC;
+      break;
+   }
+   
+   buf->bo = sws->bo_alloc( sws,
+                            usage_type,
+                            size,
+                            alignment );
+      
+   return &buf->base; 
+}
+
+
+static struct pipe_buffer *
+brw_user_buffer_create(struct pipe_screen *screen,
+                       void *ptr,
+                       unsigned bytes)
+{
+   struct brw_buffer *buf;
+   
+   buf = CALLOC_STRUCT(brw_buffer);
+   if (!buf)
+      return NULL;
+      
+   buf->user_buffer = ptr;
+   
+   pipe_reference_init(&buf->base.reference, 1);
+   buf->base.screen = screen;
+   buf->base.alignment = 1;
+   buf->base.usage = 0;
+   buf->base.size = bytes;
+   
+   return &buf->base; 
+}
+
+   
+void brw_screen_buffer_init(struct brw_screen *brw_screen)
+{
+   brw_screen->base.buffer_create = brw_buffer_create;
+   brw_screen->base.user_buffer_create = brw_user_buffer_create;
+   brw_screen->base.buffer_map = brw_buffer_map;
+   brw_screen->base.buffer_unmap = brw_buffer_unmap;
+   brw_screen->base.buffer_destroy = brw_buffer_destroy;
+}
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index 9338923da3..b2ba3e86f9 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -51,6 +51,8 @@ enum brw_buffer_usage {
    BRW_USAGE_QUERY_RESULT,	/* INSTRUCTION, INSTRUCTION */
    BRW_USAGE_RENDER_TARGET,	/* RENDER,      0 */
    BRW_USAGE_DEPTH_BUFFER,	/* RENDER,      RENDER */
+   BRW_USAGE_BLIT_SOURCE,	/* RENDER,      0 */
+   BRW_USAGE_BLIT_DEST,         /* RENDER,      RENDER */
    BRW_USAGE_SAMPLER,		/* SAMPLER,     0 */
    BRW_USAGE_VERTEX,		/* VERTEX,      0 */
    BRW_USAGE_SCRATCH,		/* 0,           0 */
@@ -71,6 +73,8 @@ enum brw_buffer_type
    BRW_BUFFER_TYPE_SHADER_SCRATCH,
    BRW_BUFFER_TYPE_BATCH,
    BRW_BUFFER_TYPE_STATE_CACHE,
+   BRW_BUFFER_TYPE_PIXEL,       /* image uploads, pbo's, etc */
+   BRW_BUFFER_TYPE_GENERIC,     /* unknown */
    BRW_BUFFER_TYPE_MAX		/* Count of possible values */
 };
 
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
index 4d4bc0cb30..d5c65fa214 100644
--- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -90,6 +90,8 @@ const char *names[BRW_BUFFER_TYPE_MAX] = {
    "wm_scratch",
    "batch",
    "state_cache",
+   "pixel",
+   "generic",
 };
 
 const char *usages[BRW_USAGE_MAX] = {
@@ -97,6 +99,8 @@ const char *usages[BRW_USAGE_MAX] = {
    "query_result",
    "render_target",
    "depth_buffer",
+   "blit_source",
+   "blit_dest",
    "sampler",
    "vertex",
    "scratch"
-- 
cgit v1.2.3


From 9706a83bc959ba8445d0258e47639b44da2238fc Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 15:25:42 +0000
Subject: i965g: hook up more pipe_context functions

---
 src/gallium/drivers/i965/Makefile           |  1 +
 src/gallium/drivers/i965/brw_batchbuffer.h  |  2 --
 src/gallium/drivers/i965/brw_context.c      | 23 ++++++++++++-
 src/gallium/drivers/i965/brw_context.h      |  3 ++
 src/gallium/drivers/i965/brw_draw.c         |  4 +--
 src/gallium/drivers/i965/brw_pipe_flush.c   | 51 ++++++++++++++++-------------
 src/gallium/drivers/i965/brw_pipe_misc.c    | 21 ++++++++++--
 src/gallium/drivers/i965/brw_pipe_query.c   |  2 +-
 src/gallium/drivers/i965/brw_pipe_sampler.c |  6 +++-
 9 files changed, 81 insertions(+), 32 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 38b7a30944..b42d9a92c4 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -31,6 +31,7 @@ C_SOURCES = \
 	brw_pipe_query.c \
 	brw_pipe_shader.c \
 	brw_pipe_flush.c \
+	brw_pipe_misc.c \
 	brw_pipe_rast.c \
 	brw_sf.c \
 	brw_sf_emit.c \
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index b7186b3757..04ca6265ed 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -60,8 +60,6 @@ void brw_batchbuffer_free(struct brw_batchbuffer *batch);
 void _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
 			      const char *file, int line);
 
-#define brw_batchbuffer_flush(batch) \
-	_brw_batchbuffer_flush(batch, __FILE__, __LINE__)
 
 void brw_batchbuffer_reset(struct brw_batchbuffer *batch);
 
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index e10b7d8bf5..30cc243255 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -50,6 +50,17 @@ static void brw_destroy_context( struct pipe_context *pipe )
 
    brw_draw_cleanup( brw );
 
+   brw_pipe_blend_cleanup( brw );
+   brw_pipe_depth_stencil_cleanup( brw );
+   brw_pipe_framebuffer_cleanup( brw );
+   brw_pipe_flush_cleanup( brw );
+   brw_pipe_misc_cleanup( brw );
+   brw_pipe_query_cleanup( brw );
+   brw_pipe_rast_cleanup( brw );
+   brw_pipe_sampler_cleanup( brw );
+   brw_pipe_shader_cleanup( brw );
+   brw_pipe_vertex_cleanup( brw );
+
    FREE(brw->wm.compile_data);
 
    for (i = 0; i < brw->curr.fb.nr_cbufs; i++)
@@ -98,7 +109,17 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen)
 
    brw->base.destroy = brw_destroy_context;
 
-   brw_init_query( brw );
+   brw_pipe_blend_init( brw );
+   brw_pipe_depth_stencil_init( brw );
+   brw_pipe_framebuffer_init( brw );
+   brw_pipe_flush_init( brw );
+   brw_pipe_misc_init( brw );
+   brw_pipe_query_init( brw );
+   brw_pipe_rast_init( brw );
+   brw_pipe_sampler_init( brw );
+   brw_pipe_shader_init( brw );
+   brw_pipe_vertex_init( brw );
+
    brw_init_state( brw );
    brw_draw_init( brw );
 
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 97b2a8e27d..a4c48e6fd2 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -777,6 +777,9 @@ void brw_pipe_shader_cleanup( struct brw_context *brw );
 void brw_pipe_vertex_cleanup( struct brw_context *brw );
 
 
+void brw_context_flush( struct brw_context *brw );
+
+
 /* brw_urb.c
  */
 int brw_upload_urb_fence(struct brw_context *brw);
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index b5fe7c9601..a2bed6256b 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -166,7 +166,7 @@ try_draw_range_elements(struct brw_context *brw,
       return ret;
 
    if (brw->flags.always_flush_batch)
-      brw_batchbuffer_flush(brw->batch);
+      brw_context_flush( brw );
 
    return 0;
 }
@@ -217,7 +217,7 @@ brw_draw_range_elements(struct pipe_context *pipe,
    /* Otherwise, flush and retry:
     */
    if (ret != 0) {
-      brw_batchbuffer_flush(brw->batch);
+      brw_context_flush( brw );
       ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
       assert(ret == 0);
    }
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
index 1b43428760..9b52b56eae 100644
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -2,50 +2,55 @@
 #include "util/u_upload_mgr.h"
 
 #include "brw_context.h"
+#include "brw_batchbuffer.h"
 
 
-/**
- * called from brw_batchbuffer_flush and children before sending a
- * batchbuffer off.
+
+/* All batchbuffer flushes must go through this function.
  */
-static void brw_finish_batch(struct brw_context *brw)
+void brw_context_flush( struct brw_context *brw )
 {
+   /*
+    * 
+    */
    brw_emit_query_end(brw);
-}
 
+   /* Move to the end of the current upload buffer so that we'll force choosing
+    * a new buffer next time.
+    */
+   u_upload_flush( brw->vb.upload_vertex );
+   u_upload_flush( brw->vb.upload_index );
 
-/**
- * called from intelFlushBatchLocked
- */
-static void brw_new_batch( struct brw_context *brw )
-{
-   brw->curbe.need_new_bo = GL_TRUE;
+   _brw_batchbuffer_flush( brw->batch, __FILE__, __LINE__ );
 
    /* Mark all context state as needing to be re-emitted.
     * This is probably not as severe as on 915, since almost all of our state
     * is just in referenced buffers.
     */
    brw->state.dirty.brw |= BRW_NEW_CONTEXT;
-
    brw->state.dirty.mesa |= ~0;
    brw->state.dirty.brw |= ~0;
    brw->state.dirty.cache |= ~0;
 
-   /* Move to the end of the current upload buffer so that we'll force choosing
-    * a new buffer next time.
-    */
-   u_upload_flush( brw->vb.upload_vertex );
-   u_upload_flush( brw->vb.upload_index );
+   brw->curbe.need_new_bo = GL_TRUE;
+}
 
+static void
+brw_flush( struct pipe_context *pipe,
+           unsigned flags, 
+           struct pipe_fence_handle **fence )
+{
+   brw_context_flush( brw_context( pipe ) );
+   *fence = NULL;
 }
 
-/* called from intelWaitForIdle() and intelFlush()
- *
- * For now, just flush everything.  Could be smarter later.
- */
-static GLuint brw_flush_cmd( void )
+
+void brw_pipe_flush_init( struct brw_context *brw )
 {
-   return ((MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
+   brw->base.flush = brw_flush;
 }
 
 
+void brw_pipe_flush_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c
index fb8d7ecc59..a7ccde5917 100644
--- a/src/gallium/drivers/i965/brw_pipe_misc.c
+++ b/src/gallium/drivers/i965/brw_pipe_misc.c
@@ -1,7 +1,12 @@
 
+#include "brw_context.h"
+#include "brw_structs.h"
+#include "brw_defines.h"
+
 static void brw_set_polygon_stipple( struct pipe_context *pipe,
-				     const unsigned *stipple )
+				     const struct pipe_poly_stipple *stip )
 {
+   struct brw_context *brw = brw_context(pipe);
    struct brw_polygon_stipple *bps = &brw->curr.bps;
    GLuint i;
 
@@ -10,5 +15,17 @@ static void brw_set_polygon_stipple( struct pipe_context *pipe,
    bps->header.length = sizeof *bps/4-2;
 
    for (i = 0; i < 32; i++)
-      bps->stipple[i] = brw->curr.poly_stipple[i]; /* don't invert */
+      bps->stipple[i] = stip->stipple[i]; /* don't invert */
+}
+
+
+
+void brw_pipe_misc_init( struct brw_context *brw )
+{
+   brw->base.set_polygon_stipple = brw_set_polygon_stipple;
+}
+
+
+void brw_pipe_misc_cleanup( struct brw_context *brw )
+{
 }
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
index 1fe2f4da4f..d3e173f5ec 100644
--- a/src/gallium/drivers/i965/brw_pipe_query.c
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -137,7 +137,7 @@ brw_query_end(struct pipe_context *pipe, struct pipe_query *q)
     */
    if (query->bo) {
       brw_emit_query_end(brw);
-      brw_batchbuffer_flush(brw->batch);
+      brw_context_flush( brw );
 
       brw->sws->bo_unreference(brw->query.bo);
       brw->query.bo = NULL;
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
index 08a5d22009..56cf95c4cd 100644
--- a/src/gallium/drivers/i965/brw_pipe_sampler.c
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -156,10 +156,14 @@ static void brw_set_sampler_textures(struct pipe_context *pipe,
 }
 
 
-void brw_sampler_init( struct brw_context *brw )
+void brw_pipe_sampler_init( struct brw_context *brw )
 {
    brw->base.set_sampler_textures = brw_set_sampler_textures;
    brw->base.create_sampler_state = brw_create_sampler_state;
    brw->base.bind_sampler_state = brw_bind_sampler_state;
    brw->base.destroy_sampler_state = brw_destroy_sampler_state;
 }
+
+void brw_pipe_sampler_cleanup( struct brw_context *brw )
+{
+}
-- 
cgit v1.2.3


From 7373bc0e0294d68bc3e64f4a6de1bb4ec3132f02 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 15:59:56 +0000
Subject: i965g: hook up pipe sampler callbacks

---
 src/gallium/drivers/i965/Makefile               |   1 +
 src/gallium/drivers/i965/brw_context.h          |   2 +-
 src/gallium/drivers/i965/brw_pipe_sampler.c     | 177 +++++++++++++++---------
 src/gallium/drivers/i965/brw_wm.c               |   2 +-
 src/gallium/drivers/i965/brw_wm_sampler_state.c |   4 +-
 src/gallium/drivers/i965/brw_wm_surface_state.c |   2 +-
 6 files changed, 120 insertions(+), 68 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index b42d9a92c4..8603907dc2 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -32,6 +32,7 @@ C_SOURCES = \
 	brw_pipe_shader.c \
 	brw_pipe_flush.c \
 	brw_pipe_misc.c \
+	brw_pipe_sampler.c \
 	brw_pipe_rast.c \
 	brw_sf.c \
 	brw_sf_emit.c \
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index a4c48e6fd2..b6f77d1253 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -505,7 +505,7 @@ struct brw_context
       unsigned num_vertex_elements;
       unsigned num_samplers;
 
-      struct brw_texture *texture[PIPE_MAX_SAMPLERS];
+      struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
       struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
       unsigned num_textures;
       unsigned num_vertex_buffers;
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
index 56cf95c4cd..f0a765ecf5 100644
--- a/src/gallium/drivers/i965/brw_pipe_sampler.c
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -1,5 +1,7 @@
 
 #include "util/u_memory.h"
+#include "util/u_math.h"
+
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 
@@ -39,119 +41,166 @@ static GLuint translate_wrap_mode( unsigned wrap )
    }
 }
 
+static GLuint translate_img_filter( unsigned filter )
+{
+   switch (filter) {
+   case PIPE_TEX_FILTER_NEAREST:
+      return BRW_MAPFILTER_NEAREST;
+   case PIPE_TEX_FILTER_LINEAR:
+      return BRW_MAPFILTER_LINEAR;
+   case PIPE_TEX_FILTER_ANISO:
+      return BRW_MAPFILTER_ANISOTROPIC;
+   default:
+      assert(0);
+      return BRW_MAPFILTER_NEAREST;
+   }
+}
 
-
-static void *brw_create_sampler_state( struct pipe_context *pipe,
-				     const struct pipe_sampler_state *templ )
+static GLuint translate_mip_filter( unsigned filter )
 {
-   struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state);
+   switch (filter) {
+   case PIPE_TEX_MIPFILTER_NONE: 
+      return BRW_MIPFILTER_NONE;
+   case PIPE_TEX_MIPFILTER_NEAREST:
+      return BRW_MIPFILTER_NEAREST;
+   case PIPE_TEX_MIPFILTER_LINEAR:
+      return BRW_MIPFILTER_LINEAR;
+   default:
+      assert(0);
+      return BRW_MIPFILTER_NONE;
+   }
+}
 
-   switch (key->minfilter) {
-   case GL_NEAREST:
-      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
-      break;
-   case GL_LINEAR:
-      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
-      break;
-   case GL_NEAREST_MIPMAP_NEAREST:
-      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
-      break;
-   case GL_LINEAR_MIPMAP_NEAREST:
-      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST;
-      break;
-   case GL_NEAREST_MIPMAP_LINEAR:
-      sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
-      break;
-   case GL_LINEAR_MIPMAP_LINEAR:
-      sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
-      sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR;
-      break;
+/* XXX: not sure why there are special translations for the shadow tex
+ * compare functions.  In particular ALWAYS is translated to NEVER.
+ * Is this a hardware issue?  Does i965 really suffer from this?
+ */
+static GLuint translate_shadow_compare_func( unsigned func )
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER: 
+       return BRW_COMPAREFUNCTION_ALWAYS;
+   case PIPE_FUNC_LESS: 
+       return BRW_COMPAREFUNCTION_LEQUAL;
+   case PIPE_FUNC_LEQUAL: 
+       return BRW_COMPAREFUNCTION_LESS;
+   case PIPE_FUNC_GREATER: 
+       return BRW_COMPAREFUNCTION_GEQUAL;
+   case PIPE_FUNC_GEQUAL: 
+      return BRW_COMPAREFUNCTION_GREATER;
+   case PIPE_FUNC_NOTEQUAL: 
+      return BRW_COMPAREFUNCTION_EQUAL;
+   case PIPE_FUNC_EQUAL: 
+      return BRW_COMPAREFUNCTION_NOTEQUAL;
+   case PIPE_FUNC_ALWAYS: 
+       return BRW_COMPAREFUNCTION_NEVER;
    default:
-      break;
+      assert(0);
+      return BRW_COMPAREFUNCTION_NEVER;
    }
+}
+
+
+
+
+static void *
+brw_create_sampler_state( struct pipe_context *pipe,
+                          const struct pipe_sampler_state *template )
+{
+   struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state);
+
+   sampler->ss0.min_filter = translate_img_filter( template->min_img_filter );
+   sampler->ss0.mag_filter = translate_img_filter( template->mag_img_filter );
+   sampler->ss0.mip_filter = translate_mip_filter( template->min_mip_filter );
 
-   /* Set Anisotropy: 
+
+   /* XXX: anisotropy logic slightly changed: 
     */
-   if (key->max_aniso > 1.0) {
+   if (template->max_anisotropy > 1.0) {
       sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; 
       sampler->ss0.mag_filter = BRW_MAPFILTER_ANISOTROPIC;
 
-      if (key->max_aniso > 2.0) {
-	 sampler->ss3.max_aniso = MIN2((key->max_aniso - 2) / 2,
+      if (template->max_anisotropy > 2.0) {
+	 sampler->ss3.max_aniso = MIN2((template->max_anisotropy - 2) / 2,
 				       BRW_ANISORATIO_16);
       }
    }
-   else {
-      switch (key->magfilter) {
-      case GL_NEAREST:
-	 sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST;
-	 break;
-      case GL_LINEAR:
-	 sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
-	 break;
-      default:
-	 break;
-      }
-   }
 
-   sampler->ss1.r_wrap_mode = translate_wrap_mode(key->wrap_r);
-   sampler->ss1.s_wrap_mode = translate_wrap_mode(key->wrap_s);
-   sampler->ss1.t_wrap_mode = translate_wrap_mode(key->wrap_t);
+   sampler->ss1.r_wrap_mode = translate_wrap_mode(template->wrap_r);
+   sampler->ss1.s_wrap_mode = translate_wrap_mode(template->wrap_s);
+   sampler->ss1.t_wrap_mode = translate_wrap_mode(template->wrap_t);
 
    /* Set LOD bias: 
     */
-   sampler->ss0.lod_bias = S_FIXED(CLAMP(key->lod_bias, -16, 15), 6);
+   sampler->ss0.lod_bias = 
+      util_signed_fixed(CLAMP(template->lod_bias, -16, 15), 6);
+
 
    sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
    sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
 
    /* Set shadow function: 
     */
-   if (key->comparemode == GL_COMPARE_R_TO_TEXTURE_ARB) {
+   if (template->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+
       /* Shadowing is "enabled" by emitting a particular sampler
        * message (sample_c).  So need to recompile WM program when
        * shadow comparison is enabled on each/any texture unit.
        */
       sampler->ss0.shadow_function =
-	 intel_translate_shadow_compare_func(key->comparefunc);
+	 translate_shadow_compare_func(template->compare_func);
    }
 
    /* Set BaseMipLevel, MaxLOD, MinLOD: 
     */
-   sampler->ss0.base_level = U_FIXED(0, 1);
+   sampler->ss0.base_level = 
+      util_unsigned_fixed(0, 1);
+
+   sampler->ss1.max_lod = 
+      util_unsigned_fixed(CLAMP(template->max_lod, 0, 13), 6);
 
-   sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(key->maxlod, 0), 13), 6);
-   sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(key->minlod, 0), 13), 6);
+   sampler->ss1.min_lod = 
+      util_unsigned_fixed(CLAMP(template->min_lod, 0, 13), 6);
 
    return (void *)sampler;
 }
 
 static void brw_bind_sampler_state(struct pipe_context *pipe,
-				 void *cso)
+                                   unsigned num, void **sampler)
 {
    struct brw_context *brw = brw_context(pipe);
-   brw->curr.sampler = (const struct brw_sampler_state *)cso;
-   brw->state.dirty.mesa |= PIPE_NEW_SAMPLER;
+   int i;
+
+   for (i = 0; i < num; i++)
+      brw->curr.sampler[i] = sampler[i];
+
+   for (i = num; i < brw->curr.num_samplers; i++)
+      brw->curr.sampler[i] = NULL;
+
+   brw->curr.num_samplers = num;
+   brw->state.dirty.mesa |= PIPE_NEW_SAMPLERS;
 }
 
 static void brw_delete_sampler_state(struct pipe_context *pipe,
 				  void *cso)
 {
-   struct brw_context *brw = brw_context(pipe);
    FREE(cso);
 }
 
 static void brw_set_sampler_textures(struct pipe_context *pipe,
-				     unsigned num_textures,
-				     struct pipe_texture **tex)
+				     unsigned num,
+				     struct pipe_texture **texture)
 {
    struct brw_context *brw = brw_context(pipe);
+   int i;
 
+   for (i = 0; i < num; i++)
+      pipe_texture_reference(&brw->curr.texture[i], texture[i]);
+
+   for (i = num; i < brw->curr.num_textures; i++)
+      pipe_texture_reference(&brw->curr.texture[i], NULL);
+
+   brw->curr.num_textures = num;
    brw->state.dirty.mesa |= PIPE_NEW_BOUND_TEXTURES;
 }
 
@@ -160,8 +209,10 @@ void brw_pipe_sampler_init( struct brw_context *brw )
 {
    brw->base.set_sampler_textures = brw_set_sampler_textures;
    brw->base.create_sampler_state = brw_create_sampler_state;
-   brw->base.bind_sampler_state = brw_bind_sampler_state;
-   brw->base.destroy_sampler_state = brw_destroy_sampler_state;
+   brw->base.bind_sampler_states = brw_bind_sampler_state;
+   brw->base.delete_sampler_state = brw_delete_sampler_state;
+
+   brw->base.set_sampler_textures = brw_set_sampler_textures;
 }
 
 void brw_pipe_sampler_cleanup( struct brw_context *brw )
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 4fbf9de9bb..90780272da 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -248,7 +248,7 @@ static void brw_wm_populate_key( struct brw_context *brw,
 
    /* PIPE_NEW_BOUND_TEXTURES */
    for (i = 0; i < brw->curr.num_textures; i++) {
-      const struct brw_texture *tex = brw->curr.texture[i];
+      const struct brw_texture *tex = brw_texture(brw->curr.texture[i]);
 	 
       if (tex->base.format == PIPE_FORMAT_YCBCR)
 	 key->yuvtex_mask |= 1 << i;
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index 2909dd3876..2fddb4ad89 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -75,7 +75,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
 			    brw->curr.num_samplers);
 
    for (i = 0; i < key->sampler_count; i++) {
-      const struct brw_texture *tex = brw->curr.texture[i];
+      const struct brw_texture *tex = brw_texture(brw->curr.texture[i]);
       const struct brw_sampler *sampler = brw->curr.sampler[i];
       struct brw_sampler_state *entry = &key->sampler[i];
 
@@ -119,7 +119,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw)
    int i;
 
    for (i = 0; i < nr; i++) {
-      const struct brw_texture *tex = brw->curr.texture[i];
+      const struct brw_texture *tex = brw_texture(brw->curr.texture[i]);
       const struct brw_sampler *sampler = brw->curr.sampler[i];
 
       brw->sws->bo_unreference(brw->wm.sdc_bo[i]);
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index e5a0ed7d61..6c29db045f 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -201,7 +201,7 @@ static int prepare_wm_surfaces(struct brw_context *brw )
     */
    for (i = 0; i < brw->curr.num_textures; i++) {
       brw_update_texture_surface(brw, 
-				 brw->curr.texture[i],
+				 brw_texture(brw->curr.texture[i]),
 				 nr_surfaces++);
    }
 
-- 
cgit v1.2.3


From 5f8dde99ed62beaf1c2590515c33ed8b5076ed8d Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 16:03:52 +0000
Subject: i965g: stubs for brw_pipe_vertex.c

---
 src/gallium/drivers/i965/Makefile          |  1 +
 src/gallium/drivers/i965/brw_pipe_vertex.c | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 8603907dc2..d7262cf07c 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -33,6 +33,7 @@ C_SOURCES = \
 	brw_pipe_flush.c \
 	brw_pipe_misc.c \
 	brw_pipe_sampler.c \
+	brw_pipe_vertex.c \
 	brw_pipe_rast.c \
 	brw_sf.c \
 	brw_sf_emit.c \
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index d1d0d7cd43..0b69718fd8 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -1,11 +1,25 @@
+#include "brw_context.h"
 
 
+void 
+brw_pipe_vertex_init( struct brw_context *brw )
+{
+}
+
 
 void 
 brw_pipe_vertex_cleanup( struct brw_context *brw )
 {
-   for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+
+   /* Release bound pipe vertex_buffers
+    */
+
+   /* Release some other stuff
+    */
+#if 0
+   for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
       brw->sws->bo_unreference(brw->vb.inputs[i].bo);
       brw->vb.inputs[i].bo = NULL;
    }
+#endif
 }
-- 
cgit v1.2.3


From 99394a737a46999a2fc08915e9f1408246109c4a Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 16:42:44 +0000
Subject: i965g: add some missing texture creation code

---
 src/gallium/drivers/i965/brw_context.c        |  1 +
 src/gallium/drivers/i965/brw_screen_texture.c | 32 ++++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index 30cc243255..0692412b32 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -107,6 +107,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen)
    //ctx->Shader.EmitCondCodes = GL_TRUE;
    //ctx->Shader.EmitNVTempInitialization = GL_TRUE;
 
+   brw->base.screen = screen;
    brw->base.destroy = brw_destroy_context;
 
    brw_pipe_blend_init( brw );
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 48b3451bfc..fe3e57da90 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -30,6 +30,7 @@
   */
 
 #include "util/u_memory.h"
+#include "util/u_simple_list.h"
 
 #include "brw_screen.h"
 #include "brw_defines.h"
@@ -190,8 +191,18 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
    if (tex == NULL)
       return NULL;
 
+   memcpy(&tex->base, templ, sizeof *templ);
+   pipe_reference_init(&tex->base.reference, 1);
+   tex->base.screen = screen;
+
+   /* XXX: compressed textures need special treatment here
+    */
+   tex->cpp = pf_get_size(tex->base.format);
    tex->compressed = pf_is_compressed(tex->base.format);
 
+   make_empty_list(&tex->views[0]);
+   make_empty_list(&tex->views[1]);
+
    /* XXX: No tiling with compressed textures??
     */
    if (tex->compressed == 0 
@@ -209,11 +220,30 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
    }
 
 
-   memcpy(&tex->base, templ, sizeof *templ);
+
 
    if (!brw_texture_layout( bscreen, tex ))
       goto fail;
 
+   
+   if (templ->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+   } 
+   else if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+                            PIPE_TEXTURE_USAGE_PRIMARY)) {
+   }
+   else if (templ->tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
+   }
+   else if (templ->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) {
+   }
+   
+   if (templ->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) {
+   }
+
+   tex->bo = bscreen->sws->bo_alloc( bscreen->sws,
+                                     BRW_USAGE_SAMPLER,
+                                     tex->pitch * tex->total_height * tex->cpp,
+                                     64 );
+
    tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
    tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
    tex->ss.ss0.surface_format = translate_tex_format(tex->base.format);
-- 
cgit v1.2.3


From 58e3360c11d6041de2927b604416146acb0c3817 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 18:26:34 +0000
Subject: i965g: add more missing pipe callbacks

---
 src/gallium/drivers/i965/brw_pipe_fb.c   |  9 +++++++++
 src/gallium/drivers/i965/brw_pipe_misc.c | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
index c65f9bc374..d9b70f4eef 100644
--- a/src/gallium/drivers/i965/brw_pipe_fb.c
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -41,6 +41,7 @@ static void brw_set_framebuffer_state( struct pipe_context *pipe,
    brw->curr.fb.nr_cbufs = fb->nr_cbufs;
 }
 
+
 static void brw_set_viewport_state( struct pipe_context *pipe,
 				    const struct pipe_viewport_state *viewport )
 {
@@ -58,4 +59,12 @@ void brw_pipe_framebuffer_init( struct brw_context *brw )
 
 void brw_pipe_framebuffer_cleanup( struct brw_context *brw )
 {
+   struct pipe_framebuffer_state *fb = &brw->curr.fb;
+   int i;
+
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+      pipe_surface_reference(&fb->cbufs[i], NULL);
+   }
+
+   pipe_surface_reference(&fb->zsbuf, NULL);
 }
diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c
index a7ccde5917..0d0d92df82 100644
--- a/src/gallium/drivers/i965/brw_pipe_misc.c
+++ b/src/gallium/drivers/i965/brw_pipe_misc.c
@@ -16,13 +16,45 @@ static void brw_set_polygon_stipple( struct pipe_context *pipe,
 
    for (i = 0; i < 32; i++)
       bps->stipple[i] = stip->stipple[i]; /* don't invert */
+
+   brw->state.dirty.mesa |= PIPE_NEW_POLYGON_STIPPLE;
+}
+
+
+static void brw_set_scissor_state( struct pipe_context *pipe,
+                                   const struct pipe_scissor_state *scissor )
+{
+   struct brw_context *brw = brw_context(pipe);
+
+   brw->curr.scissor =  *scissor;
+   brw->state.dirty.mesa |= PIPE_NEW_SCISSOR;
+}
+
+static void brw_set_viewport_state( struct pipe_context *pipe,
+                                    const struct pipe_viewport_state *viewport )
+{
+   struct brw_context *brw = brw_context(pipe);
+
+   brw->curr.viewport = *viewport;
+   brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT;
 }
 
+static void brw_set_clip_state( struct pipe_context *pipe,
+                                const struct pipe_clip_state *clip )
+{
+   struct brw_context *brw = brw_context(pipe);
+
+   brw->curr.ucp = *clip;
+   brw->state.dirty.mesa |= PIPE_NEW_CLIP;
+}
 
 
 void brw_pipe_misc_init( struct brw_context *brw )
 {
    brw->base.set_polygon_stipple = brw_set_polygon_stipple;
+   brw->base.set_scissor_state = brw_set_scissor_state;
+   brw->base.set_clip_state = brw_set_clip_state;
+   brw->base.set_viewport_state = brw_set_viewport_state;
 }
 
 
-- 
cgit v1.2.3


From 0cf432c7a180a6b847fa49c97ea1c48d90a7d5f8 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 19:03:06 +0000
Subject: i965g: initialize surface refcount

---
 src/gallium/drivers/i965/brw_screen_surface.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
index b4ad91278b..04a6fc7b66 100644
--- a/src/gallium/drivers/i965/brw_screen_surface.c
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -130,6 +130,8 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
    if (surface == NULL)
       return NULL;
 
+   pipe_reference_init(&surface->base.reference, 1);
+
    /* XXX: ignoring render-to-slice-of-3d-texture
     */
    assert(id.bits.zslice == 0);
-- 
cgit v1.2.3


From 4e335a213acd535af81dd0c4b448003eb81db0cf Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 19:32:44 +0000
Subject: i965g: add missing is_*_referenced callbacks

---
 src/gallium/drivers/i965/brw_pipe_flush.c     | 24 +++++++++++++++++
 src/gallium/drivers/i965/brw_screen.h         | 12 +++++++++
 src/gallium/drivers/i965/brw_screen_buffers.c | 12 +++++++++
 src/gallium/drivers/i965/brw_screen_texture.c | 39 +++++++++++++++++++++++++++
 4 files changed, 87 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
index 9b52b56eae..6ae3c57765 100644
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -2,6 +2,7 @@
 #include "util/u_upload_mgr.h"
 
 #include "brw_context.h"
+#include "brw_screen.h"
 #include "brw_batchbuffer.h"
 
 
@@ -44,10 +45,33 @@ brw_flush( struct pipe_context *pipe,
    *fence = NULL;
 }
 
+static unsigned brw_is_buffer_referenced(struct pipe_context *pipe,
+                                  struct pipe_buffer *buffer)
+{
+   struct brw_context *brw = brw_context(pipe);
+
+   return brw_is_buffer_referenced_by_bo( brw->brw_screen,
+                                          buffer,
+                                          brw->batch->buf );
+}
+
+static unsigned brw_is_texture_referenced(struct pipe_context *pipe,
+                                   struct pipe_texture *texture,
+                                   unsigned face,
+                                   unsigned level)
+{
+   struct brw_context *brw = brw_context(pipe);
+
+   return brw_is_texture_referenced_by_bo( brw->brw_screen,
+                                           texture, face, level,
+                                           brw->batch->buf );
+}
 
 void brw_pipe_flush_init( struct brw_context *brw )
 {
    brw->base.flush = brw_flush;
+   brw->base.is_buffer_referenced = brw_is_buffer_referenced;
+   brw->base.is_texture_referenced = brw_is_texture_referenced;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index dda516ee68..820c6a6679 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -183,4 +183,16 @@ void brw_screen_tex_surface_init( struct brw_screen *brw_screen );
 void brw_screen_buffer_init(struct brw_screen *brw_screen);
 
 
+boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen,
+                                         struct pipe_texture *texture,
+                                         unsigned face, 
+                                         unsigned level,
+                                         struct brw_winsys_buffer *bo );
+
+boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen,
+                                        struct pipe_buffer *buffer,
+                                        struct brw_winsys_buffer *bo );
+
+
+
 #endif /* BRW_SCREEN_H */
diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c
index 0bf885ce8c..c0f19d64aa 100644
--- a/src/gallium/drivers/i965/brw_screen_buffers.c
+++ b/src/gallium/drivers/i965/brw_screen_buffers.c
@@ -131,6 +131,18 @@ brw_user_buffer_create(struct pipe_screen *screen,
    return &buf->base; 
 }
 
+
+boolean brw_is_buffer_referenced_by_bo( struct brw_screen *brw_screen,
+                                     struct pipe_buffer *buffer,
+                                     struct brw_winsys_buffer *bo )
+{
+   struct brw_buffer *buf = brw_buffer(buffer);
+   if (buf->bo == NULL)
+      return FALSE;
+
+   return brw_screen->sws->bo_references( bo, buf->bo );
+}
+
    
 void brw_screen_buffer_init(struct brw_screen *brw_screen)
 {
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index fe3e57da90..c318b07f97 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -325,6 +325,45 @@ static boolean brw_is_format_supported( struct pipe_screen *screen,
 }
 
 
+boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen,
+                                      struct pipe_texture *texture,
+                                      unsigned face, 
+                                      unsigned level,
+                                      struct brw_winsys_buffer *bo )
+{
+   struct brw_texture *tex = brw_texture(texture);
+   struct brw_surface *surf;
+   int i;
+
+   /* XXX: this is subject to false positives if the underlying
+    * texture BO is referenced, we can't tell whether the sub-region
+    * we care about participates in that.
+    */
+   if (brw_screen->sws->bo_references( bo, tex->bo ))
+      return TRUE;
+
+   /* Find any view on this texture for this face/level and see if it
+    * is referenced:
+    */
+   for (i = 0; i < 2; i++) {
+      foreach (surf, &tex->views[i]) {
+         if (surf->bo == tex->bo)
+            continue;
+
+         if (surf->id.bits.face != face ||
+             surf->id.bits.level != level)
+            continue;
+         
+         if (brw_screen->sws->bo_references( bo, surf->bo))
+            return TRUE;
+      }
+   }
+
+   return FALSE;
+}
+
+
+
 void brw_screen_tex_init( struct brw_screen *brw_screen )
 {
    brw_screen->base.is_format_supported = brw_is_format_supported;
-- 
cgit v1.2.3


From 19119517ce00f7710c6cd627c75e7eef765021c2 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 19:41:02 +0000
Subject: i965g: add constant buffer setter

---
 src/gallium/drivers/i965/brw_pipe_shader.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 2422f77f34..8e10edb459 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -159,11 +159,33 @@ static void brw_delete_vs_state( struct pipe_context *pipe, void *prog )
 }
 
 
+static void brw_set_constant_buffer(struct pipe_context *pipe,
+                                     uint shader, uint index,
+                                     const struct pipe_constant_buffer *buf)
+{
+   struct brw_context *brw = brw_context(pipe);
 
+   assert(index == 0);
+
+   if (shader == PIPE_SHADER_FRAGMENT) {
+      pipe_buffer_reference( &brw->curr.fragment_constants,
+                             buf->buffer );
+
+      brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_CONSTANTS;
+   }
+   else {
+      pipe_buffer_reference( &brw->curr.vertex_constants,
+                             buf->buffer );
+
+      brw->state.dirty.mesa |= PIPE_NEW_VERTEX_CONSTANTS;
+   }
+}
 
 
 void brw_pipe_shader_init( struct brw_context *brw )
 {
+   brw->base.set_constant_buffer = brw_set_constant_buffer;
+
    brw->base.create_vs_state = brw_create_vs_state;
    brw->base.bind_vs_state = brw_bind_vs_state;
    brw->base.delete_vs_state = brw_delete_vs_state;
@@ -175,4 +197,6 @@ void brw_pipe_shader_init( struct brw_context *brw )
 
 void brw_pipe_shader_cleanup( struct brw_context *brw )
 {
+   pipe_buffer_reference( &brw->curr.fragment_constants, NULL );
+   pipe_buffer_reference( &brw->curr.vertex_constants, NULL );
 }
-- 
cgit v1.2.3


From e18f223da710a6e1f6a08d346951ea66c6a1de99 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 20:26:41 +0000
Subject: i965g: hook up pipe_clear functions

---
 src/gallium/drivers/i965/Makefile          |   1 +
 src/gallium/drivers/i965/brw_batchbuffer.h |   5 +-
 src/gallium/drivers/i965/brw_context.c     |   2 +
 src/gallium/drivers/i965/brw_context.h     |   2 +
 src/gallium/drivers/i965/brw_pipe_clear.c  | 222 +++++++++++++++++++++++++++++
 src/gallium/drivers/i965/brw_screen.h      |   6 +
 6 files changed, 237 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/drivers/i965/brw_pipe_clear.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index d7262cf07c..870d67b13d 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -34,6 +34,7 @@ C_SOURCES = \
 	brw_pipe_misc.c \
 	brw_pipe_sampler.c \
 	brw_pipe_vertex.c \
+	brw_pipe_clear.c \
 	brw_pipe_rast.c \
 	brw_sf.c \
 	brw_sf_emit.c \
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index 04ca6265ed..61374ffb00 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -1,10 +1,13 @@
 #ifndef BRW_BATCHBUFFER_H
 #define BRW_BATCHBUFFER_H
 
+#include "util/u_debug.h"
+
+#include "pipe/p_error.h"
+
 #include "brw_types.h"
 #include "brw_winsys.h"
 #include "brw_reg.h"
-#include "util/u_debug.h"
 
 #define BATCH_SZ 16384
 #define BATCH_RESERVED 16
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index 0692412b32..5accc858a9 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -60,6 +60,7 @@ static void brw_destroy_context( struct pipe_context *pipe )
    brw_pipe_sampler_cleanup( brw );
    brw_pipe_shader_cleanup( brw );
    brw_pipe_vertex_cleanup( brw );
+   brw_pipe_clear_cleanup( brw );
 
    FREE(brw->wm.compile_data);
 
@@ -120,6 +121,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen)
    brw_pipe_sampler_init( brw );
    brw_pipe_shader_init( brw );
    brw_pipe_vertex_init( brw );
+   brw_pipe_clear_init( brw );
 
    brw_init_state( brw );
    brw_draw_init( brw );
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index b6f77d1253..e32452f49a 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -764,6 +764,7 @@ void brw_pipe_rast_init( struct brw_context *brw );
 void brw_pipe_sampler_init( struct brw_context *brw );
 void brw_pipe_shader_init( struct brw_context *brw );
 void brw_pipe_vertex_init( struct brw_context *brw );
+void brw_pipe_clear_init( struct brw_context *brw );
 
 void brw_pipe_blend_cleanup( struct brw_context *brw );
 void brw_pipe_depth_stencil_cleanup( struct brw_context *brw );
@@ -775,6 +776,7 @@ void brw_pipe_rast_cleanup( struct brw_context *brw );
 void brw_pipe_sampler_cleanup( struct brw_context *brw );
 void brw_pipe_shader_cleanup( struct brw_context *brw );
 void brw_pipe_vertex_cleanup( struct brw_context *brw );
+void brw_pipe_clear_cleanup( struct brw_context *brw );
 
 
 void brw_context_flush( struct brw_context *brw );
diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c
new file mode 100644
index 0000000000..f48175c0f7
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_pipe_clear.c
@@ -0,0 +1,222 @@
+/**************************************************************************
+ * 
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#include "util/u_pack_color.h"
+
+#include "pipe/p_error.h"
+#include "pipe/p_state.h"
+
+#include "brw_batchbuffer.h"
+#include "brw_screen.h"
+#include "brw_context.h"
+
+#define MASK16 0xffff
+#define MASK24 0xffffff
+
+
+/**
+ * Use blitting to clear the renderbuffers named by 'flags'.
+ * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
+ * since that might include software renderbuffers or renderbuffers
+ * which we're clearing with triangles.
+ * \param mask  bitmask of BUFFER_BIT_* values indicating buffers to clear
+ */
+static enum pipe_error
+try_clear( struct brw_context *brw,
+           struct brw_surface *surface,
+           unsigned value )
+{
+   uint32_t BR13, CMD;
+   int x1 = 0;
+   int y1 = 0;
+   int x2 = surface->base.width;
+   int y2 = surface->base.height;
+   int pitch = surface->pitch;
+   int cpp = surface->cpp;
+
+   if (x2 == 0 || y2 == 0)
+      return 0;
+
+   debug_printf("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
+                __FUNCTION__,
+                (void *)surface->bo, pitch * cpp,
+                surface->draw_offset,
+                x1, y1, x2 - x1, y2 - y1);
+
+   BR13 = 0xf0 << 16;
+   CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_RGB | XY_BLT_WRITE_ALPHA;
+
+   /* Setup the blit command */
+   if (cpp == 4) {
+      BR13 |= BR13_8888;
+      CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
+   }
+   else {
+      assert(cpp == 2);
+      BR13 |= BR13_565;
+   }
+
+   assert(surface->tiling != BRW_TILING_Y);
+
+   if (surface->tiling == BRW_TILING_X) {
+      CMD |= XY_DST_TILED;
+      pitch /= 4;
+   }
+
+   BR13 |= (pitch * cpp);
+
+   BEGIN_BATCH(6, 0);
+   OUT_BATCH(CMD);
+   OUT_BATCH(BR13);
+   OUT_BATCH((y1 << 16) | x1);
+   OUT_BATCH((y2 << 16) | x2);
+   OUT_RELOC(surface->bo,
+             BRW_USAGE_BLIT_DEST,
+             surface->draw_offset);
+   OUT_BATCH(value);
+   ADVANCE_BATCH();
+
+   return 0;
+}
+
+
+
+
+static void color_clear(struct brw_context *brw, 
+                        struct brw_surface *bsurface,
+                        const float *rgba )
+{
+   enum pipe_error ret;
+   unsigned value;
+
+   util_pack_color( rgba, bsurface->base.format, &value );
+
+   if (bsurface->cpp == 2)
+      value |= value << 16;
+
+   ret = try_clear( brw, bsurface, value );
+
+   if (ret != 0) {
+      brw_context_flush( brw );
+      ret = try_clear( brw, bsurface, value );
+      assert( ret == 0 );
+   }
+}
+
+static void zstencil_clear(struct brw_context *brw, 
+                           struct brw_surface *bsurface,
+                           double depth,
+                           unsigned stencil )
+{
+   enum pipe_error ret;
+   unsigned value;
+
+   switch (bsurface->base.format) {
+   case PIPE_FORMAT_Z24S8_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_S8Z24_UNORM:
+      value = ((unsigned)(depth * MASK24) & MASK24);
+      break;
+   case PIPE_FORMAT_Z16_UNORM:
+      value = ((unsigned)(depth * MASK16) & MASK16);
+      break;
+   default:
+      assert(0);
+      return;
+   }
+
+   switch (bsurface->base.format) {
+   case PIPE_FORMAT_Z24S8_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:
+      value = (value << 8) | stencil;
+      break;
+
+   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_S8Z24_UNORM:
+      value = value | (stencil << 24);
+      break;
+
+   case PIPE_FORMAT_Z16_UNORM:
+      value = value | (value << 16);
+      break;
+
+   default:
+      break;
+   }
+
+   ret = try_clear( brw, bsurface, value );
+
+   if (ret != 0) {
+      brw_context_flush( brw );
+      ret = try_clear( brw, bsurface, value );
+      assert( ret == 0 );
+   }
+}
+
+
+
+/**
+ * Clear the given surface to the specified value.
+ * No masking, no scissor (clear entire buffer).
+ */
+static void brw_clear(struct pipe_context *pipe, 
+                      unsigned buffers,
+                      const float *rgba,
+                      double depth,
+                      unsigned stencil)
+{
+   struct brw_context *brw = brw_context( pipe );
+   int i;
+
+   if (buffers & PIPE_CLEAR_COLOR) {
+      for (i = 0; i < brw->curr.fb.nr_cbufs; i++) {
+         color_clear( brw, 
+                      brw_surface(brw->curr.fb.cbufs[i]),
+                      rgba );
+      }
+   }
+
+   if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
+      if (brw->curr.fb.zsbuf) {
+         zstencil_clear( brw,
+                         brw_surface(brw->curr.fb.zsbuf),
+                         depth, stencil );
+      }
+   }
+}
+
+
+void brw_pipe_clear_init( struct brw_context *brw )
+{
+   brw->base.clear = brw_clear;
+}
+
+
+void brw_pipe_clear_cleanup( struct brw_context *brw )
+{
+}
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index 820c6a6679..f7267cc78a 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -89,7 +89,13 @@ union brw_surface_id {
 struct brw_surface
 {
    struct pipe_surface base;
+   
    union brw_surface_id id;
+   unsigned cpp;
+   unsigned pitch;
+   unsigned draw_offset;
+   unsigned tiling;
+
    struct brw_surface_state ss;
    struct brw_winsys_buffer *bo;
    struct brw_surface *next, *prev;
-- 
cgit v1.2.3


From c5ed7b6e76a71d34e4a42ebfca092bd99cb39438 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 20:36:48 +0000
Subject: i965g: plumb in some surface state

---
 src/gallium/drivers/i965/brw_context.c        | 18 +++++++++++++-----
 src/gallium/drivers/i965/brw_pipe_clear.c     |  4 ++--
 src/gallium/drivers/i965/brw_screen_surface.c | 15 ++++++++++++++-
 3 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index 5accc858a9..cd8963bebc 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -39,6 +39,7 @@
 #include "brw_state.h"
 #include "brw_batchbuffer.h"
 #include "brw_winsys.h"
+#include "brw_screen.h"
 
 
 static void brw_destroy_context( struct pipe_context *pipe )
@@ -46,6 +47,8 @@ static void brw_destroy_context( struct pipe_context *pipe )
    struct brw_context *brw = brw_context(pipe);
    int i;
 
+   brw_context_flush( brw );
+   brw_batchbuffer_free( brw->batch );
    brw_destroy_state(brw);
 
    brw_draw_cleanup( brw );
@@ -101,15 +104,12 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen)
 
    if (!brw) {
       debug_printf("%s: failed to alloc context\n", __FUNCTION__);
-      return GL_FALSE;
+      return NULL;
    }
 
-   /* We want the GLSL compiler to emit code that uses condition codes */
-   //ctx->Shader.EmitCondCodes = GL_TRUE;
-   //ctx->Shader.EmitNVTempInitialization = GL_TRUE;
-
    brw->base.screen = screen;
    brw->base.destroy = brw_destroy_context;
+   brw->sws = brw_screen(screen)->sws;
 
    brw_pipe_blend_init( brw );
    brw_pipe_depth_stencil_init( brw );
@@ -133,7 +133,15 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen)
 
    make_empty_list(&brw->query.active_head);
 
+   brw->batch = brw_batchbuffer_alloc( brw->sws );
+   if (brw->batch == NULL)
+      goto fail;
 
    return &brw->base;
+
+fail:
+   if (brw->batch)
+      brw_batchbuffer_free( brw->batch );
+   return NULL;
 }
 
diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c
index f48175c0f7..69bc95e51a 100644
--- a/src/gallium/drivers/i965/brw_pipe_clear.c
+++ b/src/gallium/drivers/i965/brw_pipe_clear.c
@@ -64,7 +64,7 @@ try_clear( struct brw_context *brw,
    debug_printf("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
                 __FUNCTION__,
                 (void *)surface->bo, pitch * cpp,
-                surface->draw_offset,
+                surface->base.offset,
                 x1, y1, x2 - x1, y2 - y1);
 
    BR13 = 0xf0 << 16;
@@ -96,7 +96,7 @@ try_clear( struct brw_context *brw,
    OUT_BATCH((y2 << 16) | x2);
    OUT_RELOC(surface->bo,
              BRW_USAGE_BLIT_DEST,
-             surface->draw_offset);
+             surface->base.offset);
    OUT_BATCH(value);
    ADVANCE_BATCH();
 
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
index 04a6fc7b66..1c408e9f2e 100644
--- a/src/gallium/drivers/i965/brw_screen_surface.c
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -35,6 +35,7 @@
 #include "pipe/p_screen.h"
 #include "brw_screen.h"
 #include "brw_defines.h"
+#include "brw_winsys.h"
 
 enum {
    BRW_VIEW_LINEAR,
@@ -145,6 +146,12 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
    surface->base.face = id.bits.face;
    surface->base.level = id.bits.level;
    surface->id = id;
+   surface->cpp = tex->cpp;
+   surface->pitch = tex->pitch;
+   surface->tiling = tex->tiling;
+
+   surface->bo = tex->bo;
+   brw_screen->sws->bo_reference(surface->bo);
 
    pipe_texture_reference( &surface->base.texture, &tex->base );
 
@@ -234,10 +241,16 @@ static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen,
 }
 
 
-static void brw_tex_surface_destroy( struct pipe_surface *surface )
+static void brw_tex_surface_destroy( struct pipe_surface *surf )
 {
+   struct brw_surface *surface = brw_surface(surf);
+   struct brw_screen *screen = brw_screen(surf->texture->screen);
+
    /* Unreference texture, shared buffer:
     */
+   screen->sws->bo_unreference(surface->bo);
+   pipe_texture_reference( &surface->base.texture, NULL );
+
 
    FREE(surface);
 }
-- 
cgit v1.2.3


From b8bb48f4528227e36400cd1599a82bb73415ef60 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 21:05:34 +0000
Subject: i965g: fix up batchbuffer confusion

---
 src/gallium/drivers/i965/brw_batchbuffer.c | 35 +++++++++++++++++++-----------
 src/gallium/drivers/i965/brw_batchbuffer.h |  3 ++-
 src/gallium/drivers/i965/brw_pipe_flush.c  |  3 ++-
 3 files changed, 26 insertions(+), 15 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index fd6b34cb8a..bfb7175f75 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -36,25 +36,26 @@
 #include "brw_debug.h"
 #include "brw_structs.h"
 
-#define USE_LOCAL_BUFFER 1
+#define USE_MALLOC_BUFFER 1
 #define ALWAYS_EMIT_MI_FLUSH 1
 
 void
 brw_batchbuffer_reset(struct brw_batchbuffer *batch)
 {
-   if (batch->buf != NULL) {
+   if (batch->buf) {
       batch->sws->bo_unreference(batch->buf);
       batch->buf = NULL;
    }
 
-   if (USE_LOCAL_BUFFER && !batch->buffer)
-      batch->buffer = MALLOC(BRW_BATCH_SIZE);
+   if (batch->use_malloc_buffer && !batch->malloc_buffer)
+      batch->malloc_buffer = MALLOC(BRW_BATCH_SIZE);
 
    batch->buf = batch->sws->bo_alloc(batch->sws,
 				     BRW_BUFFER_TYPE_BATCH,
 				     BRW_BATCH_SIZE, 4096);
-   if (batch->buffer)
-      batch->map = batch->buffer;
+
+   if (batch->malloc_buffer)
+      batch->map = batch->malloc_buffer;
    else 
       batch->map = batch->sws->bo_map(batch->buf, GL_TRUE);
 
@@ -67,6 +68,7 @@ brw_batchbuffer_alloc(struct brw_winsys_screen *sws)
 {
    struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
 
+   batch->use_malloc_buffer = USE_MALLOC_BUFFER;
    batch->sws = sws;
    brw_batchbuffer_reset(batch);
 
@@ -76,16 +78,16 @@ brw_batchbuffer_alloc(struct brw_winsys_screen *sws)
 void
 brw_batchbuffer_free(struct brw_batchbuffer *batch)
 {
-   if (batch->map) {
+   if (batch->malloc_buffer) {
+      FREE(batch->malloc_buffer);
+      batch->map = NULL;
+   }
+   else if (batch->map) {
       batch->sws->bo_unmap(batch->buf);
       batch->map = NULL;
    }
 
-
    batch->sws->bo_unreference(batch->buf);
-   batch->buf = NULL;
-
-   FREE(batch->buffer);
    FREE(batch);
 }
 
@@ -127,8 +129,15 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
    batch->ptr += 4;
    used = batch->ptr - batch->map;
 
-   batch->sws->bo_unmap(batch->buf);
-   batch->map = NULL;
+   if (batch->use_malloc_buffer) {
+      batch->sws->bo_subdata(batch->buf, 0, used, batch->map );
+      batch->map = NULL;
+   }
+   else {
+      batch->sws->bo_unmap(batch->buf);
+      batch->map = NULL;
+   }
+
    batch->ptr = NULL;
       
    batch->sws->bo_exec(batch->buf, used );
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index 61374ffb00..1828324cc0 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -35,7 +35,8 @@ struct brw_batchbuffer {
     * XXX: is this still necessary?
     * XXX: if so, can this be hidden inside the GEM-specific winsys code?
     */
-   uint8_t *buffer;
+   boolean use_malloc_buffer;
+   uint8_t *malloc_buffer;
 
    /**
     * Values exported to speed up the writing the batchbuffer,
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
index 6ae3c57765..9dff2beeb1 100644
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -42,7 +42,8 @@ brw_flush( struct pipe_context *pipe,
            struct pipe_fence_handle **fence )
 {
    brw_context_flush( brw_context( pipe ) );
-   *fence = NULL;
+   if (fence)
+      *fence = NULL;
 }
 
 static unsigned brw_is_buffer_referenced(struct pipe_context *pipe,
-- 
cgit v1.2.3


From 951fdac566c3f2124f82aa94da08f55a10608f25 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 21:35:29 +0000
Subject: i965g: pull in a copy of intel_decode.c for now

With the stubbed out, non-hardware xlib winsys, trivial/clear runs and
prints a plausible command stream
---
 src/gallium/drivers/i965/Makefile          |    3 +-
 src/gallium/drivers/i965/brw_batchbuffer.c |   18 +-
 src/gallium/drivers/i965/brw_batchbuffer.h |    4 +-
 src/gallium/drivers/i965/brw_context.c     |    2 +-
 src/gallium/drivers/i965/intel_decode.c    | 1790 ++++++++++++++++++++++++++++
 src/gallium/drivers/i965/intel_decode.h    |   29 +
 6 files changed, 1835 insertions(+), 11 deletions(-)
 create mode 100644 src/gallium/drivers/i965/intel_decode.c
 create mode 100644 src/gallium/drivers/i965/intel_decode.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 870d67b13d..2188a1d4bc 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -66,6 +66,7 @@ C_SOURCES = \
 	brw_screen_tex_layout.c \
 	brw_screen_texture.c \
 	brw_screen_surface.c \
-	brw_batchbuffer.c 
+	brw_batchbuffer.c \
+	intel_decode.c
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index bfb7175f75..64d6754df5 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -35,6 +35,7 @@
 #include "brw_winsys.h"
 #include "brw_debug.h"
 #include "brw_structs.h"
+#include "intel_decode.h"
 
 #define USE_MALLOC_BUFFER 1
 #define ALWAYS_EMIT_MI_FLUSH 1
@@ -47,9 +48,6 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch)
       batch->buf = NULL;
    }
 
-   if (batch->use_malloc_buffer && !batch->malloc_buffer)
-      batch->malloc_buffer = MALLOC(BRW_BATCH_SIZE);
-
    batch->buf = batch->sws->bo_alloc(batch->sws,
 				     BRW_BUFFER_TYPE_BATCH,
 				     BRW_BATCH_SIZE, 4096);
@@ -64,12 +62,18 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch)
 }
 
 struct brw_batchbuffer *
-brw_batchbuffer_alloc(struct brw_winsys_screen *sws)
+brw_batchbuffer_alloc(struct brw_winsys_screen *sws,
+                      struct brw_chipset chipset)
 {
    struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
 
    batch->use_malloc_buffer = USE_MALLOC_BUFFER;
+   if (batch->use_malloc_buffer) {
+      batch->malloc_buffer = MALLOC(BRW_BATCH_SIZE);
+   }
+
    batch->sws = sws;
+   batch->chipset = chipset;
    brw_batchbuffer_reset(batch);
 
    return batch;
@@ -142,18 +146,16 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
       
    batch->sws->bo_exec(batch->buf, used );
 
-#if 0      
-   if (BRW_DEBUG & DEBUG_BATCH) {
+   if (1 /*BRW_DEBUG & DEBUG_BATCH*/) {
       void *ptr = batch->sws->bo_map(batch->buf, GL_FALSE);
 
       intel_decode(ptr,
 		   used / 4, 
-		   batch->buf->offset,
+		   batch->buf->offset[0],
 		   batch->chipset.pci_id);
 
       batch->sws->bo_unmap(batch->buf);
    }
-#endif
 
    if (BRW_DEBUG & DEBUG_SYNC) {
       /* Abuse map/unmap to achieve wait-for-fence.
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index 1828324cc0..b051638296 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -28,6 +28,7 @@ struct brw_batchbuffer {
 
    struct brw_winsys_screen *sws;
    struct brw_winsys_buffer *buf;
+   struct brw_chipset chipset;
 
    /* Main-memory copy of the batch-buffer, built up incrementally &
     * then copied as one to the true buffer.
@@ -57,7 +58,8 @@ struct brw_batchbuffer {
    /*@}*/
 };
 
-struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws );
+struct brw_batchbuffer *brw_batchbuffer_alloc( struct brw_winsys_screen *sws,
+                                               struct brw_chipset chipset );
 
 void brw_batchbuffer_free(struct brw_batchbuffer *batch);
 
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index cd8963bebc..aaf7d1834e 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -133,7 +133,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen)
 
    make_empty_list(&brw->query.active_head);
 
-   brw->batch = brw_batchbuffer_alloc( brw->sws );
+   brw->batch = brw_batchbuffer_alloc( brw->sws, brw->chipset );
    if (brw->batch == NULL)
       goto fail;
 
diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c
new file mode 100644
index 0000000000..1fb1b66cc8
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_decode.c
@@ -0,0 +1,1790 @@
+/* -*- c-basic-offset: 4 -*- */
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+/** @file intel_decode.c
+ * This file contains code to print out batchbuffer contents in a
+ * human-readable format.
+ *
+ * The current version only supports i915 packets, and only pretty-prints a
+ * subset of them.  The intention is for it to make just a best attempt to
+ * decode, but never crash in the process.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "intel_decode.h"
+
+/*#include "intel_chipset.h"*/
+#define IS_965(x) 1             /* XXX */
+#define IS_9XX(x) 1             /* XXX */
+
+#define BUFFER_FAIL(_count, _len, _name) do {			\
+    fprintf(out, "Buffer size too small in %s (%d < %d)\n",	\
+	    (_name), (_count), (_len));				\
+    (*failures)++;						\
+    return count;						\
+} while (0)
+
+static FILE *out;
+static uint32_t saved_s2 = 0, saved_s4 = 0;
+static char saved_s2_set = 0, saved_s4_set = 0;
+
+static float
+int_as_float(uint32_t intval)
+{
+    union intfloat {
+	uint32_t i;
+	float f;
+    } uval;
+
+    uval.i = intval;
+    return uval.f;
+}
+
+static void
+instr_out(uint32_t *data, uint32_t hw_offset, unsigned int index,
+	  char *fmt, ...)
+{
+    va_list va;
+
+    fprintf(out, "0x%08x: 0x%08x:%s ", hw_offset + index * 4, data[index],
+	    index == 0 ? "" : "  ");
+    va_start(va, fmt);
+    vfprintf(out, fmt, va);
+    va_end(va);
+}
+
+
+static int
+decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode;
+
+    struct {
+	uint32_t opcode;
+	int len_mask;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_mi[] = {
+	{ 0x08, 0, 1, 1, "MI_ARB_ON_OFF" },
+	{ 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+	{ 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" },
+	{ 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" },
+	{ 0x04, 0, 1, 1, "MI_FLUSH" },
+	{ 0x22, 0, 3, 3, "MI_LOAD_REGISTER_IMM" },
+	{ 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" },
+	{ 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" },
+	{ 0x00, 0, 1, 1, "MI_NOOP" },
+	{ 0x11, 0x3f, 2, 2, "MI_OVERLAY_FLIP" },
+	{ 0x07, 0, 1, 1, "MI_REPORT_HEAD" },
+	{ 0x18, 0x3f, 2, 2, "MI_SET_CONTEXT" },
+	{ 0x20, 0x3f, 3, 4, "MI_STORE_DATA_IMM" },
+	{ 0x21, 0x3f, 3, 4, "MI_STORE_DATA_INDEX" },
+	{ 0x24, 0x3f, 3, 3, "MI_STORE_REGISTER_MEM" },
+	{ 0x02, 0, 1, 1, "MI_USER_INTERRUPT" },
+	{ 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" },
+    };
+
+
+    for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]);
+	 opcode++) {
+	if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) {
+	    unsigned int len = 1, i;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_mi[opcode].name);
+	    if (opcodes_mi[opcode].max_len > 1) {
+		len = (data[0] & opcodes_mi[opcode].len_mask) + 2;
+		if (len < opcodes_mi[opcode].min_len ||
+		    len > opcodes_mi[opcode].max_len)
+		{
+		    fprintf(out, "Bad length (%d) in %s, [%d, %d]\n",
+			    len, opcodes_mi[opcode].name,
+			    opcodes_mi[opcode].min_len,
+			    opcodes_mi[opcode].max_len);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_mi[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "MI UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode, len;
+    char *format = NULL;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_2d[] = {
+	{ 0x40, 5, 5, "COLOR_BLT" },
+	{ 0x43, 6, 6, "SRC_COPY_BLT" },
+	{ 0x01, 8, 8, "XY_SETUP_BLT" },
+	{ 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" },
+	{ 0x03, 3, 3, "XY_SETUP_CLIP_BLT" },
+	{ 0x24, 2, 2, "XY_PIXEL_BLT" },
+	{ 0x25, 3, 3, "XY_SCANLINES_BLT" },
+	{ 0x26, 4, 4, "Y_TEXT_BLT" },
+	{ 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" },
+	{ 0x50, 6, 6, "XY_COLOR_BLT" },
+	{ 0x51, 6, 6, "XY_PAT_BLT" },
+	{ 0x76, 8, 8, "XY_PAT_CHROMA_BLT" },
+	{ 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" },
+	{ 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" },
+	{ 0x52, 9, 9, "XY_MONO_PAT_BLT" },
+	{ 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" },
+	{ 0x53, 8, 8, "XY_SRC_COPY_BLT" },
+	{ 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" },
+	{ 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" },
+	{ 0x55, 9, 9, "XY_FULL_BLT" },
+	{ 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" },
+	{ 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" },
+	{ 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" },
+	{ 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" },
+	{ 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" },
+    };
+
+    switch ((data[0] & 0x1fc00000) >> 22) {
+    case 0x50:
+	instr_out(data, hw_offset, 0,
+		  "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n",
+		  (data[0] & (1 << 20)) ? "en" : "dis",
+		  (data[0] & (1 << 21)) ? "en" : "dis",
+		  (data[0] >> 11) & 1);
+
+	len = (data[0] & 0x000000ff) + 2;
+	if (len != 6)
+	    fprintf(out, "Bad count in XY_COLOR_BLT\n");
+	if (count < 6)
+	    BUFFER_FAIL(count, len, "XY_COLOR_BLT");
+
+	switch ((data[1] >> 24) & 0x3) {
+	case 0:
+	    format="8";
+	    break;
+	case 1:
+	    format="565";
+	    break;
+	case 2:
+	    format="1555";
+	    break;
+	case 3:
+	    format="8888";
+	    break;
+	}
+
+	instr_out(data, hw_offset, 1, "format %s, pitch %d, "
+		  "clipping %sabled\n", format,
+		  (short)(data[1] & 0xffff),
+		  data[1] & (1 << 30) ? "en" : "dis");
+	instr_out(data, hw_offset, 2, "(%d,%d)\n",
+		  data[2] & 0xffff, data[2] >> 16);
+	instr_out(data, hw_offset, 3, "(%d,%d)\n",
+		  data[3] & 0xffff, data[3] >> 16);
+	instr_out(data, hw_offset, 4, "offset 0x%08x\n", data[4]);
+	instr_out(data, hw_offset, 5, "color\n");
+	return len;
+    case 0x53:
+	instr_out(data, hw_offset, 0,
+		  "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, "
+		  "src tile %d, dst tile %d)\n",
+		  (data[0] & (1 << 20)) ? "en" : "dis",
+		  (data[0] & (1 << 21)) ? "en" : "dis",
+		  (data[0] >> 15) & 1,
+		  (data[0] >> 11) & 1);
+
+	len = (data[0] & 0x000000ff) + 2;
+	if (len != 8)
+	    fprintf(out, "Bad count in XY_SRC_COPY_BLT\n");
+	if (count < 8)
+	    BUFFER_FAIL(count, len, "XY_SRC_COPY_BLT");
+
+	switch ((data[1] >> 24) & 0x3) {
+	case 0:
+	    format="8";
+	    break;
+	case 1:
+	    format="565";
+	    break;
+	case 2:
+	    format="1555";
+	    break;
+	case 3:
+	    format="8888";
+	    break;
+	}
+
+	instr_out(data, hw_offset, 1, "format %s, dst pitch %d, "
+		  "clipping %sabled\n", format,
+		  (short)(data[1] & 0xffff),
+		  data[1] & (1 << 30) ? "en" : "dis");
+	instr_out(data, hw_offset, 2, "dst (%d,%d)\n",
+		  data[2] & 0xffff, data[2] >> 16);
+	instr_out(data, hw_offset, 3, "dst (%d,%d)\n",
+		  data[3] & 0xffff, data[3] >> 16);
+	instr_out(data, hw_offset, 4, "dst offset 0x%08x\n", data[4]);
+	instr_out(data, hw_offset, 5, "src (%d,%d)\n",
+		  data[5] & 0xffff, data[5] >> 16);
+	instr_out(data, hw_offset, 6, "src pitch %d\n",
+		  (short)(data[6] & 0xffff));
+	instr_out(data, hw_offset, 7, "src offset 0x%08x\n", data[7]);
+	return len;
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]);
+	 opcode++) {
+	if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) {
+	    unsigned int i;
+
+	    len = 1;
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_2d[opcode].name);
+	    if (opcodes_2d[opcode].max_len > 1) {
+		len = (data[0] & 0x000000ff) + 2;
+		if (len < opcodes_2d[opcode].min_len ||
+		    len > opcodes_2d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n", opcodes_2d[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_2d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "2D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    switch ((data[0] & 0x00f80000) >> 19) {
+    case 0x11:
+	instr_out(data, hw_offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISALBE\n");
+	return 1;
+    case 0x10:
+	instr_out(data, hw_offset, 0, "3DSTATE_SCISSOR_ENABLE\n");
+	return 1;
+    case 0x01:
+	instr_out(data, hw_offset, 0, "3DSTATE_MAP_COORD_SET_I830\n");
+	return 1;
+    case 0x0a:
+	instr_out(data, hw_offset, 0, "3DSTATE_MAP_CUBE_I830\n");
+	return 1;
+    case 0x05:
+	instr_out(data, hw_offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n");
+	return 1;
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+/** Sets the string dstname to describe the destination of the PS instruction */
+static void
+i915_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask)
+{
+    uint32_t a0 = data[i];
+    int dst_nr = (a0 >> 14) & 0xf;
+    char dstmask[8];
+    char *sat;
+
+    if (do_mask) {
+	if (((a0 >> 10) & 0xf) == 0xf) {
+	    dstmask[0] = 0;
+	} else {
+	    int dstmask_index = 0;
+
+	    dstmask[dstmask_index++] = '.';
+	    if (a0 & (1 << 10))
+		dstmask[dstmask_index++] = 'x';
+	    if (a0 & (1 << 11))
+		dstmask[dstmask_index++] = 'y';
+	    if (a0 & (1 << 12))
+		dstmask[dstmask_index++] = 'z';
+	    if (a0 & (1 << 13))
+		dstmask[dstmask_index++] = 'w';
+	    dstmask[dstmask_index++] = 0;
+	}
+
+	if (a0 & (1 << 22))
+	    sat = ".sat";
+	else
+	    sat = "";
+    } else {
+	dstmask[0] = 0;
+	sat = "";
+    }
+
+    switch ((a0 >> 19) & 0x7) {
+    case 0:
+	if (dst_nr > 15)
+	    fprintf(out, "bad destination reg R%d\n", dst_nr);
+	sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat);
+	break;
+    case 4:
+	if (dst_nr > 0)
+	    fprintf(out, "bad destination reg oC%d\n", dst_nr);
+	sprintf(dstname, "oC%s%s", dstmask, sat);
+	break;
+    case 5:
+	if (dst_nr > 0)
+	    fprintf(out, "bad destination reg oD%d\n", dst_nr);
+	sprintf(dstname, "oD%s%s",  dstmask, sat);
+	break;
+    case 6:
+	if (dst_nr > 2)
+	    fprintf(out, "bad destination reg U%d\n", dst_nr);
+	sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat);
+	break;
+    default:
+	sprintf(dstname, "RESERVED");
+	break;
+    }
+}
+
+static char *
+i915_get_channel_swizzle(uint32_t select)
+{
+    switch (select & 0x7) {
+    case 0:
+	return (select & 8) ? "-x" : "x";
+    case 1:
+	return (select & 8) ? "-y" : "y";
+    case 2:
+	return (select & 8) ? "-z" : "z";
+    case 3:
+	return (select & 8) ? "-w" : "w";
+    case 4:
+	return (select & 8) ? "-0" : "0";
+    case 5:
+	return (select & 8) ? "-1" : "1";
+    default:
+	return (select & 8) ? "-bad" : "bad";
+    }
+}
+
+static void
+i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
+{
+    switch (src_type) {
+    case 0:
+	sprintf(name, "R%d", src_nr);
+	if (src_nr > 15)
+	    fprintf(out, "bad src reg %s\n", name);
+	break;
+    case 1:
+	if (src_nr < 8)
+	    sprintf(name, "T%d", src_nr);
+	else if (src_nr == 8)
+	    sprintf(name, "DIFFUSE");
+	else if (src_nr == 9)
+	    sprintf(name, "SPECULAR");
+	else if (src_nr == 10)
+	    sprintf(name, "FOG");
+	else {
+	    fprintf(out, "bad src reg T%d\n", src_nr);
+	    sprintf(name, "RESERVED");
+	}
+	break;
+    case 2:
+	sprintf(name, "C%d", src_nr);
+	if (src_nr > 31)
+	    fprintf(out, "bad src reg %s\n", name);
+	break;
+    case 4:
+	sprintf(name, "oC");
+	if (src_nr > 0)
+	    fprintf(out, "bad src reg oC%d\n", src_nr);
+	break;
+    case 5:
+	sprintf(name, "oD");
+	if (src_nr > 0)
+	    fprintf(out, "bad src reg oD%d\n", src_nr);
+	break;
+    case 6:
+	sprintf(name, "U%d", src_nr);
+	if (src_nr > 2)
+	    fprintf(out, "bad src reg %s\n", name);
+	break;
+    default:
+	fprintf(out, "bad src reg type %d\n", src_type);
+	sprintf(name, "RESERVED");
+	break;
+    }
+}
+
+static void
+i915_get_instruction_src0(uint32_t *data, int i, char *srcname)
+{
+    uint32_t a0 = data[i];
+    uint32_t a1 = data[i + 1];
+    int src_nr = (a0 >> 2) & 0x1f;
+    char *swizzle_x = i915_get_channel_swizzle((a1 >> 28) & 0xf);
+    char *swizzle_y = i915_get_channel_swizzle((a1 >> 24) & 0xf);
+    char *swizzle_z = i915_get_channel_swizzle((a1 >> 20) & 0xf);
+    char *swizzle_w = i915_get_channel_swizzle((a1 >> 16) & 0xf);
+    char swizzle[100];
+
+    i915_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname);
+    sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+    if (strcmp(swizzle, ".xyzw") != 0)
+	strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_src1(uint32_t *data, int i, char *srcname)
+{
+    uint32_t a1 = data[i + 1];
+    uint32_t a2 = data[i + 2];
+    int src_nr = (a1 >> 8) & 0x1f;
+    char *swizzle_x = i915_get_channel_swizzle((a1 >> 4) & 0xf);
+    char *swizzle_y = i915_get_channel_swizzle((a1 >> 0) & 0xf);
+    char *swizzle_z = i915_get_channel_swizzle((a2 >> 28) & 0xf);
+    char *swizzle_w = i915_get_channel_swizzle((a2 >> 24) & 0xf);
+    char swizzle[100];
+
+    i915_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname);
+    sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+    if (strcmp(swizzle, ".xyzw") != 0)
+	strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_src2(uint32_t *data, int i, char *srcname)
+{
+    uint32_t a2 = data[i + 2];
+    int src_nr = (a2 >> 16) & 0x1f;
+    char *swizzle_x = i915_get_channel_swizzle((a2 >> 12) & 0xf);
+    char *swizzle_y = i915_get_channel_swizzle((a2 >> 8) & 0xf);
+    char *swizzle_z = i915_get_channel_swizzle((a2 >> 4) & 0xf);
+    char *swizzle_w = i915_get_channel_swizzle((a2 >> 0) & 0xf);
+    char swizzle[100];
+
+    i915_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname);
+    sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w);
+    if (strcmp(swizzle, ".xyzw") != 0)
+	strcat(srcname, swizzle);
+}
+
+static void
+i915_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name)
+{
+    switch (src_type) {
+    case 0:
+	sprintf(name, "R%d", src_nr);
+	if (src_nr > 15)
+	    fprintf(out, "bad src reg %s\n", name);
+	break;
+    case 1:
+	if (src_nr < 8)
+	    sprintf(name, "T%d", src_nr);
+	else if (src_nr == 8)
+	    sprintf(name, "DIFFUSE");
+	else if (src_nr == 9)
+	    sprintf(name, "SPECULAR");
+	else if (src_nr == 10)
+	    sprintf(name, "FOG");
+	else {
+	    fprintf(out, "bad src reg T%d\n", src_nr);
+	    sprintf(name, "RESERVED");
+	}
+	break;
+    case 4:
+	sprintf(name, "oC");
+	if (src_nr > 0)
+	    fprintf(out, "bad src reg oC%d\n", src_nr);
+	break;
+    case 5:
+	sprintf(name, "oD");
+	if (src_nr > 0)
+	    fprintf(out, "bad src reg oD%d\n", src_nr);
+	break;
+    default:
+	fprintf(out, "bad src reg type %d\n", src_type);
+	sprintf(name, "RESERVED");
+	break;
+    }
+}
+
+static void
+i915_decode_alu1(uint32_t *data, uint32_t hw_offset,
+		 int i, char *instr_prefix, char *op_name)
+{
+    char dst[100], src0[100];
+
+    i915_get_instruction_dst(data, i, dst, 1);
+    i915_get_instruction_src0(data, i, src0);
+
+    instr_out(data, hw_offset, i++, "%s: %s %s, %s\n", instr_prefix,
+	      op_name, dst, src0);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_alu2(uint32_t *data, uint32_t hw_offset,
+		 int i, char *instr_prefix, char *op_name)
+{
+    char dst[100], src0[100], src1[100];
+
+    i915_get_instruction_dst(data, i, dst, 1);
+    i915_get_instruction_src0(data, i, src0);
+    i915_get_instruction_src1(data, i, src1);
+
+    instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s\n", instr_prefix,
+	      op_name, dst, src0, src1);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_alu3(uint32_t *data, uint32_t hw_offset,
+		 int i, char *instr_prefix, char *op_name)
+{
+    char dst[100], src0[100], src1[100], src2[100];
+
+    i915_get_instruction_dst(data, i, dst, 1);
+    i915_get_instruction_src0(data, i, src0);
+    i915_get_instruction_src1(data, i, src1);
+    i915_get_instruction_src2(data, i, src2);
+
+    instr_out(data, hw_offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix,
+	      op_name, dst, src0, src1, src2);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_tex(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix,
+		char *tex_name)
+{
+    uint32_t t0 = data[i];
+    uint32_t t1 = data[i + 1];
+    char dst_name[100];
+    char addr_name[100];
+    int sampler_nr;
+
+    i915_get_instruction_dst(data, i, dst_name, 0);
+    i915_get_instruction_addr((t1 >> 24) & 0x7,
+			      (t1 >> 17) & 0xf,
+			      addr_name);
+    sampler_nr = t0 & 0xf;
+
+    instr_out(data, hw_offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix,
+	      tex_name, dst_name, sampler_nr, addr_name);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+    instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+}
+
+static void
+i915_decode_dcl(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix)
+{
+    uint32_t d0 = data[i];
+    char *sampletype;
+    int dcl_nr = (d0 >> 14) & 0xf;
+    char *dcl_x = d0 & (1 << 10) ? "x" : "";
+    char *dcl_y = d0 & (1 << 11) ? "y" : "";
+    char *dcl_z = d0 & (1 << 12) ? "z" : "";
+    char *dcl_w = d0 & (1 << 13) ? "w" : "";
+    char dcl_mask[10];
+
+    switch ((d0 >> 19) & 0x3) {
+    case 1:
+	sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w);
+	if (strcmp(dcl_mask, ".") == 0)
+	    fprintf(out, "bad (empty) dcl mask\n");
+
+	if (dcl_nr > 10)
+	    fprintf(out, "bad T%d dcl register number\n", dcl_nr);
+	if (dcl_nr < 8) {
+	    if (strcmp(dcl_mask, ".x") != 0 &&
+		strcmp(dcl_mask, ".xy") != 0 &&
+		strcmp(dcl_mask, ".xz") != 0 &&
+		strcmp(dcl_mask, ".w") != 0 &&
+		strcmp(dcl_mask, ".xyzw") != 0) {
+		fprintf(out, "bad T%d.%s dcl mask\n", dcl_nr, dcl_mask);
+	    }
+	    instr_out(data, hw_offset, i++, "%s: DCL T%d%s\n", instr_prefix,
+		      dcl_nr, dcl_mask);
+	} else {
+	    if (strcmp(dcl_mask, ".xz") == 0)
+		fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+	    else if (strcmp(dcl_mask, ".xw") == 0)
+		fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+	    else if (strcmp(dcl_mask, ".xzw") == 0)
+		fprintf(out, "errataed bad dcl mask %s\n", dcl_mask);
+
+	    if (dcl_nr == 8) {
+		instr_out(data, hw_offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix,
+			  dcl_mask);
+	    } else if (dcl_nr == 9) {
+		instr_out(data, hw_offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix,
+			  dcl_mask);
+	    } else if (dcl_nr == 10) {
+		instr_out(data, hw_offset, i++, "%s: DCL FOG%s\n", instr_prefix,
+			  dcl_mask);
+	    }
+	}
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	break;
+    case 3:
+	switch ((d0 >> 22) & 0x3) {
+	case 0:
+	    sampletype = "2D";
+	    break;
+	case 1:
+	    sampletype = "CUBE";
+	    break;
+	case 2:
+	    sampletype = "3D";
+	    break;
+	default:
+	    sampletype = "RESERVED";
+	    break;
+	}
+	if (dcl_nr > 15)
+	    fprintf(out, "bad S%d dcl register number\n", dcl_nr);
+	instr_out(data, hw_offset, i++, "%s: DCL S%d %s\n", instr_prefix,
+		  dcl_nr, sampletype);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	break;
+    default:
+	instr_out(data, hw_offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+    }
+}
+
+static void
+i915_decode_instruction(uint32_t *data, uint32_t hw_offset,
+			int i, char *instr_prefix)
+{
+    switch ((data[i] >> 24) & 0x1f) {
+    case 0x0:
+	instr_out(data, hw_offset, i++, "%s: NOP\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	break;
+    case 0x01:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "ADD");
+	break;
+    case 0x02:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOV");
+	break;
+    case 0x03:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "MUL");
+	break;
+    case 0x04:
+	i915_decode_alu3(data, hw_offset, i, instr_prefix, "MAD");
+	break;
+    case 0x05:
+	i915_decode_alu3(data, hw_offset, i, instr_prefix, "DP2ADD");
+	break;
+    case 0x06:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP3");
+	break;
+    case 0x07:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "DP4");
+	break;
+    case 0x08:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "FRC");
+	break;
+    case 0x09:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "RCP");
+	break;
+    case 0x0a:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "RSQ");
+	break;
+    case 0x0b:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "EXP");
+	break;
+    case 0x0c:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "LOG");
+	break;
+    case 0x0d:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "CMP");
+	break;
+    case 0x0e:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "MIN");
+	break;
+    case 0x0f:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "MAX");
+	break;
+    case 0x10:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "FLR");
+	break;
+    case 0x11:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "MOD");
+	break;
+    case 0x12:
+	i915_decode_alu1(data, hw_offset, i, instr_prefix, "TRC");
+	break;
+    case 0x13:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "SGE");
+	break;
+    case 0x14:
+	i915_decode_alu2(data, hw_offset, i, instr_prefix, "SLT");
+	break;
+    case 0x15:
+	i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLD");
+	break;
+    case 0x16:
+	i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDP");
+	break;
+    case 0x17:
+	i915_decode_tex(data, hw_offset, i, instr_prefix, "TEXLDB");
+	break;
+    case 0x19:
+	i915_decode_dcl(data, hw_offset, i, instr_prefix);
+	break;
+    default:
+	instr_out(data, hw_offset, i++, "%s: unknown\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	instr_out(data, hw_offset, i++, "%s\n", instr_prefix);
+	break;
+    }
+}
+
+static int
+decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830)
+{
+    unsigned int len, i, c, opcode, word, map, sampler, instr;
+    char *format;
+
+    struct {
+	uint32_t opcode;
+	int i830_only;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d_1d[] = {
+	{ 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
+	{ 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" },
+	{ 0x9c, 0, 1, 1, "3DSTATE_CLEAR_PARAMETERS" },
+	{ 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
+	{ 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
+	{ 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
+	{ 0x98, 0, 2, 2, "3DSTATE_DEFAULT_Z" },
+	{ 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
+	{ 0x85, 0, 2, 2, "3DSTATE_DEST_BUFFER_VARIABLES" },
+	{ 0x80, 0, 5, 5, "3DSTATE_DRAWING_RECTANGLE" },
+	{ 0x8e, 0, 3, 3, "3DSTATE_BUFFER_INFO" },
+	{ 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
+	{ 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" },
+	{ 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" },
+	{ 0x8f, 0, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
+	{ 0x81, 0, 3, 3, "3DSTATE_SCISSOR_RECTANGLE" },
+	{ 0x83, 0, 2, 2, "3DSTATE_SPAN_STIPPLE" },
+	{ 0x8c, 1, 2, 2, "3DSTATE_MAP_COORD_TRANSFORM_I830" },
+	{ 0x8b, 1, 2, 2, "3DSTATE_MAP_VERTEX_TRANSFORM_I830" },
+	{ 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" },
+	{ 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" },
+	{ 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830" },
+    };
+
+    switch ((data[0] & 0x00ff0000) >> 16) {
+    case 0x07:
+	/* This instruction is unusual.  A 0 length means just 1 DWORD instead of
+	 * 2.  The 0 length is specified in one place to be unsupported, but
+	 * stated to be required in another, and 0 length LOAD_INDIRECTs appear
+	 * to cause no harm at least.
+	 */
+	instr_out(data, hw_offset, 0, "3DSTATE_LOAD_INDIRECT\n");
+	len = (data[0] & 0x000000ff) + 1;
+	i = 1;
+	if (data[0] & (0x01 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "SIS.0\n");
+	    instr_out(data, hw_offset, i++, "SIS.1\n");
+	}
+	if (data[0] & (0x02 << 8)) {
+	    if (i + 1 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "DIS.0\n");
+	}
+	if (data[0] & (0x04 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "SSB.0\n");
+	    instr_out(data, hw_offset, i++, "SSB.1\n");
+	}
+	if (data[0] & (0x08 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "MSB.0\n");
+	    instr_out(data, hw_offset, i++, "MSB.1\n");
+	}
+	if (data[0] & (0x10 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "PSP.0\n");
+	    instr_out(data, hw_offset, i++, "PSP.1\n");
+	}
+	if (data[0] & (0x20 << 8)) {
+	    if (i + 2 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_LOAD_INDIRECT");
+	    instr_out(data, hw_offset, i++, "PSC.0\n");
+	    instr_out(data, hw_offset, i++, "PSC.1\n");
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+	    (*failures)++;
+	    return len;
+	}
+	return len;
+    case 0x04:
+	instr_out(data, hw_offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+	len = (data[0] & 0x0000000f) + 2;
+	i = 1;
+	for (word = 0; word <= 7; word++) {
+	    if (data[0] & (1 << (4 + word))) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_LOAD_STATE_IMMEDIATE_1");
+
+		/* save vertex state for decode */
+		if (word == 2) {
+		    saved_s2_set = 1;
+		    saved_s2 = data[i];
+		}
+		if (word == 4) {
+		    saved_s4_set = 1;
+		    saved_s4 = data[i];
+		}
+
+		instr_out(data, hw_offset, i++, "S%d\n", word);
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n");
+	    (*failures)++;
+	}
+	return len;
+    case 0x00:
+	instr_out(data, hw_offset, 0, "3DSTATE_MAP_STATE\n");
+	len = (data[0] & 0x0000003f) + 2;
+	instr_out(data, hw_offset, 1, "mask\n");
+
+	i = 2;
+	for (map = 0; map <= 15; map++) {
+	    if (data[1] & (1 << map)) {
+		if (i + 3 >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_MAP_STATE");
+		instr_out(data, hw_offset, i++, "map %d MS2\n", map);
+		instr_out(data, hw_offset, i++, "map %d MS3\n", map);
+		instr_out(data, hw_offset, i++, "map %d MS4\n", map);
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_MAP_STATE\n");
+	    (*failures)++;
+	    return len;
+	}
+	return len;
+    case 0x06:
+	instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n");
+	len = (data[0] & 0x000000ff) + 2;
+
+	i = 2;
+	for (c = 0; c <= 31; c++) {
+	    if (data[1] & (1 << c)) {
+		if (i + 4 >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_CONSTANTS");
+		instr_out(data, hw_offset, i, "C%d.X = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+		instr_out(data, hw_offset, i, "C%d.Y = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+		instr_out(data, hw_offset, i, "C%d.Z = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+		instr_out(data, hw_offset, i, "C%d.W = %f\n",
+			  c, int_as_float(data[i]));
+		i++;
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_CONSTANTS\n");
+	    (*failures)++;
+	}
+	return len;
+    case 0x05:
+	instr_out(data, hw_offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n");
+	len = (data[0] & 0x000000ff) + 2;
+	if ((len - 1) % 3 != 0 || len > 370) {
+	    fprintf(out, "Bad count in 3DSTATE_PIXEL_SHADER_PROGRAM\n");
+	    (*failures)++;
+	}
+	i = 1;
+	for (instr = 0; instr < (len - 1) / 3; instr++) {
+	    char instr_prefix[10];
+
+	    if (i + 3 >= count)
+		BUFFER_FAIL(count, len, "3DSTATE_PIXEL_SHADER_PROGRAM");
+	    sprintf(instr_prefix, "PS%03d", instr);
+	    i915_decode_instruction(data, hw_offset, i, instr_prefix);
+	    i += 3;
+	}
+	return len;
+    case 0x01:
+	if (i830)
+	    break;
+	instr_out(data, hw_offset, 0, "3DSTATE_SAMPLER_STATE\n");
+	instr_out(data, hw_offset, 1, "mask\n");
+	len = (data[0] & 0x0000003f) + 2;
+	i = 2;
+	for (sampler = 0; sampler <= 15; sampler++) {
+	    if (data[1] & (1 << sampler)) {
+		if (i + 3 >= count)
+		    BUFFER_FAIL(count, len, "3DSTATE_SAMPLER_STATE");
+		instr_out(data, hw_offset, i++, "sampler %d SS2\n",
+			  sampler);
+		instr_out(data, hw_offset, i++, "sampler %d SS3\n",
+			  sampler);
+		instr_out(data, hw_offset, i++, "sampler %d SS4\n",
+			  sampler);
+	    }
+	}
+	if (len != i) {
+	    fprintf(out, "Bad count in 3DSTATE_SAMPLER_STATE\n");
+	    (*failures)++;
+	}
+	return len;
+    case 0x85:
+	len = (data[0] & 0x0000000f) + 2;
+
+	if (len != 2)
+	    fprintf(out, "Bad count in 3DSTATE_DEST_BUFFER_VARIABLES\n");
+	if (count < 2)
+	    BUFFER_FAIL(count, len, "3DSTATE_DEST_BUFFER_VARIABLES");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_DEST_BUFFER_VARIABLES\n");
+
+	switch ((data[1] >> 8) & 0xf) {
+	case 0x0: format = "g8"; break;
+	case 0x1: format = "x1r5g5b5"; break;
+	case 0x2: format = "r5g6b5"; break;
+	case 0x3: format = "a8r8g8b8"; break;
+	case 0x4: format = "ycrcb_swapy"; break;
+	case 0x5: format = "ycrcb_normal"; break;
+	case 0x6: format = "ycrcb_swapuv"; break;
+	case 0x7: format = "ycrcb_swapuvy"; break;
+	case 0x8: format = "a4r4g4b4"; break;
+	case 0x9: format = "a1r5g5b5"; break;
+	case 0xa: format = "a2r10g10b10"; break;
+	default: format = "BAD"; break;
+	}
+	instr_out(data, hw_offset, 1, "%s format, early Z %sabled\n",
+		  format,
+		  (data[1] & (1 << 31)) ? "en" : "dis");
+	return len;
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_3d_1d) / sizeof(opcodes_3d_1d[0]);
+	 opcode++)
+    {
+	if (opcodes_3d_1d[opcode].i830_only && !i830)
+	    continue;
+
+	if (((data[0] & 0x00ff0000) >> 16) == opcodes_3d_1d[opcode].opcode) {
+	    len = 1;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_3d_1d[opcode].name);
+	    if (opcodes_3d_1d[opcode].max_len > 1) {
+		len = (data[0] & 0x0000ffff) + 2;
+		if (len < opcodes_3d_1d[opcode].min_len ||
+		    len > opcodes_3d_1d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n",
+			    opcodes_3d_1d[opcode].name);
+		    (*failures)++;
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len,  opcodes_3d_1d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
+		    int *failures)
+{
+    char immediate = (data[0] & (1 << 23)) == 0;
+    unsigned int len, i;
+    char *primtype;
+
+    switch ((data[0] >> 18) & 0xf) {
+    case 0x0: primtype = "TRILIST"; break;
+    case 0x1: primtype = "TRISTRIP"; break;
+    case 0x2: primtype = "TRISTRIP_REVERSE"; break;
+    case 0x3: primtype = "TRIFAN"; break;
+    case 0x4: primtype = "POLYGON"; break;
+    case 0x5: primtype = "LINELIST"; break;
+    case 0x6: primtype = "LINESTRIP"; break;
+    case 0x7: primtype = "RECTLIST"; break;
+    case 0x8: primtype = "POINTLIST"; break;
+    case 0x9: primtype = "DIB"; break;
+    case 0xa: primtype = "CLEAR_RECT"; break;
+    default: primtype = "unknown"; break;
+    }
+
+    /* XXX: 3DPRIM_DIB not supported */
+    if (immediate) {
+	len = (data[0] & 0x0003ffff) + 2;
+	instr_out(data, hw_offset, 0, "3DPRIMITIVE inline %s\n", primtype);
+	if (count < len)
+	    BUFFER_FAIL(count, len,  "3DPRIMITIVE inline");
+	if (!saved_s2_set || !saved_s4_set) {
+	    fprintf(out, "unknown vertex format\n");
+	    for (i = 1; i < len; i++) {
+		instr_out(data, hw_offset, i,
+			  "           vertex data (%f float)\n",
+			  int_as_float(data[i]));
+	    }
+	} else {
+	    unsigned int vertex = 0;
+	    for (i = 1; i < len;) {
+		unsigned int tc;
+
+#define VERTEX_OUT(fmt, ...) do {					\
+    if (i < len)							\
+	instr_out(data, hw_offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \
+    else								\
+	fprintf(out, " missing data in V%d\n", vertex);			\
+    i++;								\
+} while (0)
+
+		VERTEX_OUT("X = %f", int_as_float(data[i]));
+		VERTEX_OUT("Y = %f", int_as_float(data[i]));
+	        switch (saved_s4 >> 6 & 0x7) {
+		case 0x1:
+		    VERTEX_OUT("Z = %f", int_as_float(data[i]));
+		    break;
+		case 0x2:
+		    VERTEX_OUT("Z = %f", int_as_float(data[i]));
+		    VERTEX_OUT("W = %f", int_as_float(data[i]));
+		    break;
+		case 0x3:
+		    break;
+		case 0x4:
+		    VERTEX_OUT("W = %f", int_as_float(data[i]));
+		    break;
+		default:
+		    fprintf(out, "bad S4 position mask\n");
+		}
+
+		if (saved_s4 & (1 << 10)) {
+		    VERTEX_OUT("color = (A=0x%02x, R=0x%02x, G=0x%02x, "
+			       "B=0x%02x)",
+			       data[i] >> 24,
+			       (data[i] >> 16) & 0xff,
+			       (data[i] >> 8) & 0xff,
+			       data[i] & 0xff);
+		}
+		if (saved_s4 & (1 << 11)) {
+		    VERTEX_OUT("spec = (A=0x%02x, R=0x%02x, G=0x%02x, "
+			       "B=0x%02x)",
+			       data[i] >> 24,
+			       (data[i] >> 16) & 0xff,
+			       (data[i] >> 8) & 0xff,
+			       data[i] & 0xff);
+		}
+		if (saved_s4 & (1 << 12))
+		    VERTEX_OUT("width = 0x%08x)", data[i]);
+
+		for (tc = 0; tc <= 7; tc++) {
+		    switch ((saved_s2 >> (tc * 4)) & 0xf) {
+		    case 0x0:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x1:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x2:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.W = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x3:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x4:
+			VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+			break;
+		    case 0x5:
+			VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+			VERTEX_OUT("T%d.ZW = 0x%08x half-float", tc, data[i]);
+			break;
+		    case 0xf:
+			break;
+		    default:
+			fprintf(out, "bad S2.T%d format\n", tc);
+		    }
+		}
+		vertex++;
+	    }
+	}
+    } else {
+	/* indirect vertices */
+	len = data[0] & 0x0000ffff; /* index count */
+	if (data[0] & (1 << 17)) {
+	    /* random vertex access */
+	    if (count < (len + 1) / 2 + 1) {
+		BUFFER_FAIL(count, (len + 1) / 2 + 1,
+			    "3DPRIMITIVE random indirect");
+	    }
+	    instr_out(data, hw_offset, 0,
+		      "3DPRIMITIVE random indirect %s (%d)\n", primtype, len);
+	    if (len == 0) {
+		/* vertex indices continue until 0xffff is found */
+		for (i = 1; i < count; i++) {
+		    if ((data[i] & 0xffff) == 0xffff) {
+			instr_out(data, hw_offset, i,
+				  "            indices: (terminator)\n");
+			return i;
+		    } else if ((data[i] >> 16) == 0xffff) {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x, "
+				  "(terminator)\n",
+				  data[i] & 0xffff);
+			return i;
+		    } else {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x, 0x%04x\n",
+				  data[i] & 0xffff, data[i] >> 16);
+		    }
+		}
+		fprintf(out,
+			"3DPRIMITIVE: no terminator found in index buffer\n");
+		(*failures)++;
+		return count;
+	    } else {
+		/* fixed size vertex index buffer */
+		for (i = 0; i < len; i += 2) {
+		    if (i * 2 == len - 1) {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x\n",
+				  data[i] & 0xffff);
+		    } else {
+			instr_out(data, hw_offset, i,
+				  "            indices: 0x%04x, 0x%04x\n",
+				  data[i] & 0xffff, data[i] >> 16);
+		    }
+		}
+	    }
+	    return (len + 1) / 2 + 1;
+	} else {
+	    /* sequential vertex access */
+	    if (count < 2)
+		BUFFER_FAIL(count, 2, "3DPRIMITIVE seq indirect");
+	    instr_out(data, hw_offset, 0,
+		      "3DPRIMITIVE sequential indirect %s, %d starting from "
+		      "%d\n", primtype, len, data[1] & 0xffff);
+	    instr_out(data, hw_offset, 1, "           start\n");
+	    return 2;
+	}
+    }
+
+    return len;
+}
+
+static int
+decode_3d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d[] = {
+	{ 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" },
+	{ 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" },
+	{ 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" },
+	{ 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" },
+	{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+	{ 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" },
+	{ 0x0d, 1, 1, "3DSTATE_MODES_4" },
+	{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
+	{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+    };
+
+    switch ((data[0] & 0x1f000000) >> 24) {
+    case 0x1f:
+	return decode_3d_primitive(data, count, hw_offset, failures);
+    case 0x1d:
+	return decode_3d_1d(data, count, hw_offset, failures, 0);
+    case 0x1c:
+	return decode_3d_1c(data, count, hw_offset, failures);
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+	 opcode++) {
+	if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+	    unsigned int len = 1, i;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+	    if (opcodes_3d[opcode].max_len > 1) {
+		len = (data[0] & 0xff) + 2;
+		if (len < opcodes_3d[opcode].min_len ||
+		    len > opcodes_3d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static const char *
+get_965_surfacetype(unsigned int surfacetype)
+{
+    switch (surfacetype) {
+    case 0: return "1D";
+    case 1: return "2D";
+    case 2: return "3D";
+    case 3: return "CUBE";
+    case 4: return "BUFFER";
+    case 7: return "NULL";
+    default: return "unknown";
+    }
+}
+
+static const char *
+get_965_depthformat(unsigned int depthformat)
+{
+    switch (depthformat) {
+    case 0: return "s8_z24float";
+    case 1: return "z32float";
+    case 2: return "z24s8";
+    case 5: return "z16";
+    default: return "unknown";
+    }
+}
+
+static const char *
+get_965_element_component(uint32_t data, int component)
+{
+    uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7;
+
+    switch (component_control) {
+    case 0:
+	return "nostore";
+    case 1:
+	switch (component) {
+	case 0: return "X";
+	case 1: return "Y";
+	case 2: return "Z";
+	case 3: return "W";
+	default: return "fail";
+	}
+    case 2:
+	return "0.0";
+    case 3:
+	return "1.0";
+    case 4:
+	return "0x1";
+    case 5:
+	return "VID";
+    default:
+	return "fail";
+    }
+}
+
+static const char *
+get_965_prim_type(uint32_t data)
+{
+    uint32_t primtype = (data >> 10) & 0x1f;
+
+    switch (primtype) {
+    case 0x01: return "point list";
+    case 0x02: return "line list";
+    case 0x03: return "line strip";
+    case 0x04: return "tri list";
+    case 0x05: return "tri strip";
+    case 0x06: return "tri fan";
+    case 0x07: return "quad list";
+    case 0x08: return "quad strip";
+    case 0x09: return "line list adj";
+    case 0x0a: return "line strip adj";
+    case 0x0b: return "tri list adj";
+    case 0x0c: return "tri strip adj";
+    case 0x0d: return "tri strip reverse";
+    case 0x0e: return "polygon";
+    case 0x0f: return "rect list";
+    case 0x10: return "line loop";
+    case 0x11: return "point list bf";
+    case 0x12: return "line strip cont";
+    case 0x13: return "line strip bf";
+    case 0x14: return "line strip cont bf";
+    case 0x15: return "tri fan no stipple";
+    default: return "fail";
+    }
+}
+
+static int
+decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode, len;
+    int i;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d[] = {
+	{ 0x6000, 3, 3, "URB_FENCE" },
+	{ 0x6001, 2, 2, "CS_URB_STATE" },
+	{ 0x6002, 2, 2, "CONSTANT_BUFFER" },
+	{ 0x6101, 6, 6, "STATE_BASE_ADDRESS" },
+	{ 0x6102, 2, 2 , "STATE_SIP" },
+	{ 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+	{ 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" },
+	{ 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" },
+	{ 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" },
+	{ 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" },
+	{ 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" },
+	{ 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" },
+	{ 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" },
+	{ 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" },
+	{ 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" },
+	{ 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" },
+	{ 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" },
+	{ 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" },
+	{ 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" },
+	{ 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" },
+	{ 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" },
+	{ 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" },
+	{ 0x7b00, 6, 6, "3DPRIMITIVE" },
+    };
+
+    len = (data[0] & 0x0000ffff) + 2;
+
+    switch ((data[0] & 0xffff0000) >> 16) {
+    case 0x6101:
+	if (len != 6)
+	    fprintf(out, "Bad count in STATE_BASE_ADDRESS\n");
+	if (count < 6)
+	    BUFFER_FAIL(count, len, "STATE_BASE_ADDRESS");
+
+	instr_out(data, hw_offset, 0,
+		  "STATE_BASE_ADDRESS\n");
+
+	if (data[1] & 1) {
+	    instr_out(data, hw_offset, 1, "General state at 0x%08x\n",
+		      data[1] & ~1);
+	} else
+	    instr_out(data, hw_offset, 1, "General state not updated\n");
+
+	if (data[2] & 1) {
+	    instr_out(data, hw_offset, 2, "Surface state at 0x%08x\n",
+		      data[2] & ~1);
+	} else
+	    instr_out(data, hw_offset, 2, "Surface state not updated\n");
+
+	if (data[3] & 1) {
+	    instr_out(data, hw_offset, 3, "Indirect state at 0x%08x\n",
+		      data[3] & ~1);
+	} else
+	    instr_out(data, hw_offset, 3, "Indirect state not updated\n");
+
+	if (data[4] & 1) {
+	    instr_out(data, hw_offset, 4, "General state upper bound 0x%08x\n",
+		      data[4] & ~1);
+	} else
+	    instr_out(data, hw_offset, 4, "General state not updated\n");
+
+	if (data[5] & 1) {
+	    instr_out(data, hw_offset, 5, "Indirect state upper bound 0x%08x\n",
+		      data[5] & ~1);
+	} else
+	    instr_out(data, hw_offset, 5, "Indirect state not updated\n");
+
+	return len;
+    case 0x7800:
+	if (len != 7)
+	    fprintf(out, "Bad count in 3DSTATE_PIPELINED_POINTERS\n");
+	if (count < 7)
+	    BUFFER_FAIL(count, len, "3DSTATE_PIPELINED_POINTERS");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_PIPELINED_POINTERS\n");
+	instr_out(data, hw_offset, 1, "VS state\n");
+	instr_out(data, hw_offset, 2, "GS state\n");
+	instr_out(data, hw_offset, 3, "Clip state\n");
+	instr_out(data, hw_offset, 4, "SF state\n");
+	instr_out(data, hw_offset, 5, "WM state\n");
+	instr_out(data, hw_offset, 6, "CC state\n");
+	return len;
+    case 0x7801:
+	if (len != 6)
+	    fprintf(out, "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n");
+	if (count < 6)
+	    BUFFER_FAIL(count, len, "3DSTATE_BINDING_TABLE_POINTERS");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_BINDING_TABLE_POINTERS\n");
+	instr_out(data, hw_offset, 1, "VS binding table\n");
+	instr_out(data, hw_offset, 2, "GS binding table\n");
+	instr_out(data, hw_offset, 3, "Clip binding table\n");
+	instr_out(data, hw_offset, 4, "SF binding table\n");
+	instr_out(data, hw_offset, 5, "WM binding table\n");
+
+	return len;
+
+    case 0x7808:
+	len = (data[0] & 0xff) + 2;
+	if ((len - 1) % 4 != 0)
+	    fprintf(out, "Bad count in 3DSTATE_VERTEX_BUFFERS\n");
+	if (count < len)
+	    BUFFER_FAIL(count, len, "3DSTATE_VERTEX_BUFFERS");
+	instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_BUFFERS\n");
+
+	for (i = 1; i < len;) {
+	    instr_out(data, hw_offset, i, "buffer %d: %s, pitch %db\n",
+		      data[i] >> 27,
+		      data[i] & (1 << 26) ? "random" : "sequential",
+		      data[i] & 0x07ff);
+	    i++;
+	    instr_out(data, hw_offset, i++, "buffer address\n");
+	    instr_out(data, hw_offset, i++, "max index\n");
+	    instr_out(data, hw_offset, i++, "mbz\n");
+	}
+	return len;
+
+    case 0x7809:
+	len = (data[0] & 0xff) + 2;
+	if ((len + 1) % 2 != 0)
+	    fprintf(out, "Bad count in 3DSTATE_VERTEX_ELEMENTS\n");
+	if (count < len)
+	    BUFFER_FAIL(count, len, "3DSTATE_VERTEX_ELEMENTS");
+	instr_out(data, hw_offset, 0, "3DSTATE_VERTEX_ELEMENTS\n");
+
+	for (i = 1; i < len;) {
+	    instr_out(data, hw_offset, i, "buffer %d: %svalid, type 0x%04x, "
+		      "src offset 0x%04x bytes\n",
+		      data[i] >> 27,
+		      data[i] & (1 << 26) ? "" : "in",
+		      (data[i] >> 16) & 0x1ff,
+		      data[i] & 0x07ff);
+	    i++;
+	    instr_out(data, hw_offset, i, "(%s, %s, %s, %s), "
+		      "dst offset 0x%02x bytes\n",
+		      get_965_element_component(data[i], 0),
+		      get_965_element_component(data[i], 1),
+		      get_965_element_component(data[i], 2),
+		      get_965_element_component(data[i], 3),
+		      (data[i] & 0xff) * 4);
+	    i++;
+	}
+	return len;
+
+    case 0x780a:
+	len = (data[0] & 0xff) + 2;
+	if (len != 3)
+	    fprintf(out, "Bad count in 3DSTATE_INDEX_BUFFER\n");
+	if (count < len)
+	    BUFFER_FAIL(count, len, "3DSTATE_INDEX_BUFFER");
+	instr_out(data, hw_offset, 0, "3DSTATE_INDEX_BUFFER\n");
+	instr_out(data, hw_offset, 1, "beginning buffer address\n");
+	instr_out(data, hw_offset, 2, "ending buffer address\n");
+	return len;
+
+    case 0x7900:
+	if (len != 4)
+	    fprintf(out, "Bad count in 3DSTATE_DRAWING_RECTANGLE\n");
+	if (count < 4)
+	    BUFFER_FAIL(count, len, "3DSTATE_DRAWING_RECTANGLE");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_DRAWING_RECTANGLE\n");
+	instr_out(data, hw_offset, 1, "top left: %d,%d\n",
+		  data[1] & 0xffff,
+		  (data[1] >> 16) & 0xffff);
+	instr_out(data, hw_offset, 2, "bottom right: %d,%d\n",
+		  data[2] & 0xffff,
+		  (data[2] >> 16) & 0xffff);
+	instr_out(data, hw_offset, 3, "origin: %d,%d\n",
+		  (int)data[3] & 0xffff,
+		  ((int)data[3] >> 16) & 0xffff);
+
+	return len;
+
+    case 0x7905:
+	if (len != 5 && len != 6)
+	    fprintf(out, "Bad count in 3DSTATE_DEPTH_BUFFER\n");
+	if (count < len)
+	    BUFFER_FAIL(count, len, "3DSTATE_DEPTH_BUFFER");
+
+	instr_out(data, hw_offset, 0,
+		  "3DSTATE_DEPTH_BUFFER\n");
+	instr_out(data, hw_offset, 1, "%s, %s, pitch = %d bytes, %stiled\n",
+		  get_965_surfacetype(data[1] >> 29),
+		  get_965_depthformat((data[1] >> 18) & 0x7),
+		  (data[1] & 0x0001ffff) + 1,
+		  data[1] & (1 << 27) ? "" : "not ");
+	instr_out(data, hw_offset, 2, "depth offset\n");
+	instr_out(data, hw_offset, 3, "%dx%d\n",
+		  ((data[3] & 0x0007ffc0) >> 6) + 1,
+		  ((data[3] & 0xfff80000) >> 19) + 1);
+	instr_out(data, hw_offset, 4, "volume depth\n");
+	if (len == 6)
+	    instr_out(data, hw_offset, 5, "\n");
+
+	return len;
+
+    case 0x7b00:
+	len = (data[0] & 0xff) + 2;
+	if (len != 6)
+	    fprintf(out, "Bad count in 3DPRIMITIVE\n");
+	if (count < len)
+	    BUFFER_FAIL(count, len, "3DPRIMITIVE");
+
+	instr_out(data, hw_offset, 0,
+		  "3DPRIMITIVE: %s %s\n",
+		  get_965_prim_type(data[0]),
+		  (data[0] & (1 << 15)) ? "random" : "sequential");
+	instr_out(data, hw_offset, 1, "vertex count\n");
+	instr_out(data, hw_offset, 2, "start vertex\n");
+	instr_out(data, hw_offset, 3, "instance count\n");
+	instr_out(data, hw_offset, 4, "start instance\n");
+	instr_out(data, hw_offset, 5, "index bias\n");
+	return len;
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+	 opcode++) {
+	if ((data[0] & 0xffff0000) >> 16 == opcodes_3d[opcode].opcode) {
+	    unsigned int i;
+	    len = 1;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+	    if (opcodes_3d[opcode].max_len > 1) {
+		len = (data[0] & 0xff) + 2;
+		if (len < opcodes_3d[opcode].min_len ||
+		    len > opcodes_3d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+{
+    unsigned int opcode;
+
+    struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d[] = {
+	{ 0x02, 1, 1, "3DSTATE_MODES_3" },
+	{ 0x03, 1, 1, "3DSTATE_ENABLES_1"},
+	{ 0x04, 1, 1, "3DSTATE_ENABLES_2"},
+	{ 0x05, 1, 1, "3DSTATE_VFT0"},
+	{ 0x06, 1, 1, "3DSTATE_AA"},
+	{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+	{ 0x08, 1, 1, "3DSTATE_MODES_1" },
+	{ 0x09, 1, 1, "3DSTATE_STENCIL_TEST" },
+	{ 0x0a, 1, 1, "3DSTATE_VFT1"},
+	{ 0x0b, 1, 1, "3DSTATE_INDPT_ALPHA_BLEND" },
+	{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
+	{ 0x0d, 1, 1, "3DSTATE_MAP_BLEND_OP" },
+	{ 0x0e, 1, 1, "3DSTATE_MAP_BLEND_ARG" },
+	{ 0x0f, 1, 1, "3DSTATE_MODES_2" },
+	{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+	{ 0x16, 1, 1, "3DSTATE_MODES_4" },
+    };
+
+    switch ((data[0] & 0x1f000000) >> 24) {
+    case 0x1f:
+	return decode_3d_primitive(data, count, hw_offset, failures);
+    case 0x1d:
+	return decode_3d_1d(data, count, hw_offset, failures, 1);
+    case 0x1c:
+	return decode_3d_1c(data, count, hw_offset, failures);
+    }
+
+    for (opcode = 0; opcode < sizeof(opcodes_3d) / sizeof(opcodes_3d[0]);
+	 opcode++) {
+	if ((data[0] & 0x1f000000) >> 24 == opcodes_3d[opcode].opcode) {
+	    unsigned int len = 1, i;
+
+	    instr_out(data, hw_offset, 0, "%s\n", opcodes_3d[opcode].name);
+	    if (opcodes_3d[opcode].max_len > 1) {
+		len = (data[0] & 0xff) + 2;
+		if (len < opcodes_3d[opcode].min_len ||
+		    len > opcodes_3d[opcode].max_len)
+		{
+		    fprintf(out, "Bad count in %s\n", opcodes_3d[opcode].name);
+		}
+	    }
+
+	    for (i = 1; i < len; i++) {
+		if (i >= count)
+		    BUFFER_FAIL(count, len, opcodes_3d[opcode].name);
+		instr_out(data, hw_offset, i, "dword %d\n", i);
+	    }
+	    return len;
+	}
+    }
+
+    instr_out(data, hw_offset, 0, "3D UNKNOWN\n");
+    (*failures)++;
+    return 1;
+}
+
+/**
+ * Decodes an i830-i915 batch buffer, writing the output to stdout.
+ *
+ * \param data batch buffer contents
+ * \param count number of DWORDs to decode in the batch buffer
+ * \param hw_offset hardware address for the buffer
+ */
+int
+intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
+{
+    int index = 0;
+    int failures = 0;
+
+    out = stderr;
+
+    while (index < count) {
+	switch ((data[index] & 0xe0000000) >> 29) {
+	case 0x0:
+	    index += decode_mi(data + index, count - index,
+			       hw_offset + index * 4, &failures);
+	    break;
+	case 0x2:
+	    index += decode_2d(data + index, count - index,
+			       hw_offset + index * 4, &failures);
+	    break;
+	case 0x3:
+	    if (IS_965(devid)) {
+		index += decode_3d_965(data + index, count - index,
+				       hw_offset + index * 4, &failures);
+	    } else if (IS_9XX(devid)) {
+		index += decode_3d(data + index, count - index,
+				   hw_offset + index * 4, &failures);
+	    } else {
+		index += decode_3d_i830(data + index, count - index,
+					hw_offset + index * 4, &failures);
+	    }
+	    break;
+	default:
+	    instr_out(data, hw_offset, index, "UNKNOWN\n");
+	    failures++;
+	    index++;
+	    break;
+	}
+	fflush(out);
+    }
+
+    return failures;
+}
+
+void intel_decode_context_reset(void)
+{
+    saved_s2_set = 0;
+    saved_s4_set = 1;
+}
+
diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h
new file mode 100644
index 0000000000..c50644a46b
--- /dev/null
+++ b/src/gallium/drivers/i965/intel_decode.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright © 2007 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *
+ */
+
+int intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid);
+void intel_decode_context_reset(void);
-- 
cgit v1.2.3


From cc8105d7402511c7d0ea8a07faaa8d149d9249f2 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 23:09:23 +0000
Subject: i965g: hook up some vertex state funcs

---
 src/gallium/drivers/i965/brw_context.h     |  4 ++--
 src/gallium/drivers/i965/brw_pipe_vertex.c | 38 ++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index e32452f49a..d033cb0f91 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -501,12 +501,12 @@ struct brw_context
       const struct brw_depth_stencil_state *zstencil;
 
       const struct brw_sampler *sampler[PIPE_MAX_SAMPLERS];
-      const struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-      unsigned num_vertex_elements;
       unsigned num_samplers;
 
       struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
       struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+      struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+      unsigned num_vertex_elements;
       unsigned num_textures;
       unsigned num_vertex_buffers;
 
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index 0b69718fd8..97e9a23688 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -1,9 +1,47 @@
 #include "brw_context.h"
 
 
+static void brw_set_vertex_elements( struct pipe_context *pipe,
+				     unsigned count,
+				     const struct pipe_vertex_element *elements )
+{
+   struct brw_context *brw = brw_context(pipe);
+
+   memcpy(brw->curr.vertex_element, elements, count * sizeof(elements[0]));
+   brw->curr.num_vertex_elements = count;
+
+   brw->state.dirty.mesa |= PIPE_NEW_VERTEX_ELEMENT;
+}
+
+
+static void brw_set_vertex_buffers(struct pipe_context *pipe,
+				   unsigned count,
+				   const struct pipe_vertex_buffer *buffers)
+{
+   struct brw_context *brw = brw_context(pipe);
+
+   /* XXX: don't we need to take some references here?  It's a bit
+    * awkward to do so, though.
+    */
+   memcpy(brw->curr.vertex_buffer, buffers, count * sizeof(buffers[0]));
+   brw->curr.num_vertex_buffers = count;
+
+   brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER;
+}
+
+static void brw_set_edgeflags( struct pipe_context *pipe,
+			       const unsigned *bitfield )
+{
+   /* XXX */
+}
+
+
 void 
 brw_pipe_vertex_init( struct brw_context *brw )
 {
+   brw->base.set_vertex_buffers = brw_set_vertex_buffers;
+   brw->base.set_vertex_elements = brw_set_vertex_elements;
+   brw->base.set_edgeflags = brw_set_edgeflags;
 }
 
 
-- 
cgit v1.2.3


From 1b611f99b430333e840908b42471a721689b2529 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 23:18:07 +0000
Subject: i965g: hook up some missing vertex shader code

---
 src/gallium/drivers/i965/brw_pipe_shader.c | 11 +++++++++--
 src/gallium/drivers/i965/brw_vs_emit.c     |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 8e10edb459..2833f2bce0 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -128,12 +128,19 @@ static void *brw_create_vs_state( struct pipe_context *pipe,
    vs->id = brw->program_id++;
    //vs->has_flow_control = brw_wm_has_flow_control(vs);
 
-   /* Tell the draw module about this shader:
-    */
+   vs->tokens = tgsi_dup_tokens(shader->tokens);
+   if (vs->tokens == NULL)
+      goto fail;
+
+   tgsi_scan_shader(vs->tokens, &vs->info);
    
    /* Done:
     */
    return (void *)vs;
+
+fail:
+   FREE(vs);
+   return NULL;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index bcc5c5f713..95e2b8e2cb 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -90,7 +90,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
       /* XXX: immediates can go elsewhere if necessary:
        */
       assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] +
-	     c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF);
+	     c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 <= BRW_MAX_GRF);
 
       c->vp->use_const_buffer = GL_FALSE;
    }
-- 
cgit v1.2.3


From bf4a518cf27910fe2bb828fd43de5472e5e51760 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 23:22:48 +0000
Subject: i965g: clean up wm init_registers func

---
 src/gallium/drivers/i965/brw_wm_pass2.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c
index d3d678a5e6..a5574bd1a3 100644
--- a/src/gallium/drivers/i965/brw_wm_pass2.c
+++ b/src/gallium/drivers/i965/brw_wm_pass2.c
@@ -69,30 +69,32 @@ static void prealloc_reg(struct brw_wm_compile *c,
  */
 static void init_registers( struct brw_wm_compile *c )
 {
-   GLuint nr_interp_regs = 0;
-   GLuint i = 0;
+   GLuint reg = 0;
    GLuint j;
 
    for (j = 0; j < c->grf_limit; j++) 
       c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
 
+   /* Pre-allocate incoming payload regs:
+    */
    for (j = 0; j < c->key.nr_depth_regs; j++) 
-      prealloc_reg(c, &c->payload.depth[j], i++);
+      prealloc_reg(c, &c->payload.depth[j], reg++);
 
    for (j = 0; j < c->nr_creg; j++) 
-      prealloc_reg(c, &c->creg[j], i++);
+      prealloc_reg(c, &c->creg[j], reg++);
 
-   for (j = 0; j < c->key.vp_nr_outputs; j++) {
-      prealloc_reg(c, &c->payload.input_interp[j], i++);
-   }
+   for (j = 0; j < c->key.vp_nr_outputs; j++)
+      prealloc_reg(c, &c->payload.input_interp[j], reg++);
 
-   assert(nr_interp_regs >= 1);
+   assert(c->key.vp_nr_outputs >= 1);
 
    c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
    c->prog_data.urb_read_length = c->key.vp_nr_outputs * 2;
    c->prog_data.curb_read_length = c->nr_creg * 2;
 
-   c->max_wm_grf = i * 2;
+   /* Note this allocation:
+    */
+   c->max_wm_grf = reg * 2;
 }
 
 
-- 
cgit v1.2.3


From 518171a887437e4d3fc2c8cea871862afb63c11c Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 23:27:30 +0000
Subject: i965g: init pointer to null, avoid segfault

---
 src/gallium/drivers/i965/brw_draw_upload.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index f0b7c741c0..6e12e8f4b3 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -202,7 +202,7 @@ static int brw_prepare_vertices(struct brw_context *brw)
    for (i = 0; i < brw->curr.num_vertex_buffers; i++) {
       struct pipe_vertex_buffer *vb = &brw->curr.vertex_buffer[i];
       struct brw_winsys_buffer *bo;
-      struct pipe_buffer *upload_buf;
+      struct pipe_buffer *upload_buf = NULL;
       unsigned offset;
       
       if (BRW_DEBUG & DEBUG_VERTS)
-- 
cgit v1.2.3


From e3e084c66089704a36f28dfb2bc4b17e5c5ce046 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 23:27:50 +0000
Subject: i965g: initialize winsys pointer in surface cache

---
 src/gallium/drivers/i965/brw_state_cache.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
index 1cb1b5e721..071a942e5c 100644
--- a/src/gallium/drivers/i965/brw_state_cache.c
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -462,6 +462,7 @@ brw_init_surface_cache(struct brw_context *brw)
    struct brw_cache *cache = &brw->surface_cache;
 
    cache->brw = brw;
+   cache->sws = brw->sws;
 
    cache->size = 7;
    cache->n_items = 0;
-- 
cgit v1.2.3


From 7a49bd6d15d7778db637340d695095dafb43a7fe Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 23:30:52 +0000
Subject: i965g: remove redundant screen pointer in brw context struct

---
 src/gallium/drivers/i965/brw_context.h    | 1 -
 src/gallium/drivers/i965/brw_curbe.c      | 2 +-
 src/gallium/drivers/i965/brw_draw.c       | 4 ++--
 src/gallium/drivers/i965/brw_pipe_flush.c | 6 ++++--
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index d033cb0f91..0c1dcf8a14 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -483,7 +483,6 @@ struct brw_context
    struct pipe_context base;
    struct brw_chipset chipset;
 
-   struct brw_screen *brw_screen;   
    struct brw_winsys_screen *sws;
 
    struct brw_batchbuffer *batch;
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 5763173bca..f62b0b0d5e 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -224,7 +224,7 @@ static int prepare_curbe_buffer(struct brw_context *brw)
    if (brw->curbe.vs_size) {
       GLuint offset = brw->curbe.vs_start * 16;
       GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT];
-      struct pipe_screen *screen = &brw->brw_screen->base;
+      struct pipe_screen *screen = brw->base.screen;
 
       const GLfloat *value = screen->buffer_map( screen,
 						 brw->curr.vertex_constants,
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index a2bed6256b..6d6b1c7c5c 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -258,14 +258,14 @@ boolean brw_draw_init( struct brw_context *brw )
 
    /* Create helpers for uploading data in user buffers:
     */
-   brw->vb.upload_vertex = u_upload_create( &brw->brw_screen->base,
+   brw->vb.upload_vertex = u_upload_create( brw->base.screen,
 					    128 * 1024,
 					    64,
 					    PIPE_BUFFER_USAGE_VERTEX );
    if (brw->vb.upload_vertex == NULL)
       return FALSE;
 
-   brw->vb.upload_index = u_upload_create( &brw->brw_screen->base,
+   brw->vb.upload_index = u_upload_create( brw->base.screen,
 					   128 * 1024,
 					   64,
 					   PIPE_BUFFER_USAGE_INDEX );
diff --git a/src/gallium/drivers/i965/brw_pipe_flush.c b/src/gallium/drivers/i965/brw_pipe_flush.c
index 9dff2beeb1..fdc4814b22 100644
--- a/src/gallium/drivers/i965/brw_pipe_flush.c
+++ b/src/gallium/drivers/i965/brw_pipe_flush.c
@@ -50,8 +50,9 @@ static unsigned brw_is_buffer_referenced(struct pipe_context *pipe,
                                   struct pipe_buffer *buffer)
 {
    struct brw_context *brw = brw_context(pipe);
+   struct brw_screen *bscreen = brw_screen(brw->base.screen);
 
-   return brw_is_buffer_referenced_by_bo( brw->brw_screen,
+   return brw_is_buffer_referenced_by_bo( bscreen,
                                           buffer,
                                           brw->batch->buf );
 }
@@ -62,8 +63,9 @@ static unsigned brw_is_texture_referenced(struct pipe_context *pipe,
                                    unsigned level)
 {
    struct brw_context *brw = brw_context(pipe);
+   struct brw_screen *bscreen = brw_screen(brw->base.screen);
 
-   return brw_is_texture_referenced_by_bo( brw->brw_screen,
+   return brw_is_texture_referenced_by_bo( bscreen,
                                            texture, face, level,
                                            brw->batch->buf );
 }
-- 
cgit v1.2.3


From e84e86ecb2e83b756a0153d315f946d60d695a54 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 23:33:08 +0000
Subject: i965g: fix some asserts

---
 src/gallium/drivers/i965/brw_state_upload.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index 4132c6ac69..a71af4d2b9 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -193,8 +193,8 @@ enum pipe_error brw_validate_state( struct brw_context *brw )
    {
       const struct brw_fragment_shader *fp = brw->curr.fragment_shader;
       if (fp) {
-         assert(fp->info.file_max[TGSI_FILE_SAMPLER] < brw->curr.num_samplers &&
-		fp->info.texture_max < brw->curr.num_textures);
+         assert(fp->info.file_max[TGSI_FILE_SAMPLER] < (int)brw->curr.num_samplers);
+	 assert(fp->info.texture_max <= brw->curr.num_textures);
       }
    }
 
-- 
cgit v1.2.3


From 220566d8dc4ff023ef833fd6519ab7b187e598d2 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 23:37:52 +0000
Subject: i965g: consolidate some includes

---
 src/gallium/drivers/i965/brw_batchbuffer.c   | 2 --
 src/gallium/drivers/i965/brw_batchbuffer.h   | 2 --
 src/gallium/drivers/i965/brw_draw_upload.c   | 1 -
 src/gallium/drivers/i965/brw_pipe_clear.c    | 1 -
 src/gallium/drivers/i965/brw_winsys.h        | 7 ++++---
 src/gallium/drivers/i965/brw_wm.c            | 2 --
 src/gallium/drivers/i965/brw_wm_fp.c         | 1 -
 src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 5 +++--
 8 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index 64d6754df5..673bd1ed44 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -27,8 +27,6 @@
 
 #include "util/u_memory.h"
 
-#include "pipe/p_error.h"
-
 #include "brw_batchbuffer.h"
 //#include "brw_decode.h"
 #include "brw_reg.h"
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index b051638296..781cd698e4 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -3,8 +3,6 @@
 
 #include "util/u_debug.h"
 
-#include "pipe/p_error.h"
-
 #include "brw_types.h"
 #include "brw_winsys.h"
 #include "brw_reg.h"
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index 6e12e8f4b3..acebd44080 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -26,7 +26,6 @@
  **************************************************************************/
 
 #include "pipe/p_context.h"
-#include "pipe/p_error.h"
 
 #include "util/u_upload_mgr.h"
 #include "util/u_math.h"
diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c
index 69bc95e51a..34cad62977 100644
--- a/src/gallium/drivers/i965/brw_pipe_clear.c
+++ b/src/gallium/drivers/i965/brw_pipe_clear.c
@@ -27,7 +27,6 @@
 
 #include "util/u_pack_color.h"
 
-#include "pipe/p_error.h"
 #include "pipe/p_state.h"
 
 #include "brw_batchbuffer.h"
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index b2ba3e86f9..f5ce9d13d7 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -27,6 +27,7 @@
 #define BRW_WINSYS_H
 
 #include "pipe/p_compiler.h"
+#include "pipe/p_error.h"
 
 struct brw_winsys;
 struct pipe_fence_handle;
@@ -123,9 +124,9 @@ struct brw_winsys_screen {
    /* XXX: couldn't this be handled by returning true/false on
     * bo_emit_reloc?
     */
-   boolean (*check_aperture_space)( struct brw_winsys_screen *iws,
-				    struct brw_winsys_buffer **buffers,
-				    unsigned count );
+   enum pipe_error (*check_aperture_space)( struct brw_winsys_screen *iws,
+					    struct brw_winsys_buffer **buffers,
+					    unsigned count );
 
    /**
     * Map a buffer.
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 90780272da..815ae8c51a 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -28,8 +28,6 @@
   * Authors:
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
-#include "pipe/p_error.h"
-
 #include "tgsi/tgsi_info.h"
 
 #include "brw_context.h"
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index 58f1d35b7d..bba448815b 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -31,7 +31,6 @@
                
 
 #include "pipe/p_shader_tokens.h"
-#include "pipe/p_error.h"
 
 #include "util/u_math.h"
 #include "util/u_memory.h"
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
index 08fce4b20b..71d8f4bafc 100644
--- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -35,6 +35,7 @@
 
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "pipe/p_error.h"
 #include "pipe/p_context.h"
 
 #include "xm_winsys.h"
@@ -226,7 +227,7 @@ xlib_brw_bo_references(struct brw_winsys_buffer *a,
    return TRUE;
 }
 
-static boolean 
+static enum pipe_error
 xlib_brw_check_aperture_space( struct brw_winsys_screen *iws,
                                 struct brw_winsys_buffer **buffers,
                                 unsigned count )
@@ -241,7 +242,7 @@ xlib_brw_check_aperture_space( struct brw_winsys_screen *iws,
                 __FUNCTION__, count, 
                 (tot_size + 1023) / 1024);
 
-   return TRUE;
+   return PIPE_OK;
 }
 
 static void *
-- 
cgit v1.2.3


From c2e51effe6228aa2fe6610c695b494e86490bc80 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 23:41:30 +0000
Subject: i965g: fix order of calculation of brw->wm.nr_surfaces

---
 src/gallium/drivers/i965/brw_wm_surface_state.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index 6c29db045f..b055dde20c 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -215,14 +215,14 @@ static int prepare_wm_surfaces(struct brw_context *brw )
    }
 #endif
 
-   brw->sws->bo_unreference(brw->wm.bind_bo);
-   brw->wm.bind_bo = brw_wm_get_binding_table(brw);
-
    if (brw->wm.nr_surfaces != nr_surfaces) {
       brw->wm.nr_surfaces = nr_surfaces;
       brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
    }
 
+   brw->sws->bo_unreference(brw->wm.bind_bo);
+   brw->wm.bind_bo = brw_wm_get_binding_table(brw);
+
    return 0;
 }
 
-- 
cgit v1.2.3


From ffc24463913071bbb0fa9cab9a05ea7a089c56a0 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 4 Nov 2009 23:44:16 +0000
Subject: i965g: enable line stipple packet emit

With this change, trivial/tri manages to build and emit
a fairly unconvincing command buffer (to the debug winsys),
and then crashes.
---
 src/gallium/drivers/i965/brw_misc_state.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index e786ea1100..b562eb7152 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -37,6 +37,7 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 #include "brw_screen.h"
+#include "brw_pipe_rast.h"
 
 
@@ -342,7 +343,7 @@ const struct brw_tracked_state brw_polygon_stipple = {
 
 static int upload_line_stipple(struct brw_context *brw)
 {
-   struct brw_line_stipple *bls = NULL; //brw->curr.rast->bls;
+   struct brw_line_stipple *bls = &brw->curr.rast->bls;
    BRW_CACHED_BATCH_STRUCT(brw, bls);
    return 0;
 }
-- 
cgit v1.2.3


From 643bb3419d7d342436cc54603e51467153d5d030 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Thu, 5 Nov 2009 00:38:51 +0000
Subject: i965g: Fix debug check

---
 src/gallium/drivers/i965/brw_draw_upload.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index acebd44080..4fa7d549eb 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -426,7 +426,7 @@ static int brw_prepare_indices(struct brw_context *brw)
    if (index_buffer == NULL)
       return 0;
 
-   if (DEBUG & DEBUG_VERTS)
+   if (BRW_DEBUG & DEBUG_VERTS)
       debug_printf("%s: index_size:%d index_buffer->size:%d\n",
 		   __FUNCTION__,
 		   brw->curr.index_size,
-- 
cgit v1.2.3


From 47cbbb7253f89ff165c4953758efaaca19adf16f Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Thu, 5 Nov 2009 00:42:30 +0000
Subject: i965g: Builds with scons

But there are some missing symbols, "nm -u i965_dri.so"
[SNIP]
                 U brw_surface_bo
                 U brw_surface_pitch
                 U brw_texture_blanket_winsys_buffer
                 U brw_texture_get_winsys_buffer
                 U brw_update_dirty_counts
[SNIP]
---
 SConstruct                          |  8 ++--
 src/gallium/drivers/i965/SConscript | 77 +++++++++++++++++++++++++++++++++++++
 src/gallium/winsys/drm/SConscript   |  5 +++
 3 files changed, 86 insertions(+), 4 deletions(-)
 create mode 100644 src/gallium/drivers/i965/SConscript

(limited to 'src/gallium/drivers')

diff --git a/SConstruct b/SConstruct
index d53f4401e5..d4db812db5 100644
--- a/SConstruct
+++ b/SConstruct
@@ -32,10 +32,10 @@ import common
 default_statetrackers = 'mesa'
 
 if common.default_platform in ('linux', 'freebsd', 'darwin'):
-	default_drivers = 'softpipe,failover,i915,trace,identity,llvmpipe'
+	default_drivers = 'softpipe,failover,i915,i965,trace,identity,llvmpipe'
 	default_winsys = 'xlib'
 elif common.default_platform in ('winddk',):
-	default_drivers = 'softpipe,i915,trace,identity'
+	default_drivers = 'softpipe,i915,i965,trace,identity'
 	default_winsys = 'all'
 else:
 	default_drivers = 'all'
@@ -46,9 +46,9 @@ common.AddOptions(opts)
 opts.Add(ListVariable('statetrackers', 'state trackers to build', default_statetrackers,
                      ['mesa', 'python', 'xorg']))
 opts.Add(ListVariable('drivers', 'pipe drivers to build', default_drivers,
-                     ['softpipe', 'failover', 'i915', 'cell', 'trace', 'r300', 'identity', 'llvmpipe']))
+                     ['softpipe', 'failover', 'i915', 'i965', 'cell', 'trace', 'r300', 'identity', 'llvmpipe']))
 opts.Add(ListVariable('winsys', 'winsys drivers to build', default_winsys,
-                     ['xlib', 'intel', 'gdi', 'radeon']))
+                     ['xlib', 'intel', 'i965', 'gdi', 'radeon']))
 
 opts.Add(EnumVariable('MSVS_VERSION', 'MS Visual C++ version', None, allowed_values=('7.1', '8.0', '9.0')))
 
diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript
new file mode 100644
index 0000000000..c517b08ec5
--- /dev/null
+++ b/src/gallium/drivers/i965/SConscript
@@ -0,0 +1,77 @@
+Import('*')
+
+env = env.Clone()
+
+i965 = env.ConvenienceLibrary(
+	target = 'i965',
+	source = [
+		'brw_batchbuffer.c',
+		'brw_cc.c',
+		'brw_clip.c',
+		'brw_clip_line.c',
+		'brw_clip_point.c',
+		'brw_clip_state.c',
+		'brw_clip_tri.c',
+		'brw_clip_unfilled.c',
+		'brw_clip_util.c',
+		'brw_context.c',
+		'brw_curbe.c',
+		'brw_disasm.c',
+		'brw_draw.c',
+		'brw_draw_upload.c',
+		'brw_eu.c',
+		'brw_eu_debug.c',
+		'brw_eu_emit.c',
+		'brw_eu_util.c',
+		'brw_gs.c',
+		'brw_gs_emit.c',
+		'brw_gs_state.c',
+		'brw_misc_state.c',
+		'brw_pipe_blend.c',
+		'brw_pipe_clear.c',
+		'brw_pipe_depth.c',
+		'brw_pipe_fb.c',
+		'brw_pipe_flush.c',
+		'brw_pipe_misc.c',
+		'brw_pipe_query.c',
+		'brw_pipe_rast.c',
+		'brw_pipe_sampler.c',
+		'brw_pipe_shader.c',
+		'brw_pipe_vertex.c',
+		'brw_screen_buffers.c',
+		'brw_screen.c',
+		'brw_screen_surface.c',
+		'brw_screen_tex_layout.c',
+		'brw_screen_texture.c',
+		'brw_sf.c',
+		'brw_sf_emit.c',
+		'brw_sf_state.c',
+		'brw_state_batch.c',
+		'brw_state_cache.c',
+#		'brw_state_debug.c',
+		'brw_state_dump.c',
+		'brw_state_upload.c',
+		'brw_swtnl.c',
+		'brw_urb.c',
+		'brw_util.c',
+		'brw_vs.c',
+		'brw_vs_emit.c',
+		'brw_vs_state.c',
+		'brw_vs_surface_state.c',
+		'brw_wm.c',
+#		'brw_wm_constant_buffer.c',
+		'brw_wm_debug.c',
+		'brw_wm_emit.c',
+		'brw_wm_fp.c',
+#		'brw_wm_glsl.c',
+		'brw_wm_iz.c',
+		'brw_wm_pass0.c',
+		'brw_wm_pass1.c',
+		'brw_wm_pass2.c',
+		'brw_wm_sampler_state.c',
+		'brw_wm_state.c',
+		'brw_wm_surface_state.c',
+		'intel_decode.c',
+	])
+
+Export('i965')
diff --git a/src/gallium/winsys/drm/SConscript b/src/gallium/winsys/drm/SConscript
index a9e9f2682a..ba389d8ed3 100644
--- a/src/gallium/winsys/drm/SConscript
+++ b/src/gallium/winsys/drm/SConscript
@@ -53,6 +53,11 @@ if env['dri']:
 			'intel/SConscript',
 		])
 
+	if 'i965' in env['winsys']:
+		SConscript([
+			'i965/SConscript',
+		])
+
 	if 'radeon' in env['winsys']:
 		SConscript([
 			'radeon/SConscript',
-- 
cgit v1.2.3


From ad96c0d851f6c3696fa6ae0c1f6ad56e849bc739 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Tue, 3 Nov 2009 16:48:48 +0100
Subject: r300g: add color channel masking

Signed-off-by: Corbin Simpson <MostAwesomeDude@gmail.com>
---
 src/gallium/drivers/r300/r300_context.h         |  1 +
 src/gallium/drivers/r300/r300_emit.c            |  6 +++---
 src/gallium/drivers/r300/r300_state.c           | 14 ++++++++++++++
 src/gallium/drivers/r300/r300_state_invariant.c |  3 +--
 4 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index ae7015634c..8c65c04d01 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -34,6 +34,7 @@ struct r300_vertex_shader;
 struct r300_blend_state {
     uint32_t blend_control;       /* R300_RB3D_CBLEND: 0x4e04 */
     uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */
+    uint32_t color_channel_mask;  /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */
     uint32_t rop;                 /* R300_RB3D_ROPCNTL: 0x4e18 */
     uint32_t dither;              /* R300_RB3D_DITHER_CTL: 0x4e50 */
 };
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 79972dbb49..8fe9a68886 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -38,10 +38,11 @@ void r300_emit_blend_state(struct r300_context* r300,
                            struct r300_blend_state* blend)
 {
     CS_LOCALS(r300);
-    BEGIN_CS(7);
-    OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 2);
+    BEGIN_CS(8);
+    OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3);
     OUT_CS(blend->blend_control);
     OUT_CS(blend->alpha_blend_control);
+    OUT_CS(blend->color_channel_mask);
     OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop);
     OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither);
     END_CS;
@@ -313,7 +314,6 @@ void r300_emit_fb_state(struct r300_context* r300,
         tex = (struct r300_texture*)surf->texture;
         assert(tex && tex->buffer && "cbuf is marked, but NULL!");
 
-        /* XXX I still need to figure out how to set the mipmap level here */
         OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
         OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
 
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index af063d4b20..242ec9f365 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -100,6 +100,20 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
                 (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT;
     }
 
+    /* Color Channel Mask */
+    if (state->colormask & PIPE_MASK_R) {
+        blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_RED_MASK0;
+    }
+    if (state->colormask & PIPE_MASK_G) {
+        blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0;
+    }
+    if (state->colormask & PIPE_MASK_B) {
+        blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0;
+    }
+    if (state->colormask & PIPE_MASK_A) {
+        blend->color_channel_mask |= RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0;
+    }
+
     if (state->dither) {
         blend->dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT |
                 R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT;
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 4865f16058..7e4d5c7c72 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -84,7 +84,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
     END_CS;
 
     /* XXX unsorted stuff from surface_fill */
-    BEGIN_CS(64 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
+    BEGIN_CS(62 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
     /* Flush PVS. */
     OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
 
@@ -125,7 +125,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
     OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C);
     OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
     OUT_CS_REG(R300_RB3D_CCTL, 0x00000000);
-    OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F);
     OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
     if (caps->is_r500) {
         OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000);
-- 
cgit v1.2.3


From 07190888bdc41f53bf8ea30c9e2ee4a61b42d802 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Tue, 3 Nov 2009 16:50:09 +0100
Subject: r300g: set the correct offset in a colorbuffer surface

Suggested by Joakim Sindholt.

Also, put flushing of colorbuffers _before_ the framebuffer state setup,
suggested by docs.

Signed-off-by: Corbin Simpson <MostAwesomeDude@gmail.com>
---
 src/gallium/drivers/r300/r300_emit.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 8fe9a68886..fc823ad31f 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -309,13 +309,20 @@ void r300_emit_fb_state(struct r300_context* r300,
     CS_LOCALS(r300);
 
     BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4);
+    OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
+        R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+        R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+    OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
+        R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+        R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+
     for (i = 0; i < fb->nr_cbufs; i++) {
         surf = fb->cbufs[i];
         tex = (struct r300_texture*)surf->texture;
         assert(tex && tex->buffer && "cbuf is marked, but NULL!");
 
         OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
-        OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+        OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
 
         OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
         OUT_CS_RELOC(tex->buffer, tex->pitch[surf->level] |
@@ -332,7 +339,7 @@ void r300_emit_fb_state(struct r300_context* r300,
         assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
 
         OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
-        OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+        OUT_CS_RELOC(tex->buffer, surf->offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
 
         OUT_CS_REG(R300_ZB_FORMAT, r300_translate_zsformat(tex->tex.format));
 
@@ -341,12 +348,6 @@ void r300_emit_fb_state(struct r300_context* r300,
                      RADEON_GEM_DOMAIN_VRAM, 0);
     }
 
-    OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
-        R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
-        R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
-    OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
-        R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
-        R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
     END_CS;
 }
 
-- 
cgit v1.2.3


From 4671005a4317fa37aea8786740470a40906fbfa7 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Tue, 3 Nov 2009 16:58:39 +0100
Subject: r300g: fix the size of CS when emitting the fragprog constant buffer

Signed-off-by: Corbin Simpson <MostAwesomeDude@gmail.com>
---
 src/gallium/drivers/r300/r300_emit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index fc823ad31f..6415c59c2d 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -285,7 +285,7 @@ void r500_emit_fs_constant_buffer(struct r300_context* r300,
     if (constants->Count == 0)
         return;
 
-    BEGIN_CS(constants->Count * 4 + 2);
+    BEGIN_CS(constants->Count * 4 + 3);
     OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
     OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, constants->Count * 4);
     for (i = 0; i < constants->Count; i++) {
-- 
cgit v1.2.3


From c2e47191d72e16aaa1fae4f47bbed7639c2ff201 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Wed, 4 Nov 2009 10:56:44 +0100
Subject: r300g: add polygon mode

Signed-off-by: Corbin Simpson <MostAwesomeDude@gmail.com>
---
 src/gallium/drivers/r300/r300_context.h         |  1 +
 src/gallium/drivers/r300/r300_emit.c            |  3 ++-
 src/gallium/drivers/r300/r300_state.c           | 27 +++++++++++++++++++
 src/gallium/drivers/r300/r300_state_inlines.h   | 36 +++++++++++++++++++++++++
 src/gallium/drivers/r300/r300_state_invariant.c |  3 +--
 5 files changed, 67 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 8c65c04d01..850e5a41c9 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -78,6 +78,7 @@ struct r300_rs_state {
     uint32_t line_stipple_config;   /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
     uint32_t line_stipple_value;    /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
     uint32_t color_control;         /* R300_GA_COLOR_CONTROL: 0x4278 */
+    uint32_t polygon_mode;          /* R300_GA_POLY_MODE: 0x4288 */
 };
 
 struct r300_rs_block {
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 6415c59c2d..69ce5966e8 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -483,7 +483,7 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs)
 {
     CS_LOCALS(r300);
 
-    BEGIN_CS(20);
+    BEGIN_CS(22);
     OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status);
     OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size);
     OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2);
@@ -499,6 +499,7 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs)
     OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config);
     OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value);
     OUT_CS_REG(R300_GA_COLOR_CONTROL, rs->color_control);
+    OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode);
     END_CS;
 }
 
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 242ec9f365..658a8cba13 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -398,25 +398,52 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
     rs->line_control = pack_float_16_6x(state->line_width) |
         R300_GA_LINE_CNTL_END_TYPE_COMP;
 
+    /* XXX I think there is something wrong with the polygon mode,
+     * XXX re-test when r300g is in a better shape */
+
+    /* Enable polygon mode */
+    if (state->fill_cw != PIPE_POLYGON_MODE_FILL ||
+        state->fill_ccw != PIPE_POLYGON_MODE_FILL) {
+        rs->polygon_mode = R300_GA_POLY_MODE_DUAL;
+    }
+
     /* Radeons don't think in "CW/CCW", they think in "front/back". */
     if (state->front_winding == PIPE_WINDING_CW) {
         rs->cull_mode = R300_FRONT_FACE_CW;
 
+        /* Polygon offset */
         if (state->offset_cw) {
             rs->polygon_offset_enable |= R300_FRONT_ENABLE;
         }
         if (state->offset_ccw) {
             rs->polygon_offset_enable |= R300_BACK_ENABLE;
         }
+
+        /* Polygon mode */
+        if (rs->polygon_mode) {
+            rs->polygon_mode |=
+                r300_translate_polygon_mode_front(state->fill_cw);
+            rs->polygon_mode |=
+                r300_translate_polygon_mode_back(state->fill_ccw);
+        }
     } else {
         rs->cull_mode = R300_FRONT_FACE_CCW;
 
+        /* Polygon offset */
         if (state->offset_ccw) {
             rs->polygon_offset_enable |= R300_FRONT_ENABLE;
         }
         if (state->offset_cw) {
             rs->polygon_offset_enable |= R300_BACK_ENABLE;
         }
+
+        /* Polygon mode */
+        if (rs->polygon_mode) {
+            rs->polygon_mode |=
+                r300_translate_polygon_mode_front(state->fill_ccw);
+            rs->polygon_mode |=
+                r300_translate_polygon_mode_back(state->fill_cw);
+        }
     }
     if (state->front_winding & state->cull_mode) {
         rs->cull_mode |= R300_CULL_FRONT;
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 176e59f281..52b9650fc1 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -504,4 +504,40 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) {
         (0xf << R300_WRITE_ENA_SHIFT));
 }
 
+static INLINE uint32_t
+r300_translate_polygon_mode_front(unsigned mode) {
+    switch (mode)
+    {
+        case PIPE_POLYGON_MODE_FILL:
+            return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
+        case PIPE_POLYGON_MODE_LINE:
+            return R300_GA_POLY_MODE_FRONT_PTYPE_LINE;
+        case PIPE_POLYGON_MODE_POINT:
+            return R300_GA_POLY_MODE_FRONT_PTYPE_POINT;
+
+        default:
+            debug_printf("r300: Bad polygon mode %i in %s\n", mode,
+                __FUNCTION__);
+            return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
+    }
+}
+
+static INLINE uint32_t
+r300_translate_polygon_mode_back(unsigned mode) {
+    switch (mode)
+    {
+        case PIPE_POLYGON_MODE_FILL:
+            return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
+        case PIPE_POLYGON_MODE_LINE:
+            return R300_GA_POLY_MODE_BACK_PTYPE_LINE;
+        case PIPE_POLYGON_MODE_POINT:
+            return R300_GA_POLY_MODE_BACK_PTYPE_POINT;
+
+        default:
+            debug_printf("r300: Bad polygon mode %i in %s\n", mode,
+                __FUNCTION__);
+            return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
+    }
+}
+
 #endif /* R300_STATE_INLINES_H */
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 7e4d5c7c72..c07e6ae676 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -84,7 +84,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
     END_CS;
 
     /* XXX unsorted stuff from surface_fill */
-    BEGIN_CS(62 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
+    BEGIN_CS(60 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
     /* Flush PVS. */
     OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
 
@@ -114,7 +114,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
     /* XXX this big chunk should be refactored into rs_state */
     OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000);
     OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000);
-    OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000);
     OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001);
     OUT_CS_REG(R300_GA_OFFSET, 0x00000000);
     OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412);
-- 
cgit v1.2.3


From 2475e5db679a70c4a3868dc07037d009865a6694 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 08:00:33 +0000
Subject: i965g: fix compiler warning

---
 src/gallium/drivers/i965/brw_misc_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index b562eb7152..8e35f9ad1d 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -343,7 +343,7 @@ const struct brw_tracked_state brw_polygon_stipple = {
 
 static int upload_line_stipple(struct brw_context *brw)
 {
-   struct brw_line_stipple *bls = &brw->curr.rast->bls;
+   const struct brw_line_stipple *bls = &brw->curr.rast->bls;
    BRW_CACHED_BATCH_STRUCT(brw, bls);
    return 0;
 }
-- 
cgit v1.2.3


From b2bf5f98d923b8d52473e069576fc6514c0ffd0a Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 08:01:48 +0000
Subject: i965g: use pipe_error return value for brw_batchbuffer_require_space

trivial/tri runs without crashing (on debug winsys) but still produces
obviously incorrect command buffers.
---
 src/gallium/drivers/i965/brw_batchbuffer.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index 781cd698e4..1f04826aea 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -103,19 +103,19 @@ brw_batchbuffer_emit_dword(struct brw_batchbuffer *batch, GLuint dword)
    batch->ptr += 4;
 }
 
-static INLINE boolean
+static INLINE enum pipe_error
 brw_batchbuffer_require_space(struct brw_batchbuffer *batch,
                                 GLuint sz)
 {
    assert(sz < batch->size - 8);
    if (brw_batchbuffer_space(batch) < sz) {
       assert(0);
-      return FALSE;
+      return PIPE_ERROR_OUT_OF_MEMORY;
    }
 #ifdef DEBUG
    batch->emit.end_ptr = batch->ptr + sz;
 #endif
-   return TRUE;
+   return 0;
 }
 
 /* Here are the crusty old macros, to be removed:
-- 
cgit v1.2.3


From 6ac38232ee1ebde5ed390e3ccc22cba59ad00854 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 10:59:02 +0000
Subject: i965g: add data type tags to aid dumping/decoding

---
 src/gallium/drivers/i965/brw_batchbuffer.c    | 17 +++--
 src/gallium/drivers/i965/brw_context.h        | 40 ++++++-----
 src/gallium/drivers/i965/brw_curbe.c          |  1 +
 src/gallium/drivers/i965/brw_pipe_query.c     |  2 +-
 src/gallium/drivers/i965/brw_screen_buffers.c | 13 ++--
 src/gallium/drivers/i965/brw_screen_texture.c | 18 ++---
 src/gallium/drivers/i965/brw_state_cache.c    | 23 +++----
 src/gallium/drivers/i965/brw_state_dump.c     | 14 ++--
 src/gallium/drivers/i965/brw_winsys.h         | 69 ++++++++++++-------
 src/gallium/winsys/drm/i965/xlib/xlib_i965.c  | 95 +++++++++++++++++----------
 10 files changed, 182 insertions(+), 110 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index 673bd1ed44..ca612e5ed0 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -53,7 +53,9 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch)
    if (batch->malloc_buffer)
       batch->map = batch->malloc_buffer;
    else 
-      batch->map = batch->sws->bo_map(batch->buf, GL_TRUE);
+      batch->map = batch->sws->bo_map(batch->buf,
+                                      BRW_DATA_OTHER,
+                                      GL_TRUE);
 
    batch->size = BRW_BATCH_SIZE;
    batch->ptr = batch->map;
@@ -132,7 +134,10 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
    used = batch->ptr - batch->map;
 
    if (batch->use_malloc_buffer) {
-      batch->sws->bo_subdata(batch->buf, 0, used, batch->map );
+      batch->sws->bo_subdata(batch->buf, 
+                             BRW_DATA_OTHER,
+                             0, used,
+                             batch->map );
       batch->map = NULL;
    }
    else {
@@ -145,7 +150,9 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
    batch->sws->bo_exec(batch->buf, used );
 
    if (1 /*BRW_DEBUG & DEBUG_BATCH*/) {
-      void *ptr = batch->sws->bo_map(batch->buf, GL_FALSE);
+      void *ptr = batch->sws->bo_map(batch->buf,
+                                     BRW_DATA_OTHER,
+                                     GL_FALSE);
 
       intel_decode(ptr,
 		   used / 4, 
@@ -162,7 +169,9 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
        * interface.
        */
       debug_printf("waiting for idle\n");
-      batch->sws->bo_map(batch->buf, GL_TRUE);
+      batch->sws->bo_map(batch->buf,
+                         BRW_DATA_OTHER,
+                         GL_TRUE);
       batch->sws->bo_unmap(batch->buf);
    }
 
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 0c1dcf8a14..09d34615c7 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -347,25 +347,27 @@ struct brw_vs_ouput_sizes {
 #define SURF_INDEX_VERT_CONST_BUFFER 0
 
 
+/* Bit of a hack to align these with the winsys buffer_data_type enum.
+ */
 enum brw_cache_id {
-   BRW_CC_VP,
-   BRW_CC_UNIT,
-   BRW_WM_PROG,
-   BRW_SAMPLER_DEFAULT_COLOR,
-   BRW_SAMPLER,
-   BRW_WM_UNIT,
-   BRW_SF_PROG,
-   BRW_SF_VP,
-   BRW_SF_UNIT,
-   BRW_VS_UNIT,
-   BRW_VS_PROG,
-   BRW_GS_UNIT,
-   BRW_GS_PROG,
-   BRW_CLIP_VP,
-   BRW_CLIP_UNIT,
-   BRW_CLIP_PROG,
-   BRW_SS_SURFACE,
-   BRW_SS_SURF_BIND,
+   BRW_CC_VP         = BRW_DATA_GS_CC_VP,
+   BRW_CC_UNIT       = BRW_DATA_GS_CC_UNIT,
+   BRW_WM_PROG       = BRW_DATA_GS_WM_PROG,
+   BRW_SAMPLER_DEFAULT_COLOR    = BRW_DATA_GS_SAMPLER_DEFAULT_COLOR,
+   BRW_SAMPLER       = BRW_DATA_GS_SAMPLER,
+   BRW_WM_UNIT       = BRW_DATA_GS_WM_UNIT,
+   BRW_SF_PROG       = BRW_DATA_GS_SF_PROG,
+   BRW_SF_VP         = BRW_DATA_GS_SF_VP,
+   BRW_SF_UNIT       = BRW_DATA_GS_SF_UNIT,
+   BRW_VS_UNIT       = BRW_DATA_GS_VS_UNIT,
+   BRW_VS_PROG       = BRW_DATA_GS_VS_PROG,
+   BRW_GS_UNIT       = BRW_DATA_GS_GS_UNIT,
+   BRW_GS_PROG       = BRW_DATA_GS_GS_PROG,
+   BRW_CLIP_VP       = BRW_DATA_GS_CLIP_VP,
+   BRW_CLIP_UNIT     = BRW_DATA_GS_CLIP_UNIT,
+   BRW_CLIP_PROG     = BRW_DATA_GS_CLIP_PROG,
+   BRW_SS_SURFACE    = BRW_DATA_SS_SURFACE,
+   BRW_SS_SURF_BIND  = BRW_DATA_SS_SURF_BIND,
 
    BRW_MAX_CACHE
 };
@@ -399,6 +401,8 @@ struct brw_cache {
    struct brw_cache_item **items;
    GLuint size, n_items;
 
+   enum brw_buffer_type buffer_type;
+
    GLuint key_size[BRW_MAX_CACHE];		/* for fixed-size keys */
    GLuint aux_size[BRW_MAX_CACHE];
    char *name[BRW_MAX_CACHE];
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index f62b0b0d5e..1e2e232204 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -289,6 +289,7 @@ static int prepare_curbe_buffer(struct brw_context *brw)
        */
       brw->sws->bo_subdata(brw->curbe.curbe_bo,
 			   brw->curbe.curbe_offset,
+                           BRW_DATA_OTHER,
 			   bufsz,
 			   buf);
    }
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
index d3e173f5ec..3370ebd262 100644
--- a/src/gallium/drivers/i965/brw_pipe_query.c
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -63,7 +63,7 @@ brw_query_get_result(struct pipe_context *pipe,
       if (brw->sws->bo_is_busy(query->bo) && !wait)
 	 return FALSE;
       
-      map = brw->sws->bo_map(query->bo, GL_FALSE);
+      map = brw->sws->bo_map(query->bo, BRW_DATA_OTHER, GL_FALSE);
       if (map == NULL)
 	 return FALSE;
       
diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c
index c0f19d64aa..ba54740225 100644
--- a/src/gallium/drivers/i965/brw_screen_buffers.c
+++ b/src/gallium/drivers/i965/brw_screen_buffers.c
@@ -24,6 +24,7 @@ brw_buffer_map( struct pipe_screen *screen,
       return buf->user_buffer;
 
    return sws->bo_map( buf->bo, 
+                       BRW_DATA_OTHER,
                        (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE );
 }
 
@@ -64,7 +65,7 @@ brw_buffer_create(struct pipe_screen *screen,
    struct brw_screen *bscreen = brw_screen(screen);
    struct brw_winsys_screen *sws = bscreen->sws;
    struct brw_buffer *buf;
-   unsigned usage_type;
+   unsigned buffer_type;
    
    buf = CALLOC_STRUCT(brw_buffer);
    if (!buf)
@@ -84,24 +85,24 @@ brw_buffer_create(struct pipe_screen *screen,
    case PIPE_BUFFER_USAGE_VERTEX:
    case PIPE_BUFFER_USAGE_INDEX:
    case (PIPE_BUFFER_USAGE_VERTEX|PIPE_BUFFER_USAGE_INDEX):
-      usage_type = BRW_BUFFER_TYPE_VERTEX;
+      buffer_type = BRW_BUFFER_TYPE_VERTEX;
       break;
       
    case PIPE_BUFFER_USAGE_PIXEL:
-      usage_type = BRW_BUFFER_TYPE_PIXEL;
+      buffer_type = BRW_BUFFER_TYPE_PIXEL;
       break;
 
    case PIPE_BUFFER_USAGE_CONSTANT:
-      usage_type = BRW_BUFFER_TYPE_SHADER_CONSTANTS;
+      buffer_type = BRW_BUFFER_TYPE_SHADER_CONSTANTS;
       break;
 
    default:
-      usage_type = BRW_BUFFER_TYPE_GENERIC;
+      buffer_type = BRW_BUFFER_TYPE_GENERIC;
       break;
    }
    
    buf->bo = sws->bo_alloc( sws,
-                            usage_type,
+                            buffer_type,
                             size,
                             alignment );
       
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index c318b07f97..ba6dc7dfde 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -186,6 +186,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
 {  
    struct brw_screen *bscreen = brw_screen(screen);
    struct brw_texture *tex;
+   enum brw_buffer_type buffer_type;
    
    tex = CALLOC_STRUCT(brw_texture);
    if (tex == NULL)
@@ -226,21 +227,16 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
       goto fail;
 
    
-   if (templ->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
-   } 
-   else if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
-                            PIPE_TEXTURE_USAGE_PRIMARY)) {
-   }
-   else if (templ->tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
-   }
-   else if (templ->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) {
+   if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+                           PIPE_TEXTURE_USAGE_PRIMARY)) {
+      buffer_type = BRW_BUFFER_TYPE_SCANOUT;
    }
-   
-   if (templ->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) {
+   else {
+      buffer_type = BRW_BUFFER_TYPE_TEXTURE;
    }
 
    tex->bo = bscreen->sws->bo_alloc( bscreen->sws,
-                                     BRW_USAGE_SAMPLER,
+                                     buffer_type,
                                      tex->pitch * tex->total_height * tex->cpp,
                                      64 );
 
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
index 071a942e5c..cbd1f02d77 100644
--- a/src/gallium/drivers/i965/brw_state_cache.c
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -228,7 +228,7 @@ brw_upload_cache( struct brw_cache *cache,
     * these various entities.
     */
    bo = cache->sws->bo_alloc(cache->sws,
-			     BRW_BUFFER_TYPE_STATE_CACHE, 
+                             cache->buffer_type,
 			     data_size, 1 << 6);
 
 
@@ -273,7 +273,9 @@ brw_upload_cache( struct brw_cache *cache,
 		   data_size, cache_id);
 
    /* Copy data to the buffer */
-   cache->sws->bo_subdata(bo, 0, data_size, data);
+   cache->sws->bo_subdata(bo, 
+                          cache_id,
+                          0, data_size, data);
 
    update_cache_last(cache, cache_id, bo);
 
@@ -332,11 +334,6 @@ brw_cache_data(struct brw_cache *cache,
 			    reloc_bufs, nr_reloc_bufs);
 }
 
-enum pool_type {
-   DW_SURFACE_STATE,
-   DW_GENERAL_STATE
-};
-
 
 static void
 brw_init_cache_id(struct brw_cache *cache,
@@ -352,13 +349,15 @@ brw_init_cache_id(struct brw_cache *cache,
 
 
 static void
-brw_init_non_surface_cache(struct brw_context *brw)
+brw_init_general_state_cache(struct brw_context *brw)
 {
    struct brw_cache *cache = &brw->cache;
 
    cache->brw = brw;
    cache->sws = brw->sws;
 
+   cache->buffer_type = BRW_BUFFER_TYPE_GENERAL_STATE;
+
    cache->size = 7;
    cache->n_items = 0;
    cache->items = (struct brw_cache_item **)
@@ -457,13 +456,15 @@ brw_init_non_surface_cache(struct brw_context *brw)
 
 
 static void
-brw_init_surface_cache(struct brw_context *brw)
+brw_init_surface_state_cache(struct brw_context *brw)
 {
    struct brw_cache *cache = &brw->surface_cache;
 
    cache->brw = brw;
    cache->sws = brw->sws;
 
+   cache->buffer_type = BRW_BUFFER_TYPE_SURFACE_STATE;
+
    cache->size = 7;
    cache->n_items = 0;
    cache->items = (struct brw_cache_item **)
@@ -486,8 +487,8 @@ brw_init_surface_cache(struct brw_context *brw)
 void
 brw_init_caches(struct brw_context *brw)
 {
-   brw_init_non_surface_cache(brw);
-   brw_init_surface_cache(brw);
+   brw_init_general_state_cache(brw);
+   brw_init_surface_state_cache(brw);
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c
index 345e42a6b2..388331ee62 100644
--- a/src/gallium/drivers/i965/brw_state_dump.c
+++ b/src/gallium/drivers/i965/brw_state_dump.c
@@ -65,7 +65,7 @@ state_struct_out(struct brw_winsys_screen *sws,
    if (buffer == NULL)
       return;
 
-   data = sws->bo_map(buffer, GL_FALSE);
+   data = sws->bo_map(buffer, BRW_DATA_OTHER, GL_FALSE);
    for (i = 0; i < state_size / 4; i++) {
       state_out(name, data, buffer->offset[0], i,
 		"dword %d\n", i);
@@ -114,7 +114,9 @@ static void dump_wm_surface_state(struct brw_context *brw)
 	 debug_printf("  WM SS%d: NULL\n", i);
 	 continue;
       }
-      surf = (struct brw_surface_state *)brw->sws->bo_map(surf_bo, GL_FALSE);
+      surf = (struct brw_surface_state *)brw->sws->bo_map(surf_bo, 
+                                                          BRW_DATA_OTHER,
+                                                          GL_FALSE);
       surfoff = surf_bo->offset[0];
 
       sprintf(name, "WM SS%d", i);
@@ -144,7 +146,9 @@ static void dump_sf_viewport_state(struct brw_context *brw)
    if (brw->sf.vp_bo == NULL)
       return;
 
-   vp = (struct brw_sf_viewport *)brw->sws->bo_map(brw->sf.vp_bo, GL_FALSE);
+   vp = (struct brw_sf_viewport *)brw->sws->bo_map(brw->sf.vp_bo,
+                                                   BRW_DATA_OTHER,
+                                                   GL_FALSE);
    vp_off = brw->sf.vp_bo->offset[0];
 
    state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
@@ -172,7 +176,9 @@ static void brw_debug_prog(struct brw_winsys_screen *sws,
    if (prog == NULL)
       return;
 
-   data = (uint32_t *)sws->bo_map(prog, GL_FALSE);
+   data = (uint32_t *)sws->bo_map(prog,
+                                  BRW_DATA_OTHER,
+                                  GL_FALSE);
 
    for (i = 0; i < prog->size / 4 / 4; i++) {
       debug_printf("%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index f5ce9d13d7..d941fbcebe 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -44,21 +44,6 @@ struct brw_winsys_buffer {
    unsigned size;
 };
 
-/* Describe the usage of a particular buffer in a relocation.  The DRM
- * winsys will translate these back to GEM read/write domain flags.
- */
-enum brw_buffer_usage {
-   BRW_USAGE_STATE,		/* INSTRUCTION, 0 */
-   BRW_USAGE_QUERY_RESULT,	/* INSTRUCTION, INSTRUCTION */
-   BRW_USAGE_RENDER_TARGET,	/* RENDER,      0 */
-   BRW_USAGE_DEPTH_BUFFER,	/* RENDER,      RENDER */
-   BRW_USAGE_BLIT_SOURCE,	/* RENDER,      0 */
-   BRW_USAGE_BLIT_DEST,         /* RENDER,      RENDER */
-   BRW_USAGE_SAMPLER,		/* SAMPLER,     0 */
-   BRW_USAGE_VERTEX,		/* VERTEX,      0 */
-   BRW_USAGE_SCRATCH,		/* 0,           0 */
-   BRW_USAGE_MAX
-};
 
 /* Should be possible to validate usages above against buffer creation
  * types, below:
@@ -73,12 +58,53 @@ enum brw_buffer_type
    BRW_BUFFER_TYPE_SHADER_CONSTANTS,
    BRW_BUFFER_TYPE_SHADER_SCRATCH,
    BRW_BUFFER_TYPE_BATCH,
-   BRW_BUFFER_TYPE_STATE_CACHE,
+   BRW_BUFFER_TYPE_GENERAL_STATE,
+   BRW_BUFFER_TYPE_SURFACE_STATE,
    BRW_BUFFER_TYPE_PIXEL,       /* image uploads, pbo's, etc */
    BRW_BUFFER_TYPE_GENERIC,     /* unknown */
    BRW_BUFFER_TYPE_MAX		/* Count of possible values */
 };
 
+
+/* Describe the usage of a particular buffer in a relocation.  The DRM
+ * winsys will translate these back to GEM read/write domain flags.
+ */
+enum brw_buffer_usage {
+   BRW_USAGE_STATE,         /* INSTRUCTION, 0 */
+   BRW_USAGE_QUERY_RESULT,	 /* INSTRUCTION, INSTRUCTION */
+   BRW_USAGE_RENDER_TARGET, /* RENDER,      0 */
+   BRW_USAGE_DEPTH_BUFFER,	 /* RENDER,      RENDER */
+   BRW_USAGE_BLIT_SOURCE,	 /* RENDER,      0 */
+   BRW_USAGE_BLIT_DEST,     /* RENDER,      RENDER */
+   BRW_USAGE_SAMPLER,	 /* SAMPLER,     0 */
+   BRW_USAGE_VERTEX,	 /* VERTEX,      0 */
+   BRW_USAGE_SCRATCH,	 /* 0,           0 */
+   BRW_USAGE_MAX
+};
+
+enum brw_buffer_data_type {
+   BRW_DATA_GS_CC_VP,
+   BRW_DATA_GS_CC_UNIT,
+   BRW_DATA_GS_WM_PROG,
+   BRW_DATA_GS_SAMPLER_DEFAULT_COLOR,
+   BRW_DATA_GS_SAMPLER,
+   BRW_DATA_GS_WM_UNIT,
+   BRW_DATA_GS_SF_PROG,
+   BRW_DATA_GS_SF_VP,
+   BRW_DATA_GS_SF_UNIT,
+   BRW_DATA_GS_VS_UNIT,
+   BRW_DATA_GS_VS_PROG,
+   BRW_DATA_GS_GS_UNIT,
+   BRW_DATA_GS_GS_PROG,
+   BRW_DATA_GS_CLIP_VP,
+   BRW_DATA_GS_CLIP_UNIT,
+   BRW_DATA_GS_CLIP_PROG,
+   BRW_DATA_SS_SURFACE,
+   BRW_DATA_SS_SURF_BIND,
+   BRW_DATA_OTHER,
+   BRW_DATA_MAX
+};
+
 struct brw_winsys_screen {
 
 
@@ -113,9 +139,10 @@ struct brw_winsys_screen {
 		   unsigned bytes_used );
 
    int (*bo_subdata)(struct brw_winsys_buffer *buffer,
-		      size_t offset,
-		      size_t size,
-		      const void *data);
+                     enum brw_buffer_data_type data_type,
+                     size_t offset,
+                     size_t size,
+                     const void *data);
 
    boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer);
    boolean (*bo_references)(struct brw_winsys_buffer *a,
@@ -132,6 +159,7 @@ struct brw_winsys_screen {
     * Map a buffer.
     */
    void *(*bo_map)(struct brw_winsys_buffer *buffer,
+                   enum brw_buffer_data_type data_type,
 		   boolean write);
 
    /**
@@ -140,9 +168,6 @@ struct brw_winsys_screen {
    void (*bo_unmap)(struct brw_winsys_buffer *buffer);
    /*@}*/
 
-
-
-
    /**
     * Destroy the winsys.
     */
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
index 71d8f4bafc..4fe5db4033 100644
--- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -82,31 +82,57 @@ xlib_brw_buffer( struct brw_winsys_buffer *buffer )
 
 
 const char *names[BRW_BUFFER_TYPE_MAX] = {
-   "texture",
-   "scanout",
-   "vertex",
-   "curbe",
-   "query",
-   "shader_constants",
-   "wm_scratch",
-   "batch",
-   "state_cache",
-   "pixel",
-   "generic",
+   "TEXTURE",
+   "SCANOUT",
+   "VERTEX",
+   "CURBE",
+   "QUERY",
+   "SHADER_CONSTANTS",
+   "WM_SCRATCH",
+   "BATCH",
+   "GENERAL_STATE",
+   "SURFACE_STATE",
+   "PIXEL",
+   "GENERIC",
 };
 
 const char *usages[BRW_USAGE_MAX] = {
-   "state",
-   "query_result",
-   "render_target",
-   "depth_buffer",
-   "blit_source",
-   "blit_dest",
-   "sampler",
-   "vertex",
-   "scratch"
+   "STATE",
+   "QUERY_RESULT",
+   "RENDER_TARGET",
+   "DEPTH_BUFFER",
+   "BLIT_SOURCE",
+   "BLIT_DEST",
+   "SAMPLER",
+   "VERTEX",
+   "SCRATCH"
 };
 
+
+const char *data_types[BRW_DATA_MAX] =
+{
+   "GS: CC_VP",
+   "GS: CC_UNIT",
+   "GS: WM_PROG",
+   "GS: SAMPLER_DEFAULT_COLOR",
+   "GS: SAMPLER",
+   "GS: WM_UNIT",
+   "GS: SF_PROG",
+   "GS: SF_VP",
+   "GS: SF_UNIT",
+   "GS: VS_UNIT",
+   "GS: VS_PROG",
+   "GS: GS_UNIT",
+   "GS: GS_PROG",
+   "GS: CLIP_VP",
+   "GS: CLIP_UNIT",
+   "GS: CLIP_PROG",
+   "SS: SURFACE",
+   "SS: SURF_BIND",
+   "(untyped)"
+};
+
+
 static struct brw_winsys_buffer *
 xlib_brw_bo_alloc( struct brw_winsys_screen *sws,
 		      enum brw_buffer_type type,
@@ -116,8 +142,8 @@ xlib_brw_bo_alloc( struct brw_winsys_screen *sws,
    struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws);
    struct xlib_brw_buffer *buf;
 
-   debug_printf("%s type %d sz %d align %d\n",
-                __FUNCTION__, type, size, alignment );
+   debug_printf("%s type %s sz %d align %d\n",
+                __FUNCTION__, names[type], size, alignment );
 
    buf = CALLOC_STRUCT(xlib_brw_buffer);
    if (!buf)
@@ -168,10 +194,10 @@ xlib_brw_bo_unreference( struct brw_winsys_buffer *buffer )
 
 static int 
 xlib_brw_bo_emit_reloc( struct brw_winsys_buffer *buffer,
-			   enum brw_buffer_usage usage,
-			   unsigned delta,
-			   unsigned offset,
-			   struct brw_winsys_buffer *buffer2)
+                        enum brw_buffer_usage usage,
+                        unsigned delta,
+                        unsigned offset,
+                        struct brw_winsys_buffer *buffer2)
 {
    struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
    struct xlib_brw_buffer *buf2 = xlib_brw_buffer(buffer2);
@@ -197,15 +223,16 @@ xlib_brw_bo_exec( struct brw_winsys_buffer *buffer,
 
 static int
 xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer,
-		       size_t offset,
-		       size_t size,
-		       const void *data)
+                    enum brw_buffer_data_type data_type,
+                    size_t offset,
+                    size_t size,
+                    const void *data)
 {
    struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
 
-   debug_printf("%s buf %p off %d sz %d data %p\n", 
+   debug_printf("%s buf %p off %d sz %d data %p %s\n", 
                 __FUNCTION__, 
-                (void *)buffer, offset, size, data);
+                (void *)buffer, offset, size, data, data_types[data_type]);
 
    memcpy(buf->virtual + offset, data, size);
    return 0;
@@ -247,12 +274,14 @@ xlib_brw_check_aperture_space( struct brw_winsys_screen *iws,
 
 static void *
 xlib_brw_bo_map(struct brw_winsys_buffer *buffer,
+                enum brw_buffer_data_type data_type,
 		   boolean write)
 {
    struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
 
-   debug_printf("%s %p %s\n", __FUNCTION__, (void *)buffer, 
-                write ? "read/write" : "read");
+   debug_printf("%s %p %s %s\n", __FUNCTION__, (void *)buffer, 
+                write ? "read/write" : "read",
+                write ? data_types[data_type] : "");
 
    buf->map_count++;
    return buf->virtual;
-- 
cgit v1.2.3


From 9069c791d065e513e05611e60f19305a850fb2f5 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 5 Nov 2009 12:22:01 +0000
Subject: i965g: Dumper for i965 structures.

---
 src/gallium/drivers/i965/Makefile            |    1 +
 src/gallium/drivers/i965/SConscript          |    1 +
 src/gallium/drivers/i965/brw_structs_dump.c  | 1511 ++++++++++++++++++++++++++
 src/gallium/drivers/i965/brw_structs_dump.h  |  276 +++++
 src/gallium/drivers/i965/brw_structs_dump.py |  284 +++++
 5 files changed, 2073 insertions(+)
 create mode 100644 src/gallium/drivers/i965/brw_structs_dump.c
 create mode 100644 src/gallium/drivers/i965/brw_structs_dump.h
 create mode 100755 src/gallium/drivers/i965/brw_structs_dump.py

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 2188a1d4bc..6c0d3541d7 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -43,6 +43,7 @@ C_SOURCES = \
 	brw_state_cache.c \
 	brw_state_dump.c \
 	brw_state_upload.c \
+	brw_structs_dump.c \
 	brw_swtnl.c \
 	brw_urb.c \
 	brw_util.c \
diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript
index c517b08ec5..d38ad6fe7e 100644
--- a/src/gallium/drivers/i965/SConscript
+++ b/src/gallium/drivers/i965/SConscript
@@ -43,6 +43,7 @@ i965 = env.ConvenienceLibrary(
 		'brw_screen_surface.c',
 		'brw_screen_tex_layout.c',
 		'brw_screen_texture.c',
+		'brw_structs_dump.c',
 		'brw_sf.c',
 		'brw_sf_emit.c',
 		'brw_sf_state.c',
diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c
new file mode 100644
index 0000000000..a8b96c6418
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs_dump.c
@@ -0,0 +1,1511 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Dump i965 data structures.
+ *
+ * Generated automatically from brw_structs.h by brw_structs_dump.py.
+ */
+
+#include "util/u_debug.h"
+
+#include "brw_types.h"
+#include "brw_structs.h"
+#include "brw_structs_dump.h"
+
+void
+brw_dump_3d_control(const struct brw_3d_control *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable);
+   debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad);
+   debug_printf("\t\t.header.wc_flush_enable = 0x%x\n", (*ptr).header.wc_flush_enable);
+   debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable);
+   debug_printf("\t\t.header.operation = 0x%x\n", (*ptr).header.operation);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.dest.pad = 0x%x\n", (*ptr).dest.pad);
+   debug_printf("\t\t.dest.dest_addr_type = 0x%x\n", (*ptr).dest.dest_addr_type);
+   debug_printf("\t\t.dest.dest_addr = 0x%x\n", (*ptr).dest.dest_addr);
+   debug_printf("\t\t.dword2 = 0x%x\n", (*ptr).dword2);
+   debug_printf("\t\t.dword3 = 0x%x\n", (*ptr).dword3);
+}
+
+void
+brw_dump_3d_primitive(const struct brw_3d_primitive *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad);
+   debug_printf("\t\t.header.topology = 0x%x\n", (*ptr).header.topology);
+   debug_printf("\t\t.header.indexed = 0x%x\n", (*ptr).header.indexed);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.verts_per_instance = 0x%x\n", (*ptr).verts_per_instance);
+   debug_printf("\t\t.start_vert_location = 0x%x\n", (*ptr).start_vert_location);
+   debug_printf("\t\t.instance_count = 0x%x\n", (*ptr).instance_count);
+   debug_printf("\t\t.start_instance_location = 0x%x\n", (*ptr).start_instance_location);
+   debug_printf("\t\t.base_vert_location = 0x%x\n", (*ptr).base_vert_location);
+}
+
+void
+brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope);
+   debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0);
+   debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias);
+   debug_printf("\t\t.bits0.pad1 = 0x%x\n", (*ptr).bits0.pad1);
+   debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope);
+   debug_printf("\t\t.bits1.pad0 = 0x%x\n", (*ptr).bits1.pad0);
+   debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias);
+   debug_printf("\t\t.bits1.pad1 = 0x%x\n", (*ptr).bits1.pad1);
+}
+
+void
+brw_dump_binding_table_pointers(const struct brw_binding_table_pointers *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.vs = 0x%x\n", (*ptr).vs);
+   debug_printf("\t\t.gs = 0x%x\n", (*ptr).gs);
+   debug_printf("\t\t.clp = 0x%x\n", (*ptr).clp);
+   debug_printf("\t\t.sf = 0x%x\n", (*ptr).sf);
+   debug_printf("\t\t.wm = 0x%x\n", (*ptr).wm);
+}
+
+void
+brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.blend_constant_color[0] = %f\n", (*ptr).blend_constant_color[0]);
+   debug_printf("\t\t.blend_constant_color[1] = %f\n", (*ptr).blend_constant_color[1]);
+   debug_printf("\t\t.blend_constant_color[2] = %f\n", (*ptr).blend_constant_color[2]);
+   debug_printf("\t\t.blend_constant_color[3] = %f\n", (*ptr).blend_constant_color[3]);
+}
+
+void
+brw_dump_cc0(const struct brw_cc0 *ptr)
+{
+   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
+   debug_printf("\t\t.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_pass_op);
+   debug_printf("\t\t.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_fail_op);
+   debug_printf("\t\t.bf_stencil_fail_op = 0x%x\n", (*ptr).bf_stencil_fail_op);
+   debug_printf("\t\t.bf_stencil_func = 0x%x\n", (*ptr).bf_stencil_func);
+   debug_printf("\t\t.bf_stencil_enable = 0x%x\n", (*ptr).bf_stencil_enable);
+   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
+   debug_printf("\t\t.stencil_write_enable = 0x%x\n", (*ptr).stencil_write_enable);
+   debug_printf("\t\t.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).stencil_pass_depth_pass_op);
+   debug_printf("\t\t.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).stencil_pass_depth_fail_op);
+   debug_printf("\t\t.stencil_fail_op = 0x%x\n", (*ptr).stencil_fail_op);
+   debug_printf("\t\t.stencil_func = 0x%x\n", (*ptr).stencil_func);
+   debug_printf("\t\t.stencil_enable = 0x%x\n", (*ptr).stencil_enable);
+}
+
+void
+brw_dump_cc1(const struct brw_cc1 *ptr)
+{
+   debug_printf("\t\t.bf_stencil_ref = 0x%x\n", (*ptr).bf_stencil_ref);
+   debug_printf("\t\t.stencil_write_mask = 0x%x\n", (*ptr).stencil_write_mask);
+   debug_printf("\t\t.stencil_test_mask = 0x%x\n", (*ptr).stencil_test_mask);
+   debug_printf("\t\t.stencil_ref = 0x%x\n", (*ptr).stencil_ref);
+}
+
+void
+brw_dump_cc2(const struct brw_cc2 *ptr)
+{
+   debug_printf("\t\t.logicop_enable = 0x%x\n", (*ptr).logicop_enable);
+   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
+   debug_printf("\t\t.depth_write_enable = 0x%x\n", (*ptr).depth_write_enable);
+   debug_printf("\t\t.depth_test_function = 0x%x\n", (*ptr).depth_test_function);
+   debug_printf("\t\t.depth_test = 0x%x\n", (*ptr).depth_test);
+   debug_printf("\t\t.bf_stencil_write_mask = 0x%x\n", (*ptr).bf_stencil_write_mask);
+   debug_printf("\t\t.bf_stencil_test_mask = 0x%x\n", (*ptr).bf_stencil_test_mask);
+}
+
+void
+brw_dump_cc3(const struct brw_cc3 *ptr)
+{
+   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
+   debug_printf("\t\t.alpha_test_func = 0x%x\n", (*ptr).alpha_test_func);
+   debug_printf("\t\t.alpha_test = 0x%x\n", (*ptr).alpha_test);
+   debug_printf("\t\t.blend_enable = 0x%x\n", (*ptr).blend_enable);
+   debug_printf("\t\t.ia_blend_enable = 0x%x\n", (*ptr).ia_blend_enable);
+   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
+   debug_printf("\t\t.alpha_test_format = 0x%x\n", (*ptr).alpha_test_format);
+   debug_printf("\t\t.pad2 = 0x%x\n", (*ptr).pad2);
+}
+
+void
+brw_dump_cc4(const struct brw_cc4 *ptr)
+{
+   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
+   debug_printf("\t\t.cc_viewport_state_offset = 0x%x\n", (*ptr).cc_viewport_state_offset);
+}
+
+void
+brw_dump_cc5(const struct brw_cc5 *ptr)
+{
+   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
+   debug_printf("\t\t.ia_dest_blend_factor = 0x%x\n", (*ptr).ia_dest_blend_factor);
+   debug_printf("\t\t.ia_src_blend_factor = 0x%x\n", (*ptr).ia_src_blend_factor);
+   debug_printf("\t\t.ia_blend_function = 0x%x\n", (*ptr).ia_blend_function);
+   debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable);
+   debug_printf("\t\t.logicop_func = 0x%x\n", (*ptr).logicop_func);
+   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
+   debug_printf("\t\t.dither_enable = 0x%x\n", (*ptr).dither_enable);
+}
+
+void
+brw_dump_cc6(const struct brw_cc6 *ptr)
+{
+   debug_printf("\t\t.clamp_post_alpha_blend = 0x%x\n", (*ptr).clamp_post_alpha_blend);
+   debug_printf("\t\t.clamp_pre_alpha_blend = 0x%x\n", (*ptr).clamp_pre_alpha_blend);
+   debug_printf("\t\t.clamp_range = 0x%x\n", (*ptr).clamp_range);
+   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
+   debug_printf("\t\t.y_dither_offset = 0x%x\n", (*ptr).y_dither_offset);
+   debug_printf("\t\t.x_dither_offset = 0x%x\n", (*ptr).x_dither_offset);
+   debug_printf("\t\t.dest_blend_factor = 0x%x\n", (*ptr).dest_blend_factor);
+   debug_printf("\t\t.src_blend_factor = 0x%x\n", (*ptr).src_blend_factor);
+   debug_printf("\t\t.blend_function = 0x%x\n", (*ptr).blend_function);
+}
+
+void
+brw_dump_cc7(const struct brw_cc7 *ptr)
+{
+   debug_printf("\t\t.alpha_ref.f = %f\n", (*ptr).alpha_ref.f);
+   debug_printf("\t\t.alpha_ref.ub[0] = 0x%x\n", (*ptr).alpha_ref.ub[0]);
+   debug_printf("\t\t.alpha_ref.ub[1] = 0x%x\n", (*ptr).alpha_ref.ub[1]);
+   debug_printf("\t\t.alpha_ref.ub[2] = 0x%x\n", (*ptr).alpha_ref.ub[2]);
+   debug_printf("\t\t.alpha_ref.ub[3] = 0x%x\n", (*ptr).alpha_ref.ub[3]);
+}
+
+void
+brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr)
+{
+   debug_printf("\t\t.cc0.pad0 = 0x%x\n", (*ptr).cc0.pad0);
+   debug_printf("\t\t.cc0.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_pass_op);
+   debug_printf("\t\t.cc0.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_fail_op);
+   debug_printf("\t\t.cc0.bf_stencil_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_fail_op);
+   debug_printf("\t\t.cc0.bf_stencil_func = 0x%x\n", (*ptr).cc0.bf_stencil_func);
+   debug_printf("\t\t.cc0.bf_stencil_enable = 0x%x\n", (*ptr).cc0.bf_stencil_enable);
+   debug_printf("\t\t.cc0.pad1 = 0x%x\n", (*ptr).cc0.pad1);
+   debug_printf("\t\t.cc0.stencil_write_enable = 0x%x\n", (*ptr).cc0.stencil_write_enable);
+   debug_printf("\t\t.cc0.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_pass_op);
+   debug_printf("\t\t.cc0.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_fail_op);
+   debug_printf("\t\t.cc0.stencil_fail_op = 0x%x\n", (*ptr).cc0.stencil_fail_op);
+   debug_printf("\t\t.cc0.stencil_func = 0x%x\n", (*ptr).cc0.stencil_func);
+   debug_printf("\t\t.cc0.stencil_enable = 0x%x\n", (*ptr).cc0.stencil_enable);
+   debug_printf("\t\t.cc1.bf_stencil_ref = 0x%x\n", (*ptr).cc1.bf_stencil_ref);
+   debug_printf("\t\t.cc1.stencil_write_mask = 0x%x\n", (*ptr).cc1.stencil_write_mask);
+   debug_printf("\t\t.cc1.stencil_test_mask = 0x%x\n", (*ptr).cc1.stencil_test_mask);
+   debug_printf("\t\t.cc1.stencil_ref = 0x%x\n", (*ptr).cc1.stencil_ref);
+   debug_printf("\t\t.cc2.logicop_enable = 0x%x\n", (*ptr).cc2.logicop_enable);
+   debug_printf("\t\t.cc2.pad0 = 0x%x\n", (*ptr).cc2.pad0);
+   debug_printf("\t\t.cc2.depth_write_enable = 0x%x\n", (*ptr).cc2.depth_write_enable);
+   debug_printf("\t\t.cc2.depth_test_function = 0x%x\n", (*ptr).cc2.depth_test_function);
+   debug_printf("\t\t.cc2.depth_test = 0x%x\n", (*ptr).cc2.depth_test);
+   debug_printf("\t\t.cc2.bf_stencil_write_mask = 0x%x\n", (*ptr).cc2.bf_stencil_write_mask);
+   debug_printf("\t\t.cc2.bf_stencil_test_mask = 0x%x\n", (*ptr).cc2.bf_stencil_test_mask);
+   debug_printf("\t\t.cc3.pad0 = 0x%x\n", (*ptr).cc3.pad0);
+   debug_printf("\t\t.cc3.alpha_test_func = 0x%x\n", (*ptr).cc3.alpha_test_func);
+   debug_printf("\t\t.cc3.alpha_test = 0x%x\n", (*ptr).cc3.alpha_test);
+   debug_printf("\t\t.cc3.blend_enable = 0x%x\n", (*ptr).cc3.blend_enable);
+   debug_printf("\t\t.cc3.ia_blend_enable = 0x%x\n", (*ptr).cc3.ia_blend_enable);
+   debug_printf("\t\t.cc3.pad1 = 0x%x\n", (*ptr).cc3.pad1);
+   debug_printf("\t\t.cc3.alpha_test_format = 0x%x\n", (*ptr).cc3.alpha_test_format);
+   debug_printf("\t\t.cc3.pad2 = 0x%x\n", (*ptr).cc3.pad2);
+   debug_printf("\t\t.cc4.pad0 = 0x%x\n", (*ptr).cc4.pad0);
+   debug_printf("\t\t.cc4.cc_viewport_state_offset = 0x%x\n", (*ptr).cc4.cc_viewport_state_offset);
+   debug_printf("\t\t.cc5.pad0 = 0x%x\n", (*ptr).cc5.pad0);
+   debug_printf("\t\t.cc5.ia_dest_blend_factor = 0x%x\n", (*ptr).cc5.ia_dest_blend_factor);
+   debug_printf("\t\t.cc5.ia_src_blend_factor = 0x%x\n", (*ptr).cc5.ia_src_blend_factor);
+   debug_printf("\t\t.cc5.ia_blend_function = 0x%x\n", (*ptr).cc5.ia_blend_function);
+   debug_printf("\t\t.cc5.statistics_enable = 0x%x\n", (*ptr).cc5.statistics_enable);
+   debug_printf("\t\t.cc5.logicop_func = 0x%x\n", (*ptr).cc5.logicop_func);
+   debug_printf("\t\t.cc5.pad1 = 0x%x\n", (*ptr).cc5.pad1);
+   debug_printf("\t\t.cc5.dither_enable = 0x%x\n", (*ptr).cc5.dither_enable);
+   debug_printf("\t\t.cc6.clamp_post_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_post_alpha_blend);
+   debug_printf("\t\t.cc6.clamp_pre_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_pre_alpha_blend);
+   debug_printf("\t\t.cc6.clamp_range = 0x%x\n", (*ptr).cc6.clamp_range);
+   debug_printf("\t\t.cc6.pad0 = 0x%x\n", (*ptr).cc6.pad0);
+   debug_printf("\t\t.cc6.y_dither_offset = 0x%x\n", (*ptr).cc6.y_dither_offset);
+   debug_printf("\t\t.cc6.x_dither_offset = 0x%x\n", (*ptr).cc6.x_dither_offset);
+   debug_printf("\t\t.cc6.dest_blend_factor = 0x%x\n", (*ptr).cc6.dest_blend_factor);
+   debug_printf("\t\t.cc6.src_blend_factor = 0x%x\n", (*ptr).cc6.src_blend_factor);
+   debug_printf("\t\t.cc6.blend_function = 0x%x\n", (*ptr).cc6.blend_function);
+   debug_printf("\t\t.cc7.alpha_ref.f = %f\n", (*ptr).cc7.alpha_ref.f);
+   debug_printf("\t\t.cc7.alpha_ref.ub[0] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[0]);
+   debug_printf("\t\t.cc7.alpha_ref.ub[1] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[1]);
+   debug_printf("\t\t.cc7.alpha_ref.ub[2] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[2]);
+   debug_printf("\t\t.cc7.alpha_ref.ub[3] = 0x%x\n", (*ptr).cc7.alpha_ref.ub[3]);
+}
+
+void
+brw_dump_cc_viewport(const struct brw_cc_viewport *ptr)
+{
+   debug_printf("\t\t.min_depth = %f\n", (*ptr).min_depth);
+   debug_printf("\t\t.max_depth = %f\n", (*ptr).max_depth);
+}
+
+void
+brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr)
+{
+   debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0);
+   debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+   debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1);
+   debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+   debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0);
+   debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+   debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1);
+   debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+   debug_printf("\t\t.thread1.pad2 = 0x%x\n", (*ptr).thread1.pad2);
+   debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+   debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3);
+   debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+   debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+   debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+   debug_printf("\t\t.thread1.pad4 = 0x%x\n", (*ptr).thread1.pad4);
+   debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+   debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+   debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0);
+   debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+   debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+   debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+   debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0);
+   debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+   debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1);
+   debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+   debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2);
+   debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+   debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3);
+   debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0);
+   debug_printf("\t\t.thread4.gs_output_stats = 0x%x\n", (*ptr).thread4.gs_output_stats);
+   debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+   debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+   debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1);
+   debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+   debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2);
+   debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+   debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3);
+   debug_printf("\t\t.clip5.pad0 = 0x%x\n", (*ptr).clip5.pad0);
+   debug_printf("\t\t.clip5.clip_mode = 0x%x\n", (*ptr).clip5.clip_mode);
+   debug_printf("\t\t.clip5.userclip_enable_flags = 0x%x\n", (*ptr).clip5.userclip_enable_flags);
+   debug_printf("\t\t.clip5.userclip_must_clip = 0x%x\n", (*ptr).clip5.userclip_must_clip);
+   debug_printf("\t\t.clip5.negative_w_clip_test = 0x%x\n", (*ptr).clip5.negative_w_clip_test);
+   debug_printf("\t\t.clip5.guard_band_enable = 0x%x\n", (*ptr).clip5.guard_band_enable);
+   debug_printf("\t\t.clip5.viewport_z_clip_enable = 0x%x\n", (*ptr).clip5.viewport_z_clip_enable);
+   debug_printf("\t\t.clip5.viewport_xy_clip_enable = 0x%x\n", (*ptr).clip5.viewport_xy_clip_enable);
+   debug_printf("\t\t.clip5.vertex_position_space = 0x%x\n", (*ptr).clip5.vertex_position_space);
+   debug_printf("\t\t.clip5.api_mode = 0x%x\n", (*ptr).clip5.api_mode);
+   debug_printf("\t\t.clip5.pad2 = 0x%x\n", (*ptr).clip5.pad2);
+   debug_printf("\t\t.clip6.pad0 = 0x%x\n", (*ptr).clip6.pad0);
+   debug_printf("\t\t.clip6.clipper_viewport_state_ptr = 0x%x\n", (*ptr).clip6.clipper_viewport_state_ptr);
+   debug_printf("\t\t.viewport_xmin = %f\n", (*ptr).viewport_xmin);
+   debug_printf("\t\t.viewport_xmax = %f\n", (*ptr).viewport_xmax);
+   debug_printf("\t\t.viewport_ymin = %f\n", (*ptr).viewport_ymin);
+   debug_printf("\t\t.viewport_ymax = %f\n", (*ptr).viewport_ymax);
+}
+
+void
+brw_dump_clipper_viewport(const struct brw_clipper_viewport *ptr)
+{
+   debug_printf("\t\t.xmin = %f\n", (*ptr).xmin);
+   debug_printf("\t\t.xmax = %f\n", (*ptr).xmax);
+   debug_printf("\t\t.ymin = %f\n", (*ptr).ymin);
+   debug_printf("\t\t.ymax = %f\n", (*ptr).ymax);
+}
+
+void
+brw_dump_constant_buffer(const struct brw_constant_buffer *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.valid = 0x%x\n", (*ptr).header.valid);
+   debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.bits0.buffer_length = 0x%x\n", (*ptr).bits0.buffer_length);
+   debug_printf("\t\t.bits0.buffer_address = 0x%x\n", (*ptr).bits0.buffer_address);
+}
+
+void
+brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.bits0.nr_urb_entries = 0x%x\n", (*ptr).bits0.nr_urb_entries);
+   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
+   debug_printf("\t\t.bits0.urb_entry_size = 0x%x\n", (*ptr).bits0.urb_entry_size);
+   debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0);
+}
+
+void
+brw_dump_depthbuffer(const struct brw_depthbuffer *ptr)
+{
+   debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
+   debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
+   debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword);
+   debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch);
+   debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format);
+   debug_printf("\t\t.dword1.bits.pad = 0x%x\n", (*ptr).dword1.bits.pad);
+   debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode);
+   debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable);
+   debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk);
+   debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface);
+   debug_printf("\t\t.dword1.bits.pad2 = 0x%x\n", (*ptr).dword1.bits.pad2);
+   debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type);
+   debug_printf("\t\t.dword1.dword = 0x%x\n", (*ptr).dword1.dword);
+   debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr);
+   debug_printf("\t\t.dword3.bits.pad = 0x%x\n", (*ptr).dword3.bits.pad);
+   debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout);
+   debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod);
+   debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width);
+   debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height);
+   debug_printf("\t\t.dword3.dword = 0x%x\n", (*ptr).dword3.dword);
+   debug_printf("\t\t.dword4.bits.pad = 0x%x\n", (*ptr).dword4.bits.pad);
+   debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element);
+   debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth);
+   debug_printf("\t\t.dword4.dword = 0x%x\n", (*ptr).dword4.dword);
+}
+
+void
+brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr)
+{
+   debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
+   debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
+   debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword);
+   debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch);
+   debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format);
+   debug_printf("\t\t.dword1.bits.pad = 0x%x\n", (*ptr).dword1.bits.pad);
+   debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode);
+   debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable);
+   debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk);
+   debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface);
+   debug_printf("\t\t.dword1.bits.pad2 = 0x%x\n", (*ptr).dword1.bits.pad2);
+   debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type);
+   debug_printf("\t\t.dword1.dword = 0x%x\n", (*ptr).dword1.dword);
+   debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr);
+   debug_printf("\t\t.dword3.bits.pad = 0x%x\n", (*ptr).dword3.bits.pad);
+   debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout);
+   debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod);
+   debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width);
+   debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height);
+   debug_printf("\t\t.dword3.dword = 0x%x\n", (*ptr).dword3.dword);
+   debug_printf("\t\t.dword4.bits.pad = 0x%x\n", (*ptr).dword4.bits.pad);
+   debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element);
+   debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth);
+   debug_printf("\t\t.dword4.dword = 0x%x\n", (*ptr).dword4.dword);
+   debug_printf("\t\t.dword5.bits.xoffset = 0x%x\n", (*ptr).dword5.bits.xoffset);
+   debug_printf("\t\t.dword5.bits.yoffset = 0x%x\n", (*ptr).dword5.bits.yoffset);
+   debug_printf("\t\t.dword5.dword = 0x%x\n", (*ptr).dword5.dword);
+}
+
+void
+brw_dump_drawrect(const struct brw_drawrect *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.xmin = 0x%x\n", (*ptr).xmin);
+   debug_printf("\t\t.ymin = 0x%x\n", (*ptr).ymin);
+   debug_printf("\t\t.xmax = 0x%x\n", (*ptr).xmax);
+   debug_printf("\t\t.ymax = 0x%x\n", (*ptr).ymax);
+   debug_printf("\t\t.xorg = 0x%x\n", (*ptr).xorg);
+   debug_printf("\t\t.yorg = 0x%x\n", (*ptr).yorg);
+}
+
+void
+brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.depth_offset_clamp = %f\n", (*ptr).depth_offset_clamp);
+}
+
+void
+brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr)
+{
+   debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0);
+   debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+   debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1);
+   debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+   debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+   debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+   debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+   debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+   debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+   debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0);
+   debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+   debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1);
+   debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+   debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+   debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+   debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3);
+   debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+   debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+   debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0);
+   debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+   debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+   debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+   debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0);
+   debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+   debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1);
+   debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+   debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2);
+   debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+   debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3);
+   debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0);
+   debug_printf("\t\t.thread4.rendering_enable = 0x%x\n", (*ptr).thread4.rendering_enable);
+   debug_printf("\t\t.thread4.pad4 = 0x%x\n", (*ptr).thread4.pad4);
+   debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+   debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+   debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1);
+   debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+   debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2);
+   debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+   debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3);
+   debug_printf("\t\t.gs5.sampler_count = 0x%x\n", (*ptr).gs5.sampler_count);
+   debug_printf("\t\t.gs5.pad0 = 0x%x\n", (*ptr).gs5.pad0);
+   debug_printf("\t\t.gs5.sampler_state_pointer = 0x%x\n", (*ptr).gs5.sampler_state_pointer);
+   debug_printf("\t\t.gs6.max_vp_index = 0x%x\n", (*ptr).gs6.max_vp_index);
+   debug_printf("\t\t.gs6.pad0 = 0x%x\n", (*ptr).gs6.pad0);
+   debug_printf("\t\t.gs6.svbi_post_inc_value = 0x%x\n", (*ptr).gs6.svbi_post_inc_value);
+   debug_printf("\t\t.gs6.pad1 = 0x%x\n", (*ptr).gs6.pad1);
+   debug_printf("\t\t.gs6.svbi_post_inc_enable = 0x%x\n", (*ptr).gs6.svbi_post_inc_enable);
+   debug_printf("\t\t.gs6.svbi_payload = 0x%x\n", (*ptr).gs6.svbi_payload);
+   debug_printf("\t\t.gs6.discard_adjaceny = 0x%x\n", (*ptr).gs6.discard_adjaceny);
+   debug_printf("\t\t.gs6.reorder_enable = 0x%x\n", (*ptr).gs6.reorder_enable);
+   debug_printf("\t\t.gs6.pad2 = 0x%x\n", (*ptr).gs6.pad2);
+}
+
+void
+brw_dump_indexbuffer(const struct brw_indexbuffer *ptr)
+{
+   debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
+   debug_printf("\t\t.header.bits.index_format = 0x%x\n", (*ptr).header.bits.index_format);
+   debug_printf("\t\t.header.bits.cut_index_enable = 0x%x\n", (*ptr).header.bits.cut_index_enable);
+   debug_printf("\t\t.header.bits.pad = 0x%x\n", (*ptr).header.bits.pad);
+   debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
+   debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword);
+   debug_printf("\t\t.buffer_start = 0x%x\n", (*ptr).buffer_start);
+   debug_printf("\t\t.buffer_end = 0x%x\n", (*ptr).buffer_end);
+}
+
+void
+brw_dump_line_stipple(const struct brw_line_stipple *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.bits0.pattern = 0x%x\n", (*ptr).bits0.pattern);
+   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
+   debug_printf("\t\t.bits1.repeat_count = 0x%x\n", (*ptr).bits1.repeat_count);
+   debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad);
+   debug_printf("\t\t.bits1.inverse_repeat_count = 0x%x\n", (*ptr).bits1.inverse_repeat_count);
+}
+
+void
+brw_dump_mi_flush(const struct brw_mi_flush *ptr)
+{
+   debug_printf("\t\t.flags = 0x%x\n", (*ptr).flags);
+   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
+   debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
+}
+
+void
+brw_dump_pipe_control(const struct brw_pipe_control *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable);
+   debug_printf("\t\t.header.texture_cache_flush_enable = 0x%x\n", (*ptr).header.texture_cache_flush_enable);
+   debug_printf("\t\t.header.indirect_state_pointers_disable = 0x%x\n", (*ptr).header.indirect_state_pointers_disable);
+   debug_printf("\t\t.header.instruction_state_cache_flush_enable = 0x%x\n", (*ptr).header.instruction_state_cache_flush_enable);
+   debug_printf("\t\t.header.write_cache_flush_enable = 0x%x\n", (*ptr).header.write_cache_flush_enable);
+   debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable);
+   debug_printf("\t\t.header.post_sync_operation = 0x%x\n", (*ptr).header.post_sync_operation);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad);
+   debug_printf("\t\t.bits1.dest_addr_type = 0x%x\n", (*ptr).bits1.dest_addr_type);
+   debug_printf("\t\t.bits1.dest_addr = 0x%x\n", (*ptr).bits1.dest_addr);
+   debug_printf("\t\t.data0 = 0x%x\n", (*ptr).data0);
+   debug_printf("\t\t.data1 = 0x%x\n", (*ptr).data1);
+}
+
+void
+brw_dump_pipeline_select(const struct brw_pipeline_select *ptr)
+{
+   debug_printf("\t\t.header.pipeline_select = 0x%x\n", (*ptr).header.pipeline_select);
+   debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+}
+
+void
+brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.vs.pad = 0x%x\n", (*ptr).vs.pad);
+   debug_printf("\t\t.vs.offset = 0x%x\n", (*ptr).vs.offset);
+   debug_printf("\t\t.gs.enable = 0x%x\n", (*ptr).gs.enable);
+   debug_printf("\t\t.gs.pad = 0x%x\n", (*ptr).gs.pad);
+   debug_printf("\t\t.gs.offset = 0x%x\n", (*ptr).gs.offset);
+   debug_printf("\t\t.clp.enable = 0x%x\n", (*ptr).clp.enable);
+   debug_printf("\t\t.clp.pad = 0x%x\n", (*ptr).clp.pad);
+   debug_printf("\t\t.clp.offset = 0x%x\n", (*ptr).clp.offset);
+   debug_printf("\t\t.sf.pad = 0x%x\n", (*ptr).sf.pad);
+   debug_printf("\t\t.sf.offset = 0x%x\n", (*ptr).sf.offset);
+   debug_printf("\t\t.wm.pad = 0x%x\n", (*ptr).wm.pad);
+   debug_printf("\t\t.wm.offset = 0x%x\n", (*ptr).wm.offset);
+   debug_printf("\t\t.cc.pad = 0x%x\n", (*ptr).cc.pad);
+   debug_printf("\t\t.cc.offset = 0x%x\n", (*ptr).cc.offset);
+}
+
+void
+brw_dump_polygon_stipple(const struct brw_polygon_stipple *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.stipple[0] = 0x%x\n", (*ptr).stipple[0]);
+   debug_printf("\t\t.stipple[1] = 0x%x\n", (*ptr).stipple[1]);
+   debug_printf("\t\t.stipple[2] = 0x%x\n", (*ptr).stipple[2]);
+   debug_printf("\t\t.stipple[3] = 0x%x\n", (*ptr).stipple[3]);
+   debug_printf("\t\t.stipple[4] = 0x%x\n", (*ptr).stipple[4]);
+   debug_printf("\t\t.stipple[5] = 0x%x\n", (*ptr).stipple[5]);
+   debug_printf("\t\t.stipple[6] = 0x%x\n", (*ptr).stipple[6]);
+   debug_printf("\t\t.stipple[7] = 0x%x\n", (*ptr).stipple[7]);
+   debug_printf("\t\t.stipple[8] = 0x%x\n", (*ptr).stipple[8]);
+   debug_printf("\t\t.stipple[9] = 0x%x\n", (*ptr).stipple[9]);
+   debug_printf("\t\t.stipple[10] = 0x%x\n", (*ptr).stipple[10]);
+   debug_printf("\t\t.stipple[11] = 0x%x\n", (*ptr).stipple[11]);
+   debug_printf("\t\t.stipple[12] = 0x%x\n", (*ptr).stipple[12]);
+   debug_printf("\t\t.stipple[13] = 0x%x\n", (*ptr).stipple[13]);
+   debug_printf("\t\t.stipple[14] = 0x%x\n", (*ptr).stipple[14]);
+   debug_printf("\t\t.stipple[15] = 0x%x\n", (*ptr).stipple[15]);
+   debug_printf("\t\t.stipple[16] = 0x%x\n", (*ptr).stipple[16]);
+   debug_printf("\t\t.stipple[17] = 0x%x\n", (*ptr).stipple[17]);
+   debug_printf("\t\t.stipple[18] = 0x%x\n", (*ptr).stipple[18]);
+   debug_printf("\t\t.stipple[19] = 0x%x\n", (*ptr).stipple[19]);
+   debug_printf("\t\t.stipple[20] = 0x%x\n", (*ptr).stipple[20]);
+   debug_printf("\t\t.stipple[21] = 0x%x\n", (*ptr).stipple[21]);
+   debug_printf("\t\t.stipple[22] = 0x%x\n", (*ptr).stipple[22]);
+   debug_printf("\t\t.stipple[23] = 0x%x\n", (*ptr).stipple[23]);
+   debug_printf("\t\t.stipple[24] = 0x%x\n", (*ptr).stipple[24]);
+   debug_printf("\t\t.stipple[25] = 0x%x\n", (*ptr).stipple[25]);
+   debug_printf("\t\t.stipple[26] = 0x%x\n", (*ptr).stipple[26]);
+   debug_printf("\t\t.stipple[27] = 0x%x\n", (*ptr).stipple[27]);
+   debug_printf("\t\t.stipple[28] = 0x%x\n", (*ptr).stipple[28]);
+   debug_printf("\t\t.stipple[29] = 0x%x\n", (*ptr).stipple[29]);
+   debug_printf("\t\t.stipple[30] = 0x%x\n", (*ptr).stipple[30]);
+   debug_printf("\t\t.stipple[31] = 0x%x\n", (*ptr).stipple[31]);
+}
+
+void
+brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.bits0.y_offset = 0x%x\n", (*ptr).bits0.y_offset);
+   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
+   debug_printf("\t\t.bits0.x_offset = 0x%x\n", (*ptr).bits0.x_offset);
+   debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0);
+}
+
+void
+brw_dump_sampler_default_color(const struct brw_sampler_default_color *ptr)
+{
+   debug_printf("\t\t.color[0] = %f\n", (*ptr).color[0]);
+   debug_printf("\t\t.color[1] = %f\n", (*ptr).color[1]);
+   debug_printf("\t\t.color[2] = %f\n", (*ptr).color[2]);
+   debug_printf("\t\t.color[3] = %f\n", (*ptr).color[3]);
+}
+
+void
+brw_dump_sampler_state(const struct brw_sampler_state *ptr)
+{
+   debug_printf("\t\t.ss0.shadow_function = 0x%x\n", (*ptr).ss0.shadow_function);
+   debug_printf("\t\t.ss0.lod_bias = 0x%x\n", (*ptr).ss0.lod_bias);
+   debug_printf("\t\t.ss0.min_filter = 0x%x\n", (*ptr).ss0.min_filter);
+   debug_printf("\t\t.ss0.mag_filter = 0x%x\n", (*ptr).ss0.mag_filter);
+   debug_printf("\t\t.ss0.mip_filter = 0x%x\n", (*ptr).ss0.mip_filter);
+   debug_printf("\t\t.ss0.base_level = 0x%x\n", (*ptr).ss0.base_level);
+   debug_printf("\t\t.ss0.pad = 0x%x\n", (*ptr).ss0.pad);
+   debug_printf("\t\t.ss0.lod_preclamp = 0x%x\n", (*ptr).ss0.lod_preclamp);
+   debug_printf("\t\t.ss0.default_color_mode = 0x%x\n", (*ptr).ss0.default_color_mode);
+   debug_printf("\t\t.ss0.pad0 = 0x%x\n", (*ptr).ss0.pad0);
+   debug_printf("\t\t.ss0.disable = 0x%x\n", (*ptr).ss0.disable);
+   debug_printf("\t\t.ss1.r_wrap_mode = 0x%x\n", (*ptr).ss1.r_wrap_mode);
+   debug_printf("\t\t.ss1.t_wrap_mode = 0x%x\n", (*ptr).ss1.t_wrap_mode);
+   debug_printf("\t\t.ss1.s_wrap_mode = 0x%x\n", (*ptr).ss1.s_wrap_mode);
+   debug_printf("\t\t.ss1.pad = 0x%x\n", (*ptr).ss1.pad);
+   debug_printf("\t\t.ss1.max_lod = 0x%x\n", (*ptr).ss1.max_lod);
+   debug_printf("\t\t.ss1.min_lod = 0x%x\n", (*ptr).ss1.min_lod);
+   debug_printf("\t\t.ss2.pad = 0x%x\n", (*ptr).ss2.pad);
+   debug_printf("\t\t.ss2.default_color_pointer = 0x%x\n", (*ptr).ss2.default_color_pointer);
+   debug_printf("\t\t.ss3.pad = 0x%x\n", (*ptr).ss3.pad);
+   debug_printf("\t\t.ss3.max_aniso = 0x%x\n", (*ptr).ss3.max_aniso);
+   debug_printf("\t\t.ss3.chroma_key_mode = 0x%x\n", (*ptr).ss3.chroma_key_mode);
+   debug_printf("\t\t.ss3.chroma_key_index = 0x%x\n", (*ptr).ss3.chroma_key_index);
+   debug_printf("\t\t.ss3.chroma_key_enable = 0x%x\n", (*ptr).ss3.chroma_key_enable);
+   debug_printf("\t\t.ss3.monochrome_filter_width = 0x%x\n", (*ptr).ss3.monochrome_filter_width);
+   debug_printf("\t\t.ss3.monochrome_filter_height = 0x%x\n", (*ptr).ss3.monochrome_filter_height);
+}
+
+void
+brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr)
+{
+   debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0);
+   debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+   debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1);
+   debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+   debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+   debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+   debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+   debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+   debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+   debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0);
+   debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+   debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1);
+   debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+   debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+   debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+   debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3);
+   debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+   debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+   debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0);
+   debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+   debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+   debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+   debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0);
+   debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+   debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1);
+   debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+   debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2);
+   debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+   debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3);
+   debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0);
+   debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+   debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+   debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1);
+   debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+   debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2);
+   debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+   debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3);
+   debug_printf("\t\t.sf5.front_winding = 0x%x\n", (*ptr).sf5.front_winding);
+   debug_printf("\t\t.sf5.viewport_transform = 0x%x\n", (*ptr).sf5.viewport_transform);
+   debug_printf("\t\t.sf5.pad0 = 0x%x\n", (*ptr).sf5.pad0);
+   debug_printf("\t\t.sf5.sf_viewport_state_offset = 0x%x\n", (*ptr).sf5.sf_viewport_state_offset);
+   debug_printf("\t\t.sf6.pad0 = 0x%x\n", (*ptr).sf6.pad0);
+   debug_printf("\t\t.sf6.dest_org_vbias = 0x%x\n", (*ptr).sf6.dest_org_vbias);
+   debug_printf("\t\t.sf6.dest_org_hbias = 0x%x\n", (*ptr).sf6.dest_org_hbias);
+   debug_printf("\t\t.sf6.scissor = 0x%x\n", (*ptr).sf6.scissor);
+   debug_printf("\t\t.sf6.disable_2x2_trifilter = 0x%x\n", (*ptr).sf6.disable_2x2_trifilter);
+   debug_printf("\t\t.sf6.disable_zero_pix_trifilter = 0x%x\n", (*ptr).sf6.disable_zero_pix_trifilter);
+   debug_printf("\t\t.sf6.point_rast_rule = 0x%x\n", (*ptr).sf6.point_rast_rule);
+   debug_printf("\t\t.sf6.line_endcap_aa_region_width = 0x%x\n", (*ptr).sf6.line_endcap_aa_region_width);
+   debug_printf("\t\t.sf6.line_width = 0x%x\n", (*ptr).sf6.line_width);
+   debug_printf("\t\t.sf6.fast_scissor_disable = 0x%x\n", (*ptr).sf6.fast_scissor_disable);
+   debug_printf("\t\t.sf6.cull_mode = 0x%x\n", (*ptr).sf6.cull_mode);
+   debug_printf("\t\t.sf6.aa_enable = 0x%x\n", (*ptr).sf6.aa_enable);
+   debug_printf("\t\t.sf7.point_size = 0x%x\n", (*ptr).sf7.point_size);
+   debug_printf("\t\t.sf7.use_point_size_state = 0x%x\n", (*ptr).sf7.use_point_size_state);
+   debug_printf("\t\t.sf7.subpixel_precision = 0x%x\n", (*ptr).sf7.subpixel_precision);
+   debug_printf("\t\t.sf7.sprite_point = 0x%x\n", (*ptr).sf7.sprite_point);
+   debug_printf("\t\t.sf7.pad0 = 0x%x\n", (*ptr).sf7.pad0);
+   debug_printf("\t\t.sf7.aa_line_distance_mode = 0x%x\n", (*ptr).sf7.aa_line_distance_mode);
+   debug_printf("\t\t.sf7.trifan_pv = 0x%x\n", (*ptr).sf7.trifan_pv);
+   debug_printf("\t\t.sf7.linestrip_pv = 0x%x\n", (*ptr).sf7.linestrip_pv);
+   debug_printf("\t\t.sf7.tristrip_pv = 0x%x\n", (*ptr).sf7.tristrip_pv);
+   debug_printf("\t\t.sf7.line_last_pixel_enable = 0x%x\n", (*ptr).sf7.line_last_pixel_enable);
+}
+
+void
+brw_dump_sf_viewport(const struct brw_sf_viewport *ptr)
+{
+   debug_printf("\t\t.viewport.m00 = %f\n", (*ptr).viewport.m00);
+   debug_printf("\t\t.viewport.m11 = %f\n", (*ptr).viewport.m11);
+   debug_printf("\t\t.viewport.m22 = %f\n", (*ptr).viewport.m22);
+   debug_printf("\t\t.viewport.m30 = %f\n", (*ptr).viewport.m30);
+   debug_printf("\t\t.viewport.m31 = %f\n", (*ptr).viewport.m31);
+   debug_printf("\t\t.viewport.m32 = %f\n", (*ptr).viewport.m32);
+   debug_printf("\t\t.scissor.xmin = 0x%x\n", (*ptr).scissor.xmin);
+   debug_printf("\t\t.scissor.ymin = 0x%x\n", (*ptr).scissor.ymin);
+   debug_printf("\t\t.scissor.xmax = 0x%x\n", (*ptr).scissor.xmax);
+   debug_printf("\t\t.scissor.ymax = 0x%x\n", (*ptr).scissor.ymax);
+}
+
+void
+brw_dump_ss0(const struct brw_ss0 *ptr)
+{
+   debug_printf("\t\t.shadow_function = 0x%x\n", (*ptr).shadow_function);
+   debug_printf("\t\t.lod_bias = 0x%x\n", (*ptr).lod_bias);
+   debug_printf("\t\t.min_filter = 0x%x\n", (*ptr).min_filter);
+   debug_printf("\t\t.mag_filter = 0x%x\n", (*ptr).mag_filter);
+   debug_printf("\t\t.mip_filter = 0x%x\n", (*ptr).mip_filter);
+   debug_printf("\t\t.base_level = 0x%x\n", (*ptr).base_level);
+   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
+   debug_printf("\t\t.lod_preclamp = 0x%x\n", (*ptr).lod_preclamp);
+   debug_printf("\t\t.default_color_mode = 0x%x\n", (*ptr).default_color_mode);
+   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
+   debug_printf("\t\t.disable = 0x%x\n", (*ptr).disable);
+}
+
+void
+brw_dump_ss1(const struct brw_ss1 *ptr)
+{
+   debug_printf("\t\t.r_wrap_mode = 0x%x\n", (*ptr).r_wrap_mode);
+   debug_printf("\t\t.t_wrap_mode = 0x%x\n", (*ptr).t_wrap_mode);
+   debug_printf("\t\t.s_wrap_mode = 0x%x\n", (*ptr).s_wrap_mode);
+   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
+   debug_printf("\t\t.max_lod = 0x%x\n", (*ptr).max_lod);
+   debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod);
+}
+
+void
+brw_dump_ss2(const struct brw_ss2 *ptr)
+{
+   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
+   debug_printf("\t\t.default_color_pointer = 0x%x\n", (*ptr).default_color_pointer);
+}
+
+void
+brw_dump_ss3(const struct brw_ss3 *ptr)
+{
+   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
+   debug_printf("\t\t.max_aniso = 0x%x\n", (*ptr).max_aniso);
+   debug_printf("\t\t.chroma_key_mode = 0x%x\n", (*ptr).chroma_key_mode);
+   debug_printf("\t\t.chroma_key_index = 0x%x\n", (*ptr).chroma_key_index);
+   debug_printf("\t\t.chroma_key_enable = 0x%x\n", (*ptr).chroma_key_enable);
+   debug_printf("\t\t.monochrome_filter_width = 0x%x\n", (*ptr).monochrome_filter_width);
+   debug_printf("\t\t.monochrome_filter_height = 0x%x\n", (*ptr).monochrome_filter_height);
+}
+
+void
+brw_dump_state_base_address(const struct brw_state_base_address *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.bits0.modify_enable = 0x%x\n", (*ptr).bits0.modify_enable);
+   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
+   debug_printf("\t\t.bits0.general_state_address = 0x%x\n", (*ptr).bits0.general_state_address);
+   debug_printf("\t\t.bits1.modify_enable = 0x%x\n", (*ptr).bits1.modify_enable);
+   debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad);
+   debug_printf("\t\t.bits1.surface_state_address = 0x%x\n", (*ptr).bits1.surface_state_address);
+   debug_printf("\t\t.bits2.modify_enable = 0x%x\n", (*ptr).bits2.modify_enable);
+   debug_printf("\t\t.bits2.pad = 0x%x\n", (*ptr).bits2.pad);
+   debug_printf("\t\t.bits2.indirect_object_state_address = 0x%x\n", (*ptr).bits2.indirect_object_state_address);
+   debug_printf("\t\t.bits3.modify_enable = 0x%x\n", (*ptr).bits3.modify_enable);
+   debug_printf("\t\t.bits3.pad = 0x%x\n", (*ptr).bits3.pad);
+   debug_printf("\t\t.bits3.general_state_upper_bound = 0x%x\n", (*ptr).bits3.general_state_upper_bound);
+   debug_printf("\t\t.bits4.modify_enable = 0x%x\n", (*ptr).bits4.modify_enable);
+   debug_printf("\t\t.bits4.pad = 0x%x\n", (*ptr).bits4.pad);
+   debug_printf("\t\t.bits4.indirect_object_state_upper_bound = 0x%x\n", (*ptr).bits4.indirect_object_state_upper_bound);
+}
+
+void
+brw_dump_state_prefetch(const struct brw_state_prefetch *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.bits0.prefetch_count = 0x%x\n", (*ptr).bits0.prefetch_count);
+   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
+   debug_printf("\t\t.bits0.prefetch_pointer = 0x%x\n", (*ptr).bits0.prefetch_pointer);
+}
+
+void
+brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr)
+{
+   debug_printf("\t\t.cube_pos_z = 0x%x\n", (*ptr).cube_pos_z);
+   debug_printf("\t\t.cube_neg_z = 0x%x\n", (*ptr).cube_neg_z);
+   debug_printf("\t\t.cube_pos_y = 0x%x\n", (*ptr).cube_pos_y);
+   debug_printf("\t\t.cube_neg_y = 0x%x\n", (*ptr).cube_neg_y);
+   debug_printf("\t\t.cube_pos_x = 0x%x\n", (*ptr).cube_pos_x);
+   debug_printf("\t\t.cube_neg_x = 0x%x\n", (*ptr).cube_neg_x);
+   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
+   debug_printf("\t\t.mipmap_layout_mode = 0x%x\n", (*ptr).mipmap_layout_mode);
+   debug_printf("\t\t.vert_line_stride_ofs = 0x%x\n", (*ptr).vert_line_stride_ofs);
+   debug_printf("\t\t.vert_line_stride = 0x%x\n", (*ptr).vert_line_stride);
+   debug_printf("\t\t.color_blend = 0x%x\n", (*ptr).color_blend);
+   debug_printf("\t\t.writedisable_blue = 0x%x\n", (*ptr).writedisable_blue);
+   debug_printf("\t\t.writedisable_green = 0x%x\n", (*ptr).writedisable_green);
+   debug_printf("\t\t.writedisable_red = 0x%x\n", (*ptr).writedisable_red);
+   debug_printf("\t\t.writedisable_alpha = 0x%x\n", (*ptr).writedisable_alpha);
+   debug_printf("\t\t.surface_format = 0x%x\n", (*ptr).surface_format);
+   debug_printf("\t\t.data_return_format = 0x%x\n", (*ptr).data_return_format);
+   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
+   debug_printf("\t\t.surface_type = 0x%x\n", (*ptr).surface_type);
+}
+
+void
+brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr)
+{
+   debug_printf("\t\t.base_addr = 0x%x\n", (*ptr).base_addr);
+}
+
+void
+brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr)
+{
+   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
+   debug_printf("\t\t.mip_count = 0x%x\n", (*ptr).mip_count);
+   debug_printf("\t\t.width = 0x%x\n", (*ptr).width);
+   debug_printf("\t\t.height = 0x%x\n", (*ptr).height);
+}
+
+void
+brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr)
+{
+   debug_printf("\t\t.tile_walk = 0x%x\n", (*ptr).tile_walk);
+   debug_printf("\t\t.tiled_surface = 0x%x\n", (*ptr).tiled_surface);
+   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
+   debug_printf("\t\t.pitch = 0x%x\n", (*ptr).pitch);
+   debug_printf("\t\t.depth = 0x%x\n", (*ptr).depth);
+}
+
+void
+brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr)
+{
+   debug_printf("\t\t.multisample_position_palette_index = 0x%x\n", (*ptr).multisample_position_palette_index);
+   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
+   debug_printf("\t\t.num_multisamples = 0x%x\n", (*ptr).num_multisamples);
+   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
+   debug_printf("\t\t.render_target_view_extent = 0x%x\n", (*ptr).render_target_view_extent);
+   debug_printf("\t\t.min_array_elt = 0x%x\n", (*ptr).min_array_elt);
+   debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod);
+}
+
+void
+brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr)
+{
+   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
+   debug_printf("\t\t.llc_mapping = 0x%x\n", (*ptr).llc_mapping);
+   debug_printf("\t\t.mlc_mapping = 0x%x\n", (*ptr).mlc_mapping);
+   debug_printf("\t\t.gfdt = 0x%x\n", (*ptr).gfdt);
+   debug_printf("\t\t.gfdt_src = 0x%x\n", (*ptr).gfdt_src);
+   debug_printf("\t\t.y_offset = 0x%x\n", (*ptr).y_offset);
+   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
+   debug_printf("\t\t.x_offset = 0x%x\n", (*ptr).x_offset);
+}
+
+void
+brw_dump_surface_state(const struct brw_surface_state *ptr)
+{
+   debug_printf("\t\t.ss0.cube_pos_z = 0x%x\n", (*ptr).ss0.cube_pos_z);
+   debug_printf("\t\t.ss0.cube_neg_z = 0x%x\n", (*ptr).ss0.cube_neg_z);
+   debug_printf("\t\t.ss0.cube_pos_y = 0x%x\n", (*ptr).ss0.cube_pos_y);
+   debug_printf("\t\t.ss0.cube_neg_y = 0x%x\n", (*ptr).ss0.cube_neg_y);
+   debug_printf("\t\t.ss0.cube_pos_x = 0x%x\n", (*ptr).ss0.cube_pos_x);
+   debug_printf("\t\t.ss0.cube_neg_x = 0x%x\n", (*ptr).ss0.cube_neg_x);
+   debug_printf("\t\t.ss0.pad = 0x%x\n", (*ptr).ss0.pad);
+   debug_printf("\t\t.ss0.mipmap_layout_mode = 0x%x\n", (*ptr).ss0.mipmap_layout_mode);
+   debug_printf("\t\t.ss0.vert_line_stride_ofs = 0x%x\n", (*ptr).ss0.vert_line_stride_ofs);
+   debug_printf("\t\t.ss0.vert_line_stride = 0x%x\n", (*ptr).ss0.vert_line_stride);
+   debug_printf("\t\t.ss0.color_blend = 0x%x\n", (*ptr).ss0.color_blend);
+   debug_printf("\t\t.ss0.writedisable_blue = 0x%x\n", (*ptr).ss0.writedisable_blue);
+   debug_printf("\t\t.ss0.writedisable_green = 0x%x\n", (*ptr).ss0.writedisable_green);
+   debug_printf("\t\t.ss0.writedisable_red = 0x%x\n", (*ptr).ss0.writedisable_red);
+   debug_printf("\t\t.ss0.writedisable_alpha = 0x%x\n", (*ptr).ss0.writedisable_alpha);
+   debug_printf("\t\t.ss0.surface_format = 0x%x\n", (*ptr).ss0.surface_format);
+   debug_printf("\t\t.ss0.data_return_format = 0x%x\n", (*ptr).ss0.data_return_format);
+   debug_printf("\t\t.ss0.pad0 = 0x%x\n", (*ptr).ss0.pad0);
+   debug_printf("\t\t.ss0.surface_type = 0x%x\n", (*ptr).ss0.surface_type);
+   debug_printf("\t\t.ss1.base_addr = 0x%x\n", (*ptr).ss1.base_addr);
+   debug_printf("\t\t.ss2.pad = 0x%x\n", (*ptr).ss2.pad);
+   debug_printf("\t\t.ss2.mip_count = 0x%x\n", (*ptr).ss2.mip_count);
+   debug_printf("\t\t.ss2.width = 0x%x\n", (*ptr).ss2.width);
+   debug_printf("\t\t.ss2.height = 0x%x\n", (*ptr).ss2.height);
+   debug_printf("\t\t.ss3.tile_walk = 0x%x\n", (*ptr).ss3.tile_walk);
+   debug_printf("\t\t.ss3.tiled_surface = 0x%x\n", (*ptr).ss3.tiled_surface);
+   debug_printf("\t\t.ss3.pad = 0x%x\n", (*ptr).ss3.pad);
+   debug_printf("\t\t.ss3.pitch = 0x%x\n", (*ptr).ss3.pitch);
+   debug_printf("\t\t.ss3.depth = 0x%x\n", (*ptr).ss3.depth);
+   debug_printf("\t\t.ss4.multisample_position_palette_index = 0x%x\n", (*ptr).ss4.multisample_position_palette_index);
+   debug_printf("\t\t.ss4.pad1 = 0x%x\n", (*ptr).ss4.pad1);
+   debug_printf("\t\t.ss4.num_multisamples = 0x%x\n", (*ptr).ss4.num_multisamples);
+   debug_printf("\t\t.ss4.pad0 = 0x%x\n", (*ptr).ss4.pad0);
+   debug_printf("\t\t.ss4.render_target_view_extent = 0x%x\n", (*ptr).ss4.render_target_view_extent);
+   debug_printf("\t\t.ss4.min_array_elt = 0x%x\n", (*ptr).ss4.min_array_elt);
+   debug_printf("\t\t.ss4.min_lod = 0x%x\n", (*ptr).ss4.min_lod);
+   debug_printf("\t\t.ss5.pad1 = 0x%x\n", (*ptr).ss5.pad1);
+   debug_printf("\t\t.ss5.llc_mapping = 0x%x\n", (*ptr).ss5.llc_mapping);
+   debug_printf("\t\t.ss5.mlc_mapping = 0x%x\n", (*ptr).ss5.mlc_mapping);
+   debug_printf("\t\t.ss5.gfdt = 0x%x\n", (*ptr).ss5.gfdt);
+   debug_printf("\t\t.ss5.gfdt_src = 0x%x\n", (*ptr).ss5.gfdt_src);
+   debug_printf("\t\t.ss5.y_offset = 0x%x\n", (*ptr).ss5.y_offset);
+   debug_printf("\t\t.ss5.pad0 = 0x%x\n", (*ptr).ss5.pad0);
+   debug_printf("\t\t.ss5.x_offset = 0x%x\n", (*ptr).ss5.x_offset);
+}
+
+void
+brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
+   debug_printf("\t\t.bits0.system_instruction_pointer = 0x%x\n", (*ptr).bits0.system_instruction_pointer);
+}
+
+void
+brw_dump_urb_fence(const struct brw_urb_fence *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.vs_realloc = 0x%x\n", (*ptr).header.vs_realloc);
+   debug_printf("\t\t.header.gs_realloc = 0x%x\n", (*ptr).header.gs_realloc);
+   debug_printf("\t\t.header.clp_realloc = 0x%x\n", (*ptr).header.clp_realloc);
+   debug_printf("\t\t.header.sf_realloc = 0x%x\n", (*ptr).header.sf_realloc);
+   debug_printf("\t\t.header.vfe_realloc = 0x%x\n", (*ptr).header.vfe_realloc);
+   debug_printf("\t\t.header.cs_realloc = 0x%x\n", (*ptr).header.cs_realloc);
+   debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.bits0.vs_fence = 0x%x\n", (*ptr).bits0.vs_fence);
+   debug_printf("\t\t.bits0.gs_fence = 0x%x\n", (*ptr).bits0.gs_fence);
+   debug_printf("\t\t.bits0.clp_fence = 0x%x\n", (*ptr).bits0.clp_fence);
+   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
+   debug_printf("\t\t.bits1.sf_fence = 0x%x\n", (*ptr).bits1.sf_fence);
+   debug_printf("\t\t.bits1.vf_fence = 0x%x\n", (*ptr).bits1.vf_fence);
+   debug_printf("\t\t.bits1.cs_fence = 0x%x\n", (*ptr).bits1.cs_fence);
+   debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad);
+}
+
+void
+brw_dump_urb_immediate(const struct brw_urb_immediate *ptr)
+{
+   debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
+   debug_printf("\t\t.offset = 0x%x\n", (*ptr).offset);
+   debug_printf("\t\t.swizzle_control = 0x%x\n", (*ptr).swizzle_control);
+   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
+   debug_printf("\t\t.allocate = 0x%x\n", (*ptr).allocate);
+   debug_printf("\t\t.used = 0x%x\n", (*ptr).used);
+   debug_printf("\t\t.complete = 0x%x\n", (*ptr).complete);
+   debug_printf("\t\t.response_length = 0x%x\n", (*ptr).response_length);
+   debug_printf("\t\t.msg_length = 0x%x\n", (*ptr).msg_length);
+   debug_printf("\t\t.msg_target = 0x%x\n", (*ptr).msg_target);
+   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
+   debug_printf("\t\t.end_of_thread = 0x%x\n", (*ptr).end_of_thread);
+}
+
+void
+brw_dump_vb_array_state(const struct brw_vb_array_state *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.vb[0].vb0.pitch = 0x%x\n", (*ptr).vb[0].vb0.pitch);
+   debug_printf("\t\t.vb[0].vb0.pad = 0x%x\n", (*ptr).vb[0].vb0.pad);
+   debug_printf("\t\t.vb[0].vb0.access_type = 0x%x\n", (*ptr).vb[0].vb0.access_type);
+   debug_printf("\t\t.vb[0].vb0.vb_index = 0x%x\n", (*ptr).vb[0].vb0.vb_index);
+   debug_printf("\t\t.vb[0].start_addr = 0x%x\n", (*ptr).vb[0].start_addr);
+   debug_printf("\t\t.vb[0].max_index = 0x%x\n", (*ptr).vb[0].max_index);
+   debug_printf("\t\t.vb[0].instance_data_step_rate = 0x%x\n", (*ptr).vb[0].instance_data_step_rate);
+   debug_printf("\t\t.vb[1].vb0.pitch = 0x%x\n", (*ptr).vb[1].vb0.pitch);
+   debug_printf("\t\t.vb[1].vb0.pad = 0x%x\n", (*ptr).vb[1].vb0.pad);
+   debug_printf("\t\t.vb[1].vb0.access_type = 0x%x\n", (*ptr).vb[1].vb0.access_type);
+   debug_printf("\t\t.vb[1].vb0.vb_index = 0x%x\n", (*ptr).vb[1].vb0.vb_index);
+   debug_printf("\t\t.vb[1].start_addr = 0x%x\n", (*ptr).vb[1].start_addr);
+   debug_printf("\t\t.vb[1].max_index = 0x%x\n", (*ptr).vb[1].max_index);
+   debug_printf("\t\t.vb[1].instance_data_step_rate = 0x%x\n", (*ptr).vb[1].instance_data_step_rate);
+   debug_printf("\t\t.vb[2].vb0.pitch = 0x%x\n", (*ptr).vb[2].vb0.pitch);
+   debug_printf("\t\t.vb[2].vb0.pad = 0x%x\n", (*ptr).vb[2].vb0.pad);
+   debug_printf("\t\t.vb[2].vb0.access_type = 0x%x\n", (*ptr).vb[2].vb0.access_type);
+   debug_printf("\t\t.vb[2].vb0.vb_index = 0x%x\n", (*ptr).vb[2].vb0.vb_index);
+   debug_printf("\t\t.vb[2].start_addr = 0x%x\n", (*ptr).vb[2].start_addr);
+   debug_printf("\t\t.vb[2].max_index = 0x%x\n", (*ptr).vb[2].max_index);
+   debug_printf("\t\t.vb[2].instance_data_step_rate = 0x%x\n", (*ptr).vb[2].instance_data_step_rate);
+   debug_printf("\t\t.vb[3].vb0.pitch = 0x%x\n", (*ptr).vb[3].vb0.pitch);
+   debug_printf("\t\t.vb[3].vb0.pad = 0x%x\n", (*ptr).vb[3].vb0.pad);
+   debug_printf("\t\t.vb[3].vb0.access_type = 0x%x\n", (*ptr).vb[3].vb0.access_type);
+   debug_printf("\t\t.vb[3].vb0.vb_index = 0x%x\n", (*ptr).vb[3].vb0.vb_index);
+   debug_printf("\t\t.vb[3].start_addr = 0x%x\n", (*ptr).vb[3].start_addr);
+   debug_printf("\t\t.vb[3].max_index = 0x%x\n", (*ptr).vb[3].max_index);
+   debug_printf("\t\t.vb[3].instance_data_step_rate = 0x%x\n", (*ptr).vb[3].instance_data_step_rate);
+   debug_printf("\t\t.vb[4].vb0.pitch = 0x%x\n", (*ptr).vb[4].vb0.pitch);
+   debug_printf("\t\t.vb[4].vb0.pad = 0x%x\n", (*ptr).vb[4].vb0.pad);
+   debug_printf("\t\t.vb[4].vb0.access_type = 0x%x\n", (*ptr).vb[4].vb0.access_type);
+   debug_printf("\t\t.vb[4].vb0.vb_index = 0x%x\n", (*ptr).vb[4].vb0.vb_index);
+   debug_printf("\t\t.vb[4].start_addr = 0x%x\n", (*ptr).vb[4].start_addr);
+   debug_printf("\t\t.vb[4].max_index = 0x%x\n", (*ptr).vb[4].max_index);
+   debug_printf("\t\t.vb[4].instance_data_step_rate = 0x%x\n", (*ptr).vb[4].instance_data_step_rate);
+   debug_printf("\t\t.vb[5].vb0.pitch = 0x%x\n", (*ptr).vb[5].vb0.pitch);
+   debug_printf("\t\t.vb[5].vb0.pad = 0x%x\n", (*ptr).vb[5].vb0.pad);
+   debug_printf("\t\t.vb[5].vb0.access_type = 0x%x\n", (*ptr).vb[5].vb0.access_type);
+   debug_printf("\t\t.vb[5].vb0.vb_index = 0x%x\n", (*ptr).vb[5].vb0.vb_index);
+   debug_printf("\t\t.vb[5].start_addr = 0x%x\n", (*ptr).vb[5].start_addr);
+   debug_printf("\t\t.vb[5].max_index = 0x%x\n", (*ptr).vb[5].max_index);
+   debug_printf("\t\t.vb[5].instance_data_step_rate = 0x%x\n", (*ptr).vb[5].instance_data_step_rate);
+   debug_printf("\t\t.vb[6].vb0.pitch = 0x%x\n", (*ptr).vb[6].vb0.pitch);
+   debug_printf("\t\t.vb[6].vb0.pad = 0x%x\n", (*ptr).vb[6].vb0.pad);
+   debug_printf("\t\t.vb[6].vb0.access_type = 0x%x\n", (*ptr).vb[6].vb0.access_type);
+   debug_printf("\t\t.vb[6].vb0.vb_index = 0x%x\n", (*ptr).vb[6].vb0.vb_index);
+   debug_printf("\t\t.vb[6].start_addr = 0x%x\n", (*ptr).vb[6].start_addr);
+   debug_printf("\t\t.vb[6].max_index = 0x%x\n", (*ptr).vb[6].max_index);
+   debug_printf("\t\t.vb[6].instance_data_step_rate = 0x%x\n", (*ptr).vb[6].instance_data_step_rate);
+   debug_printf("\t\t.vb[7].vb0.pitch = 0x%x\n", (*ptr).vb[7].vb0.pitch);
+   debug_printf("\t\t.vb[7].vb0.pad = 0x%x\n", (*ptr).vb[7].vb0.pad);
+   debug_printf("\t\t.vb[7].vb0.access_type = 0x%x\n", (*ptr).vb[7].vb0.access_type);
+   debug_printf("\t\t.vb[7].vb0.vb_index = 0x%x\n", (*ptr).vb[7].vb0.vb_index);
+   debug_printf("\t\t.vb[7].start_addr = 0x%x\n", (*ptr).vb[7].start_addr);
+   debug_printf("\t\t.vb[7].max_index = 0x%x\n", (*ptr).vb[7].max_index);
+   debug_printf("\t\t.vb[7].instance_data_step_rate = 0x%x\n", (*ptr).vb[7].instance_data_step_rate);
+   debug_printf("\t\t.vb[8].vb0.pitch = 0x%x\n", (*ptr).vb[8].vb0.pitch);
+   debug_printf("\t\t.vb[8].vb0.pad = 0x%x\n", (*ptr).vb[8].vb0.pad);
+   debug_printf("\t\t.vb[8].vb0.access_type = 0x%x\n", (*ptr).vb[8].vb0.access_type);
+   debug_printf("\t\t.vb[8].vb0.vb_index = 0x%x\n", (*ptr).vb[8].vb0.vb_index);
+   debug_printf("\t\t.vb[8].start_addr = 0x%x\n", (*ptr).vb[8].start_addr);
+   debug_printf("\t\t.vb[8].max_index = 0x%x\n", (*ptr).vb[8].max_index);
+   debug_printf("\t\t.vb[8].instance_data_step_rate = 0x%x\n", (*ptr).vb[8].instance_data_step_rate);
+   debug_printf("\t\t.vb[9].vb0.pitch = 0x%x\n", (*ptr).vb[9].vb0.pitch);
+   debug_printf("\t\t.vb[9].vb0.pad = 0x%x\n", (*ptr).vb[9].vb0.pad);
+   debug_printf("\t\t.vb[9].vb0.access_type = 0x%x\n", (*ptr).vb[9].vb0.access_type);
+   debug_printf("\t\t.vb[9].vb0.vb_index = 0x%x\n", (*ptr).vb[9].vb0.vb_index);
+   debug_printf("\t\t.vb[9].start_addr = 0x%x\n", (*ptr).vb[9].start_addr);
+   debug_printf("\t\t.vb[9].max_index = 0x%x\n", (*ptr).vb[9].max_index);
+   debug_printf("\t\t.vb[9].instance_data_step_rate = 0x%x\n", (*ptr).vb[9].instance_data_step_rate);
+   debug_printf("\t\t.vb[10].vb0.pitch = 0x%x\n", (*ptr).vb[10].vb0.pitch);
+   debug_printf("\t\t.vb[10].vb0.pad = 0x%x\n", (*ptr).vb[10].vb0.pad);
+   debug_printf("\t\t.vb[10].vb0.access_type = 0x%x\n", (*ptr).vb[10].vb0.access_type);
+   debug_printf("\t\t.vb[10].vb0.vb_index = 0x%x\n", (*ptr).vb[10].vb0.vb_index);
+   debug_printf("\t\t.vb[10].start_addr = 0x%x\n", (*ptr).vb[10].start_addr);
+   debug_printf("\t\t.vb[10].max_index = 0x%x\n", (*ptr).vb[10].max_index);
+   debug_printf("\t\t.vb[10].instance_data_step_rate = 0x%x\n", (*ptr).vb[10].instance_data_step_rate);
+   debug_printf("\t\t.vb[11].vb0.pitch = 0x%x\n", (*ptr).vb[11].vb0.pitch);
+   debug_printf("\t\t.vb[11].vb0.pad = 0x%x\n", (*ptr).vb[11].vb0.pad);
+   debug_printf("\t\t.vb[11].vb0.access_type = 0x%x\n", (*ptr).vb[11].vb0.access_type);
+   debug_printf("\t\t.vb[11].vb0.vb_index = 0x%x\n", (*ptr).vb[11].vb0.vb_index);
+   debug_printf("\t\t.vb[11].start_addr = 0x%x\n", (*ptr).vb[11].start_addr);
+   debug_printf("\t\t.vb[11].max_index = 0x%x\n", (*ptr).vb[11].max_index);
+   debug_printf("\t\t.vb[11].instance_data_step_rate = 0x%x\n", (*ptr).vb[11].instance_data_step_rate);
+   debug_printf("\t\t.vb[12].vb0.pitch = 0x%x\n", (*ptr).vb[12].vb0.pitch);
+   debug_printf("\t\t.vb[12].vb0.pad = 0x%x\n", (*ptr).vb[12].vb0.pad);
+   debug_printf("\t\t.vb[12].vb0.access_type = 0x%x\n", (*ptr).vb[12].vb0.access_type);
+   debug_printf("\t\t.vb[12].vb0.vb_index = 0x%x\n", (*ptr).vb[12].vb0.vb_index);
+   debug_printf("\t\t.vb[12].start_addr = 0x%x\n", (*ptr).vb[12].start_addr);
+   debug_printf("\t\t.vb[12].max_index = 0x%x\n", (*ptr).vb[12].max_index);
+   debug_printf("\t\t.vb[12].instance_data_step_rate = 0x%x\n", (*ptr).vb[12].instance_data_step_rate);
+   debug_printf("\t\t.vb[13].vb0.pitch = 0x%x\n", (*ptr).vb[13].vb0.pitch);
+   debug_printf("\t\t.vb[13].vb0.pad = 0x%x\n", (*ptr).vb[13].vb0.pad);
+   debug_printf("\t\t.vb[13].vb0.access_type = 0x%x\n", (*ptr).vb[13].vb0.access_type);
+   debug_printf("\t\t.vb[13].vb0.vb_index = 0x%x\n", (*ptr).vb[13].vb0.vb_index);
+   debug_printf("\t\t.vb[13].start_addr = 0x%x\n", (*ptr).vb[13].start_addr);
+   debug_printf("\t\t.vb[13].max_index = 0x%x\n", (*ptr).vb[13].max_index);
+   debug_printf("\t\t.vb[13].instance_data_step_rate = 0x%x\n", (*ptr).vb[13].instance_data_step_rate);
+   debug_printf("\t\t.vb[14].vb0.pitch = 0x%x\n", (*ptr).vb[14].vb0.pitch);
+   debug_printf("\t\t.vb[14].vb0.pad = 0x%x\n", (*ptr).vb[14].vb0.pad);
+   debug_printf("\t\t.vb[14].vb0.access_type = 0x%x\n", (*ptr).vb[14].vb0.access_type);
+   debug_printf("\t\t.vb[14].vb0.vb_index = 0x%x\n", (*ptr).vb[14].vb0.vb_index);
+   debug_printf("\t\t.vb[14].start_addr = 0x%x\n", (*ptr).vb[14].start_addr);
+   debug_printf("\t\t.vb[14].max_index = 0x%x\n", (*ptr).vb[14].max_index);
+   debug_printf("\t\t.vb[14].instance_data_step_rate = 0x%x\n", (*ptr).vb[14].instance_data_step_rate);
+   debug_printf("\t\t.vb[15].vb0.pitch = 0x%x\n", (*ptr).vb[15].vb0.pitch);
+   debug_printf("\t\t.vb[15].vb0.pad = 0x%x\n", (*ptr).vb[15].vb0.pad);
+   debug_printf("\t\t.vb[15].vb0.access_type = 0x%x\n", (*ptr).vb[15].vb0.access_type);
+   debug_printf("\t\t.vb[15].vb0.vb_index = 0x%x\n", (*ptr).vb[15].vb0.vb_index);
+   debug_printf("\t\t.vb[15].start_addr = 0x%x\n", (*ptr).vb[15].start_addr);
+   debug_printf("\t\t.vb[15].max_index = 0x%x\n", (*ptr).vb[15].max_index);
+   debug_printf("\t\t.vb[15].instance_data_step_rate = 0x%x\n", (*ptr).vb[15].instance_data_step_rate);
+   debug_printf("\t\t.vb[16].vb0.pitch = 0x%x\n", (*ptr).vb[16].vb0.pitch);
+   debug_printf("\t\t.vb[16].vb0.pad = 0x%x\n", (*ptr).vb[16].vb0.pad);
+   debug_printf("\t\t.vb[16].vb0.access_type = 0x%x\n", (*ptr).vb[16].vb0.access_type);
+   debug_printf("\t\t.vb[16].vb0.vb_index = 0x%x\n", (*ptr).vb[16].vb0.vb_index);
+   debug_printf("\t\t.vb[16].start_addr = 0x%x\n", (*ptr).vb[16].start_addr);
+   debug_printf("\t\t.vb[16].max_index = 0x%x\n", (*ptr).vb[16].max_index);
+   debug_printf("\t\t.vb[16].instance_data_step_rate = 0x%x\n", (*ptr).vb[16].instance_data_step_rate);
+}
+
+void
+brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr)
+{
+   debug_printf("\t\t.vb0.pitch = 0x%x\n", (*ptr).vb0.pitch);
+   debug_printf("\t\t.vb0.pad = 0x%x\n", (*ptr).vb0.pad);
+   debug_printf("\t\t.vb0.access_type = 0x%x\n", (*ptr).vb0.access_type);
+   debug_printf("\t\t.vb0.vb_index = 0x%x\n", (*ptr).vb0.vb_index);
+   debug_printf("\t\t.start_addr = 0x%x\n", (*ptr).start_addr);
+   debug_printf("\t\t.max_index = 0x%x\n", (*ptr).max_index);
+   debug_printf("\t\t.instance_data_step_rate = 0x%x\n", (*ptr).instance_data_step_rate);
+}
+
+void
+brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr)
+{
+   debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
+   debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
+   debug_printf("\t\t.ve[0].ve0.src_offset = 0x%x\n", (*ptr).ve[0].ve0.src_offset);
+   debug_printf("\t\t.ve[0].ve0.pad = 0x%x\n", (*ptr).ve[0].ve0.pad);
+   debug_printf("\t\t.ve[0].ve0.src_format = 0x%x\n", (*ptr).ve[0].ve0.src_format);
+   debug_printf("\t\t.ve[0].ve0.pad0 = 0x%x\n", (*ptr).ve[0].ve0.pad0);
+   debug_printf("\t\t.ve[0].ve0.valid = 0x%x\n", (*ptr).ve[0].ve0.valid);
+   debug_printf("\t\t.ve[0].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[0].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[0].ve1.dst_offset = 0x%x\n", (*ptr).ve[0].ve1.dst_offset);
+   debug_printf("\t\t.ve[0].ve1.pad = 0x%x\n", (*ptr).ve[0].ve1.pad);
+   debug_printf("\t\t.ve[0].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[0].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[0].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[0].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[1].ve0.src_offset = 0x%x\n", (*ptr).ve[1].ve0.src_offset);
+   debug_printf("\t\t.ve[1].ve0.pad = 0x%x\n", (*ptr).ve[1].ve0.pad);
+   debug_printf("\t\t.ve[1].ve0.src_format = 0x%x\n", (*ptr).ve[1].ve0.src_format);
+   debug_printf("\t\t.ve[1].ve0.pad0 = 0x%x\n", (*ptr).ve[1].ve0.pad0);
+   debug_printf("\t\t.ve[1].ve0.valid = 0x%x\n", (*ptr).ve[1].ve0.valid);
+   debug_printf("\t\t.ve[1].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[1].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[1].ve1.dst_offset = 0x%x\n", (*ptr).ve[1].ve1.dst_offset);
+   debug_printf("\t\t.ve[1].ve1.pad = 0x%x\n", (*ptr).ve[1].ve1.pad);
+   debug_printf("\t\t.ve[1].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[1].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[1].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[1].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[2].ve0.src_offset = 0x%x\n", (*ptr).ve[2].ve0.src_offset);
+   debug_printf("\t\t.ve[2].ve0.pad = 0x%x\n", (*ptr).ve[2].ve0.pad);
+   debug_printf("\t\t.ve[2].ve0.src_format = 0x%x\n", (*ptr).ve[2].ve0.src_format);
+   debug_printf("\t\t.ve[2].ve0.pad0 = 0x%x\n", (*ptr).ve[2].ve0.pad0);
+   debug_printf("\t\t.ve[2].ve0.valid = 0x%x\n", (*ptr).ve[2].ve0.valid);
+   debug_printf("\t\t.ve[2].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[2].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[2].ve1.dst_offset = 0x%x\n", (*ptr).ve[2].ve1.dst_offset);
+   debug_printf("\t\t.ve[2].ve1.pad = 0x%x\n", (*ptr).ve[2].ve1.pad);
+   debug_printf("\t\t.ve[2].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[2].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[2].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[2].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[3].ve0.src_offset = 0x%x\n", (*ptr).ve[3].ve0.src_offset);
+   debug_printf("\t\t.ve[3].ve0.pad = 0x%x\n", (*ptr).ve[3].ve0.pad);
+   debug_printf("\t\t.ve[3].ve0.src_format = 0x%x\n", (*ptr).ve[3].ve0.src_format);
+   debug_printf("\t\t.ve[3].ve0.pad0 = 0x%x\n", (*ptr).ve[3].ve0.pad0);
+   debug_printf("\t\t.ve[3].ve0.valid = 0x%x\n", (*ptr).ve[3].ve0.valid);
+   debug_printf("\t\t.ve[3].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[3].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[3].ve1.dst_offset = 0x%x\n", (*ptr).ve[3].ve1.dst_offset);
+   debug_printf("\t\t.ve[3].ve1.pad = 0x%x\n", (*ptr).ve[3].ve1.pad);
+   debug_printf("\t\t.ve[3].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[3].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[3].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[3].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[4].ve0.src_offset = 0x%x\n", (*ptr).ve[4].ve0.src_offset);
+   debug_printf("\t\t.ve[4].ve0.pad = 0x%x\n", (*ptr).ve[4].ve0.pad);
+   debug_printf("\t\t.ve[4].ve0.src_format = 0x%x\n", (*ptr).ve[4].ve0.src_format);
+   debug_printf("\t\t.ve[4].ve0.pad0 = 0x%x\n", (*ptr).ve[4].ve0.pad0);
+   debug_printf("\t\t.ve[4].ve0.valid = 0x%x\n", (*ptr).ve[4].ve0.valid);
+   debug_printf("\t\t.ve[4].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[4].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[4].ve1.dst_offset = 0x%x\n", (*ptr).ve[4].ve1.dst_offset);
+   debug_printf("\t\t.ve[4].ve1.pad = 0x%x\n", (*ptr).ve[4].ve1.pad);
+   debug_printf("\t\t.ve[4].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[4].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[4].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[4].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[5].ve0.src_offset = 0x%x\n", (*ptr).ve[5].ve0.src_offset);
+   debug_printf("\t\t.ve[5].ve0.pad = 0x%x\n", (*ptr).ve[5].ve0.pad);
+   debug_printf("\t\t.ve[5].ve0.src_format = 0x%x\n", (*ptr).ve[5].ve0.src_format);
+   debug_printf("\t\t.ve[5].ve0.pad0 = 0x%x\n", (*ptr).ve[5].ve0.pad0);
+   debug_printf("\t\t.ve[5].ve0.valid = 0x%x\n", (*ptr).ve[5].ve0.valid);
+   debug_printf("\t\t.ve[5].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[5].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[5].ve1.dst_offset = 0x%x\n", (*ptr).ve[5].ve1.dst_offset);
+   debug_printf("\t\t.ve[5].ve1.pad = 0x%x\n", (*ptr).ve[5].ve1.pad);
+   debug_printf("\t\t.ve[5].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[5].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[5].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[5].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[6].ve0.src_offset = 0x%x\n", (*ptr).ve[6].ve0.src_offset);
+   debug_printf("\t\t.ve[6].ve0.pad = 0x%x\n", (*ptr).ve[6].ve0.pad);
+   debug_printf("\t\t.ve[6].ve0.src_format = 0x%x\n", (*ptr).ve[6].ve0.src_format);
+   debug_printf("\t\t.ve[6].ve0.pad0 = 0x%x\n", (*ptr).ve[6].ve0.pad0);
+   debug_printf("\t\t.ve[6].ve0.valid = 0x%x\n", (*ptr).ve[6].ve0.valid);
+   debug_printf("\t\t.ve[6].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[6].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[6].ve1.dst_offset = 0x%x\n", (*ptr).ve[6].ve1.dst_offset);
+   debug_printf("\t\t.ve[6].ve1.pad = 0x%x\n", (*ptr).ve[6].ve1.pad);
+   debug_printf("\t\t.ve[6].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[6].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[6].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[6].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[7].ve0.src_offset = 0x%x\n", (*ptr).ve[7].ve0.src_offset);
+   debug_printf("\t\t.ve[7].ve0.pad = 0x%x\n", (*ptr).ve[7].ve0.pad);
+   debug_printf("\t\t.ve[7].ve0.src_format = 0x%x\n", (*ptr).ve[7].ve0.src_format);
+   debug_printf("\t\t.ve[7].ve0.pad0 = 0x%x\n", (*ptr).ve[7].ve0.pad0);
+   debug_printf("\t\t.ve[7].ve0.valid = 0x%x\n", (*ptr).ve[7].ve0.valid);
+   debug_printf("\t\t.ve[7].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[7].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[7].ve1.dst_offset = 0x%x\n", (*ptr).ve[7].ve1.dst_offset);
+   debug_printf("\t\t.ve[7].ve1.pad = 0x%x\n", (*ptr).ve[7].ve1.pad);
+   debug_printf("\t\t.ve[7].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[7].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[7].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[7].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[8].ve0.src_offset = 0x%x\n", (*ptr).ve[8].ve0.src_offset);
+   debug_printf("\t\t.ve[8].ve0.pad = 0x%x\n", (*ptr).ve[8].ve0.pad);
+   debug_printf("\t\t.ve[8].ve0.src_format = 0x%x\n", (*ptr).ve[8].ve0.src_format);
+   debug_printf("\t\t.ve[8].ve0.pad0 = 0x%x\n", (*ptr).ve[8].ve0.pad0);
+   debug_printf("\t\t.ve[8].ve0.valid = 0x%x\n", (*ptr).ve[8].ve0.valid);
+   debug_printf("\t\t.ve[8].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[8].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[8].ve1.dst_offset = 0x%x\n", (*ptr).ve[8].ve1.dst_offset);
+   debug_printf("\t\t.ve[8].ve1.pad = 0x%x\n", (*ptr).ve[8].ve1.pad);
+   debug_printf("\t\t.ve[8].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[8].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[8].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[8].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[9].ve0.src_offset = 0x%x\n", (*ptr).ve[9].ve0.src_offset);
+   debug_printf("\t\t.ve[9].ve0.pad = 0x%x\n", (*ptr).ve[9].ve0.pad);
+   debug_printf("\t\t.ve[9].ve0.src_format = 0x%x\n", (*ptr).ve[9].ve0.src_format);
+   debug_printf("\t\t.ve[9].ve0.pad0 = 0x%x\n", (*ptr).ve[9].ve0.pad0);
+   debug_printf("\t\t.ve[9].ve0.valid = 0x%x\n", (*ptr).ve[9].ve0.valid);
+   debug_printf("\t\t.ve[9].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[9].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[9].ve1.dst_offset = 0x%x\n", (*ptr).ve[9].ve1.dst_offset);
+   debug_printf("\t\t.ve[9].ve1.pad = 0x%x\n", (*ptr).ve[9].ve1.pad);
+   debug_printf("\t\t.ve[9].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[9].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[9].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[9].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[10].ve0.src_offset = 0x%x\n", (*ptr).ve[10].ve0.src_offset);
+   debug_printf("\t\t.ve[10].ve0.pad = 0x%x\n", (*ptr).ve[10].ve0.pad);
+   debug_printf("\t\t.ve[10].ve0.src_format = 0x%x\n", (*ptr).ve[10].ve0.src_format);
+   debug_printf("\t\t.ve[10].ve0.pad0 = 0x%x\n", (*ptr).ve[10].ve0.pad0);
+   debug_printf("\t\t.ve[10].ve0.valid = 0x%x\n", (*ptr).ve[10].ve0.valid);
+   debug_printf("\t\t.ve[10].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[10].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[10].ve1.dst_offset = 0x%x\n", (*ptr).ve[10].ve1.dst_offset);
+   debug_printf("\t\t.ve[10].ve1.pad = 0x%x\n", (*ptr).ve[10].ve1.pad);
+   debug_printf("\t\t.ve[10].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[10].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[10].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[10].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[11].ve0.src_offset = 0x%x\n", (*ptr).ve[11].ve0.src_offset);
+   debug_printf("\t\t.ve[11].ve0.pad = 0x%x\n", (*ptr).ve[11].ve0.pad);
+   debug_printf("\t\t.ve[11].ve0.src_format = 0x%x\n", (*ptr).ve[11].ve0.src_format);
+   debug_printf("\t\t.ve[11].ve0.pad0 = 0x%x\n", (*ptr).ve[11].ve0.pad0);
+   debug_printf("\t\t.ve[11].ve0.valid = 0x%x\n", (*ptr).ve[11].ve0.valid);
+   debug_printf("\t\t.ve[11].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[11].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[11].ve1.dst_offset = 0x%x\n", (*ptr).ve[11].ve1.dst_offset);
+   debug_printf("\t\t.ve[11].ve1.pad = 0x%x\n", (*ptr).ve[11].ve1.pad);
+   debug_printf("\t\t.ve[11].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[11].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[11].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[11].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[12].ve0.src_offset = 0x%x\n", (*ptr).ve[12].ve0.src_offset);
+   debug_printf("\t\t.ve[12].ve0.pad = 0x%x\n", (*ptr).ve[12].ve0.pad);
+   debug_printf("\t\t.ve[12].ve0.src_format = 0x%x\n", (*ptr).ve[12].ve0.src_format);
+   debug_printf("\t\t.ve[12].ve0.pad0 = 0x%x\n", (*ptr).ve[12].ve0.pad0);
+   debug_printf("\t\t.ve[12].ve0.valid = 0x%x\n", (*ptr).ve[12].ve0.valid);
+   debug_printf("\t\t.ve[12].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[12].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[12].ve1.dst_offset = 0x%x\n", (*ptr).ve[12].ve1.dst_offset);
+   debug_printf("\t\t.ve[12].ve1.pad = 0x%x\n", (*ptr).ve[12].ve1.pad);
+   debug_printf("\t\t.ve[12].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[12].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[12].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[12].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[13].ve0.src_offset = 0x%x\n", (*ptr).ve[13].ve0.src_offset);
+   debug_printf("\t\t.ve[13].ve0.pad = 0x%x\n", (*ptr).ve[13].ve0.pad);
+   debug_printf("\t\t.ve[13].ve0.src_format = 0x%x\n", (*ptr).ve[13].ve0.src_format);
+   debug_printf("\t\t.ve[13].ve0.pad0 = 0x%x\n", (*ptr).ve[13].ve0.pad0);
+   debug_printf("\t\t.ve[13].ve0.valid = 0x%x\n", (*ptr).ve[13].ve0.valid);
+   debug_printf("\t\t.ve[13].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[13].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[13].ve1.dst_offset = 0x%x\n", (*ptr).ve[13].ve1.dst_offset);
+   debug_printf("\t\t.ve[13].ve1.pad = 0x%x\n", (*ptr).ve[13].ve1.pad);
+   debug_printf("\t\t.ve[13].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[13].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[13].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[13].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[14].ve0.src_offset = 0x%x\n", (*ptr).ve[14].ve0.src_offset);
+   debug_printf("\t\t.ve[14].ve0.pad = 0x%x\n", (*ptr).ve[14].ve0.pad);
+   debug_printf("\t\t.ve[14].ve0.src_format = 0x%x\n", (*ptr).ve[14].ve0.src_format);
+   debug_printf("\t\t.ve[14].ve0.pad0 = 0x%x\n", (*ptr).ve[14].ve0.pad0);
+   debug_printf("\t\t.ve[14].ve0.valid = 0x%x\n", (*ptr).ve[14].ve0.valid);
+   debug_printf("\t\t.ve[14].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[14].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[14].ve1.dst_offset = 0x%x\n", (*ptr).ve[14].ve1.dst_offset);
+   debug_printf("\t\t.ve[14].ve1.pad = 0x%x\n", (*ptr).ve[14].ve1.pad);
+   debug_printf("\t\t.ve[14].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[14].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[14].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[14].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[15].ve0.src_offset = 0x%x\n", (*ptr).ve[15].ve0.src_offset);
+   debug_printf("\t\t.ve[15].ve0.pad = 0x%x\n", (*ptr).ve[15].ve0.pad);
+   debug_printf("\t\t.ve[15].ve0.src_format = 0x%x\n", (*ptr).ve[15].ve0.src_format);
+   debug_printf("\t\t.ve[15].ve0.pad0 = 0x%x\n", (*ptr).ve[15].ve0.pad0);
+   debug_printf("\t\t.ve[15].ve0.valid = 0x%x\n", (*ptr).ve[15].ve0.valid);
+   debug_printf("\t\t.ve[15].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[15].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[15].ve1.dst_offset = 0x%x\n", (*ptr).ve[15].ve1.dst_offset);
+   debug_printf("\t\t.ve[15].ve1.pad = 0x%x\n", (*ptr).ve[15].ve1.pad);
+   debug_printf("\t\t.ve[15].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[15].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[15].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[15].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[16].ve0.src_offset = 0x%x\n", (*ptr).ve[16].ve0.src_offset);
+   debug_printf("\t\t.ve[16].ve0.pad = 0x%x\n", (*ptr).ve[16].ve0.pad);
+   debug_printf("\t\t.ve[16].ve0.src_format = 0x%x\n", (*ptr).ve[16].ve0.src_format);
+   debug_printf("\t\t.ve[16].ve0.pad0 = 0x%x\n", (*ptr).ve[16].ve0.pad0);
+   debug_printf("\t\t.ve[16].ve0.valid = 0x%x\n", (*ptr).ve[16].ve0.valid);
+   debug_printf("\t\t.ve[16].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[16].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[16].ve1.dst_offset = 0x%x\n", (*ptr).ve[16].ve1.dst_offset);
+   debug_printf("\t\t.ve[16].ve1.pad = 0x%x\n", (*ptr).ve[16].ve1.pad);
+   debug_printf("\t\t.ve[16].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[16].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[16].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[16].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent0);
+   debug_printf("\t\t.ve[17].ve0.src_offset = 0x%x\n", (*ptr).ve[17].ve0.src_offset);
+   debug_printf("\t\t.ve[17].ve0.pad = 0x%x\n", (*ptr).ve[17].ve0.pad);
+   debug_printf("\t\t.ve[17].ve0.src_format = 0x%x\n", (*ptr).ve[17].ve0.src_format);
+   debug_printf("\t\t.ve[17].ve0.pad0 = 0x%x\n", (*ptr).ve[17].ve0.pad0);
+   debug_printf("\t\t.ve[17].ve0.valid = 0x%x\n", (*ptr).ve[17].ve0.valid);
+   debug_printf("\t\t.ve[17].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[17].ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve[17].ve1.dst_offset = 0x%x\n", (*ptr).ve[17].ve1.dst_offset);
+   debug_printf("\t\t.ve[17].ve1.pad = 0x%x\n", (*ptr).ve[17].ve1.pad);
+   debug_printf("\t\t.ve[17].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent3);
+   debug_printf("\t\t.ve[17].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent2);
+   debug_printf("\t\t.ve[17].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent1);
+   debug_printf("\t\t.ve[17].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent0);
+}
+
+void
+brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr)
+{
+   debug_printf("\t\t.ve0.src_offset = 0x%x\n", (*ptr).ve0.src_offset);
+   debug_printf("\t\t.ve0.pad = 0x%x\n", (*ptr).ve0.pad);
+   debug_printf("\t\t.ve0.src_format = 0x%x\n", (*ptr).ve0.src_format);
+   debug_printf("\t\t.ve0.pad0 = 0x%x\n", (*ptr).ve0.pad0);
+   debug_printf("\t\t.ve0.valid = 0x%x\n", (*ptr).ve0.valid);
+   debug_printf("\t\t.ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve0.vertex_buffer_index);
+   debug_printf("\t\t.ve1.dst_offset = 0x%x\n", (*ptr).ve1.dst_offset);
+   debug_printf("\t\t.ve1.pad = 0x%x\n", (*ptr).ve1.pad);
+   debug_printf("\t\t.ve1.vfcomponent3 = 0x%x\n", (*ptr).ve1.vfcomponent3);
+   debug_printf("\t\t.ve1.vfcomponent2 = 0x%x\n", (*ptr).ve1.vfcomponent2);
+   debug_printf("\t\t.ve1.vfcomponent1 = 0x%x\n", (*ptr).ve1.vfcomponent1);
+   debug_printf("\t\t.ve1.vfcomponent0 = 0x%x\n", (*ptr).ve1.vfcomponent0);
+}
+
+void
+brw_dump_vf_statistics(const struct brw_vf_statistics *ptr)
+{
+   debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable);
+   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
+   debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
+}
+
+void
+brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr)
+{
+   debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0);
+   debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+   debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1);
+   debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+   debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+   debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+   debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+   debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+   debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+   debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0);
+   debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+   debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1);
+   debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+   debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+   debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+   debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3);
+   debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+   debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+   debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0);
+   debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+   debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+   debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+   debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0);
+   debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+   debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1);
+   debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+   debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2);
+   debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+   debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3);
+   debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0);
+   debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
+   debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
+   debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1);
+   debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
+   debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2);
+   debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
+   debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3);
+   debug_printf("\t\t.vs5.sampler_count = 0x%x\n", (*ptr).vs5.sampler_count);
+   debug_printf("\t\t.vs5.pad0 = 0x%x\n", (*ptr).vs5.pad0);
+   debug_printf("\t\t.vs5.sampler_state_pointer = 0x%x\n", (*ptr).vs5.sampler_state_pointer);
+   debug_printf("\t\t.vs6.vs_enable = 0x%x\n", (*ptr).vs6.vs_enable);
+   debug_printf("\t\t.vs6.vert_cache_disable = 0x%x\n", (*ptr).vs6.vert_cache_disable);
+   debug_printf("\t\t.vs6.pad0 = 0x%x\n", (*ptr).vs6.pad0);
+}
+
+void
+brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr)
+{
+   debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0);
+   debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
+   debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1);
+   debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
+   debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
+   debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
+   debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
+   debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
+   debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
+   debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0);
+   debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
+   debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1);
+   debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
+   debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
+   debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
+   debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3);
+   debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
+   debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
+   debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0);
+   debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
+   debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
+   debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
+   debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0);
+   debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
+   debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1);
+   debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
+   debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2);
+   debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
+   debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3);
+   debug_printf("\t\t.wm4.stats_enable = 0x%x\n", (*ptr).wm4.stats_enable);
+   debug_printf("\t\t.wm4.depth_buffer_clear = 0x%x\n", (*ptr).wm4.depth_buffer_clear);
+   debug_printf("\t\t.wm4.sampler_count = 0x%x\n", (*ptr).wm4.sampler_count);
+   debug_printf("\t\t.wm4.sampler_state_pointer = 0x%x\n", (*ptr).wm4.sampler_state_pointer);
+   debug_printf("\t\t.wm5.enable_8_pix = 0x%x\n", (*ptr).wm5.enable_8_pix);
+   debug_printf("\t\t.wm5.enable_16_pix = 0x%x\n", (*ptr).wm5.enable_16_pix);
+   debug_printf("\t\t.wm5.enable_32_pix = 0x%x\n", (*ptr).wm5.enable_32_pix);
+   debug_printf("\t\t.wm5.enable_con_32_pix = 0x%x\n", (*ptr).wm5.enable_con_32_pix);
+   debug_printf("\t\t.wm5.enable_con_64_pix = 0x%x\n", (*ptr).wm5.enable_con_64_pix);
+   debug_printf("\t\t.wm5.pad0 = 0x%x\n", (*ptr).wm5.pad0);
+   debug_printf("\t\t.wm5.legacy_global_depth_bias = 0x%x\n", (*ptr).wm5.legacy_global_depth_bias);
+   debug_printf("\t\t.wm5.line_stipple = 0x%x\n", (*ptr).wm5.line_stipple);
+   debug_printf("\t\t.wm5.depth_offset = 0x%x\n", (*ptr).wm5.depth_offset);
+   debug_printf("\t\t.wm5.polygon_stipple = 0x%x\n", (*ptr).wm5.polygon_stipple);
+   debug_printf("\t\t.wm5.line_aa_region_width = 0x%x\n", (*ptr).wm5.line_aa_region_width);
+   debug_printf("\t\t.wm5.line_endcap_aa_region_width = 0x%x\n", (*ptr).wm5.line_endcap_aa_region_width);
+   debug_printf("\t\t.wm5.early_depth_test = 0x%x\n", (*ptr).wm5.early_depth_test);
+   debug_printf("\t\t.wm5.thread_dispatch_enable = 0x%x\n", (*ptr).wm5.thread_dispatch_enable);
+   debug_printf("\t\t.wm5.program_uses_depth = 0x%x\n", (*ptr).wm5.program_uses_depth);
+   debug_printf("\t\t.wm5.program_computes_depth = 0x%x\n", (*ptr).wm5.program_computes_depth);
+   debug_printf("\t\t.wm5.program_uses_killpixel = 0x%x\n", (*ptr).wm5.program_uses_killpixel);
+   debug_printf("\t\t.wm5.legacy_line_rast = 0x%x\n", (*ptr).wm5.legacy_line_rast);
+   debug_printf("\t\t.wm5.transposed_urb_read_enable = 0x%x\n", (*ptr).wm5.transposed_urb_read_enable);
+   debug_printf("\t\t.wm5.max_threads = 0x%x\n", (*ptr).wm5.max_threads);
+   debug_printf("\t\t.global_depth_offset_constant = %f\n", (*ptr).global_depth_offset_constant);
+   debug_printf("\t\t.global_depth_offset_scale = %f\n", (*ptr).global_depth_offset_scale);
+   debug_printf("\t\t.wm8.pad0 = 0x%x\n", (*ptr).wm8.pad0);
+   debug_printf("\t\t.wm8.grf_reg_count_1 = 0x%x\n", (*ptr).wm8.grf_reg_count_1);
+   debug_printf("\t\t.wm8.pad1 = 0x%x\n", (*ptr).wm8.pad1);
+   debug_printf("\t\t.wm8.kernel_start_pointer_1 = 0x%x\n", (*ptr).wm8.kernel_start_pointer_1);
+   debug_printf("\t\t.wm9.pad0 = 0x%x\n", (*ptr).wm9.pad0);
+   debug_printf("\t\t.wm9.grf_reg_count_2 = 0x%x\n", (*ptr).wm9.grf_reg_count_2);
+   debug_printf("\t\t.wm9.pad1 = 0x%x\n", (*ptr).wm9.pad1);
+   debug_printf("\t\t.wm9.kernel_start_pointer_2 = 0x%x\n", (*ptr).wm9.kernel_start_pointer_2);
+   debug_printf("\t\t.wm10.pad0 = 0x%x\n", (*ptr).wm10.pad0);
+   debug_printf("\t\t.wm10.grf_reg_count_3 = 0x%x\n", (*ptr).wm10.grf_reg_count_3);
+   debug_printf("\t\t.wm10.pad1 = 0x%x\n", (*ptr).wm10.pad1);
+   debug_printf("\t\t.wm10.kernel_start_pointer_3 = 0x%x\n", (*ptr).wm10.kernel_start_pointer_3);
+}
+
diff --git a/src/gallium/drivers/i965/brw_structs_dump.h b/src/gallium/drivers/i965/brw_structs_dump.h
new file mode 100644
index 0000000000..7c02dbfe33
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs_dump.h
@@ -0,0 +1,276 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Dump i965 data structures.
+ *
+ * Generated automatically from brw_structs.h by brw_structs_dump.py.
+ */
+
+#ifndef BRW_STRUCTS_DUMP_H
+#define BRW_STRUCTS_DUMP_H
+
+struct brw_3d_control;
+struct brw_3d_primitive;
+struct brw_aa_line_parameters;
+struct brw_binding_table_pointers;
+struct brw_blend_constant_color;
+struct brw_cc0;
+struct brw_cc1;
+struct brw_cc2;
+struct brw_cc3;
+struct brw_cc4;
+struct brw_cc5;
+struct brw_cc6;
+struct brw_cc7;
+struct brw_cc_unit_state;
+struct brw_cc_viewport;
+struct brw_clip_unit_state;
+struct brw_clipper_viewport;
+struct brw_constant_buffer;
+struct brw_cs_urb_state;
+struct brw_depthbuffer;
+struct brw_depthbuffer_g4x;
+struct brw_drawrect;
+struct brw_global_depth_offset_clamp;
+struct brw_gs_unit_state;
+struct brw_indexbuffer;
+struct brw_line_stipple;
+struct brw_mi_flush;
+struct brw_pipe_control;
+struct brw_pipeline_select;
+struct brw_pipelined_state_pointers;
+struct brw_polygon_stipple;
+struct brw_polygon_stipple_offset;
+struct brw_sampler_default_color;
+struct brw_sampler_state;
+struct brw_sf_unit_state;
+struct brw_sf_viewport;
+struct brw_ss0;
+struct brw_ss1;
+struct brw_ss2;
+struct brw_ss3;
+struct brw_state_base_address;
+struct brw_state_prefetch;
+struct brw_surf_ss0;
+struct brw_surf_ss1;
+struct brw_surf_ss2;
+struct brw_surf_ss3;
+struct brw_surf_ss4;
+struct brw_surf_ss5;
+struct brw_surface_state;
+struct brw_system_instruction_pointer;
+struct brw_urb_fence;
+struct brw_urb_immediate;
+struct brw_vb_array_state;
+struct brw_vertex_buffer_state;
+struct brw_vertex_element_packet;
+struct brw_vertex_element_state;
+struct brw_vf_statistics;
+struct brw_vs_unit_state;
+struct brw_wm_unit_state;
+
+void
+brw_dump_3d_control(const struct brw_3d_control *ptr);
+
+void
+brw_dump_3d_primitive(const struct brw_3d_primitive *ptr);
+
+void
+brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr);
+
+void
+brw_dump_binding_table_pointers(const struct brw_binding_table_pointers *ptr);
+
+void
+brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr);
+
+void
+brw_dump_cc0(const struct brw_cc0 *ptr);
+
+void
+brw_dump_cc1(const struct brw_cc1 *ptr);
+
+void
+brw_dump_cc2(const struct brw_cc2 *ptr);
+
+void
+brw_dump_cc3(const struct brw_cc3 *ptr);
+
+void
+brw_dump_cc4(const struct brw_cc4 *ptr);
+
+void
+brw_dump_cc5(const struct brw_cc5 *ptr);
+
+void
+brw_dump_cc6(const struct brw_cc6 *ptr);
+
+void
+brw_dump_cc7(const struct brw_cc7 *ptr);
+
+void
+brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr);
+
+void
+brw_dump_cc_viewport(const struct brw_cc_viewport *ptr);
+
+void
+brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr);
+
+void
+brw_dump_clipper_viewport(const struct brw_clipper_viewport *ptr);
+
+void
+brw_dump_constant_buffer(const struct brw_constant_buffer *ptr);
+
+void
+brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr);
+
+void
+brw_dump_depthbuffer(const struct brw_depthbuffer *ptr);
+
+void
+brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr);
+
+void
+brw_dump_drawrect(const struct brw_drawrect *ptr);
+
+void
+brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *ptr);
+
+void
+brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr);
+
+void
+brw_dump_indexbuffer(const struct brw_indexbuffer *ptr);
+
+void
+brw_dump_line_stipple(const struct brw_line_stipple *ptr);
+
+void
+brw_dump_mi_flush(const struct brw_mi_flush *ptr);
+
+void
+brw_dump_pipe_control(const struct brw_pipe_control *ptr);
+
+void
+brw_dump_pipeline_select(const struct brw_pipeline_select *ptr);
+
+void
+brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr);
+
+void
+brw_dump_polygon_stipple(const struct brw_polygon_stipple *ptr);
+
+void
+brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr);
+
+void
+brw_dump_sampler_default_color(const struct brw_sampler_default_color *ptr);
+
+void
+brw_dump_sampler_state(const struct brw_sampler_state *ptr);
+
+void
+brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr);
+
+void
+brw_dump_sf_viewport(const struct brw_sf_viewport *ptr);
+
+void
+brw_dump_ss0(const struct brw_ss0 *ptr);
+
+void
+brw_dump_ss1(const struct brw_ss1 *ptr);
+
+void
+brw_dump_ss2(const struct brw_ss2 *ptr);
+
+void
+brw_dump_ss3(const struct brw_ss3 *ptr);
+
+void
+brw_dump_state_base_address(const struct brw_state_base_address *ptr);
+
+void
+brw_dump_state_prefetch(const struct brw_state_prefetch *ptr);
+
+void
+brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr);
+
+void
+brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr);
+
+void
+brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr);
+
+void
+brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr);
+
+void
+brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr);
+
+void
+brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr);
+
+void
+brw_dump_surface_state(const struct brw_surface_state *ptr);
+
+void
+brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer *ptr);
+
+void
+brw_dump_urb_fence(const struct brw_urb_fence *ptr);
+
+void
+brw_dump_urb_immediate(const struct brw_urb_immediate *ptr);
+
+void
+brw_dump_vb_array_state(const struct brw_vb_array_state *ptr);
+
+void
+brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr);
+
+void
+brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr);
+
+void
+brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr);
+
+void
+brw_dump_vf_statistics(const struct brw_vf_statistics *ptr);
+
+void
+brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr);
+
+void
+brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr);
+
+
+#endif /* BRW_STRUCTS_DUMP_H */
diff --git a/src/gallium/drivers/i965/brw_structs_dump.py b/src/gallium/drivers/i965/brw_structs_dump.py
new file mode 100755
index 0000000000..581515878e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_structs_dump.py
@@ -0,0 +1,284 @@
+#!/usr/bin/env python
+'''
+Generates dumpers for the i965 state strucutures using pygccxml.
+
+Run as 
+
+  PYTHONPATH=/path/to/pygccxml-1.0.0 python brw_structs_dump.py
+
+Jose Fonseca <jfonseca@vmware.com>
+'''
+
+copyright = '''
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+ '''
+
+import os
+import sys
+
+from pygccxml import parser
+from pygccxml import declarations
+
+from pygccxml.declarations import algorithm
+from pygccxml.declarations import decl_visitor
+from pygccxml.declarations import type_traits
+from pygccxml.declarations import type_visitor
+
+
+enums = True
+
+
+class decl_dumper_t(decl_visitor.decl_visitor_t):
+
+    def __init__(self, stream, instance = '', decl = None):
+        decl_visitor.decl_visitor_t.__init__(self)
+        self.stream = stream
+        self._instance = instance
+        self.decl = decl
+
+    def clone(self):
+        return decl_dumper_t(self.stream, self._instance, self.decl)
+
+    def visit_class(self):
+        class_ = self.decl
+        assert self.decl.class_type in ('struct', 'union')
+
+        for variable in class_.variables(recursive = False):
+            dump_type(self.stream, self._instance + '.' + variable.name, variable.type)
+
+    def visit_enumeration(self):
+        if enums:
+            self.stream.write('   switch(%s) {\n' % ("(*ptr)" + self._instance,))
+            for name, value in self.decl.values:
+                self.stream.write('   case %s:\n' % (name,))
+                self.stream.write('      debug_printf("\\t\\t%s = %s\\n");\n' % (self._instance, name))
+                self.stream.write('      break;\n')
+            self.stream.write('   default:\n')
+            self.stream.write('      debug_printf("\\t\\t%s = %%i\\n", %s);\n' % (self._instance, "(*ptr)" + self._instance))
+            self.stream.write('      break;\n')
+            self.stream.write('   }\n')
+        else:
+            self.stream.write('   debug_printf("\\t\\t%s = %%i\\n", %s);\n' % (self._instance, "(*ptr)" + self._instance))
+
+
+def dump_decl(stream, instance, decl):
+    dumper = decl_dumper_t(stream, instance, decl)
+    algorithm.apply_visitor(dumper, decl)
+
+
+class type_dumper_t(type_visitor.type_visitor_t):
+
+    def __init__(self, stream, instance, type_):
+        type_visitor.type_visitor_t.__init__(self)
+        self.stream = stream
+        self.instance = instance
+        self.type = type_
+
+    def clone(self):
+        return type_dumper_t(self.instance, self.type)
+
+    def visit_bool(self):
+        self.print_instance('%i')
+        
+    def visit_char(self):
+        #self.print_instance('%i')
+        self.print_instance('0x%x')
+        
+    def visit_unsigned_char(self):
+        #self.print_instance('%u')
+        self.print_instance('0x%x')
+
+    def visit_signed_char(self):
+        #self.print_instance('%i')
+        self.print_instance('0x%x')
+    
+    def visit_wchar(self):
+        self.print_instance('0x%x')
+        
+    def visit_short_int(self):
+        #self.print_instance('%i')
+        self.print_instance('0x%x')
+        
+    def visit_short_unsigned_int(self):
+        #self.print_instance('%u')
+        self.print_instance('0x%x')
+        
+    def visit_int(self):
+        #self.print_instance('%i')
+        self.print_instance('0x%x')
+        
+    def visit_unsigned_int(self):
+        #self.print_instance('%u')
+        self.print_instance('0x%x')
+        
+    def visit_long_int(self):
+        #self.print_instance('%li')
+        self.print_instance('0x%lx')
+        
+    def visit_long_unsigned_int(self):
+        #self.print_instance('%lu')
+        self.print_instance('%0xlx')
+        
+    def visit_long_long_int(self):
+        #self.print_instance('%lli')
+        self.print_instance('%0xllx')
+        
+    def visit_long_long_unsigned_int(self):
+        #self.print_instance('%llu')
+        self.print_instance('0x%llx')
+        
+    def visit_float(self):
+        self.print_instance('%f')
+        
+    def visit_double(self):
+        self.print_instance('%f')
+        
+    def visit_array(self):
+        for i in range(type_traits.array_size(self.type)):
+            dump_type(self.stream, self.instance + '[%i]' % i, type_traits.base_type(self.type))
+
+    def visit_pointer(self):
+        self.print_instance('%p')
+
+    def visit_declarated(self):
+        #stream.write('decl = %r\n' % self.type.decl_string)
+        decl = type_traits.remove_declarated(self.type)
+        dump_decl(self.stream, self.instance, decl)
+
+    def print_instance(self, format):
+        self.stream.write('   debug_printf("\\t\\t%s = %s\\n", %s);\n' % (self.instance, format, "(*ptr)" + self.instance))
+
+
+
+def dump_type(stream, instance, type_):
+    type_ = type_traits.remove_alias(type_)
+    visitor = type_dumper_t(stream, instance, type_)
+    algorithm.apply_visitor(visitor, type_)
+
+
+def dump_struct_interface(stream, class_, suffix = ';'):
+    name = class_.name
+    assert name.startswith('brw_');
+    name = name[:4] + 'dump_' + name[4:]
+    stream.write('void\n')
+    stream.write('%s(const struct %s *ptr)%s\n' % (name, class_.name, suffix))
+
+
+def dump_struct_implementation(stream, decls, class_):
+    dump_struct_interface(stream, class_, suffix = '')
+    stream.write('{\n')
+    dump_decl(stream, '', class_)
+    stream.write('}\n')
+    stream.write('\n')
+
+
+def dump_header(stream):
+    stream.write(copyright.strip() + '\n')
+    stream.write('\n')
+    stream.write('/**\n')
+    stream.write(' * @file\n')
+    stream.write(' * Dump i965 data structures.\n')
+    stream.write(' *\n')
+    stream.write(' * Generated automatically from brw_structs.h by brw_structs_dump.py.\n')
+    stream.write(' */\n')
+    stream.write('\n')
+
+
+def dump_interfaces(decls, global_ns, names):
+    stream = open('brw_structs_dump.h', 'wt')
+    
+    dump_header(stream)
+    
+    stream.write('#ifndef BRW_STRUCTS_DUMP_H\n')
+    stream.write('#define BRW_STRUCTS_DUMP_H\n')
+    stream.write('\n')
+    
+    for name in names:
+        stream.write('struct %s;\n' % (name,))
+    stream.write('\n')
+
+    for name in names:
+        (class_,) = global_ns.classes(name = name)
+        dump_struct_interface(stream, class_)
+        stream.write('\n')
+    stream.write('\n')
+
+    stream.write('#endif /* BRW_STRUCTS_DUMP_H */\n')
+
+
+def dump_implementations(decls, global_ns, names):
+    stream = open('brw_structs_dump.c', 'wt')
+    
+    dump_header(stream)
+
+    stream.write('#include "util/u_debug.h"\n')
+    stream.write('\n')
+    stream.write('#include "brw_types.h"\n')
+    stream.write('#include "brw_structs.h"\n')
+    stream.write('#include "brw_structs_dump.h"\n')
+    stream.write('\n')
+
+    for name in names:
+        (class_,) = global_ns.classes(name = name)
+        dump_struct_implementation(stream, decls, class_)
+
+
+def decl_filter(decl):
+    '''Filter the declarations we're interested in'''
+    name = decl.name
+    return name.startswith('brw_') and name not in ('brw_instruction',) 
+
+
+def main():
+
+    config = parser.config_t(
+        include_paths = [
+            '../../include',
+        ],
+        compiler = 'gcc',
+    )
+
+    headers = [
+        'brw_types.h', 
+        'brw_structs.h', 
+    ]
+
+    decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE)
+    global_ns = declarations.get_global_namespace(decls)
+
+    names = []
+    for class_ in global_ns.classes(decl_filter):
+        names.append(class_.name)
+    names.sort()
+
+    dump_interfaces(decls, global_ns, names)
+    dump_implementations(decls, global_ns, names)
+
+
+if __name__ == '__main__':
+    main()
-- 
cgit v1.2.3


From 31b8b1dd36d9f07a7893a89ee985d83c4d0bb95b Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 5 Nov 2009 12:44:36 +0000
Subject: i965g: Don't dump pads or dwords aliases.

---
 src/gallium/drivers/i965/brw_structs_dump.c  | 264 ---------------------------
 src/gallium/drivers/i965/brw_structs_dump.py |   9 +-
 2 files changed, 8 insertions(+), 265 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_structs_dump.c b/src/gallium/drivers/i965/brw_structs_dump.c
index a8b96c6418..cd40fc6d61 100644
--- a/src/gallium/drivers/i965/brw_structs_dump.c
+++ b/src/gallium/drivers/i965/brw_structs_dump.c
@@ -43,12 +43,10 @@ brw_dump_3d_control(const struct brw_3d_control *ptr)
 {
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.notify_enable = 0x%x\n", (*ptr).header.notify_enable);
-   debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad);
    debug_printf("\t\t.header.wc_flush_enable = 0x%x\n", (*ptr).header.wc_flush_enable);
    debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable);
    debug_printf("\t\t.header.operation = 0x%x\n", (*ptr).header.operation);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
-   debug_printf("\t\t.dest.pad = 0x%x\n", (*ptr).dest.pad);
    debug_printf("\t\t.dest.dest_addr_type = 0x%x\n", (*ptr).dest.dest_addr_type);
    debug_printf("\t\t.dest.dest_addr = 0x%x\n", (*ptr).dest.dest_addr);
    debug_printf("\t\t.dword2 = 0x%x\n", (*ptr).dword2);
@@ -59,7 +57,6 @@ void
 brw_dump_3d_primitive(const struct brw_3d_primitive *ptr)
 {
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
-   debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad);
    debug_printf("\t\t.header.topology = 0x%x\n", (*ptr).header.topology);
    debug_printf("\t\t.header.indexed = 0x%x\n", (*ptr).header.indexed);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
@@ -76,13 +73,9 @@ brw_dump_aa_line_parameters(const struct brw_aa_line_parameters *ptr)
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
    debug_printf("\t\t.bits0.aa_coverage_scope = 0x%x\n", (*ptr).bits0.aa_coverage_scope);
-   debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0);
    debug_printf("\t\t.bits0.aa_coverage_bias = 0x%x\n", (*ptr).bits0.aa_coverage_bias);
-   debug_printf("\t\t.bits0.pad1 = 0x%x\n", (*ptr).bits0.pad1);
    debug_printf("\t\t.bits1.aa_coverage_endcap_slope = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_slope);
-   debug_printf("\t\t.bits1.pad0 = 0x%x\n", (*ptr).bits1.pad0);
    debug_printf("\t\t.bits1.aa_coverage_endcap_bias = 0x%x\n", (*ptr).bits1.aa_coverage_endcap_bias);
-   debug_printf("\t\t.bits1.pad1 = 0x%x\n", (*ptr).bits1.pad1);
 }
 
 void
@@ -111,13 +104,11 @@ brw_dump_blend_constant_color(const struct brw_blend_constant_color *ptr)
 void
 brw_dump_cc0(const struct brw_cc0 *ptr)
 {
-   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
    debug_printf("\t\t.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_pass_op);
    debug_printf("\t\t.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).bf_stencil_pass_depth_fail_op);
    debug_printf("\t\t.bf_stencil_fail_op = 0x%x\n", (*ptr).bf_stencil_fail_op);
    debug_printf("\t\t.bf_stencil_func = 0x%x\n", (*ptr).bf_stencil_func);
    debug_printf("\t\t.bf_stencil_enable = 0x%x\n", (*ptr).bf_stencil_enable);
-   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
    debug_printf("\t\t.stencil_write_enable = 0x%x\n", (*ptr).stencil_write_enable);
    debug_printf("\t\t.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).stencil_pass_depth_pass_op);
    debug_printf("\t\t.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).stencil_pass_depth_fail_op);
@@ -139,7 +130,6 @@ void
 brw_dump_cc2(const struct brw_cc2 *ptr)
 {
    debug_printf("\t\t.logicop_enable = 0x%x\n", (*ptr).logicop_enable);
-   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
    debug_printf("\t\t.depth_write_enable = 0x%x\n", (*ptr).depth_write_enable);
    debug_printf("\t\t.depth_test_function = 0x%x\n", (*ptr).depth_test_function);
    debug_printf("\t\t.depth_test = 0x%x\n", (*ptr).depth_test);
@@ -150,33 +140,27 @@ brw_dump_cc2(const struct brw_cc2 *ptr)
 void
 brw_dump_cc3(const struct brw_cc3 *ptr)
 {
-   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
    debug_printf("\t\t.alpha_test_func = 0x%x\n", (*ptr).alpha_test_func);
    debug_printf("\t\t.alpha_test = 0x%x\n", (*ptr).alpha_test);
    debug_printf("\t\t.blend_enable = 0x%x\n", (*ptr).blend_enable);
    debug_printf("\t\t.ia_blend_enable = 0x%x\n", (*ptr).ia_blend_enable);
-   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
    debug_printf("\t\t.alpha_test_format = 0x%x\n", (*ptr).alpha_test_format);
-   debug_printf("\t\t.pad2 = 0x%x\n", (*ptr).pad2);
 }
 
 void
 brw_dump_cc4(const struct brw_cc4 *ptr)
 {
-   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
    debug_printf("\t\t.cc_viewport_state_offset = 0x%x\n", (*ptr).cc_viewport_state_offset);
 }
 
 void
 brw_dump_cc5(const struct brw_cc5 *ptr)
 {
-   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
    debug_printf("\t\t.ia_dest_blend_factor = 0x%x\n", (*ptr).ia_dest_blend_factor);
    debug_printf("\t\t.ia_src_blend_factor = 0x%x\n", (*ptr).ia_src_blend_factor);
    debug_printf("\t\t.ia_blend_function = 0x%x\n", (*ptr).ia_blend_function);
    debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable);
    debug_printf("\t\t.logicop_func = 0x%x\n", (*ptr).logicop_func);
-   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
    debug_printf("\t\t.dither_enable = 0x%x\n", (*ptr).dither_enable);
 }
 
@@ -186,7 +170,6 @@ brw_dump_cc6(const struct brw_cc6 *ptr)
    debug_printf("\t\t.clamp_post_alpha_blend = 0x%x\n", (*ptr).clamp_post_alpha_blend);
    debug_printf("\t\t.clamp_pre_alpha_blend = 0x%x\n", (*ptr).clamp_pre_alpha_blend);
    debug_printf("\t\t.clamp_range = 0x%x\n", (*ptr).clamp_range);
-   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
    debug_printf("\t\t.y_dither_offset = 0x%x\n", (*ptr).y_dither_offset);
    debug_printf("\t\t.x_dither_offset = 0x%x\n", (*ptr).x_dither_offset);
    debug_printf("\t\t.dest_blend_factor = 0x%x\n", (*ptr).dest_blend_factor);
@@ -207,13 +190,11 @@ brw_dump_cc7(const struct brw_cc7 *ptr)
 void
 brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr)
 {
-   debug_printf("\t\t.cc0.pad0 = 0x%x\n", (*ptr).cc0.pad0);
    debug_printf("\t\t.cc0.bf_stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_pass_op);
    debug_printf("\t\t.cc0.bf_stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_pass_depth_fail_op);
    debug_printf("\t\t.cc0.bf_stencil_fail_op = 0x%x\n", (*ptr).cc0.bf_stencil_fail_op);
    debug_printf("\t\t.cc0.bf_stencil_func = 0x%x\n", (*ptr).cc0.bf_stencil_func);
    debug_printf("\t\t.cc0.bf_stencil_enable = 0x%x\n", (*ptr).cc0.bf_stencil_enable);
-   debug_printf("\t\t.cc0.pad1 = 0x%x\n", (*ptr).cc0.pad1);
    debug_printf("\t\t.cc0.stencil_write_enable = 0x%x\n", (*ptr).cc0.stencil_write_enable);
    debug_printf("\t\t.cc0.stencil_pass_depth_pass_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_pass_op);
    debug_printf("\t\t.cc0.stencil_pass_depth_fail_op = 0x%x\n", (*ptr).cc0.stencil_pass_depth_fail_op);
@@ -225,34 +206,26 @@ brw_dump_cc_unit_state(const struct brw_cc_unit_state *ptr)
    debug_printf("\t\t.cc1.stencil_test_mask = 0x%x\n", (*ptr).cc1.stencil_test_mask);
    debug_printf("\t\t.cc1.stencil_ref = 0x%x\n", (*ptr).cc1.stencil_ref);
    debug_printf("\t\t.cc2.logicop_enable = 0x%x\n", (*ptr).cc2.logicop_enable);
-   debug_printf("\t\t.cc2.pad0 = 0x%x\n", (*ptr).cc2.pad0);
    debug_printf("\t\t.cc2.depth_write_enable = 0x%x\n", (*ptr).cc2.depth_write_enable);
    debug_printf("\t\t.cc2.depth_test_function = 0x%x\n", (*ptr).cc2.depth_test_function);
    debug_printf("\t\t.cc2.depth_test = 0x%x\n", (*ptr).cc2.depth_test);
    debug_printf("\t\t.cc2.bf_stencil_write_mask = 0x%x\n", (*ptr).cc2.bf_stencil_write_mask);
    debug_printf("\t\t.cc2.bf_stencil_test_mask = 0x%x\n", (*ptr).cc2.bf_stencil_test_mask);
-   debug_printf("\t\t.cc3.pad0 = 0x%x\n", (*ptr).cc3.pad0);
    debug_printf("\t\t.cc3.alpha_test_func = 0x%x\n", (*ptr).cc3.alpha_test_func);
    debug_printf("\t\t.cc3.alpha_test = 0x%x\n", (*ptr).cc3.alpha_test);
    debug_printf("\t\t.cc3.blend_enable = 0x%x\n", (*ptr).cc3.blend_enable);
    debug_printf("\t\t.cc3.ia_blend_enable = 0x%x\n", (*ptr).cc3.ia_blend_enable);
-   debug_printf("\t\t.cc3.pad1 = 0x%x\n", (*ptr).cc3.pad1);
    debug_printf("\t\t.cc3.alpha_test_format = 0x%x\n", (*ptr).cc3.alpha_test_format);
-   debug_printf("\t\t.cc3.pad2 = 0x%x\n", (*ptr).cc3.pad2);
-   debug_printf("\t\t.cc4.pad0 = 0x%x\n", (*ptr).cc4.pad0);
    debug_printf("\t\t.cc4.cc_viewport_state_offset = 0x%x\n", (*ptr).cc4.cc_viewport_state_offset);
-   debug_printf("\t\t.cc5.pad0 = 0x%x\n", (*ptr).cc5.pad0);
    debug_printf("\t\t.cc5.ia_dest_blend_factor = 0x%x\n", (*ptr).cc5.ia_dest_blend_factor);
    debug_printf("\t\t.cc5.ia_src_blend_factor = 0x%x\n", (*ptr).cc5.ia_src_blend_factor);
    debug_printf("\t\t.cc5.ia_blend_function = 0x%x\n", (*ptr).cc5.ia_blend_function);
    debug_printf("\t\t.cc5.statistics_enable = 0x%x\n", (*ptr).cc5.statistics_enable);
    debug_printf("\t\t.cc5.logicop_func = 0x%x\n", (*ptr).cc5.logicop_func);
-   debug_printf("\t\t.cc5.pad1 = 0x%x\n", (*ptr).cc5.pad1);
    debug_printf("\t\t.cc5.dither_enable = 0x%x\n", (*ptr).cc5.dither_enable);
    debug_printf("\t\t.cc6.clamp_post_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_post_alpha_blend);
    debug_printf("\t\t.cc6.clamp_pre_alpha_blend = 0x%x\n", (*ptr).cc6.clamp_pre_alpha_blend);
    debug_printf("\t\t.cc6.clamp_range = 0x%x\n", (*ptr).cc6.clamp_range);
-   debug_printf("\t\t.cc6.pad0 = 0x%x\n", (*ptr).cc6.pad0);
    debug_printf("\t\t.cc6.y_dither_offset = 0x%x\n", (*ptr).cc6.y_dither_offset);
    debug_printf("\t\t.cc6.x_dither_offset = 0x%x\n", (*ptr).cc6.x_dither_offset);
    debug_printf("\t\t.cc6.dest_blend_factor = 0x%x\n", (*ptr).cc6.dest_blend_factor);
@@ -275,44 +248,27 @@ brw_dump_cc_viewport(const struct brw_cc_viewport *ptr)
 void
 brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr)
 {
-   debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0);
    debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
-   debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1);
    debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
-   debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0);
    debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
-   debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1);
    debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
-   debug_printf("\t\t.thread1.pad2 = 0x%x\n", (*ptr).thread1.pad2);
    debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
-   debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3);
    debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
    debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
    debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
-   debug_printf("\t\t.thread1.pad4 = 0x%x\n", (*ptr).thread1.pad4);
    debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
    debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
-   debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0);
    debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
    debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
    debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
-   debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0);
    debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
-   debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1);
    debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
-   debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2);
    debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
-   debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3);
-   debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0);
    debug_printf("\t\t.thread4.gs_output_stats = 0x%x\n", (*ptr).thread4.gs_output_stats);
    debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
    debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
-   debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1);
    debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
-   debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2);
    debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
-   debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3);
-   debug_printf("\t\t.clip5.pad0 = 0x%x\n", (*ptr).clip5.pad0);
    debug_printf("\t\t.clip5.clip_mode = 0x%x\n", (*ptr).clip5.clip_mode);
    debug_printf("\t\t.clip5.userclip_enable_flags = 0x%x\n", (*ptr).clip5.userclip_enable_flags);
    debug_printf("\t\t.clip5.userclip_must_clip = 0x%x\n", (*ptr).clip5.userclip_must_clip);
@@ -322,8 +278,6 @@ brw_dump_clip_unit_state(const struct brw_clip_unit_state *ptr)
    debug_printf("\t\t.clip5.viewport_xy_clip_enable = 0x%x\n", (*ptr).clip5.viewport_xy_clip_enable);
    debug_printf("\t\t.clip5.vertex_position_space = 0x%x\n", (*ptr).clip5.vertex_position_space);
    debug_printf("\t\t.clip5.api_mode = 0x%x\n", (*ptr).clip5.api_mode);
-   debug_printf("\t\t.clip5.pad2 = 0x%x\n", (*ptr).clip5.pad2);
-   debug_printf("\t\t.clip6.pad0 = 0x%x\n", (*ptr).clip6.pad0);
    debug_printf("\t\t.clip6.clipper_viewport_state_ptr = 0x%x\n", (*ptr).clip6.clipper_viewport_state_ptr);
    debug_printf("\t\t.viewport_xmin = %f\n", (*ptr).viewport_xmin);
    debug_printf("\t\t.viewport_xmax = %f\n", (*ptr).viewport_xmax);
@@ -345,7 +299,6 @@ brw_dump_constant_buffer(const struct brw_constant_buffer *ptr)
 {
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.valid = 0x%x\n", (*ptr).header.valid);
-   debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
    debug_printf("\t\t.bits0.buffer_length = 0x%x\n", (*ptr).bits0.buffer_length);
    debug_printf("\t\t.bits0.buffer_address = 0x%x\n", (*ptr).bits0.buffer_address);
@@ -357,9 +310,7 @@ brw_dump_cs_urb_state(const struct brw_cs_urb_state *ptr)
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
    debug_printf("\t\t.bits0.nr_urb_entries = 0x%x\n", (*ptr).bits0.nr_urb_entries);
-   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
    debug_printf("\t\t.bits0.urb_entry_size = 0x%x\n", (*ptr).bits0.urb_entry_size);
-   debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0);
 }
 
 void
@@ -367,28 +318,20 @@ brw_dump_depthbuffer(const struct brw_depthbuffer *ptr)
 {
    debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
    debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
-   debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword);
    debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch);
    debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format);
-   debug_printf("\t\t.dword1.bits.pad = 0x%x\n", (*ptr).dword1.bits.pad);
    debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode);
    debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable);
    debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk);
    debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface);
-   debug_printf("\t\t.dword1.bits.pad2 = 0x%x\n", (*ptr).dword1.bits.pad2);
    debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type);
-   debug_printf("\t\t.dword1.dword = 0x%x\n", (*ptr).dword1.dword);
    debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr);
-   debug_printf("\t\t.dword3.bits.pad = 0x%x\n", (*ptr).dword3.bits.pad);
    debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout);
    debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod);
    debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width);
    debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height);
-   debug_printf("\t\t.dword3.dword = 0x%x\n", (*ptr).dword3.dword);
-   debug_printf("\t\t.dword4.bits.pad = 0x%x\n", (*ptr).dword4.bits.pad);
    debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element);
    debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth);
-   debug_printf("\t\t.dword4.dword = 0x%x\n", (*ptr).dword4.dword);
 }
 
 void
@@ -396,31 +339,22 @@ brw_dump_depthbuffer_g4x(const struct brw_depthbuffer_g4x *ptr)
 {
    debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
    debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
-   debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword);
    debug_printf("\t\t.dword1.bits.pitch = 0x%x\n", (*ptr).dword1.bits.pitch);
    debug_printf("\t\t.dword1.bits.format = 0x%x\n", (*ptr).dword1.bits.format);
-   debug_printf("\t\t.dword1.bits.pad = 0x%x\n", (*ptr).dword1.bits.pad);
    debug_printf("\t\t.dword1.bits.software_tiled_rendering_mode = 0x%x\n", (*ptr).dword1.bits.software_tiled_rendering_mode);
    debug_printf("\t\t.dword1.bits.depth_offset_disable = 0x%x\n", (*ptr).dword1.bits.depth_offset_disable);
    debug_printf("\t\t.dword1.bits.tile_walk = 0x%x\n", (*ptr).dword1.bits.tile_walk);
    debug_printf("\t\t.dword1.bits.tiled_surface = 0x%x\n", (*ptr).dword1.bits.tiled_surface);
-   debug_printf("\t\t.dword1.bits.pad2 = 0x%x\n", (*ptr).dword1.bits.pad2);
    debug_printf("\t\t.dword1.bits.surface_type = 0x%x\n", (*ptr).dword1.bits.surface_type);
-   debug_printf("\t\t.dword1.dword = 0x%x\n", (*ptr).dword1.dword);
    debug_printf("\t\t.dword2_base_addr = 0x%x\n", (*ptr).dword2_base_addr);
-   debug_printf("\t\t.dword3.bits.pad = 0x%x\n", (*ptr).dword3.bits.pad);
    debug_printf("\t\t.dword3.bits.mipmap_layout = 0x%x\n", (*ptr).dword3.bits.mipmap_layout);
    debug_printf("\t\t.dword3.bits.lod = 0x%x\n", (*ptr).dword3.bits.lod);
    debug_printf("\t\t.dword3.bits.width = 0x%x\n", (*ptr).dword3.bits.width);
    debug_printf("\t\t.dword3.bits.height = 0x%x\n", (*ptr).dword3.bits.height);
-   debug_printf("\t\t.dword3.dword = 0x%x\n", (*ptr).dword3.dword);
-   debug_printf("\t\t.dword4.bits.pad = 0x%x\n", (*ptr).dword4.bits.pad);
    debug_printf("\t\t.dword4.bits.min_array_element = 0x%x\n", (*ptr).dword4.bits.min_array_element);
    debug_printf("\t\t.dword4.bits.depth = 0x%x\n", (*ptr).dword4.bits.depth);
-   debug_printf("\t\t.dword4.dword = 0x%x\n", (*ptr).dword4.dword);
    debug_printf("\t\t.dword5.bits.xoffset = 0x%x\n", (*ptr).dword5.bits.xoffset);
    debug_printf("\t\t.dword5.bits.yoffset = 0x%x\n", (*ptr).dword5.bits.yoffset);
-   debug_printf("\t\t.dword5.dword = 0x%x\n", (*ptr).dword5.dword);
 }
 
 void
@@ -447,57 +381,38 @@ brw_dump_global_depth_offset_clamp(const struct brw_global_depth_offset_clamp *p
 void
 brw_dump_gs_unit_state(const struct brw_gs_unit_state *ptr)
 {
-   debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0);
    debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
-   debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1);
    debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
    debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
    debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
    debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
    debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
    debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
-   debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0);
    debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
-   debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1);
    debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
    debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
    debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
-   debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3);
    debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
    debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
-   debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0);
    debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
    debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
    debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
-   debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0);
    debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
-   debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1);
    debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
-   debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2);
    debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
-   debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3);
-   debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0);
    debug_printf("\t\t.thread4.rendering_enable = 0x%x\n", (*ptr).thread4.rendering_enable);
-   debug_printf("\t\t.thread4.pad4 = 0x%x\n", (*ptr).thread4.pad4);
    debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
    debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
-   debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1);
    debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
-   debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2);
    debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
-   debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3);
    debug_printf("\t\t.gs5.sampler_count = 0x%x\n", (*ptr).gs5.sampler_count);
-   debug_printf("\t\t.gs5.pad0 = 0x%x\n", (*ptr).gs5.pad0);
    debug_printf("\t\t.gs5.sampler_state_pointer = 0x%x\n", (*ptr).gs5.sampler_state_pointer);
    debug_printf("\t\t.gs6.max_vp_index = 0x%x\n", (*ptr).gs6.max_vp_index);
-   debug_printf("\t\t.gs6.pad0 = 0x%x\n", (*ptr).gs6.pad0);
    debug_printf("\t\t.gs6.svbi_post_inc_value = 0x%x\n", (*ptr).gs6.svbi_post_inc_value);
-   debug_printf("\t\t.gs6.pad1 = 0x%x\n", (*ptr).gs6.pad1);
    debug_printf("\t\t.gs6.svbi_post_inc_enable = 0x%x\n", (*ptr).gs6.svbi_post_inc_enable);
    debug_printf("\t\t.gs6.svbi_payload = 0x%x\n", (*ptr).gs6.svbi_payload);
    debug_printf("\t\t.gs6.discard_adjaceny = 0x%x\n", (*ptr).gs6.discard_adjaceny);
    debug_printf("\t\t.gs6.reorder_enable = 0x%x\n", (*ptr).gs6.reorder_enable);
-   debug_printf("\t\t.gs6.pad2 = 0x%x\n", (*ptr).gs6.pad2);
 }
 
 void
@@ -506,9 +421,7 @@ brw_dump_indexbuffer(const struct brw_indexbuffer *ptr)
    debug_printf("\t\t.header.bits.length = 0x%x\n", (*ptr).header.bits.length);
    debug_printf("\t\t.header.bits.index_format = 0x%x\n", (*ptr).header.bits.index_format);
    debug_printf("\t\t.header.bits.cut_index_enable = 0x%x\n", (*ptr).header.bits.cut_index_enable);
-   debug_printf("\t\t.header.bits.pad = 0x%x\n", (*ptr).header.bits.pad);
    debug_printf("\t\t.header.bits.opcode = 0x%x\n", (*ptr).header.bits.opcode);
-   debug_printf("\t\t.header.dword = 0x%x\n", (*ptr).header.dword);
    debug_printf("\t\t.buffer_start = 0x%x\n", (*ptr).buffer_start);
    debug_printf("\t\t.buffer_end = 0x%x\n", (*ptr).buffer_end);
 }
@@ -519,9 +432,7 @@ brw_dump_line_stipple(const struct brw_line_stipple *ptr)
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
    debug_printf("\t\t.bits0.pattern = 0x%x\n", (*ptr).bits0.pattern);
-   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
    debug_printf("\t\t.bits1.repeat_count = 0x%x\n", (*ptr).bits1.repeat_count);
-   debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad);
    debug_printf("\t\t.bits1.inverse_repeat_count = 0x%x\n", (*ptr).bits1.inverse_repeat_count);
 }
 
@@ -529,7 +440,6 @@ void
 brw_dump_mi_flush(const struct brw_mi_flush *ptr)
 {
    debug_printf("\t\t.flags = 0x%x\n", (*ptr).flags);
-   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
    debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
 }
 
@@ -545,7 +455,6 @@ brw_dump_pipe_control(const struct brw_pipe_control *ptr)
    debug_printf("\t\t.header.depth_stall_enable = 0x%x\n", (*ptr).header.depth_stall_enable);
    debug_printf("\t\t.header.post_sync_operation = 0x%x\n", (*ptr).header.post_sync_operation);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
-   debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad);
    debug_printf("\t\t.bits1.dest_addr_type = 0x%x\n", (*ptr).bits1.dest_addr_type);
    debug_printf("\t\t.bits1.dest_addr = 0x%x\n", (*ptr).bits1.dest_addr);
    debug_printf("\t\t.data0 = 0x%x\n", (*ptr).data0);
@@ -556,7 +465,6 @@ void
 brw_dump_pipeline_select(const struct brw_pipeline_select *ptr)
 {
    debug_printf("\t\t.header.pipeline_select = 0x%x\n", (*ptr).header.pipeline_select);
-   debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
 }
 
@@ -565,19 +473,13 @@ brw_dump_pipelined_state_pointers(const struct brw_pipelined_state_pointers *ptr
 {
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
-   debug_printf("\t\t.vs.pad = 0x%x\n", (*ptr).vs.pad);
    debug_printf("\t\t.vs.offset = 0x%x\n", (*ptr).vs.offset);
    debug_printf("\t\t.gs.enable = 0x%x\n", (*ptr).gs.enable);
-   debug_printf("\t\t.gs.pad = 0x%x\n", (*ptr).gs.pad);
    debug_printf("\t\t.gs.offset = 0x%x\n", (*ptr).gs.offset);
    debug_printf("\t\t.clp.enable = 0x%x\n", (*ptr).clp.enable);
-   debug_printf("\t\t.clp.pad = 0x%x\n", (*ptr).clp.pad);
    debug_printf("\t\t.clp.offset = 0x%x\n", (*ptr).clp.offset);
-   debug_printf("\t\t.sf.pad = 0x%x\n", (*ptr).sf.pad);
    debug_printf("\t\t.sf.offset = 0x%x\n", (*ptr).sf.offset);
-   debug_printf("\t\t.wm.pad = 0x%x\n", (*ptr).wm.pad);
    debug_printf("\t\t.wm.offset = 0x%x\n", (*ptr).wm.offset);
-   debug_printf("\t\t.cc.pad = 0x%x\n", (*ptr).cc.pad);
    debug_printf("\t\t.cc.offset = 0x%x\n", (*ptr).cc.offset);
 }
 
@@ -626,9 +528,7 @@ brw_dump_polygon_stipple_offset(const struct brw_polygon_stipple_offset *ptr)
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
    debug_printf("\t\t.bits0.y_offset = 0x%x\n", (*ptr).bits0.y_offset);
-   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
    debug_printf("\t\t.bits0.x_offset = 0x%x\n", (*ptr).bits0.x_offset);
-   debug_printf("\t\t.bits0.pad0 = 0x%x\n", (*ptr).bits0.pad0);
 }
 
 void
@@ -649,20 +549,15 @@ brw_dump_sampler_state(const struct brw_sampler_state *ptr)
    debug_printf("\t\t.ss0.mag_filter = 0x%x\n", (*ptr).ss0.mag_filter);
    debug_printf("\t\t.ss0.mip_filter = 0x%x\n", (*ptr).ss0.mip_filter);
    debug_printf("\t\t.ss0.base_level = 0x%x\n", (*ptr).ss0.base_level);
-   debug_printf("\t\t.ss0.pad = 0x%x\n", (*ptr).ss0.pad);
    debug_printf("\t\t.ss0.lod_preclamp = 0x%x\n", (*ptr).ss0.lod_preclamp);
    debug_printf("\t\t.ss0.default_color_mode = 0x%x\n", (*ptr).ss0.default_color_mode);
-   debug_printf("\t\t.ss0.pad0 = 0x%x\n", (*ptr).ss0.pad0);
    debug_printf("\t\t.ss0.disable = 0x%x\n", (*ptr).ss0.disable);
    debug_printf("\t\t.ss1.r_wrap_mode = 0x%x\n", (*ptr).ss1.r_wrap_mode);
    debug_printf("\t\t.ss1.t_wrap_mode = 0x%x\n", (*ptr).ss1.t_wrap_mode);
    debug_printf("\t\t.ss1.s_wrap_mode = 0x%x\n", (*ptr).ss1.s_wrap_mode);
-   debug_printf("\t\t.ss1.pad = 0x%x\n", (*ptr).ss1.pad);
    debug_printf("\t\t.ss1.max_lod = 0x%x\n", (*ptr).ss1.max_lod);
    debug_printf("\t\t.ss1.min_lod = 0x%x\n", (*ptr).ss1.min_lod);
-   debug_printf("\t\t.ss2.pad = 0x%x\n", (*ptr).ss2.pad);
    debug_printf("\t\t.ss2.default_color_pointer = 0x%x\n", (*ptr).ss2.default_color_pointer);
-   debug_printf("\t\t.ss3.pad = 0x%x\n", (*ptr).ss3.pad);
    debug_printf("\t\t.ss3.max_aniso = 0x%x\n", (*ptr).ss3.max_aniso);
    debug_printf("\t\t.ss3.chroma_key_mode = 0x%x\n", (*ptr).ss3.chroma_key_mode);
    debug_printf("\t\t.ss3.chroma_key_index = 0x%x\n", (*ptr).ss3.chroma_key_index);
@@ -674,48 +569,32 @@ brw_dump_sampler_state(const struct brw_sampler_state *ptr)
 void
 brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr)
 {
-   debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0);
    debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
-   debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1);
    debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
    debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
    debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
    debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
    debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
    debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
-   debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0);
    debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
-   debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1);
    debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
    debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
    debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
-   debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3);
    debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
    debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
-   debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0);
    debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
    debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
    debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
-   debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0);
    debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
-   debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1);
    debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
-   debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2);
    debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
-   debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3);
-   debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0);
    debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
    debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
-   debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1);
    debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
-   debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2);
    debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
-   debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3);
    debug_printf("\t\t.sf5.front_winding = 0x%x\n", (*ptr).sf5.front_winding);
    debug_printf("\t\t.sf5.viewport_transform = 0x%x\n", (*ptr).sf5.viewport_transform);
-   debug_printf("\t\t.sf5.pad0 = 0x%x\n", (*ptr).sf5.pad0);
    debug_printf("\t\t.sf5.sf_viewport_state_offset = 0x%x\n", (*ptr).sf5.sf_viewport_state_offset);
-   debug_printf("\t\t.sf6.pad0 = 0x%x\n", (*ptr).sf6.pad0);
    debug_printf("\t\t.sf6.dest_org_vbias = 0x%x\n", (*ptr).sf6.dest_org_vbias);
    debug_printf("\t\t.sf6.dest_org_hbias = 0x%x\n", (*ptr).sf6.dest_org_hbias);
    debug_printf("\t\t.sf6.scissor = 0x%x\n", (*ptr).sf6.scissor);
@@ -731,7 +610,6 @@ brw_dump_sf_unit_state(const struct brw_sf_unit_state *ptr)
    debug_printf("\t\t.sf7.use_point_size_state = 0x%x\n", (*ptr).sf7.use_point_size_state);
    debug_printf("\t\t.sf7.subpixel_precision = 0x%x\n", (*ptr).sf7.subpixel_precision);
    debug_printf("\t\t.sf7.sprite_point = 0x%x\n", (*ptr).sf7.sprite_point);
-   debug_printf("\t\t.sf7.pad0 = 0x%x\n", (*ptr).sf7.pad0);
    debug_printf("\t\t.sf7.aa_line_distance_mode = 0x%x\n", (*ptr).sf7.aa_line_distance_mode);
    debug_printf("\t\t.sf7.trifan_pv = 0x%x\n", (*ptr).sf7.trifan_pv);
    debug_printf("\t\t.sf7.linestrip_pv = 0x%x\n", (*ptr).sf7.linestrip_pv);
@@ -763,10 +641,8 @@ brw_dump_ss0(const struct brw_ss0 *ptr)
    debug_printf("\t\t.mag_filter = 0x%x\n", (*ptr).mag_filter);
    debug_printf("\t\t.mip_filter = 0x%x\n", (*ptr).mip_filter);
    debug_printf("\t\t.base_level = 0x%x\n", (*ptr).base_level);
-   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
    debug_printf("\t\t.lod_preclamp = 0x%x\n", (*ptr).lod_preclamp);
    debug_printf("\t\t.default_color_mode = 0x%x\n", (*ptr).default_color_mode);
-   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
    debug_printf("\t\t.disable = 0x%x\n", (*ptr).disable);
 }
 
@@ -776,7 +652,6 @@ brw_dump_ss1(const struct brw_ss1 *ptr)
    debug_printf("\t\t.r_wrap_mode = 0x%x\n", (*ptr).r_wrap_mode);
    debug_printf("\t\t.t_wrap_mode = 0x%x\n", (*ptr).t_wrap_mode);
    debug_printf("\t\t.s_wrap_mode = 0x%x\n", (*ptr).s_wrap_mode);
-   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
    debug_printf("\t\t.max_lod = 0x%x\n", (*ptr).max_lod);
    debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod);
 }
@@ -784,14 +659,12 @@ brw_dump_ss1(const struct brw_ss1 *ptr)
 void
 brw_dump_ss2(const struct brw_ss2 *ptr)
 {
-   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
    debug_printf("\t\t.default_color_pointer = 0x%x\n", (*ptr).default_color_pointer);
 }
 
 void
 brw_dump_ss3(const struct brw_ss3 *ptr)
 {
-   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
    debug_printf("\t\t.max_aniso = 0x%x\n", (*ptr).max_aniso);
    debug_printf("\t\t.chroma_key_mode = 0x%x\n", (*ptr).chroma_key_mode);
    debug_printf("\t\t.chroma_key_index = 0x%x\n", (*ptr).chroma_key_index);
@@ -806,19 +679,14 @@ brw_dump_state_base_address(const struct brw_state_base_address *ptr)
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
    debug_printf("\t\t.bits0.modify_enable = 0x%x\n", (*ptr).bits0.modify_enable);
-   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
    debug_printf("\t\t.bits0.general_state_address = 0x%x\n", (*ptr).bits0.general_state_address);
    debug_printf("\t\t.bits1.modify_enable = 0x%x\n", (*ptr).bits1.modify_enable);
-   debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad);
    debug_printf("\t\t.bits1.surface_state_address = 0x%x\n", (*ptr).bits1.surface_state_address);
    debug_printf("\t\t.bits2.modify_enable = 0x%x\n", (*ptr).bits2.modify_enable);
-   debug_printf("\t\t.bits2.pad = 0x%x\n", (*ptr).bits2.pad);
    debug_printf("\t\t.bits2.indirect_object_state_address = 0x%x\n", (*ptr).bits2.indirect_object_state_address);
    debug_printf("\t\t.bits3.modify_enable = 0x%x\n", (*ptr).bits3.modify_enable);
-   debug_printf("\t\t.bits3.pad = 0x%x\n", (*ptr).bits3.pad);
    debug_printf("\t\t.bits3.general_state_upper_bound = 0x%x\n", (*ptr).bits3.general_state_upper_bound);
    debug_printf("\t\t.bits4.modify_enable = 0x%x\n", (*ptr).bits4.modify_enable);
-   debug_printf("\t\t.bits4.pad = 0x%x\n", (*ptr).bits4.pad);
    debug_printf("\t\t.bits4.indirect_object_state_upper_bound = 0x%x\n", (*ptr).bits4.indirect_object_state_upper_bound);
 }
 
@@ -828,7 +696,6 @@ brw_dump_state_prefetch(const struct brw_state_prefetch *ptr)
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
    debug_printf("\t\t.bits0.prefetch_count = 0x%x\n", (*ptr).bits0.prefetch_count);
-   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
    debug_printf("\t\t.bits0.prefetch_pointer = 0x%x\n", (*ptr).bits0.prefetch_pointer);
 }
 
@@ -841,7 +708,6 @@ brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr)
    debug_printf("\t\t.cube_neg_y = 0x%x\n", (*ptr).cube_neg_y);
    debug_printf("\t\t.cube_pos_x = 0x%x\n", (*ptr).cube_pos_x);
    debug_printf("\t\t.cube_neg_x = 0x%x\n", (*ptr).cube_neg_x);
-   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
    debug_printf("\t\t.mipmap_layout_mode = 0x%x\n", (*ptr).mipmap_layout_mode);
    debug_printf("\t\t.vert_line_stride_ofs = 0x%x\n", (*ptr).vert_line_stride_ofs);
    debug_printf("\t\t.vert_line_stride = 0x%x\n", (*ptr).vert_line_stride);
@@ -852,7 +718,6 @@ brw_dump_surf_ss0(const struct brw_surf_ss0 *ptr)
    debug_printf("\t\t.writedisable_alpha = 0x%x\n", (*ptr).writedisable_alpha);
    debug_printf("\t\t.surface_format = 0x%x\n", (*ptr).surface_format);
    debug_printf("\t\t.data_return_format = 0x%x\n", (*ptr).data_return_format);
-   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
    debug_printf("\t\t.surface_type = 0x%x\n", (*ptr).surface_type);
 }
 
@@ -865,7 +730,6 @@ brw_dump_surf_ss1(const struct brw_surf_ss1 *ptr)
 void
 brw_dump_surf_ss2(const struct brw_surf_ss2 *ptr)
 {
-   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
    debug_printf("\t\t.mip_count = 0x%x\n", (*ptr).mip_count);
    debug_printf("\t\t.width = 0x%x\n", (*ptr).width);
    debug_printf("\t\t.height = 0x%x\n", (*ptr).height);
@@ -876,7 +740,6 @@ brw_dump_surf_ss3(const struct brw_surf_ss3 *ptr)
 {
    debug_printf("\t\t.tile_walk = 0x%x\n", (*ptr).tile_walk);
    debug_printf("\t\t.tiled_surface = 0x%x\n", (*ptr).tiled_surface);
-   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
    debug_printf("\t\t.pitch = 0x%x\n", (*ptr).pitch);
    debug_printf("\t\t.depth = 0x%x\n", (*ptr).depth);
 }
@@ -885,9 +748,7 @@ void
 brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr)
 {
    debug_printf("\t\t.multisample_position_palette_index = 0x%x\n", (*ptr).multisample_position_palette_index);
-   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
    debug_printf("\t\t.num_multisamples = 0x%x\n", (*ptr).num_multisamples);
-   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
    debug_printf("\t\t.render_target_view_extent = 0x%x\n", (*ptr).render_target_view_extent);
    debug_printf("\t\t.min_array_elt = 0x%x\n", (*ptr).min_array_elt);
    debug_printf("\t\t.min_lod = 0x%x\n", (*ptr).min_lod);
@@ -896,13 +757,11 @@ brw_dump_surf_ss4(const struct brw_surf_ss4 *ptr)
 void
 brw_dump_surf_ss5(const struct brw_surf_ss5 *ptr)
 {
-   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
    debug_printf("\t\t.llc_mapping = 0x%x\n", (*ptr).llc_mapping);
    debug_printf("\t\t.mlc_mapping = 0x%x\n", (*ptr).mlc_mapping);
    debug_printf("\t\t.gfdt = 0x%x\n", (*ptr).gfdt);
    debug_printf("\t\t.gfdt_src = 0x%x\n", (*ptr).gfdt_src);
    debug_printf("\t\t.y_offset = 0x%x\n", (*ptr).y_offset);
-   debug_printf("\t\t.pad0 = 0x%x\n", (*ptr).pad0);
    debug_printf("\t\t.x_offset = 0x%x\n", (*ptr).x_offset);
 }
 
@@ -915,7 +774,6 @@ brw_dump_surface_state(const struct brw_surface_state *ptr)
    debug_printf("\t\t.ss0.cube_neg_y = 0x%x\n", (*ptr).ss0.cube_neg_y);
    debug_printf("\t\t.ss0.cube_pos_x = 0x%x\n", (*ptr).ss0.cube_pos_x);
    debug_printf("\t\t.ss0.cube_neg_x = 0x%x\n", (*ptr).ss0.cube_neg_x);
-   debug_printf("\t\t.ss0.pad = 0x%x\n", (*ptr).ss0.pad);
    debug_printf("\t\t.ss0.mipmap_layout_mode = 0x%x\n", (*ptr).ss0.mipmap_layout_mode);
    debug_printf("\t\t.ss0.vert_line_stride_ofs = 0x%x\n", (*ptr).ss0.vert_line_stride_ofs);
    debug_printf("\t\t.ss0.vert_line_stride = 0x%x\n", (*ptr).ss0.vert_line_stride);
@@ -926,32 +784,25 @@ brw_dump_surface_state(const struct brw_surface_state *ptr)
    debug_printf("\t\t.ss0.writedisable_alpha = 0x%x\n", (*ptr).ss0.writedisable_alpha);
    debug_printf("\t\t.ss0.surface_format = 0x%x\n", (*ptr).ss0.surface_format);
    debug_printf("\t\t.ss0.data_return_format = 0x%x\n", (*ptr).ss0.data_return_format);
-   debug_printf("\t\t.ss0.pad0 = 0x%x\n", (*ptr).ss0.pad0);
    debug_printf("\t\t.ss0.surface_type = 0x%x\n", (*ptr).ss0.surface_type);
    debug_printf("\t\t.ss1.base_addr = 0x%x\n", (*ptr).ss1.base_addr);
-   debug_printf("\t\t.ss2.pad = 0x%x\n", (*ptr).ss2.pad);
    debug_printf("\t\t.ss2.mip_count = 0x%x\n", (*ptr).ss2.mip_count);
    debug_printf("\t\t.ss2.width = 0x%x\n", (*ptr).ss2.width);
    debug_printf("\t\t.ss2.height = 0x%x\n", (*ptr).ss2.height);
    debug_printf("\t\t.ss3.tile_walk = 0x%x\n", (*ptr).ss3.tile_walk);
    debug_printf("\t\t.ss3.tiled_surface = 0x%x\n", (*ptr).ss3.tiled_surface);
-   debug_printf("\t\t.ss3.pad = 0x%x\n", (*ptr).ss3.pad);
    debug_printf("\t\t.ss3.pitch = 0x%x\n", (*ptr).ss3.pitch);
    debug_printf("\t\t.ss3.depth = 0x%x\n", (*ptr).ss3.depth);
    debug_printf("\t\t.ss4.multisample_position_palette_index = 0x%x\n", (*ptr).ss4.multisample_position_palette_index);
-   debug_printf("\t\t.ss4.pad1 = 0x%x\n", (*ptr).ss4.pad1);
    debug_printf("\t\t.ss4.num_multisamples = 0x%x\n", (*ptr).ss4.num_multisamples);
-   debug_printf("\t\t.ss4.pad0 = 0x%x\n", (*ptr).ss4.pad0);
    debug_printf("\t\t.ss4.render_target_view_extent = 0x%x\n", (*ptr).ss4.render_target_view_extent);
    debug_printf("\t\t.ss4.min_array_elt = 0x%x\n", (*ptr).ss4.min_array_elt);
    debug_printf("\t\t.ss4.min_lod = 0x%x\n", (*ptr).ss4.min_lod);
-   debug_printf("\t\t.ss5.pad1 = 0x%x\n", (*ptr).ss5.pad1);
    debug_printf("\t\t.ss5.llc_mapping = 0x%x\n", (*ptr).ss5.llc_mapping);
    debug_printf("\t\t.ss5.mlc_mapping = 0x%x\n", (*ptr).ss5.mlc_mapping);
    debug_printf("\t\t.ss5.gfdt = 0x%x\n", (*ptr).ss5.gfdt);
    debug_printf("\t\t.ss5.gfdt_src = 0x%x\n", (*ptr).ss5.gfdt_src);
    debug_printf("\t\t.ss5.y_offset = 0x%x\n", (*ptr).ss5.y_offset);
-   debug_printf("\t\t.ss5.pad0 = 0x%x\n", (*ptr).ss5.pad0);
    debug_printf("\t\t.ss5.x_offset = 0x%x\n", (*ptr).ss5.x_offset);
 }
 
@@ -960,7 +811,6 @@ brw_dump_system_instruction_pointer(const struct brw_system_instruction_pointer
 {
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
-   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
    debug_printf("\t\t.bits0.system_instruction_pointer = 0x%x\n", (*ptr).bits0.system_instruction_pointer);
 }
 
@@ -974,16 +824,13 @@ brw_dump_urb_fence(const struct brw_urb_fence *ptr)
    debug_printf("\t\t.header.sf_realloc = 0x%x\n", (*ptr).header.sf_realloc);
    debug_printf("\t\t.header.vfe_realloc = 0x%x\n", (*ptr).header.vfe_realloc);
    debug_printf("\t\t.header.cs_realloc = 0x%x\n", (*ptr).header.cs_realloc);
-   debug_printf("\t\t.header.pad = 0x%x\n", (*ptr).header.pad);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
    debug_printf("\t\t.bits0.vs_fence = 0x%x\n", (*ptr).bits0.vs_fence);
    debug_printf("\t\t.bits0.gs_fence = 0x%x\n", (*ptr).bits0.gs_fence);
    debug_printf("\t\t.bits0.clp_fence = 0x%x\n", (*ptr).bits0.clp_fence);
-   debug_printf("\t\t.bits0.pad = 0x%x\n", (*ptr).bits0.pad);
    debug_printf("\t\t.bits1.sf_fence = 0x%x\n", (*ptr).bits1.sf_fence);
    debug_printf("\t\t.bits1.vf_fence = 0x%x\n", (*ptr).bits1.vf_fence);
    debug_printf("\t\t.bits1.cs_fence = 0x%x\n", (*ptr).bits1.cs_fence);
-   debug_printf("\t\t.bits1.pad = 0x%x\n", (*ptr).bits1.pad);
 }
 
 void
@@ -992,14 +839,12 @@ brw_dump_urb_immediate(const struct brw_urb_immediate *ptr)
    debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
    debug_printf("\t\t.offset = 0x%x\n", (*ptr).offset);
    debug_printf("\t\t.swizzle_control = 0x%x\n", (*ptr).swizzle_control);
-   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
    debug_printf("\t\t.allocate = 0x%x\n", (*ptr).allocate);
    debug_printf("\t\t.used = 0x%x\n", (*ptr).used);
    debug_printf("\t\t.complete = 0x%x\n", (*ptr).complete);
    debug_printf("\t\t.response_length = 0x%x\n", (*ptr).response_length);
    debug_printf("\t\t.msg_length = 0x%x\n", (*ptr).msg_length);
    debug_printf("\t\t.msg_target = 0x%x\n", (*ptr).msg_target);
-   debug_printf("\t\t.pad1 = 0x%x\n", (*ptr).pad1);
    debug_printf("\t\t.end_of_thread = 0x%x\n", (*ptr).end_of_thread);
 }
 
@@ -1009,119 +854,102 @@ brw_dump_vb_array_state(const struct brw_vb_array_state *ptr)
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
    debug_printf("\t\t.vb[0].vb0.pitch = 0x%x\n", (*ptr).vb[0].vb0.pitch);
-   debug_printf("\t\t.vb[0].vb0.pad = 0x%x\n", (*ptr).vb[0].vb0.pad);
    debug_printf("\t\t.vb[0].vb0.access_type = 0x%x\n", (*ptr).vb[0].vb0.access_type);
    debug_printf("\t\t.vb[0].vb0.vb_index = 0x%x\n", (*ptr).vb[0].vb0.vb_index);
    debug_printf("\t\t.vb[0].start_addr = 0x%x\n", (*ptr).vb[0].start_addr);
    debug_printf("\t\t.vb[0].max_index = 0x%x\n", (*ptr).vb[0].max_index);
    debug_printf("\t\t.vb[0].instance_data_step_rate = 0x%x\n", (*ptr).vb[0].instance_data_step_rate);
    debug_printf("\t\t.vb[1].vb0.pitch = 0x%x\n", (*ptr).vb[1].vb0.pitch);
-   debug_printf("\t\t.vb[1].vb0.pad = 0x%x\n", (*ptr).vb[1].vb0.pad);
    debug_printf("\t\t.vb[1].vb0.access_type = 0x%x\n", (*ptr).vb[1].vb0.access_type);
    debug_printf("\t\t.vb[1].vb0.vb_index = 0x%x\n", (*ptr).vb[1].vb0.vb_index);
    debug_printf("\t\t.vb[1].start_addr = 0x%x\n", (*ptr).vb[1].start_addr);
    debug_printf("\t\t.vb[1].max_index = 0x%x\n", (*ptr).vb[1].max_index);
    debug_printf("\t\t.vb[1].instance_data_step_rate = 0x%x\n", (*ptr).vb[1].instance_data_step_rate);
    debug_printf("\t\t.vb[2].vb0.pitch = 0x%x\n", (*ptr).vb[2].vb0.pitch);
-   debug_printf("\t\t.vb[2].vb0.pad = 0x%x\n", (*ptr).vb[2].vb0.pad);
    debug_printf("\t\t.vb[2].vb0.access_type = 0x%x\n", (*ptr).vb[2].vb0.access_type);
    debug_printf("\t\t.vb[2].vb0.vb_index = 0x%x\n", (*ptr).vb[2].vb0.vb_index);
    debug_printf("\t\t.vb[2].start_addr = 0x%x\n", (*ptr).vb[2].start_addr);
    debug_printf("\t\t.vb[2].max_index = 0x%x\n", (*ptr).vb[2].max_index);
    debug_printf("\t\t.vb[2].instance_data_step_rate = 0x%x\n", (*ptr).vb[2].instance_data_step_rate);
    debug_printf("\t\t.vb[3].vb0.pitch = 0x%x\n", (*ptr).vb[3].vb0.pitch);
-   debug_printf("\t\t.vb[3].vb0.pad = 0x%x\n", (*ptr).vb[3].vb0.pad);
    debug_printf("\t\t.vb[3].vb0.access_type = 0x%x\n", (*ptr).vb[3].vb0.access_type);
    debug_printf("\t\t.vb[3].vb0.vb_index = 0x%x\n", (*ptr).vb[3].vb0.vb_index);
    debug_printf("\t\t.vb[3].start_addr = 0x%x\n", (*ptr).vb[3].start_addr);
    debug_printf("\t\t.vb[3].max_index = 0x%x\n", (*ptr).vb[3].max_index);
    debug_printf("\t\t.vb[3].instance_data_step_rate = 0x%x\n", (*ptr).vb[3].instance_data_step_rate);
    debug_printf("\t\t.vb[4].vb0.pitch = 0x%x\n", (*ptr).vb[4].vb0.pitch);
-   debug_printf("\t\t.vb[4].vb0.pad = 0x%x\n", (*ptr).vb[4].vb0.pad);
    debug_printf("\t\t.vb[4].vb0.access_type = 0x%x\n", (*ptr).vb[4].vb0.access_type);
    debug_printf("\t\t.vb[4].vb0.vb_index = 0x%x\n", (*ptr).vb[4].vb0.vb_index);
    debug_printf("\t\t.vb[4].start_addr = 0x%x\n", (*ptr).vb[4].start_addr);
    debug_printf("\t\t.vb[4].max_index = 0x%x\n", (*ptr).vb[4].max_index);
    debug_printf("\t\t.vb[4].instance_data_step_rate = 0x%x\n", (*ptr).vb[4].instance_data_step_rate);
    debug_printf("\t\t.vb[5].vb0.pitch = 0x%x\n", (*ptr).vb[5].vb0.pitch);
-   debug_printf("\t\t.vb[5].vb0.pad = 0x%x\n", (*ptr).vb[5].vb0.pad);
    debug_printf("\t\t.vb[5].vb0.access_type = 0x%x\n", (*ptr).vb[5].vb0.access_type);
    debug_printf("\t\t.vb[5].vb0.vb_index = 0x%x\n", (*ptr).vb[5].vb0.vb_index);
    debug_printf("\t\t.vb[5].start_addr = 0x%x\n", (*ptr).vb[5].start_addr);
    debug_printf("\t\t.vb[5].max_index = 0x%x\n", (*ptr).vb[5].max_index);
    debug_printf("\t\t.vb[5].instance_data_step_rate = 0x%x\n", (*ptr).vb[5].instance_data_step_rate);
    debug_printf("\t\t.vb[6].vb0.pitch = 0x%x\n", (*ptr).vb[6].vb0.pitch);
-   debug_printf("\t\t.vb[6].vb0.pad = 0x%x\n", (*ptr).vb[6].vb0.pad);
    debug_printf("\t\t.vb[6].vb0.access_type = 0x%x\n", (*ptr).vb[6].vb0.access_type);
    debug_printf("\t\t.vb[6].vb0.vb_index = 0x%x\n", (*ptr).vb[6].vb0.vb_index);
    debug_printf("\t\t.vb[6].start_addr = 0x%x\n", (*ptr).vb[6].start_addr);
    debug_printf("\t\t.vb[6].max_index = 0x%x\n", (*ptr).vb[6].max_index);
    debug_printf("\t\t.vb[6].instance_data_step_rate = 0x%x\n", (*ptr).vb[6].instance_data_step_rate);
    debug_printf("\t\t.vb[7].vb0.pitch = 0x%x\n", (*ptr).vb[7].vb0.pitch);
-   debug_printf("\t\t.vb[7].vb0.pad = 0x%x\n", (*ptr).vb[7].vb0.pad);
    debug_printf("\t\t.vb[7].vb0.access_type = 0x%x\n", (*ptr).vb[7].vb0.access_type);
    debug_printf("\t\t.vb[7].vb0.vb_index = 0x%x\n", (*ptr).vb[7].vb0.vb_index);
    debug_printf("\t\t.vb[7].start_addr = 0x%x\n", (*ptr).vb[7].start_addr);
    debug_printf("\t\t.vb[7].max_index = 0x%x\n", (*ptr).vb[7].max_index);
    debug_printf("\t\t.vb[7].instance_data_step_rate = 0x%x\n", (*ptr).vb[7].instance_data_step_rate);
    debug_printf("\t\t.vb[8].vb0.pitch = 0x%x\n", (*ptr).vb[8].vb0.pitch);
-   debug_printf("\t\t.vb[8].vb0.pad = 0x%x\n", (*ptr).vb[8].vb0.pad);
    debug_printf("\t\t.vb[8].vb0.access_type = 0x%x\n", (*ptr).vb[8].vb0.access_type);
    debug_printf("\t\t.vb[8].vb0.vb_index = 0x%x\n", (*ptr).vb[8].vb0.vb_index);
    debug_printf("\t\t.vb[8].start_addr = 0x%x\n", (*ptr).vb[8].start_addr);
    debug_printf("\t\t.vb[8].max_index = 0x%x\n", (*ptr).vb[8].max_index);
    debug_printf("\t\t.vb[8].instance_data_step_rate = 0x%x\n", (*ptr).vb[8].instance_data_step_rate);
    debug_printf("\t\t.vb[9].vb0.pitch = 0x%x\n", (*ptr).vb[9].vb0.pitch);
-   debug_printf("\t\t.vb[9].vb0.pad = 0x%x\n", (*ptr).vb[9].vb0.pad);
    debug_printf("\t\t.vb[9].vb0.access_type = 0x%x\n", (*ptr).vb[9].vb0.access_type);
    debug_printf("\t\t.vb[9].vb0.vb_index = 0x%x\n", (*ptr).vb[9].vb0.vb_index);
    debug_printf("\t\t.vb[9].start_addr = 0x%x\n", (*ptr).vb[9].start_addr);
    debug_printf("\t\t.vb[9].max_index = 0x%x\n", (*ptr).vb[9].max_index);
    debug_printf("\t\t.vb[9].instance_data_step_rate = 0x%x\n", (*ptr).vb[9].instance_data_step_rate);
    debug_printf("\t\t.vb[10].vb0.pitch = 0x%x\n", (*ptr).vb[10].vb0.pitch);
-   debug_printf("\t\t.vb[10].vb0.pad = 0x%x\n", (*ptr).vb[10].vb0.pad);
    debug_printf("\t\t.vb[10].vb0.access_type = 0x%x\n", (*ptr).vb[10].vb0.access_type);
    debug_printf("\t\t.vb[10].vb0.vb_index = 0x%x\n", (*ptr).vb[10].vb0.vb_index);
    debug_printf("\t\t.vb[10].start_addr = 0x%x\n", (*ptr).vb[10].start_addr);
    debug_printf("\t\t.vb[10].max_index = 0x%x\n", (*ptr).vb[10].max_index);
    debug_printf("\t\t.vb[10].instance_data_step_rate = 0x%x\n", (*ptr).vb[10].instance_data_step_rate);
    debug_printf("\t\t.vb[11].vb0.pitch = 0x%x\n", (*ptr).vb[11].vb0.pitch);
-   debug_printf("\t\t.vb[11].vb0.pad = 0x%x\n", (*ptr).vb[11].vb0.pad);
    debug_printf("\t\t.vb[11].vb0.access_type = 0x%x\n", (*ptr).vb[11].vb0.access_type);
    debug_printf("\t\t.vb[11].vb0.vb_index = 0x%x\n", (*ptr).vb[11].vb0.vb_index);
    debug_printf("\t\t.vb[11].start_addr = 0x%x\n", (*ptr).vb[11].start_addr);
    debug_printf("\t\t.vb[11].max_index = 0x%x\n", (*ptr).vb[11].max_index);
    debug_printf("\t\t.vb[11].instance_data_step_rate = 0x%x\n", (*ptr).vb[11].instance_data_step_rate);
    debug_printf("\t\t.vb[12].vb0.pitch = 0x%x\n", (*ptr).vb[12].vb0.pitch);
-   debug_printf("\t\t.vb[12].vb0.pad = 0x%x\n", (*ptr).vb[12].vb0.pad);
    debug_printf("\t\t.vb[12].vb0.access_type = 0x%x\n", (*ptr).vb[12].vb0.access_type);
    debug_printf("\t\t.vb[12].vb0.vb_index = 0x%x\n", (*ptr).vb[12].vb0.vb_index);
    debug_printf("\t\t.vb[12].start_addr = 0x%x\n", (*ptr).vb[12].start_addr);
    debug_printf("\t\t.vb[12].max_index = 0x%x\n", (*ptr).vb[12].max_index);
    debug_printf("\t\t.vb[12].instance_data_step_rate = 0x%x\n", (*ptr).vb[12].instance_data_step_rate);
    debug_printf("\t\t.vb[13].vb0.pitch = 0x%x\n", (*ptr).vb[13].vb0.pitch);
-   debug_printf("\t\t.vb[13].vb0.pad = 0x%x\n", (*ptr).vb[13].vb0.pad);
    debug_printf("\t\t.vb[13].vb0.access_type = 0x%x\n", (*ptr).vb[13].vb0.access_type);
    debug_printf("\t\t.vb[13].vb0.vb_index = 0x%x\n", (*ptr).vb[13].vb0.vb_index);
    debug_printf("\t\t.vb[13].start_addr = 0x%x\n", (*ptr).vb[13].start_addr);
    debug_printf("\t\t.vb[13].max_index = 0x%x\n", (*ptr).vb[13].max_index);
    debug_printf("\t\t.vb[13].instance_data_step_rate = 0x%x\n", (*ptr).vb[13].instance_data_step_rate);
    debug_printf("\t\t.vb[14].vb0.pitch = 0x%x\n", (*ptr).vb[14].vb0.pitch);
-   debug_printf("\t\t.vb[14].vb0.pad = 0x%x\n", (*ptr).vb[14].vb0.pad);
    debug_printf("\t\t.vb[14].vb0.access_type = 0x%x\n", (*ptr).vb[14].vb0.access_type);
    debug_printf("\t\t.vb[14].vb0.vb_index = 0x%x\n", (*ptr).vb[14].vb0.vb_index);
    debug_printf("\t\t.vb[14].start_addr = 0x%x\n", (*ptr).vb[14].start_addr);
    debug_printf("\t\t.vb[14].max_index = 0x%x\n", (*ptr).vb[14].max_index);
    debug_printf("\t\t.vb[14].instance_data_step_rate = 0x%x\n", (*ptr).vb[14].instance_data_step_rate);
    debug_printf("\t\t.vb[15].vb0.pitch = 0x%x\n", (*ptr).vb[15].vb0.pitch);
-   debug_printf("\t\t.vb[15].vb0.pad = 0x%x\n", (*ptr).vb[15].vb0.pad);
    debug_printf("\t\t.vb[15].vb0.access_type = 0x%x\n", (*ptr).vb[15].vb0.access_type);
    debug_printf("\t\t.vb[15].vb0.vb_index = 0x%x\n", (*ptr).vb[15].vb0.vb_index);
    debug_printf("\t\t.vb[15].start_addr = 0x%x\n", (*ptr).vb[15].start_addr);
    debug_printf("\t\t.vb[15].max_index = 0x%x\n", (*ptr).vb[15].max_index);
    debug_printf("\t\t.vb[15].instance_data_step_rate = 0x%x\n", (*ptr).vb[15].instance_data_step_rate);
    debug_printf("\t\t.vb[16].vb0.pitch = 0x%x\n", (*ptr).vb[16].vb0.pitch);
-   debug_printf("\t\t.vb[16].vb0.pad = 0x%x\n", (*ptr).vb[16].vb0.pad);
    debug_printf("\t\t.vb[16].vb0.access_type = 0x%x\n", (*ptr).vb[16].vb0.access_type);
    debug_printf("\t\t.vb[16].vb0.vb_index = 0x%x\n", (*ptr).vb[16].vb0.vb_index);
    debug_printf("\t\t.vb[16].start_addr = 0x%x\n", (*ptr).vb[16].start_addr);
@@ -1133,7 +961,6 @@ void
 brw_dump_vertex_buffer_state(const struct brw_vertex_buffer_state *ptr)
 {
    debug_printf("\t\t.vb0.pitch = 0x%x\n", (*ptr).vb0.pitch);
-   debug_printf("\t\t.vb0.pad = 0x%x\n", (*ptr).vb0.pad);
    debug_printf("\t\t.vb0.access_type = 0x%x\n", (*ptr).vb0.access_type);
    debug_printf("\t\t.vb0.vb_index = 0x%x\n", (*ptr).vb0.vb_index);
    debug_printf("\t\t.start_addr = 0x%x\n", (*ptr).start_addr);
@@ -1147,217 +974,163 @@ brw_dump_vertex_element_packet(const struct brw_vertex_element_packet *ptr)
    debug_printf("\t\t.header.length = 0x%x\n", (*ptr).header.length);
    debug_printf("\t\t.header.opcode = 0x%x\n", (*ptr).header.opcode);
    debug_printf("\t\t.ve[0].ve0.src_offset = 0x%x\n", (*ptr).ve[0].ve0.src_offset);
-   debug_printf("\t\t.ve[0].ve0.pad = 0x%x\n", (*ptr).ve[0].ve0.pad);
    debug_printf("\t\t.ve[0].ve0.src_format = 0x%x\n", (*ptr).ve[0].ve0.src_format);
-   debug_printf("\t\t.ve[0].ve0.pad0 = 0x%x\n", (*ptr).ve[0].ve0.pad0);
    debug_printf("\t\t.ve[0].ve0.valid = 0x%x\n", (*ptr).ve[0].ve0.valid);
    debug_printf("\t\t.ve[0].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[0].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[0].ve1.dst_offset = 0x%x\n", (*ptr).ve[0].ve1.dst_offset);
-   debug_printf("\t\t.ve[0].ve1.pad = 0x%x\n", (*ptr).ve[0].ve1.pad);
    debug_printf("\t\t.ve[0].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent3);
    debug_printf("\t\t.ve[0].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent2);
    debug_printf("\t\t.ve[0].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent1);
    debug_printf("\t\t.ve[0].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[0].ve1.vfcomponent0);
    debug_printf("\t\t.ve[1].ve0.src_offset = 0x%x\n", (*ptr).ve[1].ve0.src_offset);
-   debug_printf("\t\t.ve[1].ve0.pad = 0x%x\n", (*ptr).ve[1].ve0.pad);
    debug_printf("\t\t.ve[1].ve0.src_format = 0x%x\n", (*ptr).ve[1].ve0.src_format);
-   debug_printf("\t\t.ve[1].ve0.pad0 = 0x%x\n", (*ptr).ve[1].ve0.pad0);
    debug_printf("\t\t.ve[1].ve0.valid = 0x%x\n", (*ptr).ve[1].ve0.valid);
    debug_printf("\t\t.ve[1].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[1].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[1].ve1.dst_offset = 0x%x\n", (*ptr).ve[1].ve1.dst_offset);
-   debug_printf("\t\t.ve[1].ve1.pad = 0x%x\n", (*ptr).ve[1].ve1.pad);
    debug_printf("\t\t.ve[1].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent3);
    debug_printf("\t\t.ve[1].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent2);
    debug_printf("\t\t.ve[1].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent1);
    debug_printf("\t\t.ve[1].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[1].ve1.vfcomponent0);
    debug_printf("\t\t.ve[2].ve0.src_offset = 0x%x\n", (*ptr).ve[2].ve0.src_offset);
-   debug_printf("\t\t.ve[2].ve0.pad = 0x%x\n", (*ptr).ve[2].ve0.pad);
    debug_printf("\t\t.ve[2].ve0.src_format = 0x%x\n", (*ptr).ve[2].ve0.src_format);
-   debug_printf("\t\t.ve[2].ve0.pad0 = 0x%x\n", (*ptr).ve[2].ve0.pad0);
    debug_printf("\t\t.ve[2].ve0.valid = 0x%x\n", (*ptr).ve[2].ve0.valid);
    debug_printf("\t\t.ve[2].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[2].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[2].ve1.dst_offset = 0x%x\n", (*ptr).ve[2].ve1.dst_offset);
-   debug_printf("\t\t.ve[2].ve1.pad = 0x%x\n", (*ptr).ve[2].ve1.pad);
    debug_printf("\t\t.ve[2].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent3);
    debug_printf("\t\t.ve[2].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent2);
    debug_printf("\t\t.ve[2].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent1);
    debug_printf("\t\t.ve[2].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[2].ve1.vfcomponent0);
    debug_printf("\t\t.ve[3].ve0.src_offset = 0x%x\n", (*ptr).ve[3].ve0.src_offset);
-   debug_printf("\t\t.ve[3].ve0.pad = 0x%x\n", (*ptr).ve[3].ve0.pad);
    debug_printf("\t\t.ve[3].ve0.src_format = 0x%x\n", (*ptr).ve[3].ve0.src_format);
-   debug_printf("\t\t.ve[3].ve0.pad0 = 0x%x\n", (*ptr).ve[3].ve0.pad0);
    debug_printf("\t\t.ve[3].ve0.valid = 0x%x\n", (*ptr).ve[3].ve0.valid);
    debug_printf("\t\t.ve[3].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[3].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[3].ve1.dst_offset = 0x%x\n", (*ptr).ve[3].ve1.dst_offset);
-   debug_printf("\t\t.ve[3].ve1.pad = 0x%x\n", (*ptr).ve[3].ve1.pad);
    debug_printf("\t\t.ve[3].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent3);
    debug_printf("\t\t.ve[3].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent2);
    debug_printf("\t\t.ve[3].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent1);
    debug_printf("\t\t.ve[3].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[3].ve1.vfcomponent0);
    debug_printf("\t\t.ve[4].ve0.src_offset = 0x%x\n", (*ptr).ve[4].ve0.src_offset);
-   debug_printf("\t\t.ve[4].ve0.pad = 0x%x\n", (*ptr).ve[4].ve0.pad);
    debug_printf("\t\t.ve[4].ve0.src_format = 0x%x\n", (*ptr).ve[4].ve0.src_format);
-   debug_printf("\t\t.ve[4].ve0.pad0 = 0x%x\n", (*ptr).ve[4].ve0.pad0);
    debug_printf("\t\t.ve[4].ve0.valid = 0x%x\n", (*ptr).ve[4].ve0.valid);
    debug_printf("\t\t.ve[4].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[4].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[4].ve1.dst_offset = 0x%x\n", (*ptr).ve[4].ve1.dst_offset);
-   debug_printf("\t\t.ve[4].ve1.pad = 0x%x\n", (*ptr).ve[4].ve1.pad);
    debug_printf("\t\t.ve[4].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent3);
    debug_printf("\t\t.ve[4].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent2);
    debug_printf("\t\t.ve[4].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent1);
    debug_printf("\t\t.ve[4].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[4].ve1.vfcomponent0);
    debug_printf("\t\t.ve[5].ve0.src_offset = 0x%x\n", (*ptr).ve[5].ve0.src_offset);
-   debug_printf("\t\t.ve[5].ve0.pad = 0x%x\n", (*ptr).ve[5].ve0.pad);
    debug_printf("\t\t.ve[5].ve0.src_format = 0x%x\n", (*ptr).ve[5].ve0.src_format);
-   debug_printf("\t\t.ve[5].ve0.pad0 = 0x%x\n", (*ptr).ve[5].ve0.pad0);
    debug_printf("\t\t.ve[5].ve0.valid = 0x%x\n", (*ptr).ve[5].ve0.valid);
    debug_printf("\t\t.ve[5].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[5].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[5].ve1.dst_offset = 0x%x\n", (*ptr).ve[5].ve1.dst_offset);
-   debug_printf("\t\t.ve[5].ve1.pad = 0x%x\n", (*ptr).ve[5].ve1.pad);
    debug_printf("\t\t.ve[5].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent3);
    debug_printf("\t\t.ve[5].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent2);
    debug_printf("\t\t.ve[5].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent1);
    debug_printf("\t\t.ve[5].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[5].ve1.vfcomponent0);
    debug_printf("\t\t.ve[6].ve0.src_offset = 0x%x\n", (*ptr).ve[6].ve0.src_offset);
-   debug_printf("\t\t.ve[6].ve0.pad = 0x%x\n", (*ptr).ve[6].ve0.pad);
    debug_printf("\t\t.ve[6].ve0.src_format = 0x%x\n", (*ptr).ve[6].ve0.src_format);
-   debug_printf("\t\t.ve[6].ve0.pad0 = 0x%x\n", (*ptr).ve[6].ve0.pad0);
    debug_printf("\t\t.ve[6].ve0.valid = 0x%x\n", (*ptr).ve[6].ve0.valid);
    debug_printf("\t\t.ve[6].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[6].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[6].ve1.dst_offset = 0x%x\n", (*ptr).ve[6].ve1.dst_offset);
-   debug_printf("\t\t.ve[6].ve1.pad = 0x%x\n", (*ptr).ve[6].ve1.pad);
    debug_printf("\t\t.ve[6].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent3);
    debug_printf("\t\t.ve[6].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent2);
    debug_printf("\t\t.ve[6].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent1);
    debug_printf("\t\t.ve[6].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[6].ve1.vfcomponent0);
    debug_printf("\t\t.ve[7].ve0.src_offset = 0x%x\n", (*ptr).ve[7].ve0.src_offset);
-   debug_printf("\t\t.ve[7].ve0.pad = 0x%x\n", (*ptr).ve[7].ve0.pad);
    debug_printf("\t\t.ve[7].ve0.src_format = 0x%x\n", (*ptr).ve[7].ve0.src_format);
-   debug_printf("\t\t.ve[7].ve0.pad0 = 0x%x\n", (*ptr).ve[7].ve0.pad0);
    debug_printf("\t\t.ve[7].ve0.valid = 0x%x\n", (*ptr).ve[7].ve0.valid);
    debug_printf("\t\t.ve[7].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[7].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[7].ve1.dst_offset = 0x%x\n", (*ptr).ve[7].ve1.dst_offset);
-   debug_printf("\t\t.ve[7].ve1.pad = 0x%x\n", (*ptr).ve[7].ve1.pad);
    debug_printf("\t\t.ve[7].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent3);
    debug_printf("\t\t.ve[7].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent2);
    debug_printf("\t\t.ve[7].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent1);
    debug_printf("\t\t.ve[7].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[7].ve1.vfcomponent0);
    debug_printf("\t\t.ve[8].ve0.src_offset = 0x%x\n", (*ptr).ve[8].ve0.src_offset);
-   debug_printf("\t\t.ve[8].ve0.pad = 0x%x\n", (*ptr).ve[8].ve0.pad);
    debug_printf("\t\t.ve[8].ve0.src_format = 0x%x\n", (*ptr).ve[8].ve0.src_format);
-   debug_printf("\t\t.ve[8].ve0.pad0 = 0x%x\n", (*ptr).ve[8].ve0.pad0);
    debug_printf("\t\t.ve[8].ve0.valid = 0x%x\n", (*ptr).ve[8].ve0.valid);
    debug_printf("\t\t.ve[8].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[8].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[8].ve1.dst_offset = 0x%x\n", (*ptr).ve[8].ve1.dst_offset);
-   debug_printf("\t\t.ve[8].ve1.pad = 0x%x\n", (*ptr).ve[8].ve1.pad);
    debug_printf("\t\t.ve[8].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent3);
    debug_printf("\t\t.ve[8].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent2);
    debug_printf("\t\t.ve[8].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent1);
    debug_printf("\t\t.ve[8].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[8].ve1.vfcomponent0);
    debug_printf("\t\t.ve[9].ve0.src_offset = 0x%x\n", (*ptr).ve[9].ve0.src_offset);
-   debug_printf("\t\t.ve[9].ve0.pad = 0x%x\n", (*ptr).ve[9].ve0.pad);
    debug_printf("\t\t.ve[9].ve0.src_format = 0x%x\n", (*ptr).ve[9].ve0.src_format);
-   debug_printf("\t\t.ve[9].ve0.pad0 = 0x%x\n", (*ptr).ve[9].ve0.pad0);
    debug_printf("\t\t.ve[9].ve0.valid = 0x%x\n", (*ptr).ve[9].ve0.valid);
    debug_printf("\t\t.ve[9].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[9].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[9].ve1.dst_offset = 0x%x\n", (*ptr).ve[9].ve1.dst_offset);
-   debug_printf("\t\t.ve[9].ve1.pad = 0x%x\n", (*ptr).ve[9].ve1.pad);
    debug_printf("\t\t.ve[9].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent3);
    debug_printf("\t\t.ve[9].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent2);
    debug_printf("\t\t.ve[9].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent1);
    debug_printf("\t\t.ve[9].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[9].ve1.vfcomponent0);
    debug_printf("\t\t.ve[10].ve0.src_offset = 0x%x\n", (*ptr).ve[10].ve0.src_offset);
-   debug_printf("\t\t.ve[10].ve0.pad = 0x%x\n", (*ptr).ve[10].ve0.pad);
    debug_printf("\t\t.ve[10].ve0.src_format = 0x%x\n", (*ptr).ve[10].ve0.src_format);
-   debug_printf("\t\t.ve[10].ve0.pad0 = 0x%x\n", (*ptr).ve[10].ve0.pad0);
    debug_printf("\t\t.ve[10].ve0.valid = 0x%x\n", (*ptr).ve[10].ve0.valid);
    debug_printf("\t\t.ve[10].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[10].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[10].ve1.dst_offset = 0x%x\n", (*ptr).ve[10].ve1.dst_offset);
-   debug_printf("\t\t.ve[10].ve1.pad = 0x%x\n", (*ptr).ve[10].ve1.pad);
    debug_printf("\t\t.ve[10].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent3);
    debug_printf("\t\t.ve[10].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent2);
    debug_printf("\t\t.ve[10].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent1);
    debug_printf("\t\t.ve[10].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[10].ve1.vfcomponent0);
    debug_printf("\t\t.ve[11].ve0.src_offset = 0x%x\n", (*ptr).ve[11].ve0.src_offset);
-   debug_printf("\t\t.ve[11].ve0.pad = 0x%x\n", (*ptr).ve[11].ve0.pad);
    debug_printf("\t\t.ve[11].ve0.src_format = 0x%x\n", (*ptr).ve[11].ve0.src_format);
-   debug_printf("\t\t.ve[11].ve0.pad0 = 0x%x\n", (*ptr).ve[11].ve0.pad0);
    debug_printf("\t\t.ve[11].ve0.valid = 0x%x\n", (*ptr).ve[11].ve0.valid);
    debug_printf("\t\t.ve[11].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[11].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[11].ve1.dst_offset = 0x%x\n", (*ptr).ve[11].ve1.dst_offset);
-   debug_printf("\t\t.ve[11].ve1.pad = 0x%x\n", (*ptr).ve[11].ve1.pad);
    debug_printf("\t\t.ve[11].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent3);
    debug_printf("\t\t.ve[11].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent2);
    debug_printf("\t\t.ve[11].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent1);
    debug_printf("\t\t.ve[11].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[11].ve1.vfcomponent0);
    debug_printf("\t\t.ve[12].ve0.src_offset = 0x%x\n", (*ptr).ve[12].ve0.src_offset);
-   debug_printf("\t\t.ve[12].ve0.pad = 0x%x\n", (*ptr).ve[12].ve0.pad);
    debug_printf("\t\t.ve[12].ve0.src_format = 0x%x\n", (*ptr).ve[12].ve0.src_format);
-   debug_printf("\t\t.ve[12].ve0.pad0 = 0x%x\n", (*ptr).ve[12].ve0.pad0);
    debug_printf("\t\t.ve[12].ve0.valid = 0x%x\n", (*ptr).ve[12].ve0.valid);
    debug_printf("\t\t.ve[12].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[12].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[12].ve1.dst_offset = 0x%x\n", (*ptr).ve[12].ve1.dst_offset);
-   debug_printf("\t\t.ve[12].ve1.pad = 0x%x\n", (*ptr).ve[12].ve1.pad);
    debug_printf("\t\t.ve[12].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent3);
    debug_printf("\t\t.ve[12].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent2);
    debug_printf("\t\t.ve[12].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent1);
    debug_printf("\t\t.ve[12].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[12].ve1.vfcomponent0);
    debug_printf("\t\t.ve[13].ve0.src_offset = 0x%x\n", (*ptr).ve[13].ve0.src_offset);
-   debug_printf("\t\t.ve[13].ve0.pad = 0x%x\n", (*ptr).ve[13].ve0.pad);
    debug_printf("\t\t.ve[13].ve0.src_format = 0x%x\n", (*ptr).ve[13].ve0.src_format);
-   debug_printf("\t\t.ve[13].ve0.pad0 = 0x%x\n", (*ptr).ve[13].ve0.pad0);
    debug_printf("\t\t.ve[13].ve0.valid = 0x%x\n", (*ptr).ve[13].ve0.valid);
    debug_printf("\t\t.ve[13].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[13].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[13].ve1.dst_offset = 0x%x\n", (*ptr).ve[13].ve1.dst_offset);
-   debug_printf("\t\t.ve[13].ve1.pad = 0x%x\n", (*ptr).ve[13].ve1.pad);
    debug_printf("\t\t.ve[13].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent3);
    debug_printf("\t\t.ve[13].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent2);
    debug_printf("\t\t.ve[13].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent1);
    debug_printf("\t\t.ve[13].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[13].ve1.vfcomponent0);
    debug_printf("\t\t.ve[14].ve0.src_offset = 0x%x\n", (*ptr).ve[14].ve0.src_offset);
-   debug_printf("\t\t.ve[14].ve0.pad = 0x%x\n", (*ptr).ve[14].ve0.pad);
    debug_printf("\t\t.ve[14].ve0.src_format = 0x%x\n", (*ptr).ve[14].ve0.src_format);
-   debug_printf("\t\t.ve[14].ve0.pad0 = 0x%x\n", (*ptr).ve[14].ve0.pad0);
    debug_printf("\t\t.ve[14].ve0.valid = 0x%x\n", (*ptr).ve[14].ve0.valid);
    debug_printf("\t\t.ve[14].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[14].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[14].ve1.dst_offset = 0x%x\n", (*ptr).ve[14].ve1.dst_offset);
-   debug_printf("\t\t.ve[14].ve1.pad = 0x%x\n", (*ptr).ve[14].ve1.pad);
    debug_printf("\t\t.ve[14].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent3);
    debug_printf("\t\t.ve[14].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent2);
    debug_printf("\t\t.ve[14].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent1);
    debug_printf("\t\t.ve[14].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[14].ve1.vfcomponent0);
    debug_printf("\t\t.ve[15].ve0.src_offset = 0x%x\n", (*ptr).ve[15].ve0.src_offset);
-   debug_printf("\t\t.ve[15].ve0.pad = 0x%x\n", (*ptr).ve[15].ve0.pad);
    debug_printf("\t\t.ve[15].ve0.src_format = 0x%x\n", (*ptr).ve[15].ve0.src_format);
-   debug_printf("\t\t.ve[15].ve0.pad0 = 0x%x\n", (*ptr).ve[15].ve0.pad0);
    debug_printf("\t\t.ve[15].ve0.valid = 0x%x\n", (*ptr).ve[15].ve0.valid);
    debug_printf("\t\t.ve[15].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[15].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[15].ve1.dst_offset = 0x%x\n", (*ptr).ve[15].ve1.dst_offset);
-   debug_printf("\t\t.ve[15].ve1.pad = 0x%x\n", (*ptr).ve[15].ve1.pad);
    debug_printf("\t\t.ve[15].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent3);
    debug_printf("\t\t.ve[15].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent2);
    debug_printf("\t\t.ve[15].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent1);
    debug_printf("\t\t.ve[15].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[15].ve1.vfcomponent0);
    debug_printf("\t\t.ve[16].ve0.src_offset = 0x%x\n", (*ptr).ve[16].ve0.src_offset);
-   debug_printf("\t\t.ve[16].ve0.pad = 0x%x\n", (*ptr).ve[16].ve0.pad);
    debug_printf("\t\t.ve[16].ve0.src_format = 0x%x\n", (*ptr).ve[16].ve0.src_format);
-   debug_printf("\t\t.ve[16].ve0.pad0 = 0x%x\n", (*ptr).ve[16].ve0.pad0);
    debug_printf("\t\t.ve[16].ve0.valid = 0x%x\n", (*ptr).ve[16].ve0.valid);
    debug_printf("\t\t.ve[16].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[16].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[16].ve1.dst_offset = 0x%x\n", (*ptr).ve[16].ve1.dst_offset);
-   debug_printf("\t\t.ve[16].ve1.pad = 0x%x\n", (*ptr).ve[16].ve1.pad);
    debug_printf("\t\t.ve[16].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent3);
    debug_printf("\t\t.ve[16].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent2);
    debug_printf("\t\t.ve[16].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent1);
    debug_printf("\t\t.ve[16].ve1.vfcomponent0 = 0x%x\n", (*ptr).ve[16].ve1.vfcomponent0);
    debug_printf("\t\t.ve[17].ve0.src_offset = 0x%x\n", (*ptr).ve[17].ve0.src_offset);
-   debug_printf("\t\t.ve[17].ve0.pad = 0x%x\n", (*ptr).ve[17].ve0.pad);
    debug_printf("\t\t.ve[17].ve0.src_format = 0x%x\n", (*ptr).ve[17].ve0.src_format);
-   debug_printf("\t\t.ve[17].ve0.pad0 = 0x%x\n", (*ptr).ve[17].ve0.pad0);
    debug_printf("\t\t.ve[17].ve0.valid = 0x%x\n", (*ptr).ve[17].ve0.valid);
    debug_printf("\t\t.ve[17].ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve[17].ve0.vertex_buffer_index);
    debug_printf("\t\t.ve[17].ve1.dst_offset = 0x%x\n", (*ptr).ve[17].ve1.dst_offset);
-   debug_printf("\t\t.ve[17].ve1.pad = 0x%x\n", (*ptr).ve[17].ve1.pad);
    debug_printf("\t\t.ve[17].ve1.vfcomponent3 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent3);
    debug_printf("\t\t.ve[17].ve1.vfcomponent2 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent2);
    debug_printf("\t\t.ve[17].ve1.vfcomponent1 = 0x%x\n", (*ptr).ve[17].ve1.vfcomponent1);
@@ -1368,13 +1141,10 @@ void
 brw_dump_vertex_element_state(const struct brw_vertex_element_state *ptr)
 {
    debug_printf("\t\t.ve0.src_offset = 0x%x\n", (*ptr).ve0.src_offset);
-   debug_printf("\t\t.ve0.pad = 0x%x\n", (*ptr).ve0.pad);
    debug_printf("\t\t.ve0.src_format = 0x%x\n", (*ptr).ve0.src_format);
-   debug_printf("\t\t.ve0.pad0 = 0x%x\n", (*ptr).ve0.pad0);
    debug_printf("\t\t.ve0.valid = 0x%x\n", (*ptr).ve0.valid);
    debug_printf("\t\t.ve0.vertex_buffer_index = 0x%x\n", (*ptr).ve0.vertex_buffer_index);
    debug_printf("\t\t.ve1.dst_offset = 0x%x\n", (*ptr).ve1.dst_offset);
-   debug_printf("\t\t.ve1.pad = 0x%x\n", (*ptr).ve1.pad);
    debug_printf("\t\t.ve1.vfcomponent3 = 0x%x\n", (*ptr).ve1.vfcomponent3);
    debug_printf("\t\t.ve1.vfcomponent2 = 0x%x\n", (*ptr).ve1.vfcomponent2);
    debug_printf("\t\t.ve1.vfcomponent1 = 0x%x\n", (*ptr).ve1.vfcomponent1);
@@ -1385,90 +1155,63 @@ void
 brw_dump_vf_statistics(const struct brw_vf_statistics *ptr)
 {
    debug_printf("\t\t.statistics_enable = 0x%x\n", (*ptr).statistics_enable);
-   debug_printf("\t\t.pad = 0x%x\n", (*ptr).pad);
    debug_printf("\t\t.opcode = 0x%x\n", (*ptr).opcode);
 }
 
 void
 brw_dump_vs_unit_state(const struct brw_vs_unit_state *ptr)
 {
-   debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0);
    debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
-   debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1);
    debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
    debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
    debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
    debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
    debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
    debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
-   debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0);
    debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
-   debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1);
    debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
    debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
    debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
-   debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3);
    debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
    debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
-   debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0);
    debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
    debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
    debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
-   debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0);
    debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
-   debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1);
    debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
-   debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2);
    debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
-   debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3);
-   debug_printf("\t\t.thread4.pad0 = 0x%x\n", (*ptr).thread4.pad0);
    debug_printf("\t\t.thread4.stats_enable = 0x%x\n", (*ptr).thread4.stats_enable);
    debug_printf("\t\t.thread4.nr_urb_entries = 0x%x\n", (*ptr).thread4.nr_urb_entries);
-   debug_printf("\t\t.thread4.pad1 = 0x%x\n", (*ptr).thread4.pad1);
    debug_printf("\t\t.thread4.urb_entry_allocation_size = 0x%x\n", (*ptr).thread4.urb_entry_allocation_size);
-   debug_printf("\t\t.thread4.pad2 = 0x%x\n", (*ptr).thread4.pad2);
    debug_printf("\t\t.thread4.max_threads = 0x%x\n", (*ptr).thread4.max_threads);
-   debug_printf("\t\t.thread4.pad3 = 0x%x\n", (*ptr).thread4.pad3);
    debug_printf("\t\t.vs5.sampler_count = 0x%x\n", (*ptr).vs5.sampler_count);
-   debug_printf("\t\t.vs5.pad0 = 0x%x\n", (*ptr).vs5.pad0);
    debug_printf("\t\t.vs5.sampler_state_pointer = 0x%x\n", (*ptr).vs5.sampler_state_pointer);
    debug_printf("\t\t.vs6.vs_enable = 0x%x\n", (*ptr).vs6.vs_enable);
    debug_printf("\t\t.vs6.vert_cache_disable = 0x%x\n", (*ptr).vs6.vert_cache_disable);
-   debug_printf("\t\t.vs6.pad0 = 0x%x\n", (*ptr).vs6.pad0);
 }
 
 void
 brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr)
 {
-   debug_printf("\t\t.thread0.pad0 = 0x%x\n", (*ptr).thread0.pad0);
    debug_printf("\t\t.thread0.grf_reg_count = 0x%x\n", (*ptr).thread0.grf_reg_count);
-   debug_printf("\t\t.thread0.pad1 = 0x%x\n", (*ptr).thread0.pad1);
    debug_printf("\t\t.thread0.kernel_start_pointer = 0x%x\n", (*ptr).thread0.kernel_start_pointer);
    debug_printf("\t\t.thread1.ext_halt_exception_enable = 0x%x\n", (*ptr).thread1.ext_halt_exception_enable);
    debug_printf("\t\t.thread1.sw_exception_enable = 0x%x\n", (*ptr).thread1.sw_exception_enable);
    debug_printf("\t\t.thread1.mask_stack_exception_enable = 0x%x\n", (*ptr).thread1.mask_stack_exception_enable);
    debug_printf("\t\t.thread1.timeout_exception_enable = 0x%x\n", (*ptr).thread1.timeout_exception_enable);
    debug_printf("\t\t.thread1.illegal_op_exception_enable = 0x%x\n", (*ptr).thread1.illegal_op_exception_enable);
-   debug_printf("\t\t.thread1.pad0 = 0x%x\n", (*ptr).thread1.pad0);
    debug_printf("\t\t.thread1.depth_coef_urb_read_offset = 0x%x\n", (*ptr).thread1.depth_coef_urb_read_offset);
-   debug_printf("\t\t.thread1.pad1 = 0x%x\n", (*ptr).thread1.pad1);
    debug_printf("\t\t.thread1.floating_point_mode = 0x%x\n", (*ptr).thread1.floating_point_mode);
    debug_printf("\t\t.thread1.thread_priority = 0x%x\n", (*ptr).thread1.thread_priority);
    debug_printf("\t\t.thread1.binding_table_entry_count = 0x%x\n", (*ptr).thread1.binding_table_entry_count);
-   debug_printf("\t\t.thread1.pad3 = 0x%x\n", (*ptr).thread1.pad3);
    debug_printf("\t\t.thread1.single_program_flow = 0x%x\n", (*ptr).thread1.single_program_flow);
    debug_printf("\t\t.thread2.per_thread_scratch_space = 0x%x\n", (*ptr).thread2.per_thread_scratch_space);
-   debug_printf("\t\t.thread2.pad0 = 0x%x\n", (*ptr).thread2.pad0);
    debug_printf("\t\t.thread2.scratch_space_base_pointer = 0x%x\n", (*ptr).thread2.scratch_space_base_pointer);
    debug_printf("\t\t.thread3.dispatch_grf_start_reg = 0x%x\n", (*ptr).thread3.dispatch_grf_start_reg);
    debug_printf("\t\t.thread3.urb_entry_read_offset = 0x%x\n", (*ptr).thread3.urb_entry_read_offset);
-   debug_printf("\t\t.thread3.pad0 = 0x%x\n", (*ptr).thread3.pad0);
    debug_printf("\t\t.thread3.urb_entry_read_length = 0x%x\n", (*ptr).thread3.urb_entry_read_length);
-   debug_printf("\t\t.thread3.pad1 = 0x%x\n", (*ptr).thread3.pad1);
    debug_printf("\t\t.thread3.const_urb_entry_read_offset = 0x%x\n", (*ptr).thread3.const_urb_entry_read_offset);
-   debug_printf("\t\t.thread3.pad2 = 0x%x\n", (*ptr).thread3.pad2);
    debug_printf("\t\t.thread3.const_urb_entry_read_length = 0x%x\n", (*ptr).thread3.const_urb_entry_read_length);
-   debug_printf("\t\t.thread3.pad3 = 0x%x\n", (*ptr).thread3.pad3);
    debug_printf("\t\t.wm4.stats_enable = 0x%x\n", (*ptr).wm4.stats_enable);
    debug_printf("\t\t.wm4.depth_buffer_clear = 0x%x\n", (*ptr).wm4.depth_buffer_clear);
    debug_printf("\t\t.wm4.sampler_count = 0x%x\n", (*ptr).wm4.sampler_count);
@@ -1478,7 +1221,6 @@ brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr)
    debug_printf("\t\t.wm5.enable_32_pix = 0x%x\n", (*ptr).wm5.enable_32_pix);
    debug_printf("\t\t.wm5.enable_con_32_pix = 0x%x\n", (*ptr).wm5.enable_con_32_pix);
    debug_printf("\t\t.wm5.enable_con_64_pix = 0x%x\n", (*ptr).wm5.enable_con_64_pix);
-   debug_printf("\t\t.wm5.pad0 = 0x%x\n", (*ptr).wm5.pad0);
    debug_printf("\t\t.wm5.legacy_global_depth_bias = 0x%x\n", (*ptr).wm5.legacy_global_depth_bias);
    debug_printf("\t\t.wm5.line_stipple = 0x%x\n", (*ptr).wm5.line_stipple);
    debug_printf("\t\t.wm5.depth_offset = 0x%x\n", (*ptr).wm5.depth_offset);
@@ -1495,17 +1237,11 @@ brw_dump_wm_unit_state(const struct brw_wm_unit_state *ptr)
    debug_printf("\t\t.wm5.max_threads = 0x%x\n", (*ptr).wm5.max_threads);
    debug_printf("\t\t.global_depth_offset_constant = %f\n", (*ptr).global_depth_offset_constant);
    debug_printf("\t\t.global_depth_offset_scale = %f\n", (*ptr).global_depth_offset_scale);
-   debug_printf("\t\t.wm8.pad0 = 0x%x\n", (*ptr).wm8.pad0);
    debug_printf("\t\t.wm8.grf_reg_count_1 = 0x%x\n", (*ptr).wm8.grf_reg_count_1);
-   debug_printf("\t\t.wm8.pad1 = 0x%x\n", (*ptr).wm8.pad1);
    debug_printf("\t\t.wm8.kernel_start_pointer_1 = 0x%x\n", (*ptr).wm8.kernel_start_pointer_1);
-   debug_printf("\t\t.wm9.pad0 = 0x%x\n", (*ptr).wm9.pad0);
    debug_printf("\t\t.wm9.grf_reg_count_2 = 0x%x\n", (*ptr).wm9.grf_reg_count_2);
-   debug_printf("\t\t.wm9.pad1 = 0x%x\n", (*ptr).wm9.pad1);
    debug_printf("\t\t.wm9.kernel_start_pointer_2 = 0x%x\n", (*ptr).wm9.kernel_start_pointer_2);
-   debug_printf("\t\t.wm10.pad0 = 0x%x\n", (*ptr).wm10.pad0);
    debug_printf("\t\t.wm10.grf_reg_count_3 = 0x%x\n", (*ptr).wm10.grf_reg_count_3);
-   debug_printf("\t\t.wm10.pad1 = 0x%x\n", (*ptr).wm10.pad1);
    debug_printf("\t\t.wm10.kernel_start_pointer_3 = 0x%x\n", (*ptr).wm10.kernel_start_pointer_3);
 }
 
diff --git a/src/gallium/drivers/i965/brw_structs_dump.py b/src/gallium/drivers/i965/brw_structs_dump.py
index 581515878e..6dba49ad91 100755
--- a/src/gallium/drivers/i965/brw_structs_dump.py
+++ b/src/gallium/drivers/i965/brw_structs_dump.py
@@ -40,6 +40,7 @@ copyright = '''
 
 import os
 import sys
+import re
 
 from pygccxml import parser
 from pygccxml import declarations
@@ -53,6 +54,11 @@ from pygccxml.declarations import type_visitor
 enums = True
 
 
+def vars_filter(variable):
+    name = variable.name
+    return not re.match('^pad\d*', name) and name != 'dword' 
+
+
 class decl_dumper_t(decl_visitor.decl_visitor_t):
 
     def __init__(self, stream, instance = '', decl = None):
@@ -69,7 +75,8 @@ class decl_dumper_t(decl_visitor.decl_visitor_t):
         assert self.decl.class_type in ('struct', 'union')
 
         for variable in class_.variables(recursive = False):
-            dump_type(self.stream, self._instance + '.' + variable.name, variable.type)
+            if vars_filter(variable):
+                dump_type(self.stream, self._instance + '.' + variable.name, variable.type)
 
     def visit_enumeration(self):
         if enums:
-- 
cgit v1.2.3


From c796aed5ddad011d66e631c4cafdbf779e73f213 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 13:57:05 +0000
Subject: i965g: add lots of error checks and early returns

Any allocation that may fail should be checked, and propogate the
error upwards.  At the highest level we will flush batch and retry.

This is an alternate strategy to what the original DRI driver did of
attempting to flush batch from the lowest levels (eg inside
BEGIN_BATCH).  The trouble with that strategy was that flushes could
occur at unexpected times, and additionally there was a need for a
wierd notification mechanism to propogate the 'lost context' state
back up to higher levels.

Propogating the errors directly gives us a lot of flexibility how to
deal with these states, at the expense of a lot more checking in the
code.

Will add some sanity checks later to make sure that out-of-memory
conditions are properly escalated and not lost halfway up the stack.
---
 src/gallium/drivers/i965/brw_batchbuffer.c        |  19 +-
 src/gallium/drivers/i965/brw_batchbuffer.h        |   3 +-
 src/gallium/drivers/i965/brw_cc.c                 |  73 ++++---
 src/gallium/drivers/i965/brw_clip.c               |  60 ++++--
 src/gallium/drivers/i965/brw_clip_state.c         |  60 +++---
 src/gallium/drivers/i965/brw_context.c            |  46 +++--
 src/gallium/drivers/i965/brw_context.h            |   2 +-
 src/gallium/drivers/i965/brw_curbe.c              |  18 +-
 src/gallium/drivers/i965/brw_draw.c               |   3 +-
 src/gallium/drivers/i965/brw_draw_upload.c        |  18 +-
 src/gallium/drivers/i965/brw_eu.c                 |  13 +-
 src/gallium/drivers/i965/brw_eu.h                 |   8 +-
 src/gallium/drivers/i965/brw_gs.c                 |  69 ++++---
 src/gallium/drivers/i965/brw_gs_state.c           |  48 +++--
 src/gallium/drivers/i965/brw_pipe_query.c         |  31 +--
 src/gallium/drivers/i965/brw_pipe_shader.c        |   3 +-
 src/gallium/drivers/i965/brw_pipe_vertex.c        |   2 +-
 src/gallium/drivers/i965/brw_screen_buffers.c     |  16 +-
 src/gallium/drivers/i965/brw_screen_surface.c     |   7 +-
 src/gallium/drivers/i965/brw_screen_texture.c     |  17 +-
 src/gallium/drivers/i965/brw_sf.c                 |  52 +++--
 src/gallium/drivers/i965/brw_sf_state.c           |  86 ++++----
 src/gallium/drivers/i965/brw_state.h              |  71 +++----
 src/gallium/drivers/i965/brw_state_cache.c        | 115 +++++------
 src/gallium/drivers/i965/brw_state_upload.c       |   3 +-
 src/gallium/drivers/i965/brw_vs.c                 |  56 +++---
 src/gallium/drivers/i965/brw_vs_state.c           |  58 +++---
 src/gallium/drivers/i965/brw_vs_surface_state.c   |  97 ++++++----
 src/gallium/drivers/i965/brw_winsys.h             |  56 ++++--
 src/gallium/drivers/i965/brw_wm.c                 |  78 ++++----
 src/gallium/drivers/i965/brw_wm_constant_buffer.c |  87 +++++----
 src/gallium/drivers/i965/brw_wm_sampler_state.c   |  98 ++++++----
 src/gallium/drivers/i965/brw_wm_state.c           | 103 ++++++----
 src/gallium/drivers/i965/brw_wm_surface_state.c   | 226 ++++++++++++----------
 src/gallium/winsys/drm/i965/xlib/xlib_i965.c      |  46 ++---
 35 files changed, 1003 insertions(+), 745 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index ca612e5ed0..e5f73bd6a3 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -38,17 +38,17 @@
 #define USE_MALLOC_BUFFER 1
 #define ALWAYS_EMIT_MI_FLUSH 1
 
-void
+enum pipe_error
 brw_batchbuffer_reset(struct brw_batchbuffer *batch)
 {
-   if (batch->buf) {
-      batch->sws->bo_unreference(batch->buf);
-      batch->buf = NULL;
-   }
+   enum pipe_error ret;
 
-   batch->buf = batch->sws->bo_alloc(batch->sws,
-				     BRW_BUFFER_TYPE_BATCH,
-				     BRW_BATCH_SIZE, 4096);
+   ret = batch->sws->bo_alloc( batch->sws,
+                               BRW_BUFFER_TYPE_BATCH,
+                               BRW_BATCH_SIZE, 4096,
+                               &batch->buf );
+   if (ret)
+      return ret;
 
    if (batch->malloc_buffer)
       batch->map = batch->malloc_buffer;
@@ -59,6 +59,7 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch)
 
    batch->size = BRW_BATCH_SIZE;
    batch->ptr = batch->map;
+   return PIPE_OK;
 }
 
 struct brw_batchbuffer *
@@ -91,7 +92,7 @@ brw_batchbuffer_free(struct brw_batchbuffer *batch)
       batch->map = NULL;
    }
 
-   batch->sws->bo_unreference(batch->buf);
+   bo_reference(&batch->buf, NULL);
    FREE(batch);
 }
 
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index 1f04826aea..288a9d2755 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -65,7 +65,8 @@ void _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
 			      const char *file, int line);
 
 
-void brw_batchbuffer_reset(struct brw_batchbuffer *batch);
+enum pipe_error
+brw_batchbuffer_reset(struct brw_batchbuffer *batch);
 
 
 /* Unlike bmBufferData, this currently requires the buffer be mapped.
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index 20967f0191..8e25fe8585 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -57,10 +57,11 @@ static void calc_sane_viewport( const struct pipe_viewport_state *vp,
    svp->far = 1;
 }
 
-static int prepare_cc_vp( struct brw_context *brw )
+static enum pipe_error prepare_cc_vp( struct brw_context *brw )
 {
    struct brw_cc_viewport ccv;
    struct sane_viewport svp;
+   enum pipe_error ret;
 
    memset(&ccv, 0, sizeof(ccv));
 
@@ -70,10 +71,12 @@ static int prepare_cc_vp( struct brw_context *brw )
    ccv.min_depth = svp.near;
    ccv.max_depth = svp.far;
 
-   brw->sws->bo_unreference(brw->cc.vp_bo);
-   brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 );
-
-   return 0;
+   ret = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0,
+                         &brw->cc.vp_bo );
+   if (ret)
+      return ret;
+                
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_cc_vp = {
@@ -123,11 +126,13 @@ cc_unit_populate_key(const struct brw_context *brw,
 /**
  * Creates the state cache entry for the given CC unit key.
  */
-static struct brw_winsys_buffer *
-cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
+static enum pipe_error
+cc_unit_create_from_key(struct brw_context *brw, 
+                        struct brw_cc_unit_key *key,
+                        struct brw_winsys_buffer **bo_out)
 {
    struct brw_cc_unit_state cc;
-   struct brw_winsys_buffer *bo;
+   enum pipe_error ret;
 
    memset(&cc, 0, sizeof(cc));
 
@@ -143,38 +148,48 @@ cc_unit_create_from_key(struct brw_context *brw, struct brw_cc_unit_key *key)
    cc.cc6 = key->cc6;
    cc.cc7 = key->cc7;
 
-   bo = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
-			 key, sizeof(*key),
-			 &brw->cc.vp_bo, 1,
-			 &cc, sizeof(cc),
-			 NULL, NULL);
+   ret = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
+                          key, sizeof(*key),
+                          &brw->cc.vp_bo, 1,
+                          &cc, sizeof(cc),
+                          NULL, NULL,
+                          bo_out);
+   if (ret)
+      return ret;
 
-   /* Emit CC viewport relocation */
-   brw->sws->bo_emit_reloc(bo,
-			   BRW_USAGE_STATE,
-			   0,
-			   offsetof(struct brw_cc_unit_state, cc4),
-			   brw->cc.vp_bo);
 
-   return bo;
+   /* Emit CC viewport relocation */
+   ret = brw->sws->bo_emit_reloc(*bo_out,
+                                 BRW_USAGE_STATE,
+                                 0,
+                                 offsetof(struct brw_cc_unit_state, cc4),
+                                 brw->cc.vp_bo);
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 static int prepare_cc_unit( struct brw_context *brw )
 {
    struct brw_cc_unit_key key;
+   enum pipe_error ret;
 
    cc_unit_populate_key(brw, &key);
 
-   brw->sws->bo_unreference(brw->cc.state_bo);
-   brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_CC_UNIT,
-				       &key, sizeof(key),
-				       &brw->cc.vp_bo, 1,
-				       NULL);
-
-   if (brw->cc.state_bo == NULL)
-      brw->cc.state_bo = cc_unit_create_from_key(brw, &key);
+   if (brw_search_cache(&brw->cache, BRW_CC_UNIT,
+                        &key, sizeof(key),
+                        &brw->cc.vp_bo, 1,
+                        NULL,
+                        &brw->cc.state_bo))
+      return PIPE_OK;
+
+   ret = cc_unit_create_from_key(brw, &key, 
+                                 &brw->cc.state_bo);
+   if (ret)
+      return ret;
    
-   return 0;
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_cc_unit = {
diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index 1a52fa771b..35e1d2fdbd 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -48,9 +48,12 @@
 #define BACK_UNFILLED_BIT   0x2
 
 
-static void compile_clip_prog( struct brw_context *brw,
-			     struct brw_clip_prog_key *key )
+static enum pipe_error
+compile_clip_prog( struct brw_context *brw,
+                   struct brw_clip_prog_key *key,
+                   struct brw_winsys_buffer **bo_out )
 {
+   enum pipe_error ret;
    struct brw_clip_compile c;
    const GLuint *program;
    GLuint program_size;
@@ -123,31 +126,39 @@ static void compile_clip_prog( struct brw_context *brw,
       break;
    default:
       assert(0);
-      return;
+      return PIPE_ERROR_BAD_INPUT;
    }
 
 	 
    /* get the program
     */
-   program = brw_get_program(&c.func, &program_size);
+   ret = brw_get_program(&c.func, &program, &program_size);
+   if (ret)
+      return ret;
 
    /* Upload
     */
-   brw->sws->bo_unreference(brw->clip.prog_bo);
-   brw->clip.prog_bo = brw_upload_cache( &brw->cache,
-					 BRW_CLIP_PROG,
-					 &c.key, sizeof(c.key),
-					 NULL, 0,
-					 program, program_size,
-					 &c.prog_data,
-					 &brw->clip.prog_data );
+   ret = brw_upload_cache( &brw->cache,
+                           BRW_CLIP_PROG,
+                           &c.key, sizeof(c.key),
+                           NULL, 0,
+                           program, program_size,
+                           &c.prog_data,
+                           &brw->clip.prog_data,
+                           bo_out );
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 /* Calculate interpolants for triangle and line rasterization.
  */
-static int upload_clip_prog(struct brw_context *brw)
+static enum pipe_error
+upload_clip_prog(struct brw_context *brw)
 {
+   enum pipe_error ret;
    struct brw_clip_prog_key key;
 
    /* Populate the key, starting from the almost-complete version from
@@ -166,15 +177,22 @@ static int upload_clip_prog(struct brw_context *brw)
    /* PIPE_NEW_CLIP */
    key.nr_userclip = brw->curr.ucp.nr;
 
-   brw->sws->bo_unreference(brw->clip.prog_bo);
-   brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG,
-					&key, sizeof(key),
-					NULL, 0,
-					&brw->clip.prog_data);
-   if (brw->clip.prog_bo == NULL)
-      compile_clip_prog( brw, &key );
+   /* Already cached?
+    */
+   if (brw_search_cache(&brw->cache, BRW_CLIP_PROG,
+                        &key, sizeof(key),
+                        NULL, 0,
+                        &brw->clip.prog_data,
+                        &brw->clip.prog_bo))
+      return PIPE_OK;
+
+   /* Compile new program:
+    */
+   ret = compile_clip_prog( brw, &key, &brw->clip.prog_bo );
+   if (ret)
+      return ret;
 
-   return 0;
+   return PIPE_OK;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 6f8309fea9..d4e3c43c61 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -72,12 +72,13 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
    key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp;
 }
 
-static struct brw_winsys_buffer *
+static enum pipe_error
 clip_unit_create_from_key(struct brw_context *brw,
-			  struct brw_clip_unit_key *key)
+                          struct brw_clip_unit_key *key,
+                          struct brw_winsys_buffer **bo_out)
 {
    struct brw_clip_unit_state clip;
-   struct brw_winsys_buffer *bo;
+   enum pipe_error ret;
 
    memset(&clip, 0, sizeof(clip));
 
@@ -141,39 +142,50 @@ clip_unit_create_from_key(struct brw_context *brw,
    clip.viewport_ymin = -1;
    clip.viewport_ymax = 1;
 
-   bo = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
-			 key, sizeof(*key),
-			 &brw->clip.prog_bo, 1,
-			 &clip, sizeof(clip),
-			 NULL, NULL);
+   ret = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
+                          key, sizeof(*key),
+                          &brw->clip.prog_bo, 1,
+                          &clip, sizeof(clip),
+                          NULL, NULL,
+                          bo_out);
+   if (ret)
+      return ret;
 
    /* Emit clip program relocation */
    assert(brw->clip.prog_bo);
-   brw->sws->bo_emit_reloc(bo,
-			   BRW_USAGE_STATE,
-			   clip.thread0.grf_reg_count << 1,
-			   offsetof(struct brw_clip_unit_state, thread0),
-			   brw->clip.prog_bo);
-
-   return bo;
+   ret = brw->sws->bo_emit_reloc(*bo_out,
+                                 BRW_USAGE_STATE,
+                                 clip.thread0.grf_reg_count << 1,
+                                 offsetof(struct brw_clip_unit_state, thread0),
+                                 brw->clip.prog_bo);
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 static int upload_clip_unit( struct brw_context *brw )
 {
    struct brw_clip_unit_key key;
+   enum pipe_error ret;
 
    clip_unit_populate_key(brw, &key);
 
-   brw->sws->bo_unreference(brw->clip.state_bo);
-   brw->clip.state_bo = brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
-					 &key, sizeof(key),
-					 &brw->clip.prog_bo, 1,
-					 NULL);
-   if (brw->clip.state_bo == NULL) {
-      brw->clip.state_bo = clip_unit_create_from_key(brw, &key);
-   }
+   if (brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
+                        &key, sizeof(key),
+                        &brw->clip.prog_bo, 1,
+                        NULL,
+                        &brw->clip.state_bo))
+      return PIPE_OK;
+      
+   /* Create new:
+    */
+   ret = clip_unit_create_from_key(brw, &key, 
+                                   &brw->clip.state_bo);
+   if (ret)
+      return ret;
    
-   return 0;
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_clip_unit = {
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index aaf7d1834e..2cee7a7a3c 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -72,29 +72,33 @@ static void brw_destroy_context( struct pipe_context *pipe )
    brw->curr.fb.nr_cbufs = 0;
    pipe_surface_reference(&brw->curr.fb.zsbuf, NULL);
 
-   brw->sws->bo_unreference(brw->curbe.curbe_bo);
-   brw->sws->bo_unreference(brw->vs.prog_bo);
-   brw->sws->bo_unreference(brw->vs.state_bo);
-   brw->sws->bo_unreference(brw->vs.bind_bo);
-   brw->sws->bo_unreference(brw->gs.prog_bo);
-   brw->sws->bo_unreference(brw->gs.state_bo);
-   brw->sws->bo_unreference(brw->clip.prog_bo);
-   brw->sws->bo_unreference(brw->clip.state_bo);
-   brw->sws->bo_unreference(brw->clip.vp_bo);
-   brw->sws->bo_unreference(brw->sf.prog_bo);
-   brw->sws->bo_unreference(brw->sf.state_bo);
-   brw->sws->bo_unreference(brw->sf.vp_bo);
+   bo_reference(&brw->curbe.curbe_bo, NULL);
+   bo_reference(&brw->vs.prog_bo, NULL);
+   bo_reference(&brw->vs.state_bo, NULL);
+   bo_reference(&brw->vs.bind_bo, NULL);
+   bo_reference(&brw->gs.prog_bo, NULL);
+   bo_reference(&brw->gs.state_bo, NULL);
+   bo_reference(&brw->clip.prog_bo, NULL);
+   bo_reference(&brw->clip.state_bo, NULL);
+   bo_reference(&brw->clip.vp_bo, NULL);
+   bo_reference(&brw->sf.prog_bo, NULL);
+   bo_reference(&brw->sf.state_bo, NULL);
+   bo_reference(&brw->sf.vp_bo, NULL);
+
    for (i = 0; i < BRW_MAX_TEX_UNIT; i++)
-      brw->sws->bo_unreference(brw->wm.sdc_bo[i]);
-   brw->sws->bo_unreference(brw->wm.bind_bo);
+      bo_reference(&brw->wm.sdc_bo[i], NULL);
+
+   bo_reference(&brw->wm.bind_bo, NULL);
+
    for (i = 0; i < BRW_WM_MAX_SURF; i++)
-      brw->sws->bo_unreference(brw->wm.surf_bo[i]);
-   brw->sws->bo_unreference(brw->wm.sampler_bo);
-   brw->sws->bo_unreference(brw->wm.prog_bo);
-   brw->sws->bo_unreference(brw->wm.state_bo);
-   brw->sws->bo_unreference(brw->cc.prog_bo);
-   brw->sws->bo_unreference(brw->cc.state_bo);
-   brw->sws->bo_unreference(brw->cc.vp_bo);
+      bo_reference(&brw->wm.surf_bo[i], NULL);
+
+   bo_reference(&brw->wm.sampler_bo, NULL);
+   bo_reference(&brw->wm.prog_bo, NULL);
+   bo_reference(&brw->wm.state_bo, NULL);
+   bo_reference(&brw->cc.prog_bo, NULL);
+   bo_reference(&brw->cc.state_bo, NULL);
+   bo_reference(&brw->cc.vp_bo, NULL);
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 09d34615c7..580251d2f1 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -744,7 +744,7 @@ struct brw_context
  * brw_queryobj.c
  */
 void brw_init_query(struct brw_context *brw);
-void brw_prepare_query_begin(struct brw_context *brw);
+enum pipe_error brw_prepare_query_begin(struct brw_context *brw);
 void brw_emit_query_begin(struct brw_context *brw);
 void brw_emit_query_end(struct brw_context *brw);
 
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 1e2e232204..ca7774a7cc 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -160,10 +160,11 @@ static GLfloat fixed_plane[6][4] = {
  * cache mechanism, but maybe would benefit from a comparison against
  * the current uploaded set of constants.
  */
-static int prepare_curbe_buffer(struct brw_context *brw)
+static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
 {
    const GLuint sz = brw->curbe.total_size;
    const GLuint bufsz = sz * 16 * sizeof(GLfloat);
+   enum pipe_error ret;
    GLfloat *buf;
    GLuint i;
 
@@ -267,17 +268,20 @@ static int prepare_curbe_buffer(struct brw_context *brw)
 	  (brw->curbe.need_new_bo ||
 	   brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
       {
-	 brw->sws->bo_unreference(brw->curbe.curbe_bo);
-	 brw->curbe.curbe_bo = NULL;
+	 bo_reference(&brw->curbe.curbe_bo, NULL);
       }
 
       if (brw->curbe.curbe_bo == NULL) {
 	 /* Allocate a single page for CURBE entries for this batchbuffer.
 	  * They're generally around 64b.
 	  */
-	 brw->curbe.curbe_bo = brw->sws->bo_alloc(brw->sws, 
-						  BRW_BUFFER_TYPE_CURBE,
-						  4096, 1 << 6);
+	 ret = brw->sws->bo_alloc(brw->sws, 
+                                  BRW_BUFFER_TYPE_CURBE,
+                                  4096, 1 << 6,
+                                  &brw->curbe.curbe_bo);
+         if (ret)
+            return ret;
+
 	 brw->curbe.curbe_next_offset = 0;
       }
 
@@ -313,7 +317,7 @@ static int prepare_curbe_buffer(struct brw_context *brw)
    return 0;
 }
 
-static int emit_curbe_buffer(struct brw_context *brw)
+static enum pipe_error emit_curbe_buffer(struct brw_context *brw)
 {
    GLuint sz = brw->curbe.total_size;
 
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 6d6b1c7c5c..88cb31ad54 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -280,6 +280,5 @@ void brw_draw_cleanup( struct brw_context *brw )
    u_upload_destroy( brw->vb.upload_vertex );
    u_upload_destroy( brw->vb.upload_index );
 
-   brw->sws->bo_unreference(brw->ib.bo);
-   brw->ib.bo = NULL;
+   bo_reference(&brw->ib.bo, NULL);
 }
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index 4fa7d549eb..188605a0c1 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -251,9 +251,8 @@ static int brw_prepare_vertices(struct brw_context *brw)
       brw->vb.vb[i].vertex_count = (vb->stride == 0 ?
 				    1 :
 				    (bo->size - offset) / vb->stride);
-      brw->sws->bo_unreference(brw->vb.vb[i].bo);
-      brw->vb.vb[i].bo = bo;
-      brw->sws->bo_reference(brw->vb.vb[i].bo);
+
+      bo_reference( &brw->vb.vb[i].bo,  bo );
 
       /* Don't need to retain this reference.  We have a reference on
        * the underlying winsys buffer:
@@ -417,6 +416,7 @@ const struct brw_tracked_state brw_vertices = {
 static int brw_prepare_indices(struct brw_context *brw)
 {
    struct pipe_buffer *index_buffer = brw->curr.index_buffer;
+   struct pipe_buffer *upload_buf = NULL;
    struct brw_winsys_buffer *bo = NULL;
    GLuint offset;
    GLuint index_size;
@@ -438,7 +438,6 @@ static int brw_prepare_indices(struct brw_context *brw)
    /* Turn userbuffer into a proper hardware buffer?
     */
    if (brw_buffer_is_user_buffer(index_buffer)) {
-      struct pipe_buffer *upload_buf;
 
       ret = u_upload_buffer( brw->vb.upload_index,
 			     0,
@@ -450,8 +449,6 @@ static int brw_prepare_indices(struct brw_context *brw)
 	 return ret;
 
       bo = brw_buffer(upload_buf)->bo;
-      brw->sws->bo_reference(bo);
-      pipe_buffer_reference( &upload_buf, NULL );
 
       /* XXX: annotate the userbuffer with the upload information so
        * that successive calls don't get re-uploaded.
@@ -459,8 +456,6 @@ static int brw_prepare_indices(struct brw_context *brw)
    }
    else {
       bo = brw_buffer(index_buffer)->bo;
-      brw->sws->bo_reference(bo);
-      
       ib_size = bo->size;
       offset = 0;
    }
@@ -486,15 +481,12 @@ static int brw_prepare_indices(struct brw_context *brw)
    if (brw->ib.bo != bo ||
        brw->ib.size != ib_size)
    {
-      brw->sws->bo_unreference(brw->ib.bo);
-      brw->ib.bo = bo;
+      bo_reference(&brw->ib.bo, bo);
       brw->ib.size = ib_size;
       brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER;
    }
-   else {
-      brw->sws->bo_unreference(bo);
-   }
 
+   pipe_buffer_reference( &upload_buf, NULL );
    brw_add_validated_bo(brw, brw->ib.bo);
    return 0;
 }
diff --git a/src/gallium/drivers/i965/brw_eu.c b/src/gallium/drivers/i965/brw_eu.c
index de43b14512..a8fcb5f97e 100644
--- a/src/gallium/drivers/i965/brw_eu.c
+++ b/src/gallium/drivers/i965/brw_eu.c
@@ -118,16 +118,23 @@ void brw_init_compile( struct brw_context *brw, struct brw_compile *p )
 }
 
 
-const GLuint *brw_get_program( struct brw_compile *p,
-			       GLuint *sz )
+enum pipe_error brw_get_program( struct brw_compile *p,
+                                 const GLuint **data,
+                                 GLuint *sz )
 {
    GLuint i;
 
    for (i = 0; i < 8; i++)
       brw_NOP(p);
 
+   /* Is the generated program malformed for some reason?
+    */
+   if (p->error)
+      return PIPE_ERROR_BAD_INPUT;
+
    *sz = p->nr_insn * sizeof(struct brw_instruction);
-   return (const GLuint *)p->store;
+   *data = (const GLuint *)p->store;
+   return PIPE_OK;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h
index 7bddc3859c..565f4ef1c5 100644
--- a/src/gallium/drivers/i965/brw_eu.h
+++ b/src/gallium/drivers/i965/brw_eu.h
@@ -34,6 +34,7 @@
 #define BRW_EU_H
 
 #include "util/u_debug.h"
+#include "pipe/p_error.h"
 
 #include "brw_structs.h"
 #include "brw_defines.h"
@@ -132,6 +133,8 @@ struct brw_compile {
 
    struct brw_eu_label *first_label;  /**< linked list of labels */
    struct brw_eu_call *first_call;    /**< linked list of CALs */
+
+   boolean error;
 };
 
 
@@ -772,7 +775,10 @@ void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
 void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
 
 void brw_init_compile( struct brw_context *, struct brw_compile *p );
-const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+
+enum pipe_error brw_get_program( struct brw_compile *p, 
+                                 const GLuint **program,
+                                 GLuint *sz );
 
 
 /* Helpers for regular instructions:
diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
index 693d8bfdf8..ce77be24f6 100644
--- a/src/gallium/drivers/i965/brw_gs.c
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -40,10 +40,12 @@
 
 
-static void compile_gs_prog( struct brw_context *brw,
-			     struct brw_gs_prog_key *key )
+static enum pipe_error compile_gs_prog( struct brw_context *brw,
+                                        struct brw_gs_prog_key *key,
+                                        struct brw_winsys_buffer **bo_out )
 {
    struct brw_gs_compile c;
+   enum pipe_error ret;
    const GLuint *program;
    GLuint program_size;
 
@@ -57,9 +59,9 @@ static void compile_gs_prog( struct brw_context *brw,
    c.nr_attrs = c.key.nr_attrs;
 
    if (BRW_IS_IGDNG(brw))
-       c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
+      c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
    else
-       c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
+      c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
 
    c.nr_bytes = c.nr_regs * REG_SIZE;
 
@@ -93,40 +95,47 @@ static void compile_gs_prog( struct brw_context *brw,
       if (key->hint_gs_always)
 	 brw_gs_lines( &c );
       else {
-	 return;
+	 return PIPE_OK;
       }
       break;
    case PIPE_PRIM_TRIANGLES:
       if (key->hint_gs_always)
 	 brw_gs_tris( &c );
       else {
-	 return;
+	 return PIPE_OK;
       }
       break;
    case PIPE_PRIM_POINTS:
       if (key->hint_gs_always)
 	 brw_gs_points( &c );
       else {
-	 return;
+	 return PIPE_OK;
       }
-      break;      
+      break;
    default:
-      return;
+      assert(0);
+      return PIPE_ERROR_BAD_INPUT;
    }
 
    /* get the program
     */
-   program = brw_get_program(&c.func, &program_size);
+   ret = brw_get_program(&c.func, &program, &program_size);
+   if (ret)
+      return ret;
 
    /* Upload
     */
-   brw->sws->bo_unreference(brw->gs.prog_bo);
-   brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG,
-				       &c.key, sizeof(c.key),
-				       NULL, 0,
-				       program, program_size,
-				       &c.prog_data,
-				       &brw->gs.prog_data );
+   ret = brw_upload_cache( &brw->cache, BRW_GS_PROG,
+                           &c.key, sizeof(c.key),
+                           NULL, 0,
+                           program, program_size,
+                           &c.prog_data,
+                           &brw->gs.prog_data,
+                           bo_out );
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 static const unsigned gs_prim[PIPE_PRIM_MAX] = {  
@@ -166,6 +175,8 @@ static void populate_key( struct brw_context *brw,
 static int prepare_gs_prog(struct brw_context *brw)
 {
    struct brw_gs_prog_key key;
+   enum pipe_error ret;
+
    /* Populate the key:
     */
    populate_key(brw, &key);
@@ -175,17 +186,21 @@ static int prepare_gs_prog(struct brw_context *brw)
       brw->gs.prog_active = key.need_gs_prog;
    }
 
-   if (brw->gs.prog_active) {
-      brw->sws->bo_unreference(brw->gs.prog_bo);
-      brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
-					 &key, sizeof(key),
-					 NULL, 0,
-					 &brw->gs.prog_data);
-      if (brw->gs.prog_bo == NULL)
-	 compile_gs_prog( brw, &key );
-   }
+   if (!brw->gs.prog_active)
+      return PIPE_OK;
+
+   if (brw_search_cache(&brw->cache, BRW_GS_PROG,
+                        &key, sizeof(key),
+                        NULL, 0,
+                        &brw->gs.prog_data,
+                        &brw->gs.prog_bo))
+      return PIPE_OK;
+
+   ret = compile_gs_prog( brw, &key, &brw->gs.prog_bo );
+   if (ret)
+      return ret;
 
-   return 0;
+   return PIPE_OK;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
index f27f886a65..18a66da538 100644
--- a/src/gallium/drivers/i965/brw_gs_state.c
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -69,11 +69,13 @@ gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
    key->urb_size = brw->urb.vsize;
 }
 
-static struct brw_winsys_buffer *
-gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
+static enum pipe_error
+gs_unit_create_from_key(struct brw_context *brw, 
+                        struct brw_gs_unit_key *key,
+                        struct brw_winsys_buffer **bo_out)
 {
    struct brw_gs_unit_state gs;
-   struct brw_winsys_buffer *bo;
+   enum pipe_error ret;
 
    memset(&gs, 0, sizeof(gs));
 
@@ -104,40 +106,46 @@ gs_unit_create_from_key(struct brw_context *brw, struct brw_gs_unit_key *key)
    if (BRW_DEBUG & DEBUG_STATS)
       gs.thread4.stats_enable = 1;
 
-   bo = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
-			 key, sizeof(*key),
-			 &brw->gs.prog_bo, 1,
-			 &gs, sizeof(gs),
-			 NULL, NULL);
+   ret = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
+                          key, sizeof(*key),
+                          &brw->gs.prog_bo, 1,
+                          &gs, sizeof(gs),
+                          NULL, NULL,
+                          bo_out);
+   if (ret)
+      return ret;
 
    if (key->prog_active) {
       /* Emit GS program relocation */
-      brw->sws->bo_emit_reloc(bo,
+      brw->sws->bo_emit_reloc(*bo_out,
 			      BRW_USAGE_STATE,
 			      gs.thread0.grf_reg_count << 1,
 			      offsetof(struct brw_gs_unit_state, thread0),
 			      brw->gs.prog_bo);
    }
 
-   return bo;
+   return PIPE_OK;
 }
 
-static int prepare_gs_unit(struct brw_context *brw)
+static enum pipe_error prepare_gs_unit(struct brw_context *brw)
 {
    struct brw_gs_unit_key key;
+   enum pipe_error ret;
 
    gs_unit_populate_key(brw, &key);
 
-   brw->sws->bo_unreference(brw->gs.state_bo);
-   brw->gs.state_bo = brw_search_cache(&brw->cache, BRW_GS_UNIT,
-				       &key, sizeof(key),
-				       &brw->gs.prog_bo, 1,
-				       NULL);
-   if (brw->gs.state_bo == NULL) {
-      brw->gs.state_bo = gs_unit_create_from_key(brw, &key);
-   }
+   if (brw_search_cache(&brw->cache, BRW_GS_UNIT,
+                        &key, sizeof(key),
+                        &brw->gs.prog_bo, 1,
+                        NULL,
+                        &brw->gs.state_bo))
+      return PIPE_OK;
+
+   ret = gs_unit_create_from_key(brw, &key, &brw->gs.state_bo);
+   if (ret)
+      return ret;
 
-   return 0;
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_gs_unit = {
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
index 3370ebd262..6a01173787 100644
--- a/src/gallium/drivers/i965/brw_pipe_query.c
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -72,8 +72,7 @@ brw_query_get_result(struct pipe_context *pipe,
       }
 
       brw->sws->bo_unmap(query->bo);
-      brw->sws->bo_unreference(query->bo);
-      query->bo = NULL;
+      bo_reference(&query->bo, NULL);
    }
 
    *result = query->result;
@@ -100,10 +99,9 @@ brw_query_create(struct pipe_context *pipe, unsigned type )
 static void
 brw_query_destroy(struct pipe_context *pipe, struct pipe_query *q)
 {
-   struct brw_context *brw = brw_context(pipe);
    struct brw_query_object *query = (struct brw_query_object *)q;
 
-   brw->sws->bo_unreference(query->bo);
+   bo_reference(&query->bo, NULL);
    FREE(query);
 }
 
@@ -114,9 +112,8 @@ brw_query_begin(struct pipe_context *pipe, struct pipe_query *q)
    struct brw_query_object *query = (struct brw_query_object *)q;
 
    /* Reset our driver's tracking of query state. */
-   brw->sws->bo_unreference(query->bo);
+   bo_reference(&query->bo, NULL);
    query->result = 0;
-   query->bo = NULL;
    query->first_index = -1;
    query->last_index = -1;
 
@@ -139,8 +136,7 @@ brw_query_end(struct pipe_context *pipe, struct pipe_query *q)
       brw_emit_query_end(brw);
       brw_context_flush( brw );
 
-      brw->sws->bo_unreference(brw->query.bo);
-      brw->query.bo = NULL;
+      bo_reference(&brw->query.bo, NULL);
    }
 
    remove_from_list(query);
@@ -153,24 +149,30 @@ brw_query_end(struct pipe_context *pipe, struct pipe_query *q)
  */
 
 /** Called to set up the query BO and account for its aperture space */
-void
+enum pipe_error
 brw_prepare_query_begin(struct brw_context *brw)
 {
+   enum pipe_error ret;
+
    /* Skip if we're not doing any queries. */
    if (is_empty_list(&brw->query.active_head))
-      return;
+      return PIPE_OK;
 
    /* Get a new query BO if we're going to need it. */
    if (brw->query.bo == NULL ||
        brw->query.index * 2 + 1 >= 4096 / sizeof(uint64_t)) {
-      brw->sws->bo_unreference(brw->query.bo);
-      brw->query.bo = NULL;
 
-      brw->query.bo = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_QUERY, 4096, 1);
+      ret = brw->sws->bo_alloc(brw->sws, BRW_BUFFER_TYPE_QUERY, 4096, 1,
+                               &brw->query.bo);
+      if (ret)
+         return ret;
+
       brw->query.index = 0;
    }
 
    brw_add_validated_bo(brw, brw->query.bo);
+
+   return PIPE_OK;
 }
 
 /** Called just before primitive drawing to get a beginning PS_DEPTH_COUNT. */
@@ -213,8 +215,7 @@ brw_emit_query_begin(struct brw_context *brw)
 				  FALSE,
 				  &tmp );
 
-	 brw->sws->bo_reference(brw->query.bo);
-	 query->bo = brw->query.bo;
+	 bo_reference( &query->bo, brw->query.bo );
 	 query->first_index = brw->query.index;
       }
       query->last_index = brw->query.index;
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 2833f2bce0..662c43c3e5 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -146,10 +146,9 @@ fail:
 
 static void brw_delete_fs_state( struct pipe_context *pipe, void *prog )
 {
-   struct brw_context *brw = brw_context(pipe);
    struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog;
 
-   brw->sws->bo_unreference(fs->const_buffer);
+   bo_reference(&fs->const_buffer, NULL);
    FREE( (void *)fs->tokens );
    FREE( fs );
 }
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index 97e9a23688..73bba5b088 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -56,7 +56,7 @@ brw_pipe_vertex_cleanup( struct brw_context *brw )
     */
 #if 0
    for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
-      brw->sws->bo_unreference(brw->vb.inputs[i].bo);
+      bo_reference(&brw->vb.inputs[i].bo, NULL);
       brw->vb.inputs[i].bo = NULL;
    }
 #endif
diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c
index ba54740225..7ae386ffb3 100644
--- a/src/gallium/drivers/i965/brw_screen_buffers.c
+++ b/src/gallium/drivers/i965/brw_screen_buffers.c
@@ -43,15 +43,11 @@ brw_buffer_unmap( struct pipe_screen *screen,
 static void
 brw_buffer_destroy( struct pipe_buffer *buffer )
 {
-   struct brw_screen *bscreen = brw_screen( buffer->screen );
-   struct brw_winsys_screen *sws = bscreen->sws;
    struct brw_buffer *buf = brw_buffer( buffer );
 
    assert(!p_atomic_read(&buffer->reference.count));
 
-   if (buf->bo)
-      sws->bo_unreference(buf->bo);
-   
+   bo_reference(&buf->bo, NULL);
    FREE(buf);
 }
 
@@ -66,6 +62,7 @@ brw_buffer_create(struct pipe_screen *screen,
    struct brw_winsys_screen *sws = bscreen->sws;
    struct brw_buffer *buf;
    unsigned buffer_type;
+   enum pipe_error ret;
    
    buf = CALLOC_STRUCT(brw_buffer);
    if (!buf)
@@ -101,10 +98,11 @@ brw_buffer_create(struct pipe_screen *screen,
       break;
    }
    
-   buf->bo = sws->bo_alloc( sws,
-                            buffer_type,
-                            size,
-                            alignment );
+   ret = sws->bo_alloc( sws, buffer_type,
+                        size, alignment,
+                        &buf->bo );
+   if (ret != PIPE_OK)
+      return NULL;
       
    return &buf->base; 
 }
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
index 1c408e9f2e..21a7382873 100644
--- a/src/gallium/drivers/i965/brw_screen_surface.c
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -150,9 +150,7 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
    surface->pitch = tex->pitch;
    surface->tiling = tex->tiling;
 
-   surface->bo = tex->bo;
-   brw_screen->sws->bo_reference(surface->bo);
-
+   bo_reference( &surface->bo, tex->bo );
    pipe_texture_reference( &surface->base.texture, &tex->base );
 
    surface->ss.ss0.surface_format = tex->ss.ss0.surface_format;
@@ -244,11 +242,10 @@ static struct pipe_surface *brw_get_tex_surface(struct pipe_screen *screen,
 static void brw_tex_surface_destroy( struct pipe_surface *surf )
 {
    struct brw_surface *surface = brw_surface(surf);
-   struct brw_screen *screen = brw_screen(surf->texture->screen);
 
    /* Unreference texture, shared buffer:
     */
-   screen->sws->bo_unreference(surface->bo);
+   bo_reference(&surface->bo, NULL);
    pipe_texture_reference( &surface->base.texture, NULL );
 
 
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index ba6dc7dfde..355abf0b89 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -187,6 +187,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
    struct brw_screen *bscreen = brw_screen(screen);
    struct brw_texture *tex;
    enum brw_buffer_type buffer_type;
+   enum pipe_error ret;
    
    tex = CALLOC_STRUCT(brw_texture);
    if (tex == NULL)
@@ -235,10 +236,13 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
       buffer_type = BRW_BUFFER_TYPE_TEXTURE;
    }
 
-   tex->bo = bscreen->sws->bo_alloc( bscreen->sws,
-                                     buffer_type,
-                                     tex->pitch * tex->total_height * tex->cpp,
-                                     64 );
+   ret = bscreen->sws->bo_alloc( bscreen->sws,
+                                 buffer_type,
+                                 tex->pitch * tex->total_height * tex->cpp,
+                                 64,
+                                 &tex->bo );
+   if (ret)
+      goto fail;
 
    tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
    tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
@@ -289,7 +293,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
    return &tex->base;
 
 fail:
-   bscreen->sws->bo_unreference(tex->bo);
+   bo_reference(&tex->bo, NULL);
    FREE(tex);
    return NULL;
 }
@@ -306,7 +310,8 @@ static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen,
 
 static void brw_texture_destroy(struct pipe_texture *pt)
 {
-   //bscreen->sws->bo_unreference(tex->bo);
+   struct brw_texture *tex = brw_texture(pt);
+   bo_reference(&tex->bo, NULL);
    FREE(pt);
 }
 
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index 013d839e37..24d1015bbd 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -40,9 +40,11 @@
 #include "brw_sf.h"
 #include "brw_state.h"
 
-static void compile_sf_prog( struct brw_context *brw,
-			     struct brw_sf_prog_key *key )
+static enum pipe_error compile_sf_prog( struct brw_context *brw,
+                                        struct brw_sf_prog_key *key,
+                                        struct brw_winsys_buffer **bo_out )
 {
+   enum pipe_error ret;
    struct brw_sf_compile c;
    const GLuint *program;
    GLuint program_size;
@@ -87,28 +89,35 @@ static void compile_sf_prog( struct brw_context *brw,
       break;
    default:
       assert(0);
-      return;
+      return PIPE_ERROR_BAD_INPUT;
    }
 
    /* get the program
     */
-   program = brw_get_program(&c.func, &program_size);
+   ret = brw_get_program(&c.func, &program, &program_size);
+   if (ret)
+      return ret;
 
    /* Upload
     */
-   brw->sws->bo_unreference(brw->sf.prog_bo);
-   brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG,
-				       &c.key, sizeof(c.key),
-				       NULL, 0,
-				       program, program_size,
-				       &c.prog_data,
-				       &brw->sf.prog_data );
+   ret = brw_upload_cache( &brw->cache, BRW_SF_PROG,
+                           &c.key, sizeof(c.key),
+                           NULL, 0,
+                           program, program_size,
+                           &c.prog_data,
+                           &brw->sf.prog_data,
+                           bo_out);
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 /* Calculate interpolants for triangle and line rasterization.
  */
-static int upload_sf_prog(struct brw_context *brw)
+static enum pipe_error upload_sf_prog(struct brw_context *brw)
 {
+   enum pipe_error ret;
    struct brw_sf_prog_key key;
 
    memset(&key, 0, sizeof(key));
@@ -161,15 +170,18 @@ static int upload_sf_prog(struct brw_context *brw)
 			   PIPE_WINDING_CCW);
    }
 
-   brw->sws->bo_unreference(brw->sf.prog_bo);
-   brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG,
-				      &key, sizeof(key),
-				      NULL, 0,
-				      &brw->sf.prog_data);
-   if (brw->sf.prog_bo == NULL)
-      compile_sf_prog( brw, &key );
+   if (brw_search_cache(&brw->cache, BRW_SF_PROG,
+                        &key, sizeof(key),
+                        NULL, 0,
+                        &brw->sf.prog_data,
+                        &brw->sf.prog_bo))
+      return PIPE_OK;
 
-   return 0;
+   ret = compile_sf_prog( brw, &key, &brw->sf.prog_bo );
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index 31343ff245..f030f26c19 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -39,11 +39,12 @@
 #include "brw_debug.h"
 #include "brw_pipe_rast.h"
 
-static int upload_sf_vp(struct brw_context *brw)
+static enum pipe_error upload_sf_vp(struct brw_context *brw)
 {
    const struct pipe_viewport_state *vp = &brw->curr.vp;
    const struct pipe_scissor_state *scissor = &brw->curr.scissor;
    struct brw_sf_viewport sfv;
+   enum pipe_error ret;
 
    memset(&sfv, 0, sizeof(sfv));
 
@@ -61,10 +62,12 @@ static int upload_sf_vp(struct brw_context *brw)
    sfv.scissor.ymin = scissor->miny;
    sfv.scissor.ymax = scissor->maxy; /* -1 ?? */
 
-   brw->sws->bo_unreference(brw->sf.vp_bo);
-   brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
+   ret = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0,
+                         &brw->sf.vp_bo );
+   if (ret)
+      return ret;
 
-   return 0;
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_sf_vp = {
@@ -128,12 +131,13 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
 			   rast->point_size_max);
 }
 
-static struct brw_winsys_buffer *
+static enum pipe_error
 sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
-			struct brw_winsys_buffer **reloc_bufs)
+			struct brw_winsys_buffer **reloc_bufs,
+                        struct brw_winsys_buffer **bo_out)
 {
    struct brw_sf_unit_state sf;
-   struct brw_winsys_buffer *bo;
+   enum pipe_error ret;
    int chipset_max_threads;
    memset(&sf, 0, sizeof(sf));
 
@@ -273,51 +277,65 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
       sf.sf6.dest_org_hbias = 0x0;
    }
 
-   bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
-			 key, sizeof(*key),
-			 reloc_bufs, 2,
-			 &sf, sizeof(sf),
-			 NULL, NULL);
+   ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
+                          key, sizeof(*key),
+                          reloc_bufs, 2,
+                          &sf, sizeof(sf),
+                          NULL, NULL,
+                          bo_out);
+   if (ret)
+      return ret;
 
    /* STATE_PREFETCH command description describes this state as being
     * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
     */
    /* Emit SF program relocation */
-   brw->sws->bo_emit_reloc(bo,
-			   BRW_USAGE_STATE,
-			   sf.thread0.grf_reg_count << 1,
-			   offsetof(struct brw_sf_unit_state, thread0),
-			   brw->sf.prog_bo);
+   ret = brw->sws->bo_emit_reloc(*bo_out,
+                                 BRW_USAGE_STATE,
+                                 sf.thread0.grf_reg_count << 1,
+                                 offsetof(struct brw_sf_unit_state, thread0),
+                                 brw->sf.prog_bo);
+   if (ret)
+      return ret;
 
-   /* Emit SF viewport relocation */
-   brw->sws->bo_emit_reloc(bo,
-			   BRW_USAGE_STATE,
-			   sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
-			   offsetof(struct brw_sf_unit_state, sf5),
-			   brw->sf.vp_bo);
 
-   return bo;
+   /* Emit SF viewport relocation */
+   ret = brw->sws->bo_emit_reloc(*bo_out,
+                                 BRW_USAGE_STATE,
+                                 sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
+                                 offsetof(struct brw_sf_unit_state, sf5),
+                                 brw->sf.vp_bo);
+   if (ret)
+      return ret;
+   
+   return PIPE_OK;
 }
 
-static int upload_sf_unit( struct brw_context *brw )
+static enum pipe_error upload_sf_unit( struct brw_context *brw )
 {
    struct brw_sf_unit_key key;
    struct brw_winsys_buffer *reloc_bufs[2];
+   enum pipe_error ret;
 
    sf_unit_populate_key(brw, &key);
 
    reloc_bufs[0] = brw->sf.prog_bo;
    reloc_bufs[1] = brw->sf.vp_bo;
 
-   brw->sws->bo_unreference(brw->sf.state_bo);
-   brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT,
-				       &key, sizeof(key),
-				       reloc_bufs, 2,
-				       NULL);
-   if (brw->sf.state_bo == NULL) {
-      brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs);
-   }
-   return 0;
+   if (brw_search_cache(&brw->cache, BRW_SF_UNIT,
+                        &key, sizeof(key),
+                        reloc_bufs, 2,
+                        NULL,
+                        &brw->sf.state_bo))
+      return PIPE_OK;
+
+
+   ret = sf_unit_create_from_key(brw, &key, reloc_bufs,
+                                 &brw->sf.state_bo);
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_sf_unit = {
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index 94d2cb6f82..e219a1d870 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -44,8 +44,8 @@ brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo)
    assert(brw->state.validated_bo_count < Elements(brw->state.validated_bos));
 
    if (bo != NULL) {
-      brw->sws->bo_reference(bo);
-      brw->state.validated_bos[brw->state.validated_bo_count++] = bo;
+      bo_reference( &brw->state.validated_bos[brw->state.validated_bo_count++],
+                    bo );
    }
 }
 
@@ -106,37 +106,42 @@ void brw_destroy_state(struct brw_context *brw);
 /***********************************************************************
  * brw_state_cache.c
  */
-struct brw_winsys_buffer *brw_cache_data(struct brw_cache *cache,
-		       enum brw_cache_id cache_id,
-		       const void *data,
-		       struct brw_winsys_buffer **reloc_bufs,
-		       GLuint nr_reloc_bufs);
-
-struct brw_winsys_buffer *brw_cache_data_sz(struct brw_cache *cache,
-			  enum brw_cache_id cache_id,
-			  const void *data,
-			  GLuint data_size,
-			  struct brw_winsys_buffer **reloc_bufs,
-			  GLuint nr_reloc_bufs);
-
-struct brw_winsys_buffer *brw_upload_cache( struct brw_cache *cache,
-			  enum brw_cache_id cache_id,
-			  const void *key,
-			  GLuint key_sz,
-			  struct brw_winsys_buffer **reloc_bufs,
-			  GLuint nr_reloc_bufs,
-			  const void *data,
-			  GLuint data_sz,
-			  const void *aux,
-			  void *aux_return );
-
-struct brw_winsys_buffer *brw_search_cache( struct brw_cache *cache,
-			  enum brw_cache_id cache_id,
-			  const void *key,
-			  GLuint key_size,
-			  struct brw_winsys_buffer **reloc_bufs,
-			  GLuint nr_reloc_bufs,
-			  void *aux_return);
+enum pipe_error brw_cache_data(struct brw_cache *cache,
+                               enum brw_cache_id cache_id,
+                               const void *data,
+                               struct brw_winsys_buffer **reloc_bufs,
+                               GLuint nr_reloc_bufs,
+                               struct brw_winsys_buffer **bo_out );
+
+enum pipe_error brw_cache_data_sz(struct brw_cache *cache,
+                                  enum brw_cache_id cache_id,
+                                  const void *data,
+                                  GLuint data_size,
+                                  struct brw_winsys_buffer **reloc_bufs,
+                                  GLuint nr_reloc_bufs,
+                                  struct brw_winsys_buffer **bo_out);
+
+enum pipe_error brw_upload_cache( struct brw_cache *cache,
+                                  enum brw_cache_id cache_id,
+                                  const void *key,
+                                  GLuint key_sz,
+                                  struct brw_winsys_buffer **reloc_bufs,
+                                  GLuint nr_reloc_bufs,
+                                  const void *data,
+                                  GLuint data_sz,
+                                  const void *aux,
+                                  void *aux_return ,
+                                  struct brw_winsys_buffer **bo_out);
+
+boolean brw_search_cache( struct brw_cache *cache,
+                          enum brw_cache_id cache_id,
+                          const void *key,
+                          GLuint key_size,
+                          struct brw_winsys_buffer **reloc_bufs,
+                          GLuint nr_reloc_bufs,
+                          void *aux_return,
+                          struct brw_winsys_buffer **bo_out);
+
 void brw_state_cache_check_size( struct brw_context *brw );
 
 void brw_init_caches( struct brw_context *brw );
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
index cbd1f02d77..f8369d31ec 100644
--- a/src/gallium/drivers/i965/brw_state_cache.c
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -109,9 +109,8 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
    if (bo == cache->last_bo[cache_id])
       return; /* no change */
 
-   cache->sws->bo_unreference(cache->last_bo[cache_id]);
-   cache->last_bo[cache_id] = bo;
-   cache->sws->bo_reference(cache->last_bo[cache_id]);
+   bo_reference( &cache->last_bo[cache_id],  bo );
+
    cache->brw->state.dirty.cache |= 1 << cache_id;
 }
 
@@ -174,14 +173,15 @@ rehash(struct brw_cache *cache)
 /**
  * Returns the buffer object matching cache_id and key, or NULL.
  */
-struct brw_winsys_buffer *
+boolean
 brw_search_cache(struct brw_cache *cache,
                  enum brw_cache_id cache_id,
                  const void *key,
                  GLuint key_size,
                  struct brw_winsys_buffer **reloc_bufs, 
 		 GLuint nr_reloc_bufs,
-                 void *aux_return)
+                 void *aux_return,
+                 struct brw_winsys_buffer **bo_out)
 {
    struct brw_cache_item *item;
    GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
@@ -189,20 +189,20 @@ brw_search_cache(struct brw_cache *cache,
    item = search_cache(cache, cache_id, hash, key, key_size,
 		       reloc_bufs, nr_reloc_bufs);
 
-   if (item == NULL)
-      return NULL;
-
-   if (aux_return)
-      *(void **)aux_return = (void *)((char *)item->key + item->key_size);
-
-   update_cache_last(cache, cache_id, item->bo);
-
-   cache->sws->bo_reference(item->bo);
-   return item->bo;
+   if (item) {
+      if (aux_return)
+         *(void **)aux_return = (void *)((char *)item->key + item->key_size);
+      
+      update_cache_last(cache, cache_id, item->bo);
+      bo_reference(bo_out, item->bo);
+      return TRUE;
+   }
+   
+   return FALSE;      
 }
 
 
-struct brw_winsys_buffer *
+enum pipe_error
 brw_upload_cache( struct brw_cache *cache,
 		  enum brw_cache_id cache_id,
 		  const void *key,
@@ -212,14 +212,15 @@ brw_upload_cache( struct brw_cache *cache,
 		  const void *data,
 		  GLuint data_size,
 		  const void *aux,
-		  void *aux_return )
+		  void *aux_return,
+                  struct brw_winsys_buffer **bo_out)
 {
    struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
    GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
    GLuint relocs_size = nr_reloc_bufs * sizeof(struct brw_winsys_buffer *);
    GLuint aux_size = cache->aux_size[cache_id];
+   enum pipe_error ret;
    void *tmp;
-   struct brw_winsys_buffer *bo;
    int i;
 
    /* Create the buffer object to contain the data.  For now, use a
@@ -227,9 +228,12 @@ brw_upload_cache( struct brw_cache *cache,
     * may want to take advantage of hardware distinctions between
     * these various entities.
     */
-   bo = cache->sws->bo_alloc(cache->sws,
-                             cache->buffer_type,
-			     data_size, 1 << 6);
+   ret = cache->sws->bo_alloc(cache->sws,
+                              cache->buffer_type,
+                              data_size, 1 << 6, 
+                              bo_out);
+   if (ret)
+      return ret;
 
 
    /* Set up the memory containing the key, aux_data, and reloc_bufs */
@@ -240,7 +244,7 @@ brw_upload_cache( struct brw_cache *cache,
    memcpy((char *)tmp + key_size + aux_size, reloc_bufs, relocs_size);
    for (i = 0; i < nr_reloc_bufs; i++) {
       if (reloc_bufs[i] != NULL)
-	 cache->sws->bo_reference(reloc_bufs[i]);
+         p_atomic_inc(&reloc_bufs[i]->reference.count);
    }
 
    item->cache_id = cache_id;
@@ -249,9 +253,7 @@ brw_upload_cache( struct brw_cache *cache,
    item->key_size = key_size;
    item->reloc_bufs = (struct brw_winsys_buffer **)((char *)tmp + key_size + aux_size);
    item->nr_reloc_bufs = nr_reloc_bufs;
-
-   item->bo = bo;
-   cache->sws->bo_reference(bo);
+   bo_reference( &item->bo, *bo_out );
    item->data_size = data_size;
 
    if (cache->n_items > cache->size * 1.5)
@@ -273,28 +275,28 @@ brw_upload_cache( struct brw_cache *cache,
 		   data_size, cache_id);
 
    /* Copy data to the buffer */
-   cache->sws->bo_subdata(bo, 
+   cache->sws->bo_subdata(item->bo, 
                           cache_id,
                           0, data_size, data);
 
-   update_cache_last(cache, cache_id, bo);
+   update_cache_last(cache, cache_id, item->bo);
 
-   return bo;
+   return PIPE_OK;
 }
 
 
 /**
  * This doesn't really work with aux data.  Use search/upload instead
  */
-struct brw_winsys_buffer *
+enum pipe_error
 brw_cache_data_sz(struct brw_cache *cache,
 		  enum brw_cache_id cache_id,
 		  const void *data,
 		  GLuint data_size,
 		  struct brw_winsys_buffer **reloc_bufs,
-		  GLuint nr_reloc_bufs)
+		  GLuint nr_reloc_bufs,
+                  struct brw_winsys_buffer **bo_out)
 {
-   struct brw_winsys_buffer *bo;
    struct brw_cache_item *item;
    GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs);
 
@@ -302,17 +304,17 @@ brw_cache_data_sz(struct brw_cache *cache,
 		       reloc_bufs, nr_reloc_bufs);
    if (item) {
       update_cache_last(cache, cache_id, item->bo);
-      cache->sws->bo_reference(item->bo);
-      return item->bo;
-   }
 
-   bo = brw_upload_cache(cache, cache_id,
-			 data, data_size,
-			 reloc_bufs, nr_reloc_bufs,
-			 data, data_size,
-			 NULL, NULL);
+      bo_reference(bo_out, item->bo);
+      return PIPE_OK;
+   }
 
-   return bo;
+   return brw_upload_cache(cache, cache_id,
+                           data, data_size,
+                           reloc_bufs, nr_reloc_bufs,
+                           data, data_size,
+                           NULL, NULL,
+                           bo_out);
 }
 
 
@@ -323,15 +325,16 @@ brw_cache_data_sz(struct brw_cache *cache,
  * better to use, as the potentially changing offsets in the data-used-as-key
  * will result in excessive cache misses.
  */
-struct brw_winsys_buffer *
+enum pipe_error
 brw_cache_data(struct brw_cache *cache,
 	       enum brw_cache_id cache_id,
 	       const void *data,
 	       struct brw_winsys_buffer **reloc_bufs,
-	       GLuint nr_reloc_bufs)
+	       GLuint nr_reloc_bufs,
+               struct brw_winsys_buffer **bo_out)
 {
    return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
-			    reloc_bufs, nr_reloc_bufs);
+			    reloc_bufs, nr_reloc_bufs, bo_out);
 }
 
 
@@ -506,11 +509,13 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
 	 int j;
 
 	 next = c->next;
+
 	 for (j = 0; j < c->nr_reloc_bufs; j++)
-	    brw->sws->bo_unreference(c->reloc_bufs[j]);
-	 brw->sws->bo_unreference(c->bo);
-	 free((void *)c->key);
-	 free(c);
+	    bo_reference(&c->reloc_bufs[j], NULL);
+
+	 bo_reference(&c->bo, NULL);
+	 FREE((void *)c->key);
+	 FREE(c);
       }
       cache->items[i] = NULL;
    }
@@ -551,10 +556,12 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo)
 	    *prev = c->next;
 
 	    for (j = 0; j < c->nr_reloc_bufs; j++)
-	       cache->sws->bo_unreference(c->reloc_bufs[j]);
-	    cache->sws->bo_unreference(c->bo);
-	    free((void *)c->key);
-	    free(c);
+	       bo_reference(&c->reloc_bufs[j], NULL);
+
+	    bo_reference(&c->bo, NULL);
+
+	    FREE((void *)c->key);
+	    FREE(c);
 	    cache->n_items--;
 	 } else {
 	    prev = &c->next;
@@ -590,10 +597,10 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
 
    brw_clear_cache(brw, cache);
    for (i = 0; i < BRW_MAX_CACHE; i++) {
-      brw->sws->bo_unreference(cache->last_bo[i]);
-      free(cache->name[i]);
+      bo_reference(&cache->last_bo[i], NULL);
+      FREE(cache->name[i]);
    }
-   free(cache->items);
+   FREE(cache->items);
    cache->items = NULL;
    cache->size = 0;
 }
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index a71af4d2b9..fdcdd59129 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -140,8 +140,7 @@ brw_clear_validated_bos(struct brw_context *brw)
 
    /* Clear the last round of validated bos */
    for (i = 0; i < brw->state.validated_bo_count; i++) {
-      brw->sws->bo_unreference(brw->state.validated_bos[i]);
-      brw->state.validated_bos[i] = NULL;
+      bo_reference(&brw->state.validated_bos[i], NULL);
    }
    brw->state.validated_bo_count = 0;
 }
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 26a28114d9..966940ceac 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -39,10 +39,12 @@
 
 
-static void do_vs_prog( struct brw_context *brw, 
-			struct brw_vertex_shader *vp,
-			struct brw_vs_prog_key *key )
+static enum pipe_error do_vs_prog( struct brw_context *brw, 
+                                   struct brw_vertex_shader *vp,
+                                   struct brw_vs_prog_key *key,
+                                   struct brw_winsys_buffer **bo_out)
 {
+   enum pipe_error ret;
    GLuint program_size;
    const GLuint *program;
    struct brw_vs_compile c;
@@ -66,22 +68,29 @@ static void do_vs_prog( struct brw_context *brw,
 
    /* get the program
     */
-   program = brw_get_program(&c.func, &program_size);
-
-   brw->sws->bo_unreference(brw->vs.prog_bo);
-   brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG,
-				       &c.key, sizeof(c.key),
-				       NULL, 0,
-				       program, program_size,
-				       &c.prog_data,
-				       &brw->vs.prog_data );
+   ret = brw_get_program(&c.func, &program, &program_size);
+   if (ret)
+      return ret;
+
+   ret = brw_upload_cache( &brw->cache, BRW_VS_PROG,
+                           &c.key, sizeof(c.key),
+                           NULL, 0,
+                           program, program_size,
+                           &c.prog_data,
+                           &brw->vs.prog_data,
+                           bo_out);
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 
-static int brw_upload_vs_prog(struct brw_context *brw)
+static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
 {
    struct brw_vs_prog_key key;
    struct brw_vertex_shader *vp = brw->curr.vertex_shader;
+   enum pipe_error ret;
 
    memset(&key, 0, sizeof(key));
 
@@ -95,15 +104,18 @@ static int brw_upload_vs_prog(struct brw_context *brw)
 
    /* Make an early check for the key.
     */
-   brw->sws->bo_unreference(brw->vs.prog_bo);
-   brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG,
-				      &key, sizeof(key),
-				      NULL, 0,
-				      &brw->vs.prog_data);
-   if (brw->vs.prog_bo == NULL)
-      do_vs_prog(brw, vp, &key);
-
-   return 0;
+   if (brw_search_cache(&brw->cache, BRW_VS_PROG,
+                        &key, sizeof(key),
+                        NULL, 0,
+                        &brw->vs.prog_data,
+                        &brw->vs.prog_bo))
+      return PIPE_OK;
+
+   ret = do_vs_prog(brw, vp, &key, &brw->vs.prog_bo);
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index 26d5d005fa..22a4d7f01b 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -78,11 +78,13 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
    }
 }
 
-static struct brw_winsys_buffer *
-vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
+static enum pipe_error
+vs_unit_create_from_key(struct brw_context *brw, 
+                        struct brw_vs_unit_key *key,
+                        struct brw_winsys_buffer **bo_out)
 {
+   enum pipe_error ret;
    struct brw_vs_unit_state vs;
-   struct brw_winsys_buffer *bo;
    int chipset_max_threads;
 
    memset(&vs, 0, sizeof(vs));
@@ -141,38 +143,46 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key)
     */
    vs.vs6.vs_enable = 1;
 
-   bo = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
-			 key, sizeof(*key),
-			 &brw->vs.prog_bo, 1,
-			 &vs, sizeof(vs),
-			 NULL, NULL);
+   ret = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
+                          key, sizeof(*key),
+                          &brw->vs.prog_bo, 1,
+                          &vs, sizeof(vs),
+                          NULL, NULL,
+                          bo_out);
+   if (ret)
+      return ret;
 
    /* Emit VS program relocation */
-   brw->sws->bo_emit_reloc(bo,
-			   BRW_USAGE_STATE,
-			   vs.thread0.grf_reg_count << 1,
-			   offsetof(struct brw_vs_unit_state, thread0),
-			   brw->vs.prog_bo);
-
-   return bo;
+   ret = brw->sws->bo_emit_reloc(*bo_out,
+                                 BRW_USAGE_STATE,
+                                 vs.thread0.grf_reg_count << 1,
+                                 offsetof(struct brw_vs_unit_state, thread0),
+                                 brw->vs.prog_bo);
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 static int prepare_vs_unit(struct brw_context *brw)
 {
    struct brw_vs_unit_key key;
+   enum pipe_error ret;
 
    vs_unit_populate_key(brw, &key);
 
-   brw->sws->bo_unreference(brw->vs.state_bo);
-   brw->vs.state_bo = brw_search_cache(&brw->cache, BRW_VS_UNIT,
-				       &key, sizeof(key),
-				       &brw->vs.prog_bo, 1,
-				       NULL);
-   if (brw->vs.state_bo == NULL) {
-      brw->vs.state_bo = vs_unit_create_from_key(brw, &key);
-   }
+   if (brw_search_cache(&brw->cache, BRW_VS_UNIT,
+                        &key, sizeof(key),
+                        &brw->vs.prog_bo, 1,
+                        NULL,
+                        &brw->vs.state_bo))
+      return PIPE_OK;
+
+   ret = vs_unit_create_from_key(brw, &key, &brw->vs.state_bo);
+   if (ret)
+      return ret;
 
-   return 0;
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_vs_unit = {
diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c
index 32fb9b2a8b..b12df0ec03 100644
--- a/src/gallium/drivers/i965/brw_vs_surface_state.c
+++ b/src/gallium/drivers/i965/brw_vs_surface_state.c
@@ -83,22 +83,23 @@ brw_update_vs_constant_surface( struct brw_context *brw,
 {
    struct brw_surface_key key;
    struct pipe_buffer *cb = brw->curr.vs_constants;
+   enum pipe_error ret;
 
    assert(surf == 0);
 
    /* If we're in this state update atom, we need to update VS constants, so
     * free the old buffer and create a new one for the new contents.
     */
-   brw->sws->bo_unreference(vp->const_buffer);
-   vp->const_buffer = brw_vs_update_constant_buffer(brw);
+   ret = brw_vs_update_constant_buffer(brw, &vp->const_buffer);
+   if (ret)
+      return ret;
 
    /* If there's no constant buffer, then no surface BO is needed to point at
     * it.
     */
-   if (vp->const_buffer == 0) {
-      drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
-      brw->vs.surf_bo[surf] = NULL;
-      return;
+   if (vp->const_buffer == NULL) {
+      bo_reference(brw->vs.surf_bo[surf], NULL);
+      return PIPE_OK;
    }
 
    memset(&key, 0, sizeof(key));
@@ -118,15 +119,20 @@ brw_update_vs_constant_surface( struct brw_context *brw,
           key.width, key.height, key.depth, key.cpp, key.pitch);
    */
 
-   drm_intel_bo_unreference(brw->vs.surf_bo[surf]);
-   brw->vs.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
-                                            BRW_SS_SURFACE,
-                                            &key, sizeof(key),
-                                            &key.bo, key.bo ? 1 : 0,
-                                            NULL);
-   if (brw->vs.surf_bo[surf] == NULL) {
-      brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key);
-   }
+   if (brw_search_cache(&brw->surface_cache,
+                        BRW_SS_SURFACE,
+                        &key, sizeof(key),
+                        &key.bo, key.bo ? 1 : 0,
+                        NULL,
+                        &brw->vs.surf_bo[surf]))
+      return PIPE_OK;
+
+   ret = brw_create_constant_surface(brw, &key
+                                     &brw->vs.surf_bo[surf]);
+   if (ret)
+      return ret;
+   
+   return PIPE_OK;
 }
 #endif
 
@@ -134,18 +140,20 @@ brw_update_vs_constant_surface( struct brw_context *brw,
 /**
  * Constructs the binding table for the VS surface state.
  */
-static struct brw_winsys_buffer *
-brw_vs_get_binding_table(struct brw_context *brw)
+static enum pipe_error
+brw_vs_get_binding_table(struct brw_context *brw,
+                         struct brw_winsys_buffer **bo_out)
 {
 #if 0
-   struct brw_winsys_buffer *bind_bo;
-
-   bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
-			      NULL, 0,
-			      brw->vs.surf_bo, BRW_VS_MAX_SURF,
-			      NULL);
-
-   if (bind_bo == NULL) {
+   if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+                        NULL, 0,
+                        brw->vs.surf_bo, BRW_VS_MAX_SURF,
+                        NULL,
+                        bo_out))
+   {
+      return PIPE_OK;
+   }
+   else {
       GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
       uint32_t *data = malloc(data_size);
       int i;
@@ -156,11 +164,14 @@ brw_vs_get_binding_table(struct brw_context *brw)
          else
             data[i] = 0;
 
-      bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
-				  NULL, 0,
-				  brw->vs.surf_bo, BRW_VS_MAX_SURF,
-				  data, data_size,
-				  NULL, NULL);
+      ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+                              NULL, 0,
+                              brw->vs.surf_bo, BRW_VS_MAX_SURF,
+                              data, data_size,
+                              NULL, NULL,
+                              bo_out);
+      if (ret)
+         return ret;
 
       /* Emit binding table relocations to surface state */
       for (i = 0; i < BRW_VS_MAX_SURF; i++) {
@@ -168,18 +179,19 @@ brw_vs_get_binding_table(struct brw_context *brw)
 	    /* The presumed offsets were set in the data values for
 	     * brw_upload_cache.
 	     */
-	    drm_intel_bo_emit_reloc(bind_bo, i * 4,
-				    brw->vs.surf_bo[i], 0,
-				    BRW_USAGE_STATE);
+	    ret = sws->bo_emit_reloc(*bo_out, i * 4,
+                                     brw->vs.surf_bo[i], 0,
+                                     BRW_USAGE_STATE);
+            if (ret)
+               return ret;
 	 }
       }
 
-      free(data);
+      FREE(data);
+      return PIPE_OK;
    }
-
-   return bind_bo;
 #else
-   return NULL;
+   return PIPE_OK;
 #endif
 }
 
@@ -190,8 +202,10 @@ brw_vs_get_binding_table(struct brw_context *brw)
  * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit and
  * CACHE_NEW_SURF_BIND for the binding table upload.
  */
-static int prepare_vs_surfaces(struct brw_context *brw )
+static enum pipe_error prepare_vs_surfaces(struct brw_context *brw )
 {
+   enum pipe_error ret;
+
 #if 0
    int i;
    int nr_surfaces = 0;
@@ -215,11 +229,12 @@ static int prepare_vs_surfaces(struct brw_context *brw )
     * just slightly increases our working set size.
     */
    if (brw->vs.nr_surfaces != 0) {
-      brw->sws->bo_unreference(brw->vs.bind_bo);
-      brw->vs.bind_bo = brw_vs_get_binding_table(brw);
+      ret = brw_vs_get_binding_table(brw, &brw->vs.bind_bo);
+      if (ret)
+         return ret;
    }
 
-   return 0;
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_vs_surfaces = {
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index d941fbcebe..f61c541ad1 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -28,6 +28,7 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_error.h"
+#include "pipe/p_refcnt.h"
 
 struct brw_winsys;
 struct pipe_fence_handle;
@@ -36,10 +37,13 @@ struct pipe_fence_handle;
  */
 #define BRW_BATCH_SIZE (32*1024)
 
+struct brw_winsys_screen;
 
 /* Need a tiny bit of information inside the abstract buffer struct:
  */
 struct brw_winsys_buffer {
+   struct pipe_reference reference;
+   struct brw_winsys_screen *sws;
    unsigned *offset;
    unsigned size;
 };
@@ -105,6 +109,10 @@ enum brw_buffer_data_type {
    BRW_DATA_MAX
 };
 
+
+
+
+
 struct brw_winsys_screen {
 
 
@@ -116,33 +124,33 @@ struct brw_winsys_screen {
    /**
     * Create a buffer.
     */
-   struct brw_winsys_buffer *(*bo_alloc)( struct brw_winsys_screen *sws,
-					  enum brw_buffer_type type,
-					  unsigned size,
-					  unsigned alignment );
+   enum pipe_error (*bo_alloc)( struct brw_winsys_screen *sws,
+                                enum brw_buffer_type type,
+                                unsigned size,
+                                unsigned alignment,
+                                struct brw_winsys_buffer **bo_out );
 
-   /* Reference and unreference buffers:
+   /* Destroy a buffer when our refcount goes to zero:
     */
-   void (*bo_reference)( struct brw_winsys_buffer *buffer );
-   void (*bo_unreference)( struct brw_winsys_buffer *buffer );
+   void (*bo_destroy)( struct brw_winsys_buffer *buffer );
 
    /* delta -- added to b2->offset, and written into buffer
     * offset -- location above value is written to within buffer
     */
-   int (*bo_emit_reloc)( struct brw_winsys_buffer *buffer,
-			 enum brw_buffer_usage usage,
-			 unsigned delta,
-			 unsigned offset,
-			 struct brw_winsys_buffer *b2);
+   enum pipe_error (*bo_emit_reloc)( struct brw_winsys_buffer *buffer,
+                                     enum brw_buffer_usage usage,
+                                     unsigned delta,
+                                     unsigned offset,
+                                     struct brw_winsys_buffer *b2);
 
-   int (*bo_exec)( struct brw_winsys_buffer *buffer,
-		   unsigned bytes_used );
+   enum pipe_error (*bo_exec)( struct brw_winsys_buffer *buffer,
+                               unsigned bytes_used );
 
-   int (*bo_subdata)(struct brw_winsys_buffer *buffer,
-                     enum brw_buffer_data_type data_type,
-                     size_t offset,
-                     size_t size,
-                     const void *data);
+   enum pipe_error (*bo_subdata)(struct brw_winsys_buffer *buffer,
+                                 enum brw_buffer_data_type data_type,
+                                 size_t offset,
+                                 size_t size,
+                                 const void *data);
 
    boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer);
    boolean (*bo_references)(struct brw_winsys_buffer *a,
@@ -175,6 +183,16 @@ struct brw_winsys_screen {
 };
 
 
+static INLINE void
+bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf)
+{
+   struct brw_winsys_buffer *old_buf = *ptr;
+
+   if (pipe_reference((struct pipe_reference **)ptr, &buf->reference))
+      old_buf->sws->bo_destroy(old_buf);
+}
+
+
 /**
  * Create brw pipe_screen.
  */
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 815ae8c51a..93f90bf329 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -137,30 +137,26 @@ brw_wm_linear_shader_emit(struct brw_context *brw, struct brw_wm_compile *c)
  * Depending on the instructions used (i.e. flow control instructions)
  * we'll use one of two code generators.
  */
-static int do_wm_prog( struct brw_context *brw,
-			struct brw_fragment_shader *fp, 
-			struct brw_wm_prog_key *key)
+static enum pipe_error do_wm_prog( struct brw_context *brw,
+                                   struct brw_fragment_shader *fp, 
+                                   struct brw_wm_prog_key *key,
+                                   struct brw_winsys_buffer **bo_out)
 {
+   enum pipe_error ret;
    struct brw_wm_compile *c;
    const GLuint *program;
    GLuint program_size;
 
-   c = brw->wm.compile_data;
-   if (c == NULL) {
-      brw->wm.compile_data = calloc(1, sizeof(*brw->wm.compile_data));
-      c = brw->wm.compile_data;
-      if (c == NULL) {
-         /* Ouch - big out of memory problem.  Can't continue
-          * without triggering a segfault, no way to signal,
-          * so just return.
-          */
+   if (brw->wm.compile_data == NULL) {
+      brw->wm.compile_data = MALLOC(sizeof(*brw->wm.compile_data));
+      if (!brw->wm.compile_data) 
          return PIPE_ERROR_OUT_OF_MEMORY;
-      }
-   } else {
-      memset(c, 0, sizeof(*brw->wm.compile_data));
    }
-   memcpy(&c->key, key, sizeof(*key));
 
+   c = brw->wm.compile_data;
+   memset(c, 0, sizeof *c);
+
+   c->key = *key;
    c->fp = fp;
    c->env_param = NULL; /*brw->intel.ctx.FragmentProgram.Parameters;*/
 
@@ -190,17 +186,21 @@ static int do_wm_prog( struct brw_context *brw,
 
    /* get the program
     */
-   program = brw_get_program(&c->func, &program_size);
-
-   brw->sws->bo_unreference(brw->wm.prog_bo);
-   brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG,
-				       &c->key, sizeof(c->key),
-				       NULL, 0,
-				       program, program_size,
-				       &c->prog_data,
-				       &brw->wm.prog_data );
-
-   return 0;
+   ret = brw_get_program(&c->func, &program, &program_size);
+   if (ret)
+      return ret;
+
+   ret = brw_upload_cache( &brw->cache, BRW_WM_PROG,
+                           &c->key, sizeof(c->key),
+                           NULL, 0,
+                           program, program_size,
+                           &c->prog_data,
+                           &brw->wm.prog_data,
+                           bo_out );
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 
@@ -267,24 +267,28 @@ static void brw_wm_populate_key( struct brw_context *brw,
 }
 
 
-static int brw_prepare_wm_prog(struct brw_context *brw)
+static enum pipe_error brw_prepare_wm_prog(struct brw_context *brw)
 {
    struct brw_wm_prog_key key;
    struct brw_fragment_shader *fs = brw->curr.fragment_shader;
+   enum pipe_error ret;
      
    brw_wm_populate_key(brw, &key);
 
    /* Make an early check for the key.
     */
-   brw->sws->bo_unreference(brw->wm.prog_bo);
-   brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG,
-				      &key, sizeof(key),
-				      NULL, 0,
-				      &brw->wm.prog_data);
-   if (brw->wm.prog_bo == NULL)
-      return do_wm_prog(brw, fs, &key);
-
-   return 0;
+   if (brw_search_cache(&brw->cache, BRW_WM_PROG,
+                        &key, sizeof(key),
+                        NULL, 0,
+                        &brw->wm.prog_data,
+                        &brw->wm.prog_bo))
+      return PIPE_OK;
+
+   ret = do_wm_prog(brw, fs, &key, &brw->wm.prog_bo);
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_wm_constant_buffer.c b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
index 50ecef29a4..14568265dd 100644
--- a/src/gallium/drivers/i965/brw_wm_constant_buffer.c
+++ b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
@@ -6,12 +6,14 @@
  * Create the constant buffer surface.  Vertex/fragment shader constants will be
  * read from this buffer with Data Port Read instructions/messages.
  */
-struct brw_winsys_buffer *
+enum pipe_error
 brw_create_constant_surface( struct brw_context *brw,
-                             struct brw_surface_key *key )
+                             struct brw_surface_key *key,
+                             struct brw_winsys_buffer **bo_out )
 {
    const GLint w = key->width - 1;
    struct brw_winsys_buffer *bo;
+   enum pipe_error ret;
 
    memset(&surf, 0, sizeof(surf));
 
@@ -28,22 +30,27 @@ brw_create_constant_surface( struct brw_context *brw,
    surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */
    brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */
  
-   bo = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
-			 key, sizeof(*key),
-			 &key->bo, key->bo ? 1 : 0,
-			 &surf, sizeof(surf),
-			 NULL, NULL);
+   ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+                          key, sizeof(*key),
+                          &key->bo, key->bo ? 1 : 0,
+                          &surf, sizeof(surf),
+                          NULL, NULL,
+                          &bo_out);
+   if (ret)
+      return ret;
 
    if (key->bo) {
       /* Emit relocation to surface contents */
-      brw->sws->bo_emit_reloc(bo,
-			      BRW_USAGE_SAMPLER,
-			      0,
-			      offsetof(struct brw_surface_state, ss1),
-			      key->bo);
+      ret = brw->sws->bo_emit_reloc(*bo_out,
+                                    BRW_USAGE_SAMPLER,
+                                    0,
+                                    offsetof(struct brw_surface_state, ss1),
+                                    key->bo);
+      if (ret)
+         return ret;
    }
 
-   return bo;
+   return PIPE_OK;
 }
 
 
@@ -52,7 +59,7 @@ brw_create_constant_surface( struct brw_context *brw,
  * Update the surface state for a WM constant buffer.
  * The constant buffer will be (re)allocated here if needed.
  */
-static void
+static enum pipe_error
 brw_update_wm_constant_surface( struct brw_context *brw,
                                 GLuint surf)
 {
@@ -60,20 +67,21 @@ brw_update_wm_constant_surface( struct brw_context *brw,
    struct brw_fragment_shader *fp = brw->curr.fragment_shader;
    struct pipe_buffer *cbuf = brw->curr.fragment_constants;
    int pitch = cbuf->size / (4 * sizeof(float));
+   enum pipe_error ret;
 
    /* If we're in this state update atom, we need to update WM constants, so
     * free the old buffer and create a new one for the new contents.
     */
-   brw->sws->bo_unreference(fp->const_buffer);
-   fp->const_buffer = brw_wm_update_constant_buffer(brw);
+   ret = brw_wm_update_constant_buffer(brw, &fp->const_buffer);
+   if (ret)
+      return ret;
 
    /* If there's no constant buffer, then no surface BO is needed to point at
     * it.
     */
    if (cbuf == NULL) {
-      drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
-      brw->wm.surf_bo[surf] = NULL;
-      return;
+      bo_reference(&brw->wm.surf_bo[surf], NULL);
+      return PIPE_OK;
    }
 
    memset(&key, 0, sizeof(key));
@@ -97,16 +105,20 @@ brw_update_wm_constant_surface( struct brw_context *brw,
           key.width, key.height, key.depth, key.cpp, key.pitch);
    */
 
-   brw->sws->bo_unreference(brw->wm.surf_bo[surf]);
-   brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
-                                            BRW_SS_SURFACE,
-                                            &key, sizeof(key),
-                                            &key.bo, 1,
-                                            NULL);
-   if (brw->wm.surf_bo[surf] == NULL) {
-      brw->wm.surf_bo[surf] = brw_create_constant_surface(brw, &key);
-   }
+   if (brw_search_cache(&brw->surface_cache,
+                        BRW_SS_SURFACE,
+                        &key, sizeof(key),
+                        &key.bo, 1,
+                        NULL,
+                        &brw->wm.surf_bo[surf]))
+      return PIPE_OK;
+
+   ret = brw_create_constant_surface(brw, &key, &brw->wm.surf_bo[surf]);
+   if (ret)
+      return ret;
+
    brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
+   return PIPE_OK;
 }
 
 /**
@@ -117,28 +129,33 @@ brw_update_wm_constant_surface( struct brw_context *brw,
  * BRW_NEW_WM_SURFACES to get picked up by brw_prepare_wm_surfaces for
  * inclusion in the binding table.
  */
-static void prepare_wm_constant_surface(struct brw_context *brw )
+static enum pipe_error prepare_wm_constant_surface(struct brw_context *brw )
 {
    struct brw_fragment_program *fp =
       (struct brw_fragment_program *) brw->fragment_program;
    GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER;
 
-   drm_intel_bo_unreference(fp->const_buffer);
-   fp->const_buffer = brw_wm_update_constant_buffer(brw);
+   ret = brw_wm_update_constant_buffer(brw,
+                                       &fp->const_buffer);
+   if (ret)
+      return ret;
 
    /* If there's no constant buffer, then no surface BO is needed to point at
     * it.
     */
    if (fp->const_buffer == 0) {
       if (brw->wm.surf_bo[surf] != NULL) {
-	 drm_intel_bo_unreference(brw->wm.surf_bo[surf]);
-	 brw->wm.surf_bo[surf] = NULL;
+	 bo_reference(&brw->wm.surf_bo[surf], NULL);
 	 brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
       }
-      return;
+      return PIPE_OK;
    }
 
-   brw_update_wm_constant_surface(ctx, surf);
+   ret = brw_update_wm_constant_surface(ctx, surf);
+   if (ret)
+      return ret;
+
+   return PIPE_OK
 }
 
 const struct brw_tracked_state brw_wm_constant_surface = {
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index 2fddb4ad89..2861aa979f 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -43,16 +43,22 @@
 
 
-static struct brw_winsys_buffer *
+static enum pipe_error
 upload_default_color( struct brw_context *brw,
-		      const GLfloat *color )
+		      const GLfloat *color,
+                      struct brw_winsys_buffer **bo_out )
 {
    struct brw_sampler_default_color sdc;
+   enum pipe_error ret;
 
    COPY_4V(sdc.color, color); 
    
-   return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc,
-			  NULL, 0 );
+   ret = brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc,
+                         NULL, 0, bo_out );
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 
@@ -111,9 +117,10 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
 }
 
 
-static void
+static enum pipe_error
 brw_wm_sampler_update_default_colors(struct brw_context *brw)
 {
+   enum pipe_error ret;
    int nr = MIN2(brw->curr.num_textures,
 		 brw->curr.num_samplers);
    int i;
@@ -121,8 +128,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw)
    for (i = 0; i < nr; i++) {
       const struct brw_texture *tex = brw_texture(brw->curr.texture[i]);
       const struct brw_sampler *sampler = brw->curr.sampler[i];
-
-      brw->sws->bo_unreference(brw->wm.sdc_bo[i]);
+      const float *bc;
 
       if (pf_is_depth_or_stencil(tex->base.format)) {
 	 float bordercolor[4] = {
@@ -131,15 +137,25 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw)
 	    sampler->border_color[0],
 	    sampler->border_color[0]
 	 };
-	 /* GL specs that border color for depth textures is taken from the
-	  * R channel, while the hardware uses A.  Spam R into all the
-	  * channels for safety.
-	  */
-	 brw->wm.sdc_bo[i] = upload_default_color(brw, bordercolor);
-      } else {
-	 brw->wm.sdc_bo[i] = upload_default_color(brw, sampler->border_color);
+         
+         bc = bordercolor;
+      }
+      else {
+         bc = sampler->border_color;
       }
+
+      /* GL specs that border color for depth textures is taken from the
+       * R channel, while the hardware uses A.  Spam R into all the
+       * channels for safety.
+       */
+      ret = upload_default_color(brw, 
+                                 bc,
+                                 &brw->wm.sdc_bo[i]);
+      if (ret) 
+         return ret;
    }
+
+   return PIPE_OK;
 }
 
 
@@ -149,6 +165,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw)
 static int upload_wm_samplers( struct brw_context *brw )
 {
    struct wm_sampler_key key;
+   enum pipe_error ret;
    int i;
 
    brw_wm_sampler_update_default_colors(brw);
@@ -159,35 +176,40 @@ static int upload_wm_samplers( struct brw_context *brw )
       brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
    }
 
-   brw->sws->bo_unreference(brw->wm.sampler_bo);
-   brw->wm.sampler_bo = NULL;
-   if (brw->wm.sampler_count == 0)
-      return 0;
+   if (brw->wm.sampler_count == 0) {
+      bo_reference(&brw->wm.sampler_bo, NULL);
+      return PIPE_OK;
+   }
 
-   brw->wm.sampler_bo = brw_search_cache(&brw->cache, BRW_SAMPLER,
-					 &key, sizeof(key),
-					 brw->wm.sdc_bo, key.sampler_count,
-					 NULL);
+   if (brw_search_cache(&brw->cache, BRW_SAMPLER,
+                        &key, sizeof(key),
+                        brw->wm.sdc_bo, key.sampler_count,
+                        NULL,
+                        &brw->wm.sampler_bo))
+      return PIPE_OK;
 
    /* If we didnt find it in the cache, compute the state and put it in the
     * cache.
     */
-   if (brw->wm.sampler_bo == NULL) {
-      brw->wm.sampler_bo = brw_upload_cache(&brw->cache, BRW_SAMPLER,
-					    &key, sizeof(key),
-					    brw->wm.sdc_bo, key.sampler_count,
-					    &key.sampler, sizeof(key.sampler),
-					    NULL, NULL);
-
-      /* Emit SDC relocations */
-      for (i = 0; i < key.sampler_count; i++) {
-	 brw->sws->bo_emit_reloc(brw->wm.sampler_bo,
-				 BRW_USAGE_SAMPLER,
-				 0,
-				 i * sizeof(struct brw_sampler_state) +
-				 offsetof(struct brw_sampler_state, ss2),
-				 brw->wm.sdc_bo[i]);
-      }
+   ret = brw_upload_cache(&brw->cache, BRW_SAMPLER,
+                          &key, sizeof(key),
+                          brw->wm.sdc_bo, key.sampler_count,
+                          &key.sampler, sizeof(key.sampler),
+                          NULL, NULL,
+                          &brw->wm.sampler_bo);
+   if (ret)
+      return ret;
+
+   /* Emit SDC relocations */
+   for (i = 0; i < key.sampler_count; i++) {
+      ret = brw->sws->bo_emit_reloc(brw->wm.sampler_bo,
+                                    BRW_USAGE_SAMPLER,
+                                    0,
+                                    i * sizeof(struct brw_sampler_state) +
+                                    offsetof(struct brw_sampler_state, ss2),
+                                    brw->wm.sdc_bo[i]);
+      if (ret)
+         return ret;
    }
 
    return 0;
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index ccbb647bcd..86dc10540d 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -138,12 +138,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
 /**
  * Setup wm hardware state.  See page 225 of Volume 2
  */
-static struct brw_winsys_buffer *
+static enum pipe_error
 wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
-			struct brw_winsys_buffer **reloc_bufs)
+			struct brw_winsys_buffer **reloc_bufs,
+                        struct brw_winsys_buffer **bo_out)
 {
    struct brw_wm_unit_state wm;
-   struct brw_winsys_buffer *bo;
+   enum pipe_error ret;
 
    memset(&wm, 0, sizeof(wm));
 
@@ -222,45 +223,56 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    if (BRW_DEBUG & DEBUG_STATS || key->stats_wm)
       wm.wm4.stats_enable = 1;
 
-   bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
-			 key, sizeof(*key),
-			 reloc_bufs, 3,
-			 &wm, sizeof(wm),
-			 NULL, NULL);
+   ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
+                          key, sizeof(*key),
+                          reloc_bufs, 3,
+                          &wm, sizeof(wm),
+                          NULL, NULL,
+                          bo_out);
+   if (ret)
+      return ret;
 
    /* Emit WM program relocation */
-   brw->sws->bo_emit_reloc(bo,
-			   BRW_USAGE_STATE,
-			   wm.thread0.grf_reg_count << 1,
-			   offsetof(struct brw_wm_unit_state, thread0),
-			   brw->wm.prog_bo);
+   ret = brw->sws->bo_emit_reloc(*bo_out,
+                                 BRW_USAGE_STATE,
+                                 wm.thread0.grf_reg_count << 1,
+                                 offsetof(struct brw_wm_unit_state, thread0),
+                                 brw->wm.prog_bo);
+   if (ret)
+      return ret;
 
    /* Emit scratch space relocation */
    if (key->total_scratch != 0) {
-      brw->sws->bo_emit_reloc(bo,
-			      BRW_USAGE_SCRATCH,
-			      wm.thread2.per_thread_scratch_space,
-			      offsetof(struct brw_wm_unit_state, thread2),
-			      brw->wm.scratch_bo);
+      ret = brw->sws->bo_emit_reloc(*bo_out,
+                                    BRW_USAGE_SCRATCH,
+                                    wm.thread2.per_thread_scratch_space,
+                                    offsetof(struct brw_wm_unit_state, thread2),
+                                    brw->wm.scratch_bo);
+      if (ret)
+         return ret;
    }
 
    /* Emit sampler state relocation */
    if (key->sampler_count != 0) {
-      brw->sws->bo_emit_reloc(bo,
-			      BRW_USAGE_STATE,
-			      wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
-			      offsetof(struct brw_wm_unit_state, wm4),
-			      brw->wm.sampler_bo);
+      ret = brw->sws->bo_emit_reloc(*bo_out,
+                                    BRW_USAGE_STATE,
+                                    wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
+                                    offsetof(struct brw_wm_unit_state, wm4),
+                                    brw->wm.sampler_bo);
+      if (ret)
+         return ret;
    }
 
-   return bo;
+   return PIPE_OK;
 }
 
 
-static int upload_wm_unit( struct brw_context *brw )
+static enum pipe_error upload_wm_unit( struct brw_context *brw )
 {
    struct brw_wm_unit_key key;
    struct brw_winsys_buffer *reloc_bufs[3];
+   enum pipe_error ret;
+
    wm_unit_populate_key(brw, &key);
 
    /* Allocate the necessary scratch space if we haven't already.  Don't
@@ -271,15 +283,19 @@ static int upload_wm_unit( struct brw_context *brw )
    if (key.total_scratch) {
       GLuint total = key.total_scratch * key.max_threads;
 
-      if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
-	 brw->sws->bo_unreference(brw->wm.scratch_bo);
-	 brw->wm.scratch_bo = NULL;
-      }
+      /* Do we need a new buffer:
+       */
+      if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) 
+	 bo_reference(&brw->wm.scratch_bo, NULL);
+
       if (brw->wm.scratch_bo == NULL) {
-	 brw->wm.scratch_bo = brw->sws->bo_alloc(brw->sws,
-						 BRW_BUFFER_TYPE_SHADER_SCRATCH,
-						 total,
-						 4096);
+	 ret = brw->sws->bo_alloc(brw->sws,
+                                  BRW_BUFFER_TYPE_SHADER_SCRATCH,
+                                  total,
+                                  4096,
+                                  &brw->wm.scratch_bo);
+         if (ret)
+            return ret;
       }
    }
 
@@ -287,16 +303,19 @@ static int upload_wm_unit( struct brw_context *brw )
    reloc_bufs[1] = brw->wm.scratch_bo;
    reloc_bufs[2] = brw->wm.sampler_bo;
 
-   brw->sws->bo_unreference(brw->wm.state_bo);
-   brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
-				       &key, sizeof(key),
-				       reloc_bufs, 3,
-				       NULL);
-   if (brw->wm.state_bo == NULL) {
-      brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
-   }
+   if (brw_search_cache(&brw->cache, BRW_WM_UNIT,
+                        &key, sizeof(key),
+                        reloc_bufs, 3,
+                        NULL,
+                        &brw->wm.state_bo))
+      return PIPE_OK;
+
+   ret = wm_unit_create_from_key(brw, &key, reloc_bufs,
+                                 &brw->wm.state_bo);
+   if (ret)
+      return ret;
 
-   return 0;
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_wm_unit = {
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index b055dde20c..e5d0329967 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -40,31 +40,40 @@
 
 
-static void
+static enum pipe_error
 brw_update_texture_surface( struct brw_context *brw,
 			    struct brw_texture *tex,
-			    GLuint surf )
+                            struct brw_winsys_buffer **bo_out)
 {
-   brw->wm.surf_bo[surf] = brw_search_cache(&brw->surface_cache,
-                                            BRW_SS_SURFACE,
-                                            &tex->ss, sizeof tex->ss,
-                                            &tex->bo, 1,
-                                            NULL);
-
-   if (brw->wm.surf_bo[surf] == NULL) {
-      brw->wm.surf_bo[surf] = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
-					       &tex->ss, sizeof tex->ss,
-					       &tex->bo, 1,
-					       &tex->ss, sizeof tex->ss,
-					       NULL, NULL);
+   enum pipe_error ret;
+
+   if (brw_search_cache(&brw->surface_cache,
+                        BRW_SS_SURFACE,
+                        &tex->ss, sizeof tex->ss,
+                        &tex->bo, 1,
+                        NULL,
+                        bo_out))
+      return PIPE_OK;
+
+   ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
+                          &tex->ss, sizeof tex->ss,
+                          &tex->bo, 1,
+                          &tex->ss, sizeof tex->ss,
+                          NULL, NULL,
+                          bo_out);
+   if (ret)
+      return ret;
       
-      /* Emit relocation to surface contents */
-      brw->sws->bo_emit_reloc(brw->wm.surf_bo[surf],
-			      BRW_USAGE_SAMPLER,
-			      0,
-			      offsetof(struct brw_surface_state, ss1),
-			      tex->bo);
-   }
+   /* Emit relocation to surface contents */
+   ret = brw->sws->bo_emit_reloc(*bo_out,
+                                 BRW_USAGE_SAMPLER,
+                                 0,
+                                 offsetof(struct brw_surface_state, ss1),
+                                 tex->bo);
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 
@@ -79,13 +88,14 @@ brw_update_texture_surface( struct brw_context *brw,
  * While it is only used for the front/back buffer currently, it should be
  * usable for further buffers when doing ARB_draw_buffer support.
  */
-static void
-brw_update_renderbuffer_surface(struct brw_context *brw,
-				struct brw_surface *surface,
-				unsigned int unit)
+static enum pipe_error
+brw_update_render_surface(struct brw_context *brw,
+                          struct brw_surface *surface,
+                          struct brw_winsys_buffer **bo_out)
 {
    struct brw_surf_ss0 blend_ss0 = brw->curr.blend->ss0;
    struct brw_surface_state ss;
+   enum pipe_error ret;
 
    /* Surfaces are potentially shared between contexts, so can't
     * scribble the in-place ss0 value in the surface.
@@ -98,30 +108,35 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
    ss.ss0.writedisable_red   = blend_ss0.writedisable_red;
    ss.ss0.writedisable_alpha = blend_ss0.writedisable_alpha;
 
-   brw->sws->bo_unreference(brw->wm.surf_bo[unit]);
-   brw->wm.surf_bo[unit] = brw_search_cache(&brw->surface_cache,
-					    BRW_SS_SURFACE,
-					    &ss, sizeof(ss),
-					    &surface->bo, 1,
-					    NULL);
-
-   if (brw->wm.surf_bo[unit] == NULL) {
-
-      brw->wm.surf_bo[unit] = brw_upload_cache(&brw->surface_cache,
-                                               BRW_SS_SURFACE,
-                                               &ss, sizeof ss,
-					       &surface->bo, 1,
-					       &ss, sizeof ss,
-					       NULL, NULL);
+   if (brw_search_cache(&brw->surface_cache,
+                        BRW_SS_SURFACE,
+                        &ss, sizeof(ss),
+                        &surface->bo, 1,
+                        NULL,
+                        bo_out))
+      return PIPE_OK;
+       
+   ret = brw_upload_cache(&brw->surface_cache,
+                          BRW_SS_SURFACE,
+                          &ss, sizeof ss,
+                          &surface->bo, 1,
+                          &ss, sizeof ss,
+                          NULL, NULL,
+                          bo_out);
+   if (ret)
+      return ret;
 
       /* XXX: we will only be rendering to this surface:
        */
-      brw->sws->bo_emit_reloc(brw->wm.surf_bo[unit],
-			      BRW_USAGE_RENDER_TARGET,
-			      ss.ss1.base_addr - surface->bo->offset[0], /* XXX */
-			      offsetof(struct brw_surface_state, ss1),
-			      surface->bo);
-   }
+   ret = brw->sws->bo_emit_reloc(*bo_out,
+                                 BRW_USAGE_RENDER_TARGET,
+                                 ss.ss1.base_addr - surface->bo->offset[0], /* XXX */
+                                 offsetof(struct brw_surface_state, ss1),
+                                 surface->bo);
+   if (ret)
+      return ret;
+
+   return PIPE_OK;
 }
 
 
@@ -129,60 +144,60 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
  * Constructs the binding table for the WM surface state, which maps unit
  * numbers to surface state objects.
  */
-static struct brw_winsys_buffer *
-brw_wm_get_binding_table(struct brw_context *brw)
+static enum pipe_error
+brw_wm_get_binding_table(struct brw_context *brw,
+                         struct brw_winsys_buffer **bo_out )
 {
-   struct brw_winsys_buffer *bind_bo;
+   enum pipe_error ret;
+   uint32_t data[BRW_WM_MAX_SURF];
+   GLuint data_size = brw->wm.nr_surfaces * sizeof data[0];
+   int i;
 
    assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
+   assert(brw->wm.nr_surfaces > 0);
 
    /* Note there is no key for this search beyond the values in the
     * relocation array:
     */
-   bind_bo = brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
-			      NULL, 0,
-			      brw->wm.surf_bo, brw->wm.nr_surfaces,
-			      NULL);
-
-   if (bind_bo == NULL) {
-      uint32_t data[BRW_WM_MAX_SURF];
-      GLuint data_size = brw->wm.nr_surfaces * sizeof data[0];
-      int i;
-
-      for (i = 0; i < brw->wm.nr_surfaces; i++)
-	 data[i] = brw->wm.surf_bo[i]->offset[0];
-
-      bind_bo = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
-				  NULL, 0,
-				  brw->wm.surf_bo, brw->wm.nr_surfaces,
-				  data, data_size,
-				  NULL, NULL);
-
-      /* Emit binding table relocations to surface state */
-      for (i = 0; i < brw->wm.nr_surfaces; i++) {
-	 brw->sws->bo_emit_reloc(bind_bo,
-				 BRW_USAGE_STATE,
-				 0,
-				 i * sizeof(GLuint),
-				 brw->wm.surf_bo[i]);
-      }
+   if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
+                        NULL, 0,
+                        brw->wm.surf_bo,
+                        brw->wm.nr_surfaces,
+                        NULL,
+                        bo_out))
+      return PIPE_OK;
+
+   for (i = 0; i < brw->wm.nr_surfaces; i++)
+      data[i] = brw->wm.surf_bo[i]->offset[0];
+
+   ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
+                           NULL, 0,
+                           brw->wm.surf_bo, brw->wm.nr_surfaces,
+                           data, data_size,
+                           NULL, NULL,
+                           bo_out);
+   if (ret)
+      return ret;
+
+   /* Emit binding table relocations to surface state */
+   for (i = 0; i < brw->wm.nr_surfaces; i++) {
+      ret = brw->sws->bo_emit_reloc(*bo_out,
+                                    BRW_USAGE_STATE,
+                                    0,
+                                    i * sizeof(GLuint),
+                                    brw->wm.surf_bo[i]);
+      if (ret)
+         return ret;
    }
 
-   return bind_bo;
+   return PIPE_OK;
 }
 
-static int prepare_wm_surfaces(struct brw_context *brw )
+static enum pipe_error prepare_wm_surfaces(struct brw_context *brw )
 {
-   GLuint i;
+   enum pipe_error ret;
    int nr_surfaces = 0;
-
-   /* Unreference old buffers
-    */
-   for (i = 0; i < brw->wm.nr_surfaces; i++) {
-      brw->sws->bo_unreference(brw->wm.surf_bo[i]);
-      brw->wm.surf_bo[i] = NULL;
-   }
-
+   GLuint i;
 
    /* PIPE_NEW_COLOR_BUFFERS | PIPE_NEW_BLEND
     *
@@ -192,38 +207,51 @@ static int prepare_wm_surfaces(struct brw_context *brw )
     * XXX: no color buffer case
     */
    for (i = 0; i < brw->curr.fb.nr_cbufs; i++) {
-      brw_update_renderbuffer_surface(brw, 
-				      brw_surface(brw->curr.fb.cbufs[i]), 
-				      nr_surfaces++);
+      ret = brw_update_render_surface(brw, 
+                                      brw_surface(brw->curr.fb.cbufs[i]), 
+                                      &brw->wm.surf_bo[nr_surfaces++]);
+      if (ret)
+         return ret;
    }
 
    /* PIPE_NEW_TEXTURE 
     */
    for (i = 0; i < brw->curr.num_textures; i++) {
-      brw_update_texture_surface(brw, 
-				 brw_texture(brw->curr.texture[i]),
-				 nr_surfaces++);
+      ret = brw_update_texture_surface(brw, 
+                                       brw_texture(brw->curr.texture[i]),
+                                       &brw->wm.surf_bo[nr_surfaces++]);
+      if (ret)
+         return ret;
    }
 
    /* PIPE_NEW_FRAGMENT_CONSTANTS
     */
 #if 0
    if (brw->curr.fragment_constants) {
-      brw_update_fragment_constant_surface(brw, 
-					   brw->curr.fragment_constants, 
-					   nr_surfaces++);
+      ret = brw_update_fragment_constant_surface(brw, 
+                                                 brw->curr.fragment_constants, 
+                                                 &brw->wm.surf_bo[nr_surfaces++]);
+      if (ret)
+         return ret;
    }
 #endif
 
    if (brw->wm.nr_surfaces != nr_surfaces) {
+
+      /* Unreference any left-over old buffers
+       */
+      for (i = nr_surfaces; i < brw->wm.nr_surfaces; i++)
+         bo_reference(&brw->wm.surf_bo[i], NULL);
+
       brw->wm.nr_surfaces = nr_surfaces;
       brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
    }
 
-   brw->sws->bo_unreference(brw->wm.bind_bo);
-   brw->wm.bind_bo = brw_wm_get_binding_table(brw);
+   ret = brw_wm_get_binding_table(brw, &brw->wm.bind_bo);
+   if (ret)
+      return ret;
 
-   return 0;
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_wm_surfaces = {
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
index b1edca818a..fc465d7c14 100644
--- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -134,11 +134,12 @@ const char *data_types[BRW_DATA_MAX] =
 };
 
 
-static struct brw_winsys_buffer *
+static enum pipe_error
 xlib_brw_bo_alloc( struct brw_winsys_screen *sws,
-		      enum brw_buffer_type type,
-		      unsigned size,
-		      unsigned alignment )
+                   enum brw_buffer_type type,
+                   unsigned size,
+                   unsigned alignment,
+                   struct brw_winsys_buffer **bo_out )
 {
    struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws);
    struct xlib_brw_buffer *buf;
@@ -148,12 +149,13 @@ xlib_brw_bo_alloc( struct brw_winsys_screen *sws,
 
    buf = CALLOC_STRUCT(xlib_brw_buffer);
    if (!buf)
-      return NULL;
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   pipe_reference_init(&buf->base.reference, 1);
 
    buf->offset = align(xbw->offset, alignment);
    buf->type = type;
    buf->virtual = MALLOC(size);
-   buf->cheesy_refcount = 1;
    buf->base.offset = &buf->offset; /* hmm, cheesy */
    buf->base.size = size;
 
@@ -161,36 +163,25 @@ xlib_brw_bo_alloc( struct brw_winsys_screen *sws,
    if (xbw->offset > MAX_VRAM)
       goto err;
 
-   return &buf->base;
+   /* XXX: possibly rentrant call to bo_destroy:
+    */
+   bo_reference(bo_out, &buf->base);
+   return PIPE_OK;
 
 err:
    assert(0);
+   FREE(buf->virtual);
    FREE(buf);
-   return NULL;
-}
-
-static void 
-xlib_brw_bo_reference( struct brw_winsys_buffer *buffer )
-{
-   struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
-
-   buf->cheesy_refcount++;
+   return PIPE_ERROR_OUT_OF_MEMORY;
 }
 
 static void 
-xlib_brw_bo_unreference( struct brw_winsys_buffer *buffer )
+xlib_brw_bo_destroy( struct brw_winsys_buffer *buffer )
 {
    struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
 
-   /* As a special favor in this call only, buffer is allowed to be
-    * NULL:
-    */
-   if (buffer == NULL)
-      return;
-
-   if (--buf->cheesy_refcount == 0) {
-      FREE(buffer);
-   }
+   FREE(buf->virtual);
+   FREE(buf);
 }
 
 static int 
@@ -378,8 +369,7 @@ xlib_create_brw_winsys_screen( void )
 
    ws->base.destroy              = xlib_brw_winsys_destroy;
    ws->base.bo_alloc             = xlib_brw_bo_alloc;
-   ws->base.bo_reference         = xlib_brw_bo_reference;
-   ws->base.bo_unreference       = xlib_brw_bo_unreference;
+   ws->base.bo_destroy           = xlib_brw_bo_destroy;
    ws->base.bo_emit_reloc        = xlib_brw_bo_emit_reloc;
    ws->base.bo_exec              = xlib_brw_bo_exec;
    ws->base.bo_subdata           = xlib_brw_bo_subdata;
-- 
cgit v1.2.3


From 203adb8ea68da0fbb2e4643e36e273f31c29980f Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 15:15:04 +0000
Subject: i965g: remove old dumping code

---
 src/gallium/drivers/i965/Makefile         |   1 -
 src/gallium/drivers/i965/SConscript       |   1 -
 src/gallium/drivers/i965/brw_state_dump.c | 230 ------------------------------
 3 files changed, 232 deletions(-)
 delete mode 100644 src/gallium/drivers/i965/brw_state_dump.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 6c0d3541d7..f0a5bc7ee5 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -41,7 +41,6 @@ C_SOURCES = \
 	brw_sf_state.c \
 	brw_state_batch.c \
 	brw_state_cache.c \
-	brw_state_dump.c \
 	brw_state_upload.c \
 	brw_structs_dump.c \
 	brw_swtnl.c \
diff --git a/src/gallium/drivers/i965/SConscript b/src/gallium/drivers/i965/SConscript
index d38ad6fe7e..9c2faaf4b4 100644
--- a/src/gallium/drivers/i965/SConscript
+++ b/src/gallium/drivers/i965/SConscript
@@ -50,7 +50,6 @@ i965 = env.ConvenienceLibrary(
 		'brw_state_batch.c',
 		'brw_state_cache.c',
 #		'brw_state_debug.c',
-		'brw_state_dump.c',
 		'brw_state_upload.c',
 		'brw_swtnl.c',
 		'brw_urb.c',
diff --git a/src/gallium/drivers/i965/brw_state_dump.c b/src/gallium/drivers/i965/brw_state_dump.c
deleted file mode 100644
index 388331ee62..0000000000
--- a/src/gallium/drivers/i965/brw_state_dump.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Copyright © 2007 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Eric Anholt <eric@anholt.net>
- *
- */
-
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "brw_winsys.h"
-
-/**
- * Prints out a header, the contents, and the message associated with
- * the hardware state data given.
- *
- * \param name Name of the state object
- * \param data Pointer to the base of the state object
- * \param hw_offset Hardware offset of the base of the state data.
- * \param index Index of the DWORD being output.
- */
-static void
-state_out(const char *name, void *data, uint32_t hw_offset, int index,
-	  char *fmt, ...)
-{
-    va_list va;
-
-    debug_printf("%8s: 0x%08x: 0x%08x: ",
-		 name, hw_offset + index * 4, ((uint32_t *)data)[index]);
-    va_start(va, fmt);
-    debug_vprintf(fmt, va);
-    va_end(va);
-}
-
-/** Generic, undecoded state buffer debug printout */
-static void
-state_struct_out(struct brw_winsys_screen *sws,
-		 const char *name,
-		 struct brw_winsys_buffer *buffer,
-		 unsigned int state_size)
-{
-   int i;
-   void *data;
-
-   if (buffer == NULL)
-      return;
-
-   data = sws->bo_map(buffer, BRW_DATA_OTHER, GL_FALSE);
-   for (i = 0; i < state_size / 4; i++) {
-      state_out(name, data, buffer->offset[0], i,
-		"dword %d\n", i);
-   }
-   sws->bo_unmap(buffer);
-}
-
-static const char *
-get_965_surfacetype(unsigned int surfacetype)
-{
-    switch (surfacetype) {
-    case 0: return "1D";
-    case 1: return "2D";
-    case 2: return "3D";
-    case 3: return "CUBE";
-    case 4: return "BUFFER";
-    case 7: return "NULL";
-    default: return "unknown";
-    }
-}
-
-static const char *
-get_965_surface_format(unsigned int surface_format)
-{
-    switch (surface_format) {
-    case 0x000: return "r32g32b32a32_float";
-    case 0x0c1: return "b8g8r8a8_unorm";
-    case 0x100: return "b5g6r5_unorm";
-    case 0x102: return "b5g5r5a1_unorm";
-    case 0x104: return "b4g4r4a4_unorm";
-    default: return "unknown";
-    }
-}
-
-static void dump_wm_surface_state(struct brw_context *brw)
-{
-   int i;
-
-   for (i = 0; i < brw->wm.nr_surfaces; i++) {
-      struct brw_winsys_buffer *surf_bo = brw->wm.surf_bo[i];
-      unsigned int surfoff;
-      struct brw_surface_state *surf;
-      char name[20];
-
-      if (surf_bo == NULL) {
-	 debug_printf("  WM SS%d: NULL\n", i);
-	 continue;
-      }
-      surf = (struct brw_surface_state *)brw->sws->bo_map(surf_bo, 
-                                                          BRW_DATA_OTHER,
-                                                          GL_FALSE);
-      surfoff = surf_bo->offset[0];
-
-      sprintf(name, "WM SS%d", i);
-      state_out(name, surf, surfoff, 0, "%s %s\n",
-		get_965_surfacetype(surf->ss0.surface_type),
-		get_965_surface_format(surf->ss0.surface_format));
-      state_out(name, surf, surfoff, 1, "offset\n");
-      state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n",
-		surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count);
-      state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n",
-		surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not ");
-      state_out(name, surf, surfoff, 4, "mip base %d\n",
-		surf->ss4.min_lod);
-      state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n",
-		surf->ss5.x_offset, surf->ss5.y_offset);
-
-      brw->sws->bo_unmap(surf_bo);
-   }
-}
-
-static void dump_sf_viewport_state(struct brw_context *brw)
-{
-   const char *name = "SF VP";
-   struct brw_sf_viewport *vp;
-   uint32_t vp_off;
-
-   if (brw->sf.vp_bo == NULL)
-      return;
-
-   vp = (struct brw_sf_viewport *)brw->sws->bo_map(brw->sf.vp_bo,
-                                                   BRW_DATA_OTHER,
-                                                   GL_FALSE);
-   vp_off = brw->sf.vp_bo->offset[0];
-
-   state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00);
-   state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11);
-   state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22);
-   state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30);
-   state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31);
-   state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32);
-
-   state_out(name, vp, vp_off, 6, "top left = %d,%d\n",
-	     vp->scissor.xmin, vp->scissor.ymin);
-   state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n",
-	     vp->scissor.xmax, vp->scissor.ymax);
-
-   brw->sws->bo_unmap(brw->sf.vp_bo);
-}
-
-static void brw_debug_prog(struct brw_winsys_screen *sws,
-			   const char *name,
-			   struct brw_winsys_buffer *prog)
-{
-   unsigned int i;
-   uint32_t *data;
-
-   if (prog == NULL)
-      return;
-
-   data = (uint32_t *)sws->bo_map(prog,
-                                  BRW_DATA_OTHER,
-                                  GL_FALSE);
-
-   for (i = 0; i < prog->size / 4 / 4; i++) {
-      debug_printf("%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
-	      name, (unsigned int)prog->offset + i * 4 * 4,
-	      data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]);
-      /* Stop at the end of the program.  It'd be nice to keep track of the actual
-       * intended program size instead of guessing like this.
-       */
-      if (data[i * 4 + 0] == 0 &&
-	  data[i * 4 + 1] == 0 &&
-	  data[i * 4 + 2] == 0 &&
-	  data[i * 4 + 3] == 0)
-	 break;
-   }
-
-   sws->bo_unmap(prog);
-}
-
-
-/**
- * Print additional debug information associated with the batchbuffer
- * when DEBUG_BATCH is set.
- *
- * For 965, this means mapping the state buffers that would have been referenced
- * by the batchbuffer and dumping them.
- *
- * The buffer offsets printed rely on the buffer containing the last offset
- * it was validated at.
- */
-void brw_debug_batch(struct brw_context *brw)
-{
-   struct brw_winsys_screen *sws = brw->sws;
-
-   state_struct_out(sws, "WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces);
-   dump_wm_surface_state(brw);
-
-   state_struct_out(sws, "VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state));
-   brw_debug_prog(sws, "VS prog", brw->vs.prog_bo);
-
-   state_struct_out(sws, "GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state));
-   brw_debug_prog(sws, "GS prog", brw->gs.prog_bo);
-
-   state_struct_out(sws, "SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state));
-   dump_sf_viewport_state(brw);
-   brw_debug_prog(sws, "SF prog", brw->sf.prog_bo);
-
-   state_struct_out(sws, "WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state));
-   brw_debug_prog(sws, "WM prog", brw->wm.prog_bo);
-}
-- 
cgit v1.2.3


From aa9773d056a8799050304f75c1bf4c1f470e7e53 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 15:34:18 +0000
Subject: i965g: disassemble more than one instruction at a time

---
 src/gallium/drivers/i965/brw_context.h       |  4 +++-
 src/gallium/drivers/i965/brw_disasm.c        | 28 +++++++++++++++++++++++-----
 src/gallium/drivers/i965/brw_vs_emit.c       |  4 +---
 src/gallium/drivers/i965/brw_wm_emit.c       |  6 +-----
 src/gallium/drivers/i965/brw_wm_glsl.c       |  4 +---
 src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 22 ++++++++--------------
 6 files changed, 37 insertions(+), 31 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 580251d2f1..e0c1c57ed7 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -794,7 +794,9 @@ int brw_upload_urb_fence(struct brw_context *brw);
 int brw_upload_cs_urb_state(struct brw_context *brw);
 
 /* brw_disasm.c */
-int brw_disasm (FILE *file, struct brw_instruction *inst);
+int brw_disasm (FILE *file, 
+                const struct brw_instruction *inst,
+                unsigned count);
 
 /*======================================================================
  * Inline conversion functions.  These are better-typed than the
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
index 29fe848005..df0c7b9a2b 100644
--- a/src/gallium/drivers/i965/brw_disasm.c
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -455,7 +455,7 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
     return err;
 }
 
-static int dest (FILE *file, struct brw_instruction *inst)
+static int dest (FILE *file, const struct brw_instruction *inst)
 {
     int	err = 0;
 
@@ -621,7 +621,7 @@ static int src_da16 (FILE *file,
 }
 
 
-static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
+static int imm (FILE *file, GLuint type, const struct brw_instruction *inst) {
     switch (type) {
     case BRW_REGISTER_TYPE_UD:
 	format (file, "0x%08xUD", inst->bits3.ud);
@@ -650,7 +650,7 @@ static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
     return 0;
 }
 
-static int src0 (FILE *file, struct brw_instruction *inst)
+static int src0 (FILE *file, const struct brw_instruction *inst)
 {
     if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
 	return imm (file, inst->bits1.da1.src0_reg_type,
@@ -710,7 +710,7 @@ static int src0 (FILE *file, struct brw_instruction *inst)
     }
 }
 
-static int src1 (FILE *file, struct brw_instruction *inst)
+static int src1 (FILE *file, const struct brw_instruction *inst)
 {
     if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
 	return imm (file, inst->bits1.da1.src1_reg_type,
@@ -770,7 +770,7 @@ static int src1 (FILE *file, struct brw_instruction *inst)
     }
 }
 
-int brw_disasm (FILE *file, struct brw_instruction *inst)
+static int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
 {
     int	err = 0;
     int space = 0;
@@ -900,3 +900,21 @@ int brw_disasm (FILE *file, struct brw_instruction *inst)
     newline (file);
     return err;
 }
+
+
+int brw_disasm (FILE *file, 
+                const struct brw_instruction *inst,
+                unsigned count)
+{
+   int i, err;
+
+   for (i = 0; i < count; i++) {
+      err = brw_disasm_insn(stderr, &inst[i]);
+      if (err)
+         return err;
+   }
+
+   fprintf(file, "\n");
+   return 0;
+}
+
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 95e2b8e2cb..d86e2104d8 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -1627,8 +1627,6 @@ void brw_vs_emit(struct brw_vs_compile *c)
       int i;
 
       debug_printf("vs-native:\n");
-      for (i = 0; i < p->nr_insn; i++)
-	 brw_disasm(stderr, &p->store[i]);
-      debug_printf("\n");
+      brw_disasm(stderr, p->store, p->nr_insn);
    }
 }
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index a705d8b344..1c38f80cda 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -1512,11 +1512,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
    }
 
    if (BRW_DEBUG & DEBUG_WM) {
-      int i;
-
       debug_printf("wm-native:\n");
-      for (i = 0; i < p->nr_insn; i++)
-	 brw_disasm(stderr, &p->store[i]);
-      debug_printf("\n");
+      brw_disasm(stderr, p->store, p->nr_insn);
    }
 }
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index a06b0a446e..284f819bf8 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -2003,9 +2003,7 @@ static void brw_wm_emit_branching_shader(struct brw_context *brw, struct brw_wm_
 
     if (BRW_DEBUG & DEBUG_WM) {
       debug_printf("wm-native:\n");
-      for (i = 0; i < p->nr_insn; i++)
-	 brw_disasm(stderr, &p->store[i]);
-      debug_printf("\n");
+      brw_disasm(stderr, p->store, p->nr_insn);
     }
 }
 
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
index 54cf56c811..d129067ba3 100644
--- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -47,7 +47,9 @@
 
 #define MAX_VRAM (128*1024*1024)
 
-extern int brw_disasm (FILE *file, struct brw_instruction *inst);
+extern int brw_disasm (FILE *file, 
+                       const struct brw_instruction *inst,
+                       unsigned count );
 
 struct xlib_brw_buffer
 {
@@ -236,7 +238,11 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer,
       brw_dump_cc_unit_state( data );
       break;
    case BRW_DATA_GS_WM_PROG:
-      brw_disasm( stderr, data ); /* disassem */
+   case BRW_DATA_GS_SF_PROG:
+   case BRW_DATA_GS_VS_PROG:
+   case BRW_DATA_GS_GS_PROG:
+   case BRW_DATA_GS_CLIP_PROG:
+      brw_disasm( stderr, data, size / sizeof(struct brw_instruction) );
       break;
    case BRW_DATA_GS_SAMPLER_DEFAULT_COLOR:
       brw_dump_sampler_default_color( data );
@@ -247,9 +253,6 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer,
    case BRW_DATA_GS_WM_UNIT:
       brw_dump_wm_unit_state( data );
       break;
-   case BRW_DATA_GS_SF_PROG:
-      brw_disasm( stderr, data ); /* disassem */
-      break;
    case BRW_DATA_GS_SF_VP:
       brw_dump_sf_viewport( data );
       break;
@@ -259,24 +262,15 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer,
    case BRW_DATA_GS_VS_UNIT:
       brw_dump_vs_unit_state( data );
       break;
-   case BRW_DATA_GS_VS_PROG:
-      brw_disasm( stderr, data ); /* disassem */
-      break;
    case BRW_DATA_GS_GS_UNIT:
       brw_dump_gs_unit_state( data );
       break;
-   case BRW_DATA_GS_GS_PROG:
-      brw_disasm( stderr, data ); /* disassem */
-      break;
    case BRW_DATA_GS_CLIP_VP:
       brw_dump_clipper_viewport( data );
       break;
    case BRW_DATA_GS_CLIP_UNIT:
       brw_dump_clip_unit_state( data );
       break;
-   case BRW_DATA_GS_CLIP_PROG:
-      brw_disasm( stderr, data ); /* disassem */
-      break;
    case BRW_DATA_SS_SURFACE:
       brw_dump_surface_state( data );
       break;
-- 
cgit v1.2.3


From 674c390aaf9d797dbedda1285d4fdacb3d334a67 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 17:41:35 +0000
Subject: i965g: add const qualifiers

---
 src/gallium/drivers/i965/intel_decode.c | 40 ++++++++++++++++-----------------
 src/gallium/drivers/i965/intel_decode.h |  2 +-
 2 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/intel_decode.c b/src/gallium/drivers/i965/intel_decode.c
index 1fb1b66cc8..3166958bad 100644
--- a/src/gallium/drivers/i965/intel_decode.c
+++ b/src/gallium/drivers/i965/intel_decode.c
@@ -70,7 +70,7 @@ int_as_float(uint32_t intval)
 }
 
 static void
-instr_out(uint32_t *data, uint32_t hw_offset, unsigned int index,
+instr_out(const uint32_t *data, uint32_t hw_offset, unsigned int index,
 	  char *fmt, ...)
 {
     va_list va;
@@ -84,7 +84,7 @@ instr_out(uint32_t *data, uint32_t hw_offset, unsigned int index,
 
 
 static int
-decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_mi(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
 {
     unsigned int opcode;
 
@@ -149,7 +149,7 @@ decode_mi(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 }
 
 static int
-decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_2d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
 {
     unsigned int opcode, len;
     char *format = NULL;
@@ -306,7 +306,7 @@ decode_2d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 }
 
 static int
-decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d_1c(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
 {
     switch ((data[0] & 0x00f80000) >> 19) {
     case 0x11:
@@ -333,7 +333,7 @@ decode_3d_1c(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 
 /** Sets the string dstname to describe the destination of the PS instruction */
 static void
-i915_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask)
+i915_get_instruction_dst(const uint32_t *data, int i, char *dstname, int do_mask)
 {
     uint32_t a0 = data[i];
     int dst_nr = (a0 >> 14) & 0xf;
@@ -466,7 +466,7 @@ i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name)
 }
 
 static void
-i915_get_instruction_src0(uint32_t *data, int i, char *srcname)
+i915_get_instruction_src0(const uint32_t *data, int i, char *srcname)
 {
     uint32_t a0 = data[i];
     uint32_t a1 = data[i + 1];
@@ -484,7 +484,7 @@ i915_get_instruction_src0(uint32_t *data, int i, char *srcname)
 }
 
 static void
-i915_get_instruction_src1(uint32_t *data, int i, char *srcname)
+i915_get_instruction_src1(const uint32_t *data, int i, char *srcname)
 {
     uint32_t a1 = data[i + 1];
     uint32_t a2 = data[i + 2];
@@ -502,7 +502,7 @@ i915_get_instruction_src1(uint32_t *data, int i, char *srcname)
 }
 
 static void
-i915_get_instruction_src2(uint32_t *data, int i, char *srcname)
+i915_get_instruction_src2(const uint32_t *data, int i, char *srcname)
 {
     uint32_t a2 = data[i + 2];
     int src_nr = (a2 >> 16) & 0x1f;
@@ -559,7 +559,7 @@ i915_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name)
 }
 
 static void
-i915_decode_alu1(uint32_t *data, uint32_t hw_offset,
+i915_decode_alu1(const uint32_t *data, uint32_t hw_offset,
 		 int i, char *instr_prefix, char *op_name)
 {
     char dst[100], src0[100];
@@ -574,7 +574,7 @@ i915_decode_alu1(uint32_t *data, uint32_t hw_offset,
 }
 
 static void
-i915_decode_alu2(uint32_t *data, uint32_t hw_offset,
+i915_decode_alu2(const uint32_t *data, uint32_t hw_offset,
 		 int i, char *instr_prefix, char *op_name)
 {
     char dst[100], src0[100], src1[100];
@@ -590,7 +590,7 @@ i915_decode_alu2(uint32_t *data, uint32_t hw_offset,
 }
 
 static void
-i915_decode_alu3(uint32_t *data, uint32_t hw_offset,
+i915_decode_alu3(const uint32_t *data, uint32_t hw_offset,
 		 int i, char *instr_prefix, char *op_name)
 {
     char dst[100], src0[100], src1[100], src2[100];
@@ -607,7 +607,7 @@ i915_decode_alu3(uint32_t *data, uint32_t hw_offset,
 }
 
 static void
-i915_decode_tex(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix,
+i915_decode_tex(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix,
 		char *tex_name)
 {
     uint32_t t0 = data[i];
@@ -629,7 +629,7 @@ i915_decode_tex(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix,
 }
 
 static void
-i915_decode_dcl(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix)
+i915_decode_dcl(const uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix)
 {
     uint32_t d0 = data[i];
     char *sampletype;
@@ -710,7 +710,7 @@ i915_decode_dcl(uint32_t *data, uint32_t hw_offset, int i, char *instr_prefix)
 }
 
 static void
-i915_decode_instruction(uint32_t *data, uint32_t hw_offset,
+i915_decode_instruction(const uint32_t *data, uint32_t hw_offset,
 			int i, char *instr_prefix)
 {
     switch ((data[i] >> 24) & 0x1f) {
@@ -800,7 +800,7 @@ i915_decode_instruction(uint32_t *data, uint32_t hw_offset,
 }
 
 static int
-decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830)
+decode_3d_1d(const uint32_t *data, int count, uint32_t hw_offset, int *failures, int i830)
 {
     unsigned int len, i, c, opcode, word, map, sampler, instr;
     char *format;
@@ -1073,7 +1073,7 @@ decode_3d_1d(uint32_t *data, int count, uint32_t hw_offset, int *failures, int i
 }
 
 static int
-decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
+decode_3d_primitive(const uint32_t *data, int count, uint32_t hw_offset,
 		    int *failures)
 {
     char immediate = (data[0] & (1 << 23)) == 0;
@@ -1260,7 +1260,7 @@ decode_3d_primitive(uint32_t *data, int count, uint32_t hw_offset,
 }
 
 static int
-decode_3d(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
 {
     unsigned int opcode;
 
@@ -1406,7 +1406,7 @@ get_965_prim_type(uint32_t data)
 }
 
 static int
-decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d_965(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
 {
     unsigned int opcode, len;
     int i;
@@ -1667,7 +1667,7 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, int *failures)
 }
 
 static int
-decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, int *failures)
+decode_3d_i830(const uint32_t *data, int count, uint32_t hw_offset, int *failures)
 {
     unsigned int opcode;
 
@@ -1741,7 +1741,7 @@ decode_3d_i830(uint32_t *data, int count, uint32_t hw_offset, int *failures)
  * \param hw_offset hardware address for the buffer
  */
 int
-intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
+intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid)
 {
     int index = 0;
     int failures = 0;
diff --git a/src/gallium/drivers/i965/intel_decode.h b/src/gallium/drivers/i965/intel_decode.h
index c50644a46b..7683097b86 100644
--- a/src/gallium/drivers/i965/intel_decode.h
+++ b/src/gallium/drivers/i965/intel_decode.h
@@ -25,5 +25,5 @@
  *
  */
 
-int intel_decode(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid);
+int intel_decode(const uint32_t *data, int count, uint32_t hw_offset, uint32_t devid);
 void intel_decode_context_reset(void);
-- 
cgit v1.2.3


From 205871c76ad2e655a9180900359d8f9ac690a912 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 17:42:13 +0000
Subject: i965g: use Elements in loops over arrays

---
 src/gallium/drivers/i965/brw_context.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index 2cee7a7a3c..8e1421e738 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -85,12 +85,12 @@ static void brw_destroy_context( struct pipe_context *pipe )
    bo_reference(&brw->sf.state_bo, NULL);
    bo_reference(&brw->sf.vp_bo, NULL);
 
-   for (i = 0; i < BRW_MAX_TEX_UNIT; i++)
+   for (i = 0; i < Elements(brw->wm.sdc_bo); i++)
       bo_reference(&brw->wm.sdc_bo[i], NULL);
 
    bo_reference(&brw->wm.bind_bo, NULL);
 
-   for (i = 0; i < BRW_WM_MAX_SURF; i++)
+   for (i = 0; i < Elements(brw->wm.surf_bo); i++)
       bo_reference(&brw->wm.surf_bo[i], NULL);
 
    bo_reference(&brw->wm.sampler_bo, NULL);
-- 
cgit v1.2.3


From a70e6178d4841f490ff318b6017a1ddacfadf752 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 17:42:38 +0000
Subject: i965g: correct size of surf_bo array

---
 src/gallium/drivers/i965/brw_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index e0c1c57ed7..f853255261 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -705,7 +705,7 @@ struct brw_context
 
       /** Binding table of pointers to surf_bo entries */
       struct brw_winsys_buffer *bind_bo;
-      struct brw_winsys_buffer *surf_bo[PIPE_MAX_COLOR_BUFS];
+      struct brw_winsys_buffer *surf_bo[BRW_WM_MAX_SURF];
 
       struct brw_winsys_buffer *prog_bo;
       struct brw_winsys_buffer *state_bo;
-- 
cgit v1.2.3


From 67034b9efce43a7b83f79e44beb6d4e8f6dff22a Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 5 Nov 2009 17:05:20 +0000
Subject: softpipe: Implement PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE for
 destination.

It is a valid and tested combination on D3D9.
---
 src/gallium/drivers/softpipe/sp_quad_blend.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index 0ad0b98654..fe6b6cec35 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -478,7 +478,15 @@ blend_quad(struct quad_stage *qs,
       VEC4_MUL(dest[2], dest[2], dest[2]); /* B */
       break;
    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      assert(0); /* illegal */
+   {
+      const float *alpha = quadColor[3];
+      float diff[4], temp[4];
+      VEC4_SUB(diff, one, dest[3]);
+      VEC4_MIN(temp, alpha, diff);
+      VEC4_MUL(dest[0], quadColor[0], temp); /* R */
+      VEC4_MUL(dest[1], quadColor[1], temp); /* G */
+      VEC4_MUL(dest[2], quadColor[2], temp); /* B */
+   }
       break;
    case PIPE_BLENDFACTOR_CONST_COLOR:
    {
@@ -600,7 +608,7 @@ blend_quad(struct quad_stage *qs,
       VEC4_MUL(dest[3], dest[3], dest[3]); /* A */
       break;
    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-      assert(0); /* illegal */
+      /* dest = dest * 1   NO-OP, leave dest as-is */
       break;
    case PIPE_BLENDFACTOR_CONST_COLOR:
       /* fall-through */
-- 
cgit v1.2.3


From b229ee342f2cef5396a251525d5b499760280933 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 17:43:57 +0000
Subject: brw: push more dumping into the winsys

---
 src/gallium/drivers/i965/brw_batchbuffer.c   |  22 +-----
 src/gallium/drivers/i965/brw_vs_emit.c       |   2 -
 src/gallium/drivers/i965/brw_winsys.h        |   8 ++
 src/gallium/winsys/drm/i965/xlib/xlib_i965.c | 111 ++++++++++++++++++++-------
 4 files changed, 93 insertions(+), 50 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index e5f73bd6a3..76a7d2d2af 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -54,7 +54,7 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch)
       batch->map = batch->malloc_buffer;
    else 
       batch->map = batch->sws->bo_map(batch->buf,
-                                      BRW_DATA_OTHER,
+                                      BRW_DATA_BATCH_BUFFER,
                                       GL_TRUE);
 
    batch->size = BRW_BATCH_SIZE;
@@ -136,7 +136,7 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
 
    if (batch->use_malloc_buffer) {
       batch->sws->bo_subdata(batch->buf, 
-                             BRW_DATA_OTHER,
+                             BRW_DATA_BATCH_BUFFER,
                              0, used,
                              batch->map );
       batch->map = NULL;
@@ -150,19 +150,6 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
       
    batch->sws->bo_exec(batch->buf, used );
 
-   if (1 /*BRW_DEBUG & DEBUG_BATCH*/) {
-      void *ptr = batch->sws->bo_map(batch->buf,
-                                     BRW_DATA_OTHER,
-                                     GL_FALSE);
-
-      intel_decode(ptr,
-		   used / 4, 
-		   batch->buf->offset[0],
-		   batch->chipset.pci_id);
-
-      batch->sws->bo_unmap(batch->buf);
-   }
-
    if (BRW_DEBUG & DEBUG_SYNC) {
       /* Abuse map/unmap to achieve wait-for-fence.
        *
@@ -170,10 +157,7 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
        * interface.
        */
       debug_printf("waiting for idle\n");
-      batch->sws->bo_map(batch->buf,
-                         BRW_DATA_OTHER,
-                         GL_TRUE);
-      batch->sws->bo_unmap(batch->buf);
+      batch->sws->bo_wait_idle(batch->buf);
    }
 
    /* Reset the buffer:
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index d86e2104d8..3217777acb 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -1624,8 +1624,6 @@ void brw_vs_emit(struct brw_vs_compile *c)
    post_vs_emit(c, end_inst, last_inst);
 
    if (BRW_DEBUG & DEBUG_VS) {
-      int i;
-
       debug_printf("vs-native:\n");
       brw_disasm(stderr, p->store, p->nr_insn);
    }
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index f61c541ad1..e041b0acaf 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -105,6 +105,8 @@ enum brw_buffer_data_type {
    BRW_DATA_GS_CLIP_PROG,
    BRW_DATA_SS_SURFACE,
    BRW_DATA_SS_SURF_BIND,
+   BRW_DATA_CONSTANT_BUFFER,
+   BRW_DATA_BATCH_BUFFER,
    BRW_DATA_OTHER,
    BRW_DATA_MAX
 };
@@ -176,6 +178,12 @@ struct brw_winsys_screen {
    void (*bo_unmap)(struct brw_winsys_buffer *buffer);
    /*@}*/
 
+   
+   /* Wait for buffer to go idle.  Similar to map+unmap, but doesn't
+    * mark buffer contents as dirty.
+    */
+   void (*bo_wait_idle)(struct brw_winsys_buffer *buffer);
+   
    /**
     * Destroy the winsys.
     */
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
index d129067ba3..5aec332761 100644
--- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -51,14 +51,19 @@ extern int brw_disasm (FILE *file,
                        const struct brw_instruction *inst,
                        unsigned count );
 
+extern int intel_decode(const uint32_t *data, 
+                        int count,
+                        uint32_t hw_offset,
+                        uint32_t devid);
+
 struct xlib_brw_buffer
 {
    struct brw_winsys_buffer base;
+   char *virtual;
    unsigned offset;
    unsigned type;
-   char *virtual;
-   unsigned cheesy_refcount;
    int map_count;
+   boolean modified;
 };
 
 
@@ -68,7 +73,10 @@ struct xlib_brw_buffer
 struct xlib_brw_winsys
 {
    struct brw_winsys_screen base;
-   unsigned offset;
+   struct brw_chipset chipset;
+
+   unsigned size;
+   unsigned used;
 };
 
 static struct xlib_brw_winsys *
@@ -157,14 +165,15 @@ xlib_brw_bo_alloc( struct brw_winsys_screen *sws,
 
    pipe_reference_init(&buf->base.reference, 1);
 
-   buf->offset = align(xbw->offset, alignment);
+   buf->offset = align(xbw->used, alignment);
    buf->type = type;
    buf->virtual = MALLOC(size);
    buf->base.offset = &buf->offset; /* hmm, cheesy */
    buf->base.size = size;
+   buf->base.sws = sws;
 
-   xbw->offset = align(xbw->offset, alignment) + size;
-   if (xbw->offset > MAX_VRAM)
+   xbw->used = align(xbw->used, alignment) + size;
+   if (xbw->used > MAX_VRAM)
       goto err;
 
    /* XXX: possibly rentrant call to bo_destroy:
@@ -184,7 +193,6 @@ xlib_brw_bo_destroy( struct brw_winsys_buffer *buffer )
 {
    struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
 
-   FREE(buf->virtual);
    FREE(buf);
 }
 
@@ -217,19 +225,11 @@ xlib_brw_bo_exec( struct brw_winsys_buffer *buffer,
    return 0;
 }
 
-static int
-xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer,
-                    enum brw_buffer_data_type data_type,
-                    size_t offset,
-                    size_t size,
-                    const void *data)
+static void dump_data( struct xlib_brw_winsys *xbw,
+                       enum brw_buffer_data_type data_type,
+                       const void *data,
+                       size_t size )
 {
-   struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
-
-   debug_printf("%s buf %p off %d sz %d data %p %s\n", 
-                __FUNCTION__, 
-                (void *)buffer, offset, size, data, data_types[data_type]);
-
    switch (data_type) {
    case BRW_DATA_GS_CC_VP:
       brw_dump_cc_viewport( data );
@@ -278,12 +278,39 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer,
       break;
    case BRW_DATA_OTHER:
       break;
+   case BRW_DATA_BATCH_BUFFER:
+      intel_decode(data, size / 4, 0, xbw->chipset.pci_id);
+      break;
+   case BRW_DATA_CONSTANT_BUFFER:
+      break;
    default:
       assert(0);
       break;
    }
+}
+
+
+static int
+xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer,
+                    enum brw_buffer_data_type data_type,
+                    size_t offset,
+                    size_t size,
+                    const void *data)
+{
+   struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
+   struct xlib_brw_winsys *xbw = xlib_brw_winsys(buffer->sws);
+
+   debug_printf("%s buf %p off %d sz %d %s\n", 
+                __FUNCTION__, 
+                (void *)buffer, offset, size, data_types[data_type]);
+
+   if (1)
+      dump_data( xbw, data_type, data, size );
 
+   assert(buf->base.size >= offset + size);
    memcpy(buf->virtual + offset, data, size);
+
+
    return 0;
 }
 
@@ -324,7 +351,7 @@ xlib_brw_check_aperture_space( struct brw_winsys_screen *iws,
 static void *
 xlib_brw_bo_map(struct brw_winsys_buffer *buffer,
                 enum brw_buffer_data_type data_type,
-		   boolean write)
+                boolean write)
 {
    struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
 
@@ -332,6 +359,9 @@ xlib_brw_bo_map(struct brw_winsys_buffer *buffer,
                 write ? "read/write" : "read",
                 write ? data_types[data_type] : "");
 
+   if (write)
+      buf->modified = 1;
+
    buf->map_count++;
    return buf->virtual;
 }
@@ -345,14 +375,30 @@ xlib_brw_bo_unmap(struct brw_winsys_buffer *buffer)
 
    --buf->map_count;
    assert(buf->map_count >= 0);
+
+   if (buf->map_count == 0 &&
+       buf->modified) {
+
+      buf->modified = 0;
+      
+      /* Consider dumping new buffer contents here.
+       */
+   }
+}
+
+
+static void
+xlib_brw_bo_wait_idle( struct brw_winsys_buffer *buffer )
+{
 }
 
 
 static void
-xlib_brw_winsys_destroy( struct brw_winsys_screen *screen )
+xlib_brw_winsys_destroy( struct brw_winsys_screen *sws )
 {
-   /* XXX: free all buffers */
-   FREE(screen);
+   struct xlib_brw_winsys *xbw = xlib_brw_winsys(sws);
+
+   FREE(xbw);
 }
 
 static struct brw_winsys_screen *
@@ -364,6 +410,8 @@ xlib_create_brw_winsys_screen( void )
    if (!ws)
       return NULL;
 
+   ws->used = 0;
+
    ws->base.destroy              = xlib_brw_winsys_destroy;
    ws->base.bo_alloc             = xlib_brw_bo_alloc;
    ws->base.bo_destroy           = xlib_brw_bo_destroy;
@@ -375,6 +423,7 @@ xlib_create_brw_winsys_screen( void )
    ws->base.check_aperture_space = xlib_brw_check_aperture_space;
    ws->base.bo_map               = xlib_brw_bo_map;
    ws->base.bo_unmap             = xlib_brw_bo_unmap;
+   ws->base.bo_wait_idle         = xlib_brw_bo_wait_idle;
 
    return &ws->base;
 }
@@ -388,12 +437,14 @@ static void
 xlib_i965_display_surface(struct xmesa_buffer *xm_buffer,
                           struct pipe_surface *surf)
 {
-   /* struct brw_texture *texture = brw_texture(surf->texture); */
-
-   debug_printf("%s tex %p, sz %dx%d\n", __FUNCTION__, 
-                (void *)surf->texture,
-                surf->texture->width[0],
-                surf->texture->height[0]);
+   struct brw_surface *surface = brw_surface(surf);
+   struct xlib_brw_buffer *bo = xlib_brw_buffer(surface->bo);
+
+   debug_printf("%s offset %x+%x sz %dx%d\n", __FUNCTION__, 
+                bo->offset,
+                surface->draw_offset,
+                surf->width,
+                surf->height);
 }
 
 static void
@@ -419,6 +470,8 @@ xlib_create_i965_screen( void )
    if (screen == NULL)
       goto fail;
 
+   xlib_brw_winsys(winsys)->chipset = brw_screen(screen)->chipset;
+
    screen->flush_frontbuffer = xlib_i965_flush_frontbuffer;
    return screen;
 
-- 
cgit v1.2.3


From 8f0e51be4784273baef692583940964bc04b78ef Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 19:57:59 +0000
Subject: i965g: correct sense of writedisable flags

---
 src/gallium/drivers/i965/brw_pipe_blend.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c
index f6da9254ef..872151222d 100644
--- a/src/gallium/drivers/i965/brw_pipe_blend.c
+++ b/src/gallium/drivers/i965/brw_pipe_blend.c
@@ -146,10 +146,10 @@ static void *brw_create_blend_state( struct pipe_context *pipe,
 
    /* Per-surface color mask -- just follow global state:
     */
-   blend->ss0.writedisable_red   = (templ->colormask & PIPE_MASK_R) ? 1 : 0;
-   blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 1 : 0;
-   blend->ss0.writedisable_blue  = (templ->colormask & PIPE_MASK_B) ? 1 : 0;
-   blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 1 : 0;
+   blend->ss0.writedisable_red   = (templ->colormask & PIPE_MASK_R) ? 0 : 1;
+   blend->ss0.writedisable_green = (templ->colormask & PIPE_MASK_G) ? 0 : 1;
+   blend->ss0.writedisable_blue  = (templ->colormask & PIPE_MASK_B) ? 0 : 1;
+   blend->ss0.writedisable_alpha = (templ->colormask & PIPE_MASK_A) ? 0 : 1;
 
    return (void *)blend;
 }
-- 
cgit v1.2.3


From 658da189b62c4086c08950f3da5767e628235b55 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 19:58:02 +0000
Subject: i965g: remove duplicate viewport state in brw_context

---
 src/gallium/drivers/i965/brw_cc.c       | 2 +-
 src/gallium/drivers/i965/brw_context.h  | 5 +----
 src/gallium/drivers/i965/brw_sf_state.c | 2 +-
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index 8e25fe8585..ba16fc4f6b 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -66,7 +66,7 @@ static enum pipe_error prepare_cc_vp( struct brw_context *brw )
    memset(&ccv, 0, sizeof(ccv));
 
    /* PIPE_NEW_VIEWPORT */
-   calc_sane_viewport( &brw->curr.vp, &svp );
+   calc_sane_viewport( &brw->curr.viewport, &svp );
 
    ccv.min_depth = svp.near;
    ccv.max_depth = svp.far;
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index f853255261..177fe2172d 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -514,18 +514,15 @@ struct brw_context
       unsigned num_vertex_buffers;
 
       struct pipe_scissor_state scissor;
+      struct pipe_viewport_state viewport;
       struct pipe_framebuffer_state fb;
-      struct pipe_viewport_state vp;
       struct pipe_clip_state ucp;
       struct pipe_buffer *vertex_constants;
       struct pipe_buffer *fragment_constants;
 
-      struct pipe_viewport_state viewport;
       struct brw_blend_constant_color bcc;
       struct brw_polygon_stipple bps;
 
-      
-
       /**
        * Index buffer for this draw_prims call.
        *
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index f030f26c19..bd8fc65b9e 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -41,7 +41,7 @@
 
 static enum pipe_error upload_sf_vp(struct brw_context *brw)
 {
-   const struct pipe_viewport_state *vp = &brw->curr.vp;
+   const struct pipe_viewport_state *vp = &brw->curr.viewport;
    const struct pipe_scissor_state *scissor = &brw->curr.scissor;
    struct brw_sf_viewport sfv;
    enum pipe_error ret;
-- 
cgit v1.2.3


From 963728665aa0d48d4fdbba4276084528f221ee39 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 20:34:27 +0000
Subject: i965g: make the winsys responsible for all buffer->offset handling

The winsys now inserts the presumed offset into referring buffers from
inside of bo_emit_reloc().  Remove the many locally coded places where
this was happening in the driver and eliminate the worry of getting it
wrong.

No longer need to expose offset values to the driver at all, so no need
to worry about what to do in the driver when they change.  Just use
zero values wherever we had offsets previously -- the relocations will
fix it all up for us.
---
 src/gallium/drivers/i965/brw_batchbuffer.c      | 11 +++++------
 src/gallium/drivers/i965/brw_cc.c               |  2 +-
 src/gallium/drivers/i965/brw_clip_state.c       |  2 +-
 src/gallium/drivers/i965/brw_gs_state.c         |  4 ++--
 src/gallium/drivers/i965/brw_screen_texture.c   |  8 +++++---
 src/gallium/drivers/i965/brw_sf_state.c         |  6 ++++--
 src/gallium/drivers/i965/brw_vs_state.c         |  2 +-
 src/gallium/drivers/i965/brw_winsys.h           |  1 -
 src/gallium/drivers/i965/brw_wm_sampler_state.c |  2 +-
 src/gallium/drivers/i965/brw_wm_state.c         | 13 ++++---------
 src/gallium/drivers/i965/brw_wm_surface_state.c |  7 +++++--
 src/gallium/winsys/drm/i965/xlib/xlib_i965.c    |  1 -
 12 files changed, 29 insertions(+), 30 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index 76a7d2d2af..a55be6faab 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -115,7 +115,7 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
 		   file, line, used);
 
    if (ALWAYS_EMIT_MI_FLUSH) {
-      *(GLuint *) (batch->ptr) = ((MI_FLUSH << 16) | BRW_FLUSH_STATE_CACHE);
+      *(GLuint *) (batch->ptr) = MI_FLUSH | BRW_FLUSH_STATE_CACHE;
       batch->ptr += 4;
       used = batch->ptr - batch->map;
    }
@@ -192,12 +192,11 @@ brw_batchbuffer_emit_reloc(struct brw_batchbuffer *batch,
    if (ret != 0)
       return ret;
 
-   /*
-    * Using the old buffer offset, write in what the right data would be, in case
-    * the buffer doesn't move and we can short-circuit the relocation processing
-    * in the kernel
+   /* bo_emit_reloc was resposible for writing a zero into the
+    * batchbuffer if necessary.  Just need to update our pointer.
     */
-   brw_batchbuffer_emit_dword (batch, buffer->offset[0] + delta);
+   batch->ptr += 4;
+
    return 0;
 }
 
diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index ba16fc4f6b..78d83929e0 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -142,7 +142,7 @@ cc_unit_create_from_key(struct brw_context *brw,
    cc.cc3 = key->cc3;
 
    /* CACHE_NEW_CC_VP */
-   cc.cc4.cc_viewport_state_offset = *(brw->cc.vp_bo->offset) >> 5; /* reloc */
+   cc.cc4.cc_viewport_state_offset = 0;
 
    cc.cc5 = key->cc5;
    cc.cc6 = key->cc6;
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index d4e3c43c61..157e6edf19 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -84,7 +84,7 @@ clip_unit_create_from_key(struct brw_context *brw,
 
    clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    /* reloc */
-   clip.thread0.kernel_start_pointer = *(brw->clip.prog_bo->offset) >> 6;
+   clip.thread0.kernel_start_pointer = 0;
 
    clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    clip.thread1.single_program_flow = 1;
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
index 18a66da538..36a99fd0e9 100644
--- a/src/gallium/drivers/i965/brw_gs_state.c
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -80,8 +80,8 @@ gs_unit_create_from_key(struct brw_context *brw,
    memset(&gs, 0, sizeof(gs));
 
    gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
-   if (key->prog_active) /* reloc */
-      gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset[0] >> 6;
+   /* reloc */
+   gs.thread0.kernel_start_pointer = 0;
 
    gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    gs.thread1.single_program_flow = 1;
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 355abf0b89..8e684aa076 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -211,8 +211,10 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
        /* && bscreen->use_texture_tiling */
        /* && bscreen->kernel_exec_fencing */) 
    {
-      if (bscreen->chipset.is_965 &&
-	  pf_is_depth_or_stencil(templ->format))
+      if (1)
+         tex->tiling = BRW_TILING_NONE;
+      else if (bscreen->chipset.is_965 &&
+               pf_is_depth_or_stencil(templ->format))
 	 tex->tiling = BRW_TILING_Y;
       else
 	 tex->tiling = BRW_TILING_X;
@@ -256,7 +258,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
 
    /* XXX: what happens when tex->bo->offset changes???
     */
-   tex->ss.ss1.base_addr = tex->bo->offset[0]; /* reloc */
+   tex->ss.ss1.base_addr = 0; /* reloc */
    tex->ss.ss2.mip_count = tex->base.last_level;
    tex->ss.ss2.width = tex->base.width[0] - 1;
    tex->ss.ss2.height = tex->base.height[0] - 1;
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index bd8fc65b9e..689483b4bc 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -142,7 +142,8 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
    memset(&sf, 0, sizeof(sf));
 
    sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
-   sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset[0] >> 6; /* reloc */
+   /* reloc */
+   sf.thread0.kernel_start_pointer = 0;
 
    sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
@@ -175,7 +176,8 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
       sf.thread4.stats_enable = 1;
 
    /* CACHE_NEW_SF_VP */
-   sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset[0] >> 5; /* reloc */
+   /* reloc */
+   sf.sf5.sf_viewport_state_offset = 0;
 
    sf.sf5.viewport_transform = 1;
 
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index 22a4d7f01b..a5b30eba47 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -89,7 +89,7 @@ vs_unit_create_from_key(struct brw_context *brw,
 
    memset(&vs, 0, sizeof(vs));
 
-   vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset[0] >> 6; /* reloc */
+   vs.thread0.kernel_start_pointer = 0; /* reloc */
    vs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
    /* Choosing multiple program flow means that we may get 2-vertex threads,
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index e041b0acaf..f4a1e9d8ed 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -44,7 +44,6 @@ struct brw_winsys_screen;
 struct brw_winsys_buffer {
    struct pipe_reference reference;
    struct brw_winsys_screen *sws;
-   unsigned *offset;
    unsigned size;
 };
 
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index 2861aa979f..174836b39d 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -87,7 +87,7 @@ brw_wm_sampler_populate_key(struct brw_context *brw,
 
       entry->ss0 = sampler->ss0;
       entry->ss1 = sampler->ss1;
-      entry->ss2.default_color_pointer = brw->wm.sdc_bo[i]->offset[0] >> 5; /* reloc */
+      entry->ss2.default_color_pointer = 0; /* reloc */
       entry->ss3 = sampler->ss3;
 
       /* Cube-maps on 965 and later must use the same wrap mode for all 3
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index 86dc10540d..56789ce7a4 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -149,7 +149,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    memset(&wm, 0, sizeof(wm));
 
    wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
-   wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset[0] >> 6; /* reloc */
+   wm.thread0.kernel_start_pointer = 0; /* reloc */
    wm.thread1.depth_coef_urb_read_offset = 1;
    wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
@@ -159,8 +159,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
       wm.thread1.binding_table_entry_count = key->nr_surfaces;
 
    if (key->total_scratch != 0) {
-      wm.thread2.scratch_space_base_pointer =
-	 brw->wm.scratch_bo->offset[0] >> 10; /* reloc */
+      wm.thread2.scratch_space_base_pointer = 0; /* reloc */
       wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
    } else {
       wm.thread2.scratch_space_base_pointer = 0;
@@ -178,12 +177,8 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
    else
       wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
 
-   if (brw->wm.sampler_bo != NULL) {
-      /* reloc */
-      wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset[0] >> 5;
-   } else {
-      wm.wm4.sampler_state_pointer = 0;
-   }
+   /* reloc */
+   wm.wm4.sampler_state_pointer = 0;
 
    wm.wm5.program_uses_depth = key->uses_depth;
    wm.wm5.program_computes_depth = key->computes_depth;
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index e5d0329967..ed365b03b9 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -130,7 +130,7 @@ brw_update_render_surface(struct brw_context *brw,
        */
    ret = brw->sws->bo_emit_reloc(*bo_out,
                                  BRW_USAGE_RENDER_TARGET,
-                                 ss.ss1.base_addr - surface->bo->offset[0], /* XXX */
+                                 0,
                                  offsetof(struct brw_surface_state, ss1),
                                  surface->bo);
    if (ret)
@@ -167,8 +167,11 @@ brw_wm_get_binding_table(struct brw_context *brw,
                         bo_out))
       return PIPE_OK;
 
+   /* Upload zero data, will all be overwitten with relocation
+    * offsets:
+    */
    for (i = 0; i < brw->wm.nr_surfaces; i++)
-      data[i] = brw->wm.surf_bo[i]->offset[0];
+      data[i] = 0;
 
    ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
                            NULL, 0,
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
index 5aec332761..f46d9961c6 100644
--- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -168,7 +168,6 @@ xlib_brw_bo_alloc( struct brw_winsys_screen *sws,
    buf->offset = align(xbw->used, alignment);
    buf->type = type;
    buf->virtual = MALLOC(size);
-   buf->base.offset = &buf->offset; /* hmm, cheesy */
    buf->base.size = size;
    buf->base.sws = sws;
 
-- 
cgit v1.2.3


From 3763457892c2d0c654c0eca7585e4d3a863f7714 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 21:09:51 +0000
Subject: i965g: propogate map-buffer-range semantics down to winsys

---
 src/gallium/drivers/i965/brw_batchbuffer.c    | 45 ++++++++---------------
 src/gallium/drivers/i965/brw_batchbuffer.h    |  9 -----
 src/gallium/drivers/i965/brw_pipe_query.c     |  2 +-
 src/gallium/drivers/i965/brw_screen_buffers.c | 51 ++++++++++++++++++++++++++-
 src/gallium/drivers/i965/brw_winsys.h         | 18 +++++++++-
 src/gallium/winsys/drm/i965/xlib/xlib_i965.c  | 19 ++++++++--
 6 files changed, 100 insertions(+), 44 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index a55be6faab..d725e8b27e 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -35,7 +35,6 @@
 #include "brw_structs.h"
 #include "intel_decode.h"
 
-#define USE_MALLOC_BUFFER 1
 #define ALWAYS_EMIT_MI_FLUSH 1
 
 enum pipe_error
@@ -50,14 +49,18 @@ brw_batchbuffer_reset(struct brw_batchbuffer *batch)
    if (ret)
       return ret;
 
-   if (batch->malloc_buffer)
-      batch->map = batch->malloc_buffer;
-   else 
-      batch->map = batch->sws->bo_map(batch->buf,
-                                      BRW_DATA_BATCH_BUFFER,
-                                      GL_TRUE);
-
    batch->size = BRW_BATCH_SIZE;
+
+   /* With map_range semantics, the winsys can decide whether to
+    * inject a malloc'ed bounce buffer instead of mapping directly.
+    */
+   batch->map = batch->sws->bo_map(batch->buf,
+                                   BRW_DATA_BATCH_BUFFER,
+                                   0, batch->size,
+                                   GL_TRUE,
+                                   GL_TRUE,
+                                   GL_TRUE);
+
    batch->ptr = batch->map;
    return PIPE_OK;
 }
@@ -68,11 +71,6 @@ brw_batchbuffer_alloc(struct brw_winsys_screen *sws,
 {
    struct brw_batchbuffer *batch = CALLOC_STRUCT(brw_batchbuffer);
 
-   batch->use_malloc_buffer = USE_MALLOC_BUFFER;
-   if (batch->use_malloc_buffer) {
-      batch->malloc_buffer = MALLOC(BRW_BATCH_SIZE);
-   }
-
    batch->sws = sws;
    batch->chipset = chipset;
    brw_batchbuffer_reset(batch);
@@ -83,11 +81,7 @@ brw_batchbuffer_alloc(struct brw_winsys_screen *sws,
 void
 brw_batchbuffer_free(struct brw_batchbuffer *batch)
 {
-   if (batch->malloc_buffer) {
-      FREE(batch->malloc_buffer);
-      batch->map = NULL;
-   }
-   else if (batch->map) {
+   if (batch->map) {
       batch->sws->bo_unmap(batch->buf);
       batch->map = NULL;
    }
@@ -134,18 +128,9 @@ _brw_batchbuffer_flush(struct brw_batchbuffer *batch,
    batch->ptr += 4;
    used = batch->ptr - batch->map;
 
-   if (batch->use_malloc_buffer) {
-      batch->sws->bo_subdata(batch->buf, 
-                             BRW_DATA_BATCH_BUFFER,
-                             0, used,
-                             batch->map );
-      batch->map = NULL;
-   }
-   else {
-      batch->sws->bo_unmap(batch->buf);
-      batch->map = NULL;
-   }
-
+   batch->sws->bo_flush_range(batch->buf, 0, used);
+   batch->sws->bo_unmap(batch->buf);
+   batch->map = NULL;
    batch->ptr = NULL;
       
    batch->sws->bo_exec(batch->buf, used );
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.h b/src/gallium/drivers/i965/brw_batchbuffer.h
index 288a9d2755..7473f5bea4 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.h
+++ b/src/gallium/drivers/i965/brw_batchbuffer.h
@@ -28,15 +28,6 @@ struct brw_batchbuffer {
    struct brw_winsys_buffer *buf;
    struct brw_chipset chipset;
 
-   /* Main-memory copy of the batch-buffer, built up incrementally &
-    * then copied as one to the true buffer.
-    *
-    * XXX: is this still necessary?
-    * XXX: if so, can this be hidden inside the GEM-specific winsys code?
-    */
-   boolean use_malloc_buffer;
-   uint8_t *malloc_buffer;
-
    /**
     * Values exported to speed up the writing the batchbuffer,
     * instead of having to go trough a accesor function for
diff --git a/src/gallium/drivers/i965/brw_pipe_query.c b/src/gallium/drivers/i965/brw_pipe_query.c
index 6a01173787..2eb862635c 100644
--- a/src/gallium/drivers/i965/brw_pipe_query.c
+++ b/src/gallium/drivers/i965/brw_pipe_query.c
@@ -63,7 +63,7 @@ brw_query_get_result(struct pipe_context *pipe,
       if (brw->sws->bo_is_busy(query->bo) && !wait)
 	 return FALSE;
       
-      map = brw->sws->bo_map(query->bo, BRW_DATA_OTHER, GL_FALSE);
+      map = bo_map_read(brw->sws, query->bo);
       if (map == NULL)
 	 return FALSE;
       
diff --git a/src/gallium/drivers/i965/brw_screen_buffers.c b/src/gallium/drivers/i965/brw_screen_buffers.c
index 7ae386ffb3..d8141a3f5b 100644
--- a/src/gallium/drivers/i965/brw_screen_buffers.c
+++ b/src/gallium/drivers/i965/brw_screen_buffers.c
@@ -11,6 +11,29 @@
 
 
+static void *
+brw_buffer_map_range( struct pipe_screen *screen,
+                      struct pipe_buffer *buffer,
+                      unsigned offset,
+                      unsigned length,
+                      unsigned usage )
+{
+   struct brw_screen *bscreen = brw_screen(screen); 
+   struct brw_winsys_screen *sws = bscreen->sws;
+   struct brw_buffer *buf = brw_buffer( buffer );
+
+   if (buf->user_buffer)
+      return buf->user_buffer;
+
+   return sws->bo_map( buf->bo, 
+                       BRW_DATA_OTHER,
+                       offset,
+                       length,
+                       (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE,
+                       (usage & PIPE_BUFFER_USAGE_DISCARD) ? TRUE : FALSE,
+                       (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT) ? TRUE : FALSE);
+}
+
 static void *
 brw_buffer_map( struct pipe_screen *screen,
                 struct pipe_buffer *buffer,
@@ -25,9 +48,33 @@ brw_buffer_map( struct pipe_screen *screen,
 
    return sws->bo_map( buf->bo, 
                        BRW_DATA_OTHER,
-                       (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE );
+                       0,
+                       buf->base.size,
+                       (usage & PIPE_BUFFER_USAGE_CPU_WRITE) ? TRUE : FALSE,
+                       FALSE,
+                       FALSE);
 }
 
+
+static void 
+brw_buffer_flush_mapped_range( struct pipe_screen *screen,
+                               struct pipe_buffer *buffer,
+                               unsigned offset,
+                               unsigned length )
+{
+   struct brw_screen *bscreen = brw_screen(screen); 
+   struct brw_winsys_screen *sws = bscreen->sws;
+   struct brw_buffer *buf = brw_buffer( buffer );
+
+   if (buf->user_buffer)
+      return;
+
+   sws->bo_flush_range( buf->bo, 
+                        offset,
+                        length );
+}
+
+
 static void 
 brw_buffer_unmap( struct pipe_screen *screen,
                    struct pipe_buffer *buffer )
@@ -148,6 +195,8 @@ void brw_screen_buffer_init(struct brw_screen *brw_screen)
    brw_screen->base.buffer_create = brw_buffer_create;
    brw_screen->base.user_buffer_create = brw_user_buffer_create;
    brw_screen->base.buffer_map = brw_buffer_map;
+   brw_screen->base.buffer_map_range = brw_buffer_map_range;
+   brw_screen->base.buffer_flush_mapped_range = brw_buffer_flush_mapped_range;
    brw_screen->base.buffer_unmap = brw_buffer_unmap;
    brw_screen->base.buffer_destroy = brw_buffer_destroy;
 }
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index f4a1e9d8ed..e72b928b06 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -169,7 +169,15 @@ struct brw_winsys_screen {
     */
    void *(*bo_map)(struct brw_winsys_buffer *buffer,
                    enum brw_buffer_data_type data_type,
-		   boolean write);
+                   unsigned offset,
+                   unsigned length,
+                   boolean write,
+                   boolean discard,
+                   boolean flush_explicit );
+
+   void (*bo_flush_range)( struct brw_winsys_buffer *buffer,
+                           unsigned offset,
+                           unsigned length );
 
    /**
     * Unmap a buffer.
@@ -189,6 +197,14 @@ struct brw_winsys_screen {
    void (*destroy)(struct brw_winsys_screen *iws);
 };
 
+static INLINE void *
+bo_map_read( struct brw_winsys_screen *sws, struct brw_winsys_buffer *buf )
+{
+   return sws->bo_map( buf,
+                       BRW_DATA_OTHER,
+                       0, buf->size,
+                       FALSE, FALSE, FALSE );
+}
 
 static INLINE void
 bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf)
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
index f46d9961c6..ab5df56bc0 100644
--- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -350,7 +350,11 @@ xlib_brw_check_aperture_space( struct brw_winsys_screen *iws,
 static void *
 xlib_brw_bo_map(struct brw_winsys_buffer *buffer,
                 enum brw_buffer_data_type data_type,
-                boolean write)
+                unsigned offset,
+                unsigned length,
+                boolean write,
+                boolean discard,
+                boolean explicit)
 {
    struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
 
@@ -365,6 +369,15 @@ xlib_brw_bo_map(struct brw_winsys_buffer *buffer,
    return buf->virtual;
 }
 
+
+static void
+xlib_brw_bo_flush_range( struct brw_winsys_buffer *buffer,
+                         unsigned offset,
+                         unsigned length )
+{
+}
+
+
 static void 
 xlib_brw_bo_unmap(struct brw_winsys_buffer *buffer)
 {
@@ -380,7 +393,8 @@ xlib_brw_bo_unmap(struct brw_winsys_buffer *buffer)
 
       buf->modified = 0;
       
-      /* Consider dumping new buffer contents here.
+      /* Consider dumping new buffer contents here, using the
+       * flush-range info to minimize verbosity.
        */
    }
 }
@@ -421,6 +435,7 @@ xlib_create_brw_winsys_screen( void )
    ws->base.bo_references        = xlib_brw_bo_references;
    ws->base.check_aperture_space = xlib_brw_check_aperture_space;
    ws->base.bo_map               = xlib_brw_bo_map;
+   ws->base.bo_flush_range       = xlib_brw_bo_flush_range;
    ws->base.bo_unmap             = xlib_brw_bo_unmap;
    ws->base.bo_wait_idle         = xlib_brw_bo_wait_idle;
 
-- 
cgit v1.2.3


From 4c196ed7a8e06933d11b96ac520afa39252fc5c7 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 5 Nov 2009 22:43:36 +0000
Subject: i965g: pass relocation information in an array with bo_subdata

Makes it easier to dump as we get all of the information
about the upload in a single hit.

Opens the window to simplification in the driver if these
relocation arrays can be maintained statically rather than
being recreated whenever we check for a new upload.

Still needs some cleanup to avoid uglyness introduced with the
delta values.
---
 src/gallium/drivers/i965/brw_cc.c                 | 27 ++++----
 src/gallium/drivers/i965/brw_clip_state.c         | 35 ++++++----
 src/gallium/drivers/i965/brw_context.h            |  4 +-
 src/gallium/drivers/i965/brw_curbe.c              |  3 +-
 src/gallium/drivers/i965/brw_gs_state.c           | 36 ++++++----
 src/gallium/drivers/i965/brw_sf_state.c           | 73 ++++++++++----------
 src/gallium/drivers/i965/brw_state.h              | 16 ++---
 src/gallium/drivers/i965/brw_state_cache.c        | 81 ++++++++++++-----------
 src/gallium/drivers/i965/brw_vs_state.c           | 28 ++++----
 src/gallium/drivers/i965/brw_vs_surface_state.c   | 69 +++++++------------
 src/gallium/drivers/i965/brw_winsys.h             | 28 +++++++-
 src/gallium/drivers/i965/brw_wm_constant_buffer.c | 25 +++----
 src/gallium/drivers/i965/brw_wm_sampler_state.c   | 27 ++++----
 src/gallium/drivers/i965/brw_wm_state.c           | 61 ++++++++---------
 src/gallium/drivers/i965/brw_wm_surface_state.c   | 70 +++++++++-----------
 src/gallium/winsys/drm/i965/xlib/xlib_i965.c      | 31 +++++++--
 16 files changed, 327 insertions(+), 287 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index 78d83929e0..94e2c99c3e 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -129,6 +129,7 @@ cc_unit_populate_key(const struct brw_context *brw,
 static enum pipe_error
 cc_unit_create_from_key(struct brw_context *brw, 
                         struct brw_cc_unit_key *key,
+                        struct brw_winsys_reloc *reloc,
                         struct brw_winsys_buffer **bo_out)
 {
    struct brw_cc_unit_state cc;
@@ -141,50 +142,48 @@ cc_unit_create_from_key(struct brw_context *brw,
    cc.cc2 = key->cc2;
    cc.cc3 = key->cc3;
 
-   /* CACHE_NEW_CC_VP */
    cc.cc4.cc_viewport_state_offset = 0;
 
    cc.cc5 = key->cc5;
    cc.cc6 = key->cc6;
    cc.cc7 = key->cc7;
-
+   
    ret = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
                           key, sizeof(*key),
-                          &brw->cc.vp_bo, 1,
+                          reloc, Elements(reloc),
                           &cc, sizeof(cc),
                           NULL, NULL,
                           bo_out);
    if (ret)
       return ret;
 
-
-   /* Emit CC viewport relocation */
-   ret = brw->sws->bo_emit_reloc(*bo_out,
-                                 BRW_USAGE_STATE,
-                                 0,
-                                 offsetof(struct brw_cc_unit_state, cc4),
-                                 brw->cc.vp_bo);
-   if (ret)
-      return ret;
-
    return PIPE_OK;
 }
 
 static int prepare_cc_unit( struct brw_context *brw )
 {
    struct brw_cc_unit_key key;
+   struct brw_winsys_reloc reloc[1];
    enum pipe_error ret;
 
    cc_unit_populate_key(brw, &key);
 
+   /* CACHE_NEW_CC_VP */
+   make_reloc(&reloc[0],
+              BRW_USAGE_STATE,
+              0,
+              offsetof(struct brw_cc_unit_state, cc4),
+              brw->cc.vp_bo);
+
    if (brw_search_cache(&brw->cache, BRW_CC_UNIT,
                         &key, sizeof(key),
-                        &brw->cc.vp_bo, 1,
+                        reloc, 1,
                         NULL,
                         &brw->cc.state_bo))
       return PIPE_OK;
 
    ret = cc_unit_create_from_key(brw, &key, 
+                                 reloc,
                                  &brw->cc.state_bo);
    if (ret)
       return ret;
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 157e6edf19..3f2b9701e6 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -75,6 +75,7 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
 static enum pipe_error
 clip_unit_create_from_key(struct brw_context *brw,
                           struct brw_clip_unit_key *key,
+                          struct brw_winsys_reloc *reloc,
                           struct brw_winsys_buffer **bo_out)
 {
    struct brw_clip_unit_state clip;
@@ -82,7 +83,6 @@ clip_unit_create_from_key(struct brw_context *brw,
 
    memset(&clip, 0, sizeof(clip));
 
-   clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    /* reloc */
    clip.thread0.kernel_start_pointer = 0;
 
@@ -144,36 +144,44 @@ clip_unit_create_from_key(struct brw_context *brw,
 
    ret = brw_upload_cache(&brw->cache, BRW_CLIP_UNIT,
                           key, sizeof(*key),
-                          &brw->clip.prog_bo, 1,
+                          reloc, 1,
                           &clip, sizeof(clip),
                           NULL, NULL,
                           bo_out);
    if (ret)
       return ret;
 
-   /* Emit clip program relocation */
-   assert(brw->clip.prog_bo);
-   ret = brw->sws->bo_emit_reloc(*bo_out,
-                                 BRW_USAGE_STATE,
-                                 clip.thread0.grf_reg_count << 1,
-                                 offsetof(struct brw_clip_unit_state, thread0),
-                                 brw->clip.prog_bo);
-   if (ret)
-      return ret;
-
    return PIPE_OK;
 }
 
 static int upload_clip_unit( struct brw_context *brw )
 {
    struct brw_clip_unit_key key;
+   struct brw_winsys_reloc reloc[1];
+   unsigned grf_reg_count;
    enum pipe_error ret;
 
    clip_unit_populate_key(brw, &key);
 
+   grf_reg_count = align(key.total_grf, 16) / 16 - 1;
+
+   /* clip program relocation
+    *
+    * XXX: these reloc structs are long lived and only need to be
+    * updated when the bound BO changes.  Hopefully the stuff mixed in
+    * in the delta's is non-orthogonal.
+    */
+   assert(brw->clip.prog_bo);
+   make_reloc(&reloc[0],
+              BRW_USAGE_STATE,
+              grf_reg_count << 1,
+              offsetof(struct brw_clip_unit_state, thread0),
+              brw->clip.prog_bo);
+
+
    if (brw_search_cache(&brw->cache, BRW_CLIP_UNIT,
                         &key, sizeof(key),
-                        &brw->clip.prog_bo, 1,
+                        reloc, 1,
                         NULL,
                         &brw->clip.state_bo))
       return PIPE_OK;
@@ -181,6 +189,7 @@ static int upload_clip_unit( struct brw_context *brw )
    /* Create new:
     */
    ret = clip_unit_create_from_key(brw, &key, 
+                                   reloc,
                                    &brw->clip.state_bo);
    if (ret)
       return ret;
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 177fe2172d..67fad0d9a5 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -383,8 +383,8 @@ struct brw_cache_item {
    GLuint hash;
    GLuint key_size;		/* for variable-sized keys */
    const void *key;
-   struct brw_winsys_buffer **reloc_bufs;
-   GLuint nr_reloc_bufs;
+   struct brw_winsys_reloc *relocs;
+   GLuint nr_relocs;
 
    struct brw_winsys_buffer *bo;
    GLuint data_size;
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index ca7774a7cc..0a5cfcc7cf 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -295,7 +295,8 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
 			   brw->curbe.curbe_offset,
                            BRW_DATA_OTHER,
 			   bufsz,
-			   buf);
+			   buf,
+                           NULL, 0);
    }
 
    brw_add_validated_bo(brw, brw->curbe.curbe_bo);
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
index 36a99fd0e9..1b0de17aec 100644
--- a/src/gallium/drivers/i965/brw_gs_state.c
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -72,15 +72,18 @@ gs_unit_populate_key(struct brw_context *brw, struct brw_gs_unit_key *key)
 static enum pipe_error
 gs_unit_create_from_key(struct brw_context *brw, 
                         struct brw_gs_unit_key *key,
+                        struct brw_winsys_reloc *reloc,
+                        unsigned nr_reloc,
                         struct brw_winsys_buffer **bo_out)
 {
    struct brw_gs_unit_state gs;
    enum pipe_error ret;
 
+
    memset(&gs, 0, sizeof(gs));
 
+   /* maybe-reloc: populate the background */
    gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
-   /* reloc */
    gs.thread0.kernel_start_pointer = 0;
 
    gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
@@ -108,22 +111,13 @@ gs_unit_create_from_key(struct brw_context *brw,
 
    ret = brw_upload_cache(&brw->cache, BRW_GS_UNIT,
                           key, sizeof(*key),
-                          &brw->gs.prog_bo, 1,
+                          reloc, nr_reloc,
                           &gs, sizeof(gs),
                           NULL, NULL,
                           bo_out);
    if (ret)
       return ret;
 
-   if (key->prog_active) {
-      /* Emit GS program relocation */
-      brw->sws->bo_emit_reloc(*bo_out,
-			      BRW_USAGE_STATE,
-			      gs.thread0.grf_reg_count << 1,
-			      offsetof(struct brw_gs_unit_state, thread0),
-			      brw->gs.prog_bo);
-   }
-
    return PIPE_OK;
 }
 
@@ -131,17 +125,33 @@ static enum pipe_error prepare_gs_unit(struct brw_context *brw)
 {
    struct brw_gs_unit_key key;
    enum pipe_error ret;
+   struct brw_winsys_reloc reloc[1];
+   unsigned nr_reloc = 0;
+   unsigned grf_reg_count;
 
    gs_unit_populate_key(brw, &key);
 
+   grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+
+   /* GS program relocation */
+   if (key.prog_active) {
+      make_reloc(&reloc[nr_reloc++],
+                 BRW_USAGE_STATE,
+                 grf_reg_count << 1,
+                 offsetof(struct brw_gs_unit_state, thread0),
+                 brw->gs.prog_bo);
+   }
+
    if (brw_search_cache(&brw->cache, BRW_GS_UNIT,
                         &key, sizeof(key),
-                        &brw->gs.prog_bo, 1,
+                        reloc, nr_reloc,
                         NULL,
                         &brw->gs.state_bo))
       return PIPE_OK;
 
-   ret = gs_unit_create_from_key(brw, &key, &brw->gs.state_bo);
+   ret = gs_unit_create_from_key(brw, &key,
+                                 reloc, nr_reloc,
+                                 &brw->gs.state_bo);
    if (ret)
       return ret;
 
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index 689483b4bc..a911482149 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -132,8 +132,9 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
 }
 
 static enum pipe_error
-sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
-			struct brw_winsys_buffer **reloc_bufs,
+sf_unit_create_from_key(struct brw_context *brw,
+                        struct brw_sf_unit_key *key,
+                        struct brw_winsys_reloc *reloc,
                         struct brw_winsys_buffer **bo_out)
 {
    struct brw_sf_unit_state sf;
@@ -141,7 +142,8 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
    int chipset_max_threads;
    memset(&sf, 0, sizeof(sf));
 
-   sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+
+   sf.thread0.grf_reg_count = 0;
    /* reloc */
    sf.thread0.kernel_start_pointer = 0;
 
@@ -177,18 +179,10 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 
    /* CACHE_NEW_SF_VP */
    /* reloc */
-   sf.sf5.sf_viewport_state_offset = 0;
-
-   sf.sf5.viewport_transform = 1;
 
    if (key->scissor)
       sf.sf6.scissor = 1;
 
-   if (key->front_face == PIPE_WINDING_CCW)
-      sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
-   else
-      sf.sf5.front_winding = BRW_FRONTWINDING_CW;
-
    switch (key->cull_mode) {
    case PIPE_WINDING_CCW:
    case PIPE_WINDING_CW:
@@ -281,34 +275,13 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 
    ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
                           key, sizeof(*key),
-                          reloc_bufs, 2,
+                          reloc, 2,
                           &sf, sizeof(sf),
                           NULL, NULL,
                           bo_out);
    if (ret)
       return ret;
 
-   /* STATE_PREFETCH command description describes this state as being
-    * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
-    */
-   /* Emit SF program relocation */
-   ret = brw->sws->bo_emit_reloc(*bo_out,
-                                 BRW_USAGE_STATE,
-                                 sf.thread0.grf_reg_count << 1,
-                                 offsetof(struct brw_sf_unit_state, thread0),
-                                 brw->sf.prog_bo);
-   if (ret)
-      return ret;
-
-
-   /* Emit SF viewport relocation */
-   ret = brw->sws->bo_emit_reloc(*bo_out,
-                                 BRW_USAGE_STATE,
-                                 sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
-                                 offsetof(struct brw_sf_unit_state, sf5),
-                                 brw->sf.vp_bo);
-   if (ret)
-      return ret;
    
    return PIPE_OK;
 }
@@ -316,23 +289,47 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 static enum pipe_error upload_sf_unit( struct brw_context *brw )
 {
    struct brw_sf_unit_key key;
-   struct brw_winsys_buffer *reloc_bufs[2];
+   struct brw_winsys_reloc reloc[2];
+   unsigned total_grf;
+   unsigned viewport_transform;
+   unsigned front_winding;
    enum pipe_error ret;
 
    sf_unit_populate_key(brw, &key);
+   
+   /* XXX: cut this crap and pre calculate the key:
+    */
+   total_grf = (align(key.total_grf, 16) / 16 - 1);
+   viewport_transform = 1;
+   front_winding = (key.front_face == PIPE_WINDING_CCW ?
+                    BRW_FRONTWINDING_CCW :
+                    BRW_FRONTWINDING_CW);
+
+   /* Emit SF program relocation */
+   make_reloc(&reloc[0],
+              BRW_USAGE_STATE,
+              total_grf << 1,
+              offsetof(struct brw_sf_unit_state, thread0),
+              brw->sf.prog_bo);
+
+   /* Emit SF viewport relocation */
+   make_reloc(&reloc[1],
+              BRW_USAGE_STATE,
+              front_winding | (viewport_transform << 1),
+              offsetof(struct brw_sf_unit_state, sf5),
+              brw->sf.vp_bo);
 
-   reloc_bufs[0] = brw->sf.prog_bo;
-   reloc_bufs[1] = brw->sf.vp_bo;
 
    if (brw_search_cache(&brw->cache, BRW_SF_UNIT,
                         &key, sizeof(key),
-                        reloc_bufs, 2,
+                        reloc, 2,
                         NULL,
                         &brw->sf.state_bo))
       return PIPE_OK;
 
 
-   ret = sf_unit_create_from_key(brw, &key, reloc_bufs,
+   ret = sf_unit_create_from_key(brw, &key,
+                                 reloc,
                                  &brw->sf.state_bo);
    if (ret)
       return ret;
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index e219a1d870..97710abec3 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -109,24 +109,24 @@ void brw_destroy_state(struct brw_context *brw);
 enum pipe_error brw_cache_data(struct brw_cache *cache,
                                enum brw_cache_id cache_id,
                                const void *data,
-                               struct brw_winsys_buffer **reloc_bufs,
-                               GLuint nr_reloc_bufs,
+                               struct brw_winsys_reloc *relocs,
+                               GLuint nr_relocs,
                                struct brw_winsys_buffer **bo_out );
 
 enum pipe_error brw_cache_data_sz(struct brw_cache *cache,
                                   enum brw_cache_id cache_id,
                                   const void *data,
                                   GLuint data_size,
-                                  struct brw_winsys_buffer **reloc_bufs,
-                                  GLuint nr_reloc_bufs,
+                                  struct brw_winsys_reloc *relocs,
+                                  GLuint nr_relocs,
                                   struct brw_winsys_buffer **bo_out);
 
 enum pipe_error brw_upload_cache( struct brw_cache *cache,
                                   enum brw_cache_id cache_id,
                                   const void *key,
                                   GLuint key_sz,
-                                  struct brw_winsys_buffer **reloc_bufs,
-                                  GLuint nr_reloc_bufs,
+                                  struct brw_winsys_reloc *relocs,
+                                  GLuint nr_relocs,
                                   const void *data,
                                   GLuint data_sz,
                                   const void *aux,
@@ -137,8 +137,8 @@ boolean brw_search_cache( struct brw_cache *cache,
                           enum brw_cache_id cache_id,
                           const void *key,
                           GLuint key_size,
-                          struct brw_winsys_buffer **reloc_bufs,
-                          GLuint nr_reloc_bufs,
+                          struct brw_winsys_reloc *relocs,
+                          GLuint nr_relocs,
                           void *aux_return,
                           struct brw_winsys_buffer **bo_out);
 
diff --git a/src/gallium/drivers/i965/brw_state_cache.c b/src/gallium/drivers/i965/brw_state_cache.c
index f8369d31ec..16b643ceb2 100644
--- a/src/gallium/drivers/i965/brw_state_cache.c
+++ b/src/gallium/drivers/i965/brw_state_cache.c
@@ -47,7 +47,7 @@
  * a safe point (unlock) we throw out all of the cache data and let it
  * regenerate for the next rendering operation.
  *
- * The reloc_buf pointers need to be included as key data, otherwise the
+ * The reloc structs need to be included as key data, otherwise the
  * non-unique values stuffed in the offset in key data through
  * brw_cache_data() may result in successful probe for state buffers
  * even when the buffer being referenced doesn't match.  The result would be
@@ -73,7 +73,7 @@
 
 static GLuint
 hash_key(const void *key, GLuint key_size,
-         struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs)
+         struct brw_winsys_reloc *relocs, GLuint nr_relocs)
 {
    GLuint *ikey = (GLuint *)key;
    GLuint hash = 0, i;
@@ -88,8 +88,8 @@ hash_key(const void *key, GLuint key_size,
    }
 
    /* Include the BO pointers as key data as well */
-   ikey = (GLuint *)reloc_bufs;
-   key_size = nr_reloc_bufs * sizeof(struct brw_winsys_buffer *);
+   ikey = (GLuint *)relocs;
+   key_size = nr_relocs * sizeof(struct brw_winsys_reloc);
    for (i = 0; i < key_size/4; i++) {
       hash ^= ikey[i];
       hash = (hash << 5) | (hash >> 27);
@@ -118,7 +118,7 @@ update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id,
 static struct brw_cache_item *
 search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
 	     GLuint hash, const void *key, GLuint key_size,
-	     struct brw_winsys_buffer **reloc_bufs, GLuint nr_reloc_bufs)
+	     struct brw_winsys_reloc *relocs, GLuint nr_relocs)
 {
    struct brw_cache_item *c;
 
@@ -137,9 +137,8 @@ search_cache(struct brw_cache *cache, enum brw_cache_id cache_id,
 	  c->hash == hash &&
 	  c->key_size == key_size &&
 	  memcmp(c->key, key, key_size) == 0 &&
-	  c->nr_reloc_bufs == nr_reloc_bufs &&
-	  memcmp(c->reloc_bufs, reloc_bufs,
-		 nr_reloc_bufs * sizeof(struct brw_winsys_buffer *)) == 0)
+	  c->nr_relocs == nr_relocs &&
+	  memcmp(c->relocs, relocs, nr_relocs * sizeof *relocs) == 0)
 	 return c;
    }
 
@@ -178,16 +177,16 @@ brw_search_cache(struct brw_cache *cache,
                  enum brw_cache_id cache_id,
                  const void *key,
                  GLuint key_size,
-                 struct brw_winsys_buffer **reloc_bufs, 
-		 GLuint nr_reloc_bufs,
+                 struct brw_winsys_reloc *relocs, 
+		 GLuint nr_relocs,
                  void *aux_return,
                  struct brw_winsys_buffer **bo_out)
 {
    struct brw_cache_item *item;
-   GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
+   GLuint hash = hash_key(key, key_size, relocs, nr_relocs);
 
    item = search_cache(cache, cache_id, hash, key, key_size,
-		       reloc_bufs, nr_reloc_bufs);
+		       relocs, nr_relocs);
 
    if (item) {
       if (aux_return)
@@ -207,8 +206,8 @@ brw_upload_cache( struct brw_cache *cache,
 		  enum brw_cache_id cache_id,
 		  const void *key,
 		  GLuint key_size,
-		  struct brw_winsys_buffer **reloc_bufs,
-		  GLuint nr_reloc_bufs,
+		  struct brw_winsys_reloc *relocs,
+		  GLuint nr_relocs,
 		  const void *data,
 		  GLuint data_size,
 		  const void *aux,
@@ -216,8 +215,8 @@ brw_upload_cache( struct brw_cache *cache,
                   struct brw_winsys_buffer **bo_out)
 {
    struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
-   GLuint hash = hash_key(key, key_size, reloc_bufs, nr_reloc_bufs);
-   GLuint relocs_size = nr_reloc_bufs * sizeof(struct brw_winsys_buffer *);
+   GLuint hash = hash_key(key, key_size, relocs, nr_relocs);
+   GLuint relocs_size = nr_relocs * sizeof relocs[0];
    GLuint aux_size = cache->aux_size[cache_id];
    enum pipe_error ret;
    void *tmp;
@@ -236,23 +235,22 @@ brw_upload_cache( struct brw_cache *cache,
       return ret;
 
 
-   /* Set up the memory containing the key, aux_data, and reloc_bufs */
+   /* Set up the memory containing the key, aux_data, and relocs */
    tmp = MALLOC(key_size + aux_size + relocs_size);
 
    memcpy(tmp, key, key_size);
    memcpy((char *)tmp + key_size, aux, cache->aux_size[cache_id]);
-   memcpy((char *)tmp + key_size + aux_size, reloc_bufs, relocs_size);
-   for (i = 0; i < nr_reloc_bufs; i++) {
-      if (reloc_bufs[i] != NULL)
-         p_atomic_inc(&reloc_bufs[i]->reference.count);
+   memcpy((char *)tmp + key_size + aux_size, relocs, relocs_size);
+   for (i = 0; i < nr_relocs; i++) {
+      p_atomic_inc(&relocs[i].bo->reference.count);
    }
 
    item->cache_id = cache_id;
    item->key = tmp;
    item->hash = hash;
    item->key_size = key_size;
-   item->reloc_bufs = (struct brw_winsys_buffer **)((char *)tmp + key_size + aux_size);
-   item->nr_reloc_bufs = nr_reloc_bufs;
+   item->relocs = (struct brw_winsys_reloc *)((char *)tmp + key_size + aux_size);
+   item->nr_relocs = nr_relocs;
    bo_reference( &item->bo, *bo_out );
    item->data_size = data_size;
 
@@ -275,9 +273,12 @@ brw_upload_cache( struct brw_cache *cache,
 		   data_size, cache_id);
 
    /* Copy data to the buffer */
-   cache->sws->bo_subdata(item->bo, 
-                          cache_id,
-                          0, data_size, data);
+   ret = cache->sws->bo_subdata(item->bo, 
+                                cache_id,
+                                0, data_size, data,
+                                relocs, nr_relocs);
+   if (ret)
+      return ret;
 
    update_cache_last(cache, cache_id, item->bo);
 
@@ -293,15 +294,15 @@ brw_cache_data_sz(struct brw_cache *cache,
 		  enum brw_cache_id cache_id,
 		  const void *data,
 		  GLuint data_size,
-		  struct brw_winsys_buffer **reloc_bufs,
-		  GLuint nr_reloc_bufs,
+		  struct brw_winsys_reloc *relocs,
+		  GLuint nr_relocs,
                   struct brw_winsys_buffer **bo_out)
 {
    struct brw_cache_item *item;
-   GLuint hash = hash_key(data, data_size, reloc_bufs, nr_reloc_bufs);
+   GLuint hash = hash_key(data, data_size, relocs, nr_relocs);
 
    item = search_cache(cache, cache_id, hash, data, data_size,
-		       reloc_bufs, nr_reloc_bufs);
+		       relocs, nr_relocs);
    if (item) {
       update_cache_last(cache, cache_id, item->bo);
 
@@ -311,7 +312,7 @@ brw_cache_data_sz(struct brw_cache *cache,
 
    return brw_upload_cache(cache, cache_id,
                            data, data_size,
-                           reloc_bufs, nr_reloc_bufs,
+                           relocs, nr_relocs,
                            data, data_size,
                            NULL, NULL,
                            bo_out);
@@ -321,20 +322,22 @@ brw_cache_data_sz(struct brw_cache *cache,
 /**
  * Wrapper around brw_cache_data_sz using the cache_id's canonical key size.
  *
- * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be
+ * If nr_relocs is nonzero, brw_search_cache()/brw_upload_cache() would be
  * better to use, as the potentially changing offsets in the data-used-as-key
  * will result in excessive cache misses.
+ * 
+ * XXX: above is no longer true -- can we remove some code?
  */
 enum pipe_error
 brw_cache_data(struct brw_cache *cache,
 	       enum brw_cache_id cache_id,
 	       const void *data,
-	       struct brw_winsys_buffer **reloc_bufs,
-	       GLuint nr_reloc_bufs,
+	       struct brw_winsys_reloc *relocs,
+	       GLuint nr_relocs,
                struct brw_winsys_buffer **bo_out)
 {
    return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id],
-			    reloc_bufs, nr_reloc_bufs, bo_out);
+			    relocs, nr_relocs, bo_out);
 }
 
 
@@ -510,8 +513,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
 
 	 next = c->next;
 
-	 for (j = 0; j < c->nr_reloc_bufs; j++)
-	    bo_reference(&c->reloc_bufs[j], NULL);
+	 for (j = 0; j < c->nr_relocs; j++)
+	    bo_reference(&c->relocs[j].bo, NULL);
 
 	 bo_reference(&c->bo, NULL);
 	 FREE((void *)c->key);
@@ -555,8 +558,8 @@ brw_state_cache_bo_delete(struct brw_cache *cache, struct brw_winsys_buffer *bo)
 
 	    *prev = c->next;
 
-	    for (j = 0; j < c->nr_reloc_bufs; j++)
-	       bo_reference(&c->reloc_bufs[j], NULL);
+	    for (j = 0; j < c->nr_relocs; j++)
+	       bo_reference(&c->relocs[j].bo, NULL);
 
 	    bo_reference(&c->bo, NULL);
 
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index a5b30eba47..0b44f39f4d 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -81,6 +81,7 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key)
 static enum pipe_error
 vs_unit_create_from_key(struct brw_context *brw, 
                         struct brw_vs_unit_key *key,
+                        struct brw_winsys_reloc *reloc,
                         struct brw_winsys_buffer **bo_out)
 {
    enum pipe_error ret;
@@ -145,22 +146,13 @@ vs_unit_create_from_key(struct brw_context *brw,
 
    ret = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
                           key, sizeof(*key),
-                          &brw->vs.prog_bo, 1,
+                          reloc, Elements(reloc),
                           &vs, sizeof(vs),
                           NULL, NULL,
                           bo_out);
    if (ret)
       return ret;
 
-   /* Emit VS program relocation */
-   ret = brw->sws->bo_emit_reloc(*bo_out,
-                                 BRW_USAGE_STATE,
-                                 vs.thread0.grf_reg_count << 1,
-                                 offsetof(struct brw_vs_unit_state, thread0),
-                                 brw->vs.prog_bo);
-   if (ret)
-      return ret;
-
    return PIPE_OK;
 }
 
@@ -168,17 +160,29 @@ static int prepare_vs_unit(struct brw_context *brw)
 {
    struct brw_vs_unit_key key;
    enum pipe_error ret;
+   struct brw_winsys_reloc reloc[1];
+   unsigned grf_reg_count;
 
    vs_unit_populate_key(brw, &key);
 
+   grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+
+   /* Emit VS program relocation */
+   make_reloc(&reloc[0],
+              BRW_USAGE_STATE,
+              grf_reg_count << 1,
+              offsetof(struct brw_vs_unit_state, thread0),
+              brw->vs.prog_bo);
+
+
    if (brw_search_cache(&brw->cache, BRW_VS_UNIT,
                         &key, sizeof(key),
-                        &brw->vs.prog_bo, 1,
+                        reloc, 1,
                         NULL,
                         &brw->vs.state_bo))
       return PIPE_OK;
 
-   ret = vs_unit_create_from_key(brw, &key, &brw->vs.state_bo);
+   ret = vs_unit_create_from_key(brw, &key, reloc, &brw->vs.state_bo);
    if (ret)
       return ret;
 
diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c
index b12df0ec03..aaf2a44f61 100644
--- a/src/gallium/drivers/i965/brw_vs_surface_state.c
+++ b/src/gallium/drivers/i965/brw_vs_surface_state.c
@@ -65,7 +65,8 @@ brw_vs_update_constant_buffer(struct brw_context *brw)
 				     size, 64);
 
    /* _NEW_PROGRAM_CONSTANTS */
-   dri_bo_subdata(const_buffer, 0, size, params->ParameterValues);
+   brw->sws->bo_subdata(const_buffer, 0, size, params->ParameterValues,
+                        NULL, 0);
 
    return const_buffer;
 }
@@ -145,51 +146,31 @@ brw_vs_get_binding_table(struct brw_context *brw,
                          struct brw_winsys_buffer **bo_out)
 {
 #if 0
-   if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
-                        NULL, 0,
-                        brw->vs.surf_bo, BRW_VS_MAX_SURF,
-                        NULL,
-                        bo_out))
-   {
-      return PIPE_OK;
-   }
-   else {
-      GLuint data_size = BRW_VS_MAX_SURF * sizeof(GLuint);
-      uint32_t *data = malloc(data_size);
-      int i;
-
-      for (i = 0; i < BRW_VS_MAX_SURF; i++)
-         if (brw->vs.surf_bo[i])
-            data[i] = brw->vs.surf_bo[i]->offset;
-         else
-            data[i] = 0;
-
-      ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
-                              NULL, 0,
-                              brw->vs.surf_bo, BRW_VS_MAX_SURF,
-                              data, data_size,
-                              NULL, NULL,
-                              bo_out);
-      if (ret)
-         return ret;
-
-      /* Emit binding table relocations to surface state */
-      for (i = 0; i < BRW_VS_MAX_SURF; i++) {
-	 if (brw->vs.surf_bo[i] != NULL) {
-	    /* The presumed offsets were set in the data values for
-	     * brw_upload_cache.
-	     */
-	    ret = sws->bo_emit_reloc(*bo_out, i * 4,
-                                     brw->vs.surf_bo[i], 0,
-                                     BRW_USAGE_STATE);
-            if (ret)
-               return ret;
-	 }
-      }
+   static GLuint data[BRW_VS_MAX_SURF]; /* always zero */
+   struct brw_winsys_reloc reloc[BRW_VS_MAX_SURF];
+   int i;
 
-      FREE(data);
-      return PIPE_OK;
+   /* Emit binding table relocations to surface state */
+   for (i = 0; i < BRW_VS_MAX_SURF; i++) {
+      make_reloc(&reloc[i],
+                 BRW_USAGE_STATE,
+                 0,
+                 i * 4,
+                 brw->vs.surf_bo[i]);
    }
+   
+   ret = brw_cache_data( &brw->surface_cache, 
+                         BRW_SS_SURF_BIND,
+                         NULL, 0,
+                         reloc, Elements(reloc),
+                         data, sizeof data,
+                         NULL, NULL,
+                         bo_out);
+   if (ret)
+      return ret;
+
+   FREE(data);
+   return PIPE_OK;
 #else
    return PIPE_OK;
 #endif
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index e72b928b06..2da660a1e6 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -111,6 +111,30 @@ enum brw_buffer_data_type {
 };
 
 
+/* Relocations to be applied with subdata in a call to sws->bo_subdata, below.
+ *
+ * Effectively this encodes:
+ *
+ *    (unsigned *)(subdata + offset) = bo->offset + delta
+ */
+struct brw_winsys_reloc {
+   enum brw_buffer_usage usage; /* debug only */
+   unsigned delta;
+   unsigned offset;
+   struct brw_winsys_buffer *bo;
+};
+
+static INLINE void make_reloc( struct brw_winsys_reloc *reloc,
+                               enum brw_buffer_usage usage,
+                               unsigned delta,
+                               unsigned offset,
+                               struct brw_winsys_buffer *bo)
+{
+   reloc->usage = usage;
+   reloc->delta = delta;
+   reloc->offset = offset;
+   reloc->bo = bo;              /* Note - note taking a reference yet */
+}
 
 
@@ -151,7 +175,9 @@ struct brw_winsys_screen {
                                  enum brw_buffer_data_type data_type,
                                  size_t offset,
                                  size_t size,
-                                 const void *data);
+                                 const void *data,
+                                 const struct brw_winsys_reloc *reloc,
+                                 unsigned nr_reloc );
 
    boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer);
    boolean (*bo_references)(struct brw_winsys_buffer *a,
diff --git a/src/gallium/drivers/i965/brw_wm_constant_buffer.c b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
index 14568265dd..6434c6acf7 100644
--- a/src/gallium/drivers/i965/brw_wm_constant_buffer.c
+++ b/src/gallium/drivers/i965/brw_wm_constant_buffer.c
@@ -13,16 +13,24 @@ brw_create_constant_surface( struct brw_context *brw,
 {
    const GLint w = key->width - 1;
    struct brw_winsys_buffer *bo;
+   struct brw_winsys_reloc reloc[1];
    enum pipe_error ret;
 
+      /* Emit relocation to surface contents */
+   make_reloc(&reloc[0],
+              BRW_USAGE_SAMPLER,
+              0,
+              offsetof(struct brw_surface_state, ss1),
+              key->bo);
+
+   
    memset(&surf, 0, sizeof(surf));
 
    surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
    surf.ss0.surface_type = BRW_SURFACE_BUFFER;
    surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
 
-   assert(key->bo);
-   surf.ss1.base_addr = key->bo->offset; /* reloc */
+   surf.ss1.base_addr = 0; /* reloc */
 
    surf.ss2.width = w & 0x7f;            /* bits 6:0 of size or width */
    surf.ss2.height = (w >> 7) & 0x1fff;  /* bits 19:7 of size or width */
@@ -32,24 +40,13 @@ brw_create_constant_surface( struct brw_context *brw,
  
    ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
                           key, sizeof(*key),
-                          &key->bo, key->bo ? 1 : 0,
+                          reloc, Elements(reloc),
                           &surf, sizeof(surf),
                           NULL, NULL,
                           &bo_out);
    if (ret)
       return ret;
 
-   if (key->bo) {
-      /* Emit relocation to surface contents */
-      ret = brw->sws->bo_emit_reloc(*bo_out,
-                                    BRW_USAGE_SAMPLER,
-                                    0,
-                                    offsetof(struct brw_surface_state, ss1),
-                                    key->bo);
-      if (ret)
-         return ret;
-   }
-
    return PIPE_OK;
 }
 
diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index 174836b39d..4e99ac703a 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -165,6 +165,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw)
 static int upload_wm_samplers( struct brw_context *brw )
 {
    struct wm_sampler_key key;
+   struct brw_winsys_reloc reloc[BRW_MAX_TEX_UNIT];
    enum pipe_error ret;
    int i;
 
@@ -181,9 +182,20 @@ static int upload_wm_samplers( struct brw_context *brw )
       return PIPE_OK;
    }
 
+   /* Emit SDC relocations */
+   for (i = 0; i < key.sampler_count; i++) {
+      make_reloc( &reloc[i],
+                  BRW_USAGE_SAMPLER,
+                  0,
+                  i * sizeof(struct brw_sampler_state) +
+                  offsetof(struct brw_sampler_state, ss2),
+                  brw->wm.sdc_bo[i]);
+   }
+
+
    if (brw_search_cache(&brw->cache, BRW_SAMPLER,
                         &key, sizeof(key),
-                        brw->wm.sdc_bo, key.sampler_count,
+                        reloc, key.sampler_count,
                         NULL,
                         &brw->wm.sampler_bo))
       return PIPE_OK;
@@ -193,24 +205,13 @@ static int upload_wm_samplers( struct brw_context *brw )
     */
    ret = brw_upload_cache(&brw->cache, BRW_SAMPLER,
                           &key, sizeof(key),
-                          brw->wm.sdc_bo, key.sampler_count,
+                          reloc, key.sampler_count,
                           &key.sampler, sizeof(key.sampler),
                           NULL, NULL,
                           &brw->wm.sampler_bo);
    if (ret)
       return ret;
 
-   /* Emit SDC relocations */
-   for (i = 0; i < key.sampler_count; i++) {
-      ret = brw->sws->bo_emit_reloc(brw->wm.sampler_bo,
-                                    BRW_USAGE_SAMPLER,
-                                    0,
-                                    i * sizeof(struct brw_sampler_state) +
-                                    offsetof(struct brw_sampler_state, ss2),
-                                    brw->wm.sdc_bo[i]);
-      if (ret)
-         return ret;
-   }
 
    return 0;
 }
diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index 56789ce7a4..d8e88237ce 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -144,8 +144,36 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
                         struct brw_winsys_buffer **bo_out)
 {
    struct brw_wm_unit_state wm;
+   struct brw_winsys_reloc reloc[3];
+   unsigned nr_reloc = 0;
    enum pipe_error ret;
 
+   /* Emit WM program relocation */
+   make_reloc(&reloc[nr_reloc++],
+              BRW_USAGE_STATE,
+              wm.thread0.grf_reg_count << 1,
+              offsetof(struct brw_wm_unit_state, thread0),
+              brw->wm.prog_bo);
+
+   /* Emit scratch space relocation */
+   if (key->total_scratch != 0) {
+      make_reloc(&reloc[nr_reloc++],
+                 BRW_USAGE_SCRATCH,
+                 wm.thread2.per_thread_scratch_space,
+                 offsetof(struct brw_wm_unit_state, thread2),
+                 brw->wm.scratch_bo);
+   }
+
+   /* Emit sampler state relocation */
+   if (key->sampler_count != 0) {
+      make_reloc(&reloc[nr_reloc++],
+                 BRW_USAGE_STATE,
+                 wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
+                 offsetof(struct brw_wm_unit_state, wm4),
+                 brw->wm.sampler_bo);
+   }
+
+
    memset(&wm, 0, sizeof(wm));
 
    wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
@@ -220,44 +248,13 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
    ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
                           key, sizeof(*key),
-                          reloc_bufs, 3,
+                          reloc, nr_reloc,
                           &wm, sizeof(wm),
                           NULL, NULL,
                           bo_out);
    if (ret)
       return ret;
 
-   /* Emit WM program relocation */
-   ret = brw->sws->bo_emit_reloc(*bo_out,
-                                 BRW_USAGE_STATE,
-                                 wm.thread0.grf_reg_count << 1,
-                                 offsetof(struct brw_wm_unit_state, thread0),
-                                 brw->wm.prog_bo);
-   if (ret)
-      return ret;
-
-   /* Emit scratch space relocation */
-   if (key->total_scratch != 0) {
-      ret = brw->sws->bo_emit_reloc(*bo_out,
-                                    BRW_USAGE_SCRATCH,
-                                    wm.thread2.per_thread_scratch_space,
-                                    offsetof(struct brw_wm_unit_state, thread2),
-                                    brw->wm.scratch_bo);
-      if (ret)
-         return ret;
-   }
-
-   /* Emit sampler state relocation */
-   if (key->sampler_count != 0) {
-      ret = brw->sws->bo_emit_reloc(*bo_out,
-                                    BRW_USAGE_STATE,
-                                    wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
-                                    offsetof(struct brw_wm_unit_state, wm4),
-                                    brw->wm.sampler_bo);
-      if (ret)
-         return ret;
-   }
-
    return PIPE_OK;
 }
 
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index ed365b03b9..f882331433 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -45,33 +45,32 @@ brw_update_texture_surface( struct brw_context *brw,
 			    struct brw_texture *tex,
                             struct brw_winsys_buffer **bo_out)
 {
+   struct brw_winsys_reloc reloc[1];
    enum pipe_error ret;
 
+   /* Emit relocation to surface contents */
+   make_reloc(&reloc[0],
+              BRW_USAGE_SAMPLER,
+              0,
+              offsetof(struct brw_surface_state, ss1),
+              tex->bo);
+
    if (brw_search_cache(&brw->surface_cache,
                         BRW_SS_SURFACE,
                         &tex->ss, sizeof tex->ss,
-                        &tex->bo, 1,
+                        reloc, Elements(reloc),
                         NULL,
                         bo_out))
       return PIPE_OK;
 
    ret = brw_upload_cache(&brw->surface_cache, BRW_SS_SURFACE,
                           &tex->ss, sizeof tex->ss,
-                          &tex->bo, 1,
+                          reloc, Elements(reloc),
                           &tex->ss, sizeof tex->ss,
                           NULL, NULL,
                           bo_out);
    if (ret)
       return ret;
-      
-   /* Emit relocation to surface contents */
-   ret = brw->sws->bo_emit_reloc(*bo_out,
-                                 BRW_USAGE_SAMPLER,
-                                 0,
-                                 offsetof(struct brw_surface_state, ss1),
-                                 tex->bo);
-   if (ret)
-      return ret;
 
    return PIPE_OK;
 }
@@ -95,8 +94,17 @@ brw_update_render_surface(struct brw_context *brw,
 {
    struct brw_surf_ss0 blend_ss0 = brw->curr.blend->ss0;
    struct brw_surface_state ss;
+   struct brw_winsys_reloc reloc[1];
    enum pipe_error ret;
 
+   /* XXX: we will only be rendering to this surface:
+    */
+   make_reloc(&reloc[0],
+              BRW_USAGE_RENDER_TARGET,
+              0,
+              offsetof(struct brw_surface_state, ss1),
+              surface->bo);
+
    /* Surfaces are potentially shared between contexts, so can't
     * scribble the in-place ss0 value in the surface.
     */
@@ -111,7 +119,7 @@ brw_update_render_surface(struct brw_context *brw,
    if (brw_search_cache(&brw->surface_cache,
                         BRW_SS_SURFACE,
                         &ss, sizeof(ss),
-                        &surface->bo, 1,
+                        reloc, Elements(reloc),
                         NULL,
                         bo_out))
       return PIPE_OK;
@@ -119,23 +127,13 @@ brw_update_render_surface(struct brw_context *brw,
    ret = brw_upload_cache(&brw->surface_cache,
                           BRW_SS_SURFACE,
                           &ss, sizeof ss,
-                          &surface->bo, 1,
+                          reloc, Elements(reloc),
                           &ss, sizeof ss,
                           NULL, NULL,
                           bo_out);
    if (ret)
       return ret;
 
-      /* XXX: we will only be rendering to this surface:
-       */
-   ret = brw->sws->bo_emit_reloc(*bo_out,
-                                 BRW_USAGE_RENDER_TARGET,
-                                 0,
-                                 offsetof(struct brw_surface_state, ss1),
-                                 surface->bo);
-   if (ret)
-      return ret;
-
    return PIPE_OK;
 }
 
@@ -149,6 +147,7 @@ brw_wm_get_binding_table(struct brw_context *brw,
                          struct brw_winsys_buffer **bo_out )
 {
    enum pipe_error ret;
+   struct brw_winsys_reloc reloc[BRW_WM_MAX_SURF];
    uint32_t data[BRW_WM_MAX_SURF];
    GLuint data_size = brw->wm.nr_surfaces * sizeof data[0];
    int i;
@@ -156,13 +155,21 @@ brw_wm_get_binding_table(struct brw_context *brw,
    assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
    assert(brw->wm.nr_surfaces > 0);
 
+   /* Emit binding table relocations to surface state */
+   for (i = 0; i < brw->wm.nr_surfaces; i++) {
+      make_reloc(&reloc[i],
+                 BRW_USAGE_STATE,
+                 0,
+                 i * sizeof(GLuint),
+                 brw->wm.surf_bo[i]);
+   }
+
    /* Note there is no key for this search beyond the values in the
     * relocation array:
     */
    if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
                         NULL, 0,
-                        brw->wm.surf_bo,
-                        brw->wm.nr_surfaces,
+                        reloc, brw->wm.nr_surfaces,
                         NULL,
                         bo_out))
       return PIPE_OK;
@@ -175,24 +182,13 @@ brw_wm_get_binding_table(struct brw_context *brw,
 
    ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
                            NULL, 0,
-                           brw->wm.surf_bo, brw->wm.nr_surfaces,
+                           reloc, brw->wm.nr_surfaces,
                            data, data_size,
                            NULL, NULL,
                            bo_out);
    if (ret)
       return ret;
 
-   /* Emit binding table relocations to surface state */
-   for (i = 0; i < brw->wm.nr_surfaces; i++) {
-      ret = brw->sws->bo_emit_reloc(*bo_out,
-                                    BRW_USAGE_STATE,
-                                    0,
-                                    i * sizeof(GLuint),
-                                    brw->wm.surf_bo[i]);
-      if (ret)
-         return ret;
-   }
-
    return PIPE_OK;
 }
 
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
index ab5df56bc0..ce6d85976d 100644
--- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -47,6 +47,10 @@
 
 #define MAX_VRAM (128*1024*1024)
 
+#define MAX_DUMPS 128
+
+
+
 extern int brw_disasm (FILE *file, 
                        const struct brw_instruction *inst,
                        unsigned count );
@@ -294,21 +298,36 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer,
                     enum brw_buffer_data_type data_type,
                     size_t offset,
                     size_t size,
-                    const void *data)
+                    const void *data,
+                    const struct brw_winsys_reloc *reloc,
+                    unsigned nr_relocs)
 {
    struct xlib_brw_buffer *buf = xlib_brw_buffer(buffer);
    struct xlib_brw_winsys *xbw = xlib_brw_winsys(buffer->sws);
+   unsigned i;
 
-   debug_printf("%s buf %p off %d sz %d %s\n", 
+   debug_printf("%s buf %p off %d sz %d %s relocs: %d\n", 
                 __FUNCTION__, 
-                (void *)buffer, offset, size, data_types[data_type]);
-
-   if (1)
-      dump_data( xbw, data_type, data, size );
+                (void *)buffer, offset, size, 
+                data_types[data_type],
+                nr_relocs);
 
    assert(buf->base.size >= offset + size);
    memcpy(buf->virtual + offset, data, size);
 
+   /* Apply the relocations:
+    */
+   for (i = 0; i < nr_relocs; i++) {
+      debug_printf("\treloc[%d] usage %s off %d value %x+%x\n", 
+                   i, usages[reloc[i].usage], reloc[i].offset,
+                   xlib_brw_buffer(reloc[i].bo)->offset, reloc[i].delta);
+
+      *(unsigned *)(buf->virtual + offset + reloc[i].offset) = 
+         xlib_brw_buffer(reloc[i].bo)->offset + reloc[i].delta;
+   }
+
+   if (1)
+      dump_data( xbw, data_type, buf->virtual + offset, size );
 
    return 0;
 }
-- 
cgit v1.2.3


From a49ccf0fd25575c4e40398e5d22f3931e80921f8 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 07:47:07 +0000
Subject: i965g: restore code to populate the relocation background

I'm emitting this in two places now, to the data presented
for upload and also in the delta field of the reloc struct.
Probably want to remove the delta field and just pull the
background from the key.
---
 src/gallium/drivers/i965/brw_clip_state.c |  1 +
 src/gallium/drivers/i965/brw_gs_state.c   |  2 +-
 src/gallium/drivers/i965/brw_sf_state.c   | 11 +++++++++--
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 3f2b9701e6..467364e884 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -83,6 +83,7 @@ clip_unit_create_from_key(struct brw_context *brw,
 
    memset(&clip, 0, sizeof(clip));
 
+   clip.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    /* reloc */
    clip.thread0.kernel_start_pointer = 0;
 
diff --git a/src/gallium/drivers/i965/brw_gs_state.c b/src/gallium/drivers/i965/brw_gs_state.c
index 1b0de17aec..b64ec286ce 100644
--- a/src/gallium/drivers/i965/brw_gs_state.c
+++ b/src/gallium/drivers/i965/brw_gs_state.c
@@ -82,7 +82,7 @@ gs_unit_create_from_key(struct brw_context *brw,
 
    memset(&gs, 0, sizeof(gs));
 
-   /* maybe-reloc: populate the background */
+   /* reloc */
    gs.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    gs.thread0.kernel_start_pointer = 0;
 
diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index a911482149..e412669844 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -142,8 +142,7 @@ sf_unit_create_from_key(struct brw_context *brw,
    int chipset_max_threads;
    memset(&sf, 0, sizeof(sf));
 
-
-   sf.thread0.grf_reg_count = 0;
+   sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
    /* reloc */
    sf.thread0.kernel_start_pointer = 0;
 
@@ -179,10 +178,18 @@ sf_unit_create_from_key(struct brw_context *brw,
 
    /* CACHE_NEW_SF_VP */
    /* reloc */
+   sf.sf5.sf_viewport_state_offset = 0;
+
+   sf.sf5.viewport_transform = 1;
 
    if (key->scissor)
       sf.sf6.scissor = 1;
 
+   if (key->front_face == PIPE_WINDING_CCW)
+      sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
+   else
+      sf.sf5.front_winding = BRW_FRONTWINDING_CW;
+
    switch (key->cull_mode) {
    case PIPE_WINDING_CCW:
    case PIPE_WINDING_CW:
-- 
cgit v1.2.3


From b9bb41321a9add139cd1dbddcf48e6c81c9d019d Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 08:27:43 +0000
Subject: i965g: scissor off by one

---
 src/gallium/drivers/i965/brw_sf_state.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index e412669844..955478e624 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -58,9 +58,9 @@ static enum pipe_error upload_sf_vp(struct brw_context *brw)
    sfv.viewport.m32 = vp->translate[2];
 
    sfv.scissor.xmin = scissor->minx;
-   sfv.scissor.xmax = scissor->maxx; /* -1 ?? */
+   sfv.scissor.xmax = scissor->maxx - 1; /* ? */
    sfv.scissor.ymin = scissor->miny;
-   sfv.scissor.ymax = scissor->maxy; /* -1 ?? */
+   sfv.scissor.ymax = scissor->maxy - 1; /* ? */
 
    ret = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0,
                          &brw->sf.vp_bo );
-- 
cgit v1.2.3


From b8e63e92102b6ca0b5ce06685590232a3a47d1ea Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 08:28:17 +0000
Subject: i965g: point_rast_rule comment no longer applies

Not sure exactly what state we want here now, will need to experiment.
---
 src/gallium/drivers/i965/brw_sf_state.c | 30 ++++--------------------------
 1 file changed, 4 insertions(+), 26 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_sf_state.c b/src/gallium/drivers/i965/brw_sf_state.c
index 955478e624..25dc2b52e0 100644
--- a/src/gallium/drivers/i965/brw_sf_state.c
+++ b/src/gallium/drivers/i965/brw_sf_state.c
@@ -221,32 +221,10 @@ sf_unit_create_from_key(struct brw_context *brw,
 
    /* XXX: gl_rasterization_rules?  something else?
     */
-   if (0) {
-      /* Rendering to an OpenGL window */
-      sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
-   }
-   else {
-      /* If rendering to an FBO, the pixel coordinate system is
-       * inverted with respect to the normal OpenGL coordinate
-       * system, so BRW_RASTRULE_LOWER_RIGHT is correct.
-       * But this value is listed as "Reserved, but not seen as useful"
-       * in Intel documentation (page 212, "Point Rasterization Rule",
-       * section 7.4 "SF Pipeline State Summary", of document
-       * "Intel® 965 Express Chipset Family and Intel® G35 Express
-       * Chipset Graphics Controller Programmer's Reference Manual,
-       * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
-       * available at 
-       *     http://intellinuxgraphics.org/documentation.html
-       * at the time of this writing).
-       *
-       * It does work on at least some devices, if not all;
-       * if devices that don't support it can be identified,
-       * the likely failure case is that points are rasterized
-       * incorrectly, which is no worse than occurs without
-       * the value, so we're using it here.
-       */
-      sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
-   }
+   sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
+   sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
+   sf.sf6.point_rast_rule = 1;
+
    /* XXX clamp max depends on AA vs. non-AA */
 
    /* _NEW_POINT */
-- 
cgit v1.2.3


From 4a3e24522b0538cb3802c59c22d6f3660c4491be Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 08:29:09 +0000
Subject: i965g: populate wm reloc array earlier

Still have to calculate the reloc background in two places.
---
 src/gallium/drivers/i965/brw_wm_state.c | 82 ++++++++++++++++++---------------
 1 file changed, 46 insertions(+), 36 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_state.c b/src/gallium/drivers/i965/brw_wm_state.c
index d8e88237ce..ee970ac75b 100644
--- a/src/gallium/drivers/i965/brw_wm_state.c
+++ b/src/gallium/drivers/i965/brw_wm_state.c
@@ -140,40 +140,13 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
  */
 static enum pipe_error
 wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
-			struct brw_winsys_buffer **reloc_bufs,
+			struct brw_winsys_reloc *reloc,
+                        unsigned nr_reloc,
                         struct brw_winsys_buffer **bo_out)
 {
    struct brw_wm_unit_state wm;
-   struct brw_winsys_reloc reloc[3];
-   unsigned nr_reloc = 0;
    enum pipe_error ret;
 
-   /* Emit WM program relocation */
-   make_reloc(&reloc[nr_reloc++],
-              BRW_USAGE_STATE,
-              wm.thread0.grf_reg_count << 1,
-              offsetof(struct brw_wm_unit_state, thread0),
-              brw->wm.prog_bo);
-
-   /* Emit scratch space relocation */
-   if (key->total_scratch != 0) {
-      make_reloc(&reloc[nr_reloc++],
-                 BRW_USAGE_SCRATCH,
-                 wm.thread2.per_thread_scratch_space,
-                 offsetof(struct brw_wm_unit_state, thread2),
-                 brw->wm.scratch_bo);
-   }
-
-   /* Emit sampler state relocation */
-   if (key->sampler_count != 0) {
-      make_reloc(&reloc[nr_reloc++],
-                 BRW_USAGE_STATE,
-                 wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
-                 offsetof(struct brw_wm_unit_state, wm4),
-                 brw->wm.sampler_bo);
-   }
-
-
    memset(&wm, 0, sizeof(wm));
 
    wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
@@ -243,7 +216,7 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
    wm.wm5.line_stipple = key->line_stipple;
 
-   if (BRW_DEBUG & DEBUG_STATS || key->stats_wm)
+   if ((BRW_DEBUG & DEBUG_STATS) || key->stats_wm)
       wm.wm4.stats_enable = 1;
 
    ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
@@ -262,11 +235,17 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 static enum pipe_error upload_wm_unit( struct brw_context *brw )
 {
    struct brw_wm_unit_key key;
-   struct brw_winsys_buffer *reloc_bufs[3];
+   struct brw_winsys_reloc reloc[3];
+   unsigned nr_reloc = 0;
    enum pipe_error ret;
+   unsigned grf_reg_count;
+   unsigned per_thread_scratch_space;
+   unsigned stats_enable;
+   unsigned sampler_count;
 
    wm_unit_populate_key(brw, &key);
 
+
    /* Allocate the necessary scratch space if we haven't already.  Don't
     * bother reducing the allocation later, since we use scratch so
     * rarely.
@@ -291,18 +270,49 @@ static enum pipe_error upload_wm_unit( struct brw_context *brw )
       }
    }
 
-   reloc_bufs[0] = brw->wm.prog_bo;
-   reloc_bufs[1] = brw->wm.scratch_bo;
-   reloc_bufs[2] = brw->wm.sampler_bo;
+
+   /* XXX: temporary:
+    */
+   grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+   per_thread_scratch_space = key.total_scratch / 1024 - 1;
+   stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm;
+   sampler_count = BRW_IS_IGDNG(brw) ? 0 :(key.sampler_count + 1) / 4;
+
+   /* Emit WM program relocation */
+   make_reloc(&reloc[nr_reloc++],
+              BRW_USAGE_STATE,
+              grf_reg_count << 1,
+              offsetof(struct brw_wm_unit_state, thread0),
+              brw->wm.prog_bo);
+
+   /* Emit scratch space relocation */
+   if (key.total_scratch != 0) {
+      make_reloc(&reloc[nr_reloc++],
+                 BRW_USAGE_SCRATCH,
+                 per_thread_scratch_space,
+                 offsetof(struct brw_wm_unit_state, thread2),
+                 brw->wm.scratch_bo);
+   }
+
+   /* Emit sampler state relocation */
+   if (key.sampler_count != 0) {
+      make_reloc(&reloc[nr_reloc++],
+                 BRW_USAGE_STATE,
+                 stats_enable | (sampler_count << 2),
+                 offsetof(struct brw_wm_unit_state, wm4),
+                 brw->wm.sampler_bo);
+   }
+
 
    if (brw_search_cache(&brw->cache, BRW_WM_UNIT,
                         &key, sizeof(key),
-                        reloc_bufs, 3,
+                        reloc, nr_reloc,
                         NULL,
                         &brw->wm.state_bo))
       return PIPE_OK;
 
-   ret = wm_unit_create_from_key(brw, &key, reloc_bufs,
+   ret = wm_unit_create_from_key(brw, &key, 
+                                 reloc, nr_reloc,
                                  &brw->wm.state_bo);
    if (ret)
       return ret;
-- 
cgit v1.2.3


From 018e2250b860df75485d1c7741dfa010c39ae6f1 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 08:33:10 +0000
Subject: i965g: remove/disable inactive state atoms

---
 src/gallium/drivers/i965/brw_misc_state.c   | 2 +-
 src/gallium/drivers/i965/brw_screen.c       | 1 +
 src/gallium/drivers/i965/brw_state.h        | 2 --
 src/gallium/drivers/i965/brw_state_upload.c | 4 +---
 4 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index 8e35f9ad1d..ce3e48f360 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -404,7 +404,7 @@ static int upload_invarient_state( struct brw_context *brw )
       BRW_BATCH_STRUCT(brw, &sip);
    }
 
-
+   /* VF Statistics */
    {
       struct brw_vf_statistics vfs;
       memset(&vfs, 0, sizeof(vfs));
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 7991f4ae52..04a3f81bcf 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -291,6 +291,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
 #ifdef DEBUG
    BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0);
    BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0);
+   BRW_DEBUG |= DEBUG_STATS;
 #endif
 
    memset(&chipset, 0, sizeof chipset);
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index 97710abec3..a9b8165495 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -52,7 +52,6 @@ brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo)
 const struct brw_tracked_state brw_blend_constant_color;
 const struct brw_tracked_state brw_cc_unit;
 const struct brw_tracked_state brw_cc_vp;
-const struct brw_tracked_state brw_check_fallback;
 const struct brw_tracked_state brw_clip_prog;
 const struct brw_tracked_state brw_clip_unit;
 const struct brw_tracked_state brw_curbe_buffer;
@@ -65,7 +64,6 @@ const struct brw_tracked_state brw_aa_line_parameters;
 const struct brw_tracked_state brw_pipelined_state_pointers;
 const struct brw_tracked_state brw_binding_table_pointers;
 const struct brw_tracked_state brw_depthbuffer;
-const struct brw_tracked_state brw_polygon_stipple_offset;
 const struct brw_tracked_state brw_polygon_stipple;
 const struct brw_tracked_state brw_program_parameters;
 const struct brw_tracked_state brw_recalculate_urb_fence;
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index fdcdd59129..233dce03df 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -38,8 +38,6 @@
 
 const struct brw_tracked_state *atoms[] =
 {
-   &brw_check_fallback,
-
 //   &brw_wm_input_sizes,
    &brw_vs_prog,
    &brw_gs_prog, 
@@ -58,7 +56,7 @@ const struct brw_tracked_state *atoms[] =
    &brw_cc_unit,
 
    &brw_vs_surfaces,		/* must do before unit */
-   &brw_wm_constant_surface,	/* must do before wm surfaces/bind bo */
+   //&brw_wm_constant_surface,	/* must do before wm surfaces/bind bo */
    &brw_wm_surfaces,		/* must do before samplers and unit */
    &brw_wm_samplers,
 
-- 
cgit v1.2.3


From 0e80e4ea7576733ede13f156a1dce644b1e6df89 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 08:54:01 +0000
Subject: i965g: make sure blend color packet header is initialized

We will emit this packet at startup (dirty == ~0), even if we haven't
had the state tracker call into brw_set_blend_color() yet.

This way is a little more efficient also.
---
 src/gallium/drivers/i965/brw_pipe_blend.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_blend.c b/src/gallium/drivers/i965/brw_pipe_blend.c
index 872151222d..b759a910b6 100644
--- a/src/gallium/drivers/i965/brw_pipe_blend.c
+++ b/src/gallium/drivers/i965/brw_pipe_blend.c
@@ -177,9 +177,6 @@ static void brw_set_blend_color(struct pipe_context *pipe,
    struct brw_context *brw = brw_context(pipe);
    struct brw_blend_constant_color *bcc = &brw->curr.bcc;
 
-   memset(bcc, 0, sizeof(*bcc));      
-   bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR;
-   bcc->header.length = sizeof(*bcc)/4-2;
    bcc->blend_constant_color[0] = blend_color->color[0];
    bcc->blend_constant_color[1] = blend_color->color[1];
    bcc->blend_constant_color[2] = blend_color->color[2];
@@ -195,6 +192,15 @@ void brw_pipe_blend_init( struct brw_context *brw )
    brw->base.create_blend_state = brw_create_blend_state;
    brw->base.bind_blend_state = brw_bind_blend_state;
    brw->base.delete_blend_state = brw_delete_blend_state;
+
+   {
+      struct brw_blend_constant_color *bcc = &brw->curr.bcc;
+
+      memset(bcc, 0, sizeof(*bcc));      
+      bcc->header.opcode = CMD_BLEND_CONSTANT_COLOR;
+      bcc->header.length = sizeof(*bcc)/4-2;
+   }
+
 }
 
 void brw_pipe_blend_cleanup( struct brw_context *brw )
-- 
cgit v1.2.3


From fc7fa678f55d15b032e3c9053a22c811e2de4cde Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 08:57:02 +0000
Subject: i965g: populate brw_context chipset id

---
 src/gallium/drivers/i965/brw_context.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index 8e1421e738..f85116a568 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -114,6 +114,7 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen)
    brw->base.screen = screen;
    brw->base.destroy = brw_destroy_context;
    brw->sws = brw_screen(screen)->sws;
+   brw->chipset = brw_screen(screen)->chipset;
 
    brw_pipe_blend_init( brw );
    brw_pipe_depth_stencil_init( brw );
-- 
cgit v1.2.3


From c22b47ebb1f00d43fe74d57a1e727fa70c4bf970 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 08:57:40 +0000
Subject: i965g: don't emit line stipple packet if stipple disabled

---
 src/gallium/drivers/i965/brw_misc_state.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index ce3e48f360..5ee87bcac0 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -344,7 +344,9 @@ const struct brw_tracked_state brw_polygon_stipple = {
 static int upload_line_stipple(struct brw_context *brw)
 {
    const struct brw_line_stipple *bls = &brw->curr.rast->bls;
-   BRW_CACHED_BATCH_STRUCT(brw, bls);
+   if (bls->header.opcode) {
+      BRW_CACHED_BATCH_STRUCT(brw, bls);
+   }
    return 0;
 }
 
-- 
cgit v1.2.3


From b216f1aa474196661aacbaf29604659172d1a74e Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 09:35:25 +0000
Subject: i965g: use curr.vertex_element state directly

---
 src/gallium/drivers/i965/brw_context.h     |  5 -----
 src/gallium/drivers/i965/brw_draw_upload.c | 13 +++++++------
 2 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 67fad0d9a5..34799d5211 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -572,12 +572,7 @@ struct brw_context
 	 struct brw_winsys_buffer *bo;
       } vb[PIPE_MAX_ATTRIBS];
 
-      struct {
-         int dummy;
-      } ve[PIPE_MAX_ATTRIBS];
-
       unsigned nr_vb;		/* currently the same as curr.num_vertex_buffers */
-      unsigned nr_ve;		/* currently the same as curr.num_vertex_elements */
    } vb;
 
    struct {
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index 188605a0c1..f50ce3005d 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -317,6 +317,7 @@ static int brw_emit_vertex_buffers( struct brw_context *brw )
 
 static int brw_emit_vertex_elements(struct brw_context *brw)
 {
+   GLuint nr = brw->curr.num_vertex_elements;
    GLuint i;
 
    brw_emit_query_begin(brw);
@@ -328,7 +329,7 @@ static int brw_emit_vertex_elements(struct brw_context *brw)
     * The stale VB state stays in place, but they don't do anything unless
     * a VE loads from them.
     */
-   if (brw->vb.nr_ve == 0) {
+   if (nr == 0) {
       BEGIN_BATCH(3, IGNORE_CLIPRECTS);
       OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
       OUT_BATCH((0 << BRW_VE0_INDEX_SHIFT) |
@@ -346,9 +347,9 @@ static int brw_emit_vertex_elements(struct brw_context *brw)
    /* Now emit vertex element (VEP) state packets.
     *
     */
-   BEGIN_BATCH(1 + brw->curr.num_vertex_elements * 2, IGNORE_CLIPRECTS);
-   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_ve * 2) - 2));
-   for (i = 0; i < brw->vb.nr_ve; i++) {
+   BEGIN_BATCH(1 + nr * 2, IGNORE_CLIPRECTS);
+   OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + nr * 2) - 2));
+   for (i = 0; i < nr; i++) {
       const struct pipe_vertex_element *input = &brw->curr.vertex_element[i];
       uint32_t format = brw_translate_surface_format( input->src_format );
       uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
@@ -364,10 +365,10 @@ static int brw_emit_vertex_elements(struct brw_context *brw)
 	 break;
       }
 
-      OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) |
+      OUT_BATCH((input->vertex_buffer_index << BRW_VE0_INDEX_SHIFT) |
 		BRW_VE0_VALID |
 		(format << BRW_VE0_FORMAT_SHIFT) |
-		(0 << BRW_VE0_SRC_OFFSET_SHIFT));
+		(input->src_offset << BRW_VE0_SRC_OFFSET_SHIFT));
 
       if (BRW_IS_IGDNG(brw))
           OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
-- 
cgit v1.2.3


From 5d7c0cf563b65aeb83f3d2f2ec709a96cf0fbae2 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 10:19:24 +0000
Subject: i965g: tgsi outputs cannot be used as source regs

---
 src/gallium/drivers/i965/brw_vs_emit.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 3217777acb..25aea87b8f 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -171,7 +171,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 
    /* Allocate outputs.  The non-position outputs go straight into message regs.
     */
-   c->nr_outputs = 0;
+   c->nr_outputs = c->prog_data.nr_outputs;
    c->first_output = reg;
    c->first_overflow_output = 0;
 
@@ -182,7 +182,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 
    /* XXX: need to access vertex output semantics here:
     */
-   c->nr_outputs = c->prog_data.nr_outputs;
    for (i = 0; i < c->prog_data.nr_outputs; i++) {
       assert(i < Elements(c->regs[TGSI_FILE_OUTPUT]));
 
@@ -244,12 +243,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
       }
    }
 
+#if 0
    for (i = 0; i < 128; i++) {
       if (c->output_regs[i].used_in_src) {
          c->output_regs[i].reg = brw_vec8_grf(reg, 0);
          reg++;
       }
    }
+#endif
 
    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
    reg += 2;
-- 
cgit v1.2.3


From 1d6b5957c6be221feb836bc25686246f67769bce Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 10:19:39 +0000
Subject: i965g: add DEBUG_MIN_URB flag

---
 src/gallium/drivers/i965/brw_debug.h  |  2 +-
 src/gallium/drivers/i965/brw_screen.c |  3 ++-
 src/gallium/drivers/i965/brw_urb.c    | 11 ++++++++++-
 3 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h
index aee62f7a5b..ea3c87218b 100644
--- a/src/gallium/drivers/i965/brw_debug.h
+++ b/src/gallium/drivers/i965/brw_debug.h
@@ -15,7 +15,7 @@
 #define DEBUG_BATCH             0x80
 #define DEBUG_PIXEL             0x100
 #define DEBUG_BUFMGR            0x200
-#define DEBUG_unused1           0x400
+#define DEBUG_MIN_URB           0x400
 #define DEBUG_unused2           0x800
 #define DEBUG_unused3           0x1000
 #define DEBUG_SYNC	        0x2000
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 04a3f81bcf..275ff0959f 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -48,6 +48,7 @@ static const struct debug_named_value debug_names[] = {
    { "bat",   DEBUG_BATCH},
    { "pix",   DEBUG_PIXEL},
    { "buf",   DEBUG_BUFMGR},
+   { "min",   DEBUG_MIN_URB},
    { "sync",  DEBUG_SYNC},
    { "prim",  DEBUG_PRIMS },
    { "vert",  DEBUG_VERTS },
@@ -291,7 +292,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
 #ifdef DEBUG
    BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0);
    BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0);
-   BRW_DEBUG |= DEBUG_STATS;
+   BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB;
 #endif
 
    memset(&chipset, 0, sizeof chipset);
diff --git a/src/gallium/drivers/i965/brw_urb.c b/src/gallium/drivers/i965/brw_urb.c
index 57fd8f20b2..907ec56c6c 100644
--- a/src/gallium/drivers/i965/brw_urb.c
+++ b/src/gallium/drivers/i965/brw_urb.c
@@ -84,7 +84,7 @@
  * XXX: Verify min_nr_entries, esp for VS.
  * XXX: Verify SF min_entry_size.
  */
-static const struct {
+static const struct urb_limits {
    GLuint min_nr_entries;
    GLuint preferred_nr_entries;
    GLuint min_entry_size;
@@ -167,6 +167,15 @@ static int recalculate_urb_fence( struct brw_context *brw )
 	 }
       }
 
+      if (BRW_DEBUG & DEBUG_MIN_URB) {
+	 brw->urb.nr_vs_entries = limits[VS].min_nr_entries;	
+	 brw->urb.nr_gs_entries = limits[GS].min_nr_entries;	
+	 brw->urb.nr_clip_entries = limits[CLP].min_nr_entries;
+	 brw->urb.nr_sf_entries = limits[SF].min_nr_entries;	
+	 brw->urb.nr_cs_entries = limits[CS].min_nr_entries;	
+	 brw->urb.constrained = 1;
+      }
+
       if (!check_urb_layout(brw)) {
 	 brw->urb.nr_vs_entries = limits[VS].min_nr_entries;	
 	 brw->urb.nr_gs_entries = limits[GS].min_nr_entries;	
-- 
cgit v1.2.3


From 212fb8adbd0e5e28a5d20b0cc03cde46df2831f4 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 10:24:19 +0000
Subject: i965g: don't set up vs stack register for non-branching shaders

---
 src/gallium/drivers/i965/brw_context.h     |  2 ++
 src/gallium/drivers/i965/brw_pipe_shader.c | 20 ++++++++++----------
 src/gallium/drivers/i965/brw_vs_emit.c     | 11 ++++++++---
 src/gallium/drivers/i965/brw_wm.c          |  3 ---
 src/gallium/drivers/i965/brw_wm.h          |  1 -
 5 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 34799d5211..b81dff0aa0 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -154,6 +154,8 @@ struct brw_vertex_shader {
    const struct tgsi_token *tokens;
    struct tgsi_shader_info info;
 
+   unsigned  has_flow_control:1;
+
    unsigned id;
    struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
    GLboolean use_const_buffer;
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 662c43c3e5..44f9ad6f9c 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -43,15 +43,15 @@
  * Determine if the given shader uses complex features such as flow
  * conditionals, loops, subroutines.
  */
-GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp)
+static GLboolean has_flow_control(const struct tgsi_shader_info *info)
 {
-    return (fp->info.opcode_count[TGSI_OPCODE_ARL] > 0 ||
-	    fp->info.opcode_count[TGSI_OPCODE_IF] > 0 ||
-	    fp->info.opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */
-	    fp->info.opcode_count[TGSI_OPCODE_CAL] > 0 ||
-	    fp->info.opcode_count[TGSI_OPCODE_BRK] > 0 ||   /* redundant - BGNLOOP */
-	    fp->info.opcode_count[TGSI_OPCODE_RET] > 0 ||   /* redundant - CAL */
-	    fp->info.opcode_count[TGSI_OPCODE_BGNLOOP] > 0);
+    return (info->opcode_count[TGSI_OPCODE_ARL] > 0 ||
+	    info->opcode_count[TGSI_OPCODE_IF] > 0 ||
+	    info->opcode_count[TGSI_OPCODE_ENDIF] > 0 || /* redundant - IF */
+	    info->opcode_count[TGSI_OPCODE_CAL] > 0 ||
+	    info->opcode_count[TGSI_OPCODE_BRK] > 0 ||   /* redundant - BGNLOOP */
+	    info->opcode_count[TGSI_OPCODE_RET] > 0 ||   /* redundant - CAL */
+	    info->opcode_count[TGSI_OPCODE_BGNLOOP] > 0);
 }
 
 
@@ -88,7 +88,7 @@ static void *brw_create_fs_state( struct pipe_context *pipe,
    /* Duplicate tokens, scan shader
     */
    fs->id = brw->program_id++;
-   fs->has_flow_control = brw_wm_has_flow_control(fs);
+   fs->has_flow_control = has_flow_control(&fs->info);
 
    fs->tokens = tgsi_dup_tokens(shader->tokens);
    if (fs->tokens == NULL)
@@ -126,7 +126,7 @@ static void *brw_create_vs_state( struct pipe_context *pipe,
    /* Duplicate tokens, scan shader
     */
    vs->id = brw->program_id++;
-   //vs->has_flow_control = brw_wm_has_flow_control(vs);
+   vs->has_flow_control = has_flow_control(&vs->info);
 
    vs->tokens = tgsi_dup_tokens(shader->tokens);
    if (vs->tokens == NULL)
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 25aea87b8f..e0fadc8dce 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -252,8 +252,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    }
 #endif
 
-   c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
-   reg += 2;
+   if (c->vp->has_flow_control) {
+      c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
+      reg += 2;
+   }
 
    /* Some opcodes need an internal temporary:
     */
@@ -1592,7 +1594,10 @@ void brw_vs_emit(struct brw_vs_compile *c)
    /* Static register allocation
     */
    brw_vs_alloc_regs(c);
-   brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
+
+   if (c->vp->has_flow_control) {
+      brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack));
+   }
 
    /* Instructions
     */
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 93f90bf329..7f2cb15256 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -162,9 +162,6 @@ static enum pipe_error do_wm_prog( struct brw_context *brw,
 
    brw_init_compile(brw, &c->func);
 
-   /* temporary sanity check assertion */
-   assert(fp->has_flow_control == brw_wm_has_flow_control(c->fp));
-
    /*
     * Shader which use GLSL features such as flow control are handled
     * differently from "simple" shaders.
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 48dac39756..28d216260e 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -338,7 +338,6 @@ void brw_wm_lookup_iz( GLuint line_aa,
 		       GLboolean ps_uses_depth,
 		       struct brw_wm_prog_key *key );
 
-GLboolean brw_wm_has_flow_control(const struct brw_fragment_shader *fp);
 void brw_wm_branching_shader_emit(struct brw_context *brw, struct brw_wm_compile *c);
 
 void emit_ddxy(struct brw_compile *p,
-- 
cgit v1.2.3


From caf2cf884cb32883e9af07dbe36ca9648bae1821 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 10:38:19 +0000
Subject: i965g: fix some reloc counts

---
 src/gallium/drivers/i965/brw_cc.c               | 2 +-
 src/gallium/drivers/i965/brw_vs_state.c         | 2 +-
 src/gallium/drivers/i965/brw_vs_surface_state.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index 94e2c99c3e..f05728ea5d 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -150,7 +150,7 @@ cc_unit_create_from_key(struct brw_context *brw,
    
    ret = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
                           key, sizeof(*key),
-                          reloc, Elements(reloc),
+                          reloc, 1,
                           &cc, sizeof(cc),
                           NULL, NULL,
                           bo_out);
diff --git a/src/gallium/drivers/i965/brw_vs_state.c b/src/gallium/drivers/i965/brw_vs_state.c
index 0b44f39f4d..dadbb622e4 100644
--- a/src/gallium/drivers/i965/brw_vs_state.c
+++ b/src/gallium/drivers/i965/brw_vs_state.c
@@ -146,7 +146,7 @@ vs_unit_create_from_key(struct brw_context *brw,
 
    ret = brw_upload_cache(&brw->cache, BRW_VS_UNIT,
                           key, sizeof(*key),
-                          reloc, Elements(reloc),
+                          reloc, 1,
                           &vs, sizeof(vs),
                           NULL, NULL,
                           bo_out);
diff --git a/src/gallium/drivers/i965/brw_vs_surface_state.c b/src/gallium/drivers/i965/brw_vs_surface_state.c
index aaf2a44f61..177a5170d2 100644
--- a/src/gallium/drivers/i965/brw_vs_surface_state.c
+++ b/src/gallium/drivers/i965/brw_vs_surface_state.c
@@ -162,7 +162,7 @@ brw_vs_get_binding_table(struct brw_context *brw,
    ret = brw_cache_data( &brw->surface_cache, 
                          BRW_SS_SURF_BIND,
                          NULL, 0,
-                         reloc, Elements(reloc),
+                         reloc, nr_reloc,
                          data, sizeof data,
                          NULL, NULL,
                          bo_out);
-- 
cgit v1.2.3


From aab9601a753afd012e16df072e774a32eb1348b9 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 11:21:08 +0000
Subject: i965g: hardwire linear interpolation for now

seems to generate saner code, need to go back and fix perspective
interpolation (and remove the hard-wire) once this is working.
---
 src/gallium/drivers/i965/brw_sf.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index 24d1015bbd..52fb2cd42d 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -138,8 +138,11 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw)
     * XXX: as long as we're hard-wiring, is eg. position required to
     * be linear?
     */
-   key.linear_attrs = 0;
-   key.persp_attrs = (1 << key.nr_attrs) - 1;
+   //key.linear_attrs = 0;
+   //key.persp_attrs = (1 << key.nr_attrs) - 1;
+
+   key.linear_attrs = (1 << key.nr_attrs) - 1;
+   key.persp_attrs = 0;
 
    /* BRW_NEW_REDUCED_PRIMITIVE */
    switch (brw->reduced_primitive) {
-- 
cgit v1.2.3


From 381cd2d63f4aae29d478e02dda5a978a668168e1 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 11:21:48 +0000
Subject: i965g: disassemble each instruction as generated

---
 src/gallium/drivers/i965/brw_context.h | 1 +
 src/gallium/drivers/i965/brw_debug.h   | 2 +-
 src/gallium/drivers/i965/brw_disasm.c  | 2 +-
 src/gallium/drivers/i965/brw_eu_emit.c | 7 +++++++
 src/gallium/drivers/i965/brw_screen.c  | 1 +
 5 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index b81dff0aa0..05fc9d45b5 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -788,6 +788,7 @@ int brw_upload_urb_fence(struct brw_context *brw);
 int brw_upload_cs_urb_state(struct brw_context *brw);
 
 /* brw_disasm.c */
+int brw_disasm_insn (FILE *file, const struct brw_instruction *inst);
 int brw_disasm (FILE *file, 
                 const struct brw_instruction *inst,
                 unsigned count);
diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h
index ea3c87218b..0deddbf977 100644
--- a/src/gallium/drivers/i965/brw_debug.h
+++ b/src/gallium/drivers/i965/brw_debug.h
@@ -16,7 +16,7 @@
 #define DEBUG_PIXEL             0x100
 #define DEBUG_BUFMGR            0x200
 #define DEBUG_MIN_URB           0x400
-#define DEBUG_unused2           0x800
+#define DEBUG_DISASSEM           0x800
 #define DEBUG_unused3           0x1000
 #define DEBUG_SYNC	        0x2000
 #define DEBUG_PRIMS	        0x4000
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
index df0c7b9a2b..4100f11d48 100644
--- a/src/gallium/drivers/i965/brw_disasm.c
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -770,7 +770,7 @@ static int src1 (FILE *file, const struct brw_instruction *inst)
     }
 }
 
-static int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
+int brw_disasm_insn (FILE *file, const struct brw_instruction *inst)
 {
     int	err = 0;
     int space = 0;
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
index f7fa520348..7776b4f965 100644
--- a/src/gallium/drivers/i965/brw_eu_emit.c
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -33,6 +33,7 @@
 #include "brw_context.h"
 #include "brw_defines.h"
 #include "brw_eu.h"
+#include "brw_debug.h"
 
 
@@ -473,6 +474,12 @@ static struct brw_instruction *next_insn( struct brw_compile *p,
 {
    struct brw_instruction *insn;
 
+   if (0 && (BRW_DEBUG & DEBUG_DISASSEM))
+   {
+      if (p->nr_insn) 
+         brw_disasm_insn(stderr, &p->store[p->nr_insn-1]);
+   }
+
    assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
 
    insn = &p->store[p->nr_insn++];
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 275ff0959f..9d8066442b 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -49,6 +49,7 @@ static const struct debug_named_value debug_names[] = {
    { "pix",   DEBUG_PIXEL},
    { "buf",   DEBUG_BUFMGR},
    { "min",   DEBUG_MIN_URB},
+   { "dis",   DEBUG_DISASSEM},
    { "sync",  DEBUG_SYNC},
    { "prim",  DEBUG_PRIMS },
    { "vert",  DEBUG_VERTS },
-- 
cgit v1.2.3


From a485341455bb270001aad8b39c7b9fa36ac74478 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 11:56:52 +0000
Subject: i965g: add dumping for our new pass_fp output

---
 src/gallium/drivers/i965/brw_screen.c   |   2 +-
 src/gallium/drivers/i965/brw_wm.h       |   4 +-
 src/gallium/drivers/i965/brw_wm_debug.c | 163 ++++++++++++++++++++++++--------
 src/gallium/drivers/i965/brw_wm_fp.c    |  35 ++-----
 src/gallium/drivers/i965/brw_wm_glsl.c  |   4 +-
 src/gallium/drivers/i965/brw_wm_pass0.c |   2 +-
 6 files changed, 139 insertions(+), 71 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 9d8066442b..575a418b7d 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -293,7 +293,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
 #ifdef DEBUG
    BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0);
    BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0);
-   BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB;
+   BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB | DEBUG_WM;
 #endif
 
    memset(&chipset, 0, sizeof chipset);
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 28d216260e..7d044ff6ec 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -181,7 +181,6 @@ struct brw_wm_instruction {
 #define Y    1
 #define Z    2
 #define W    3
-#define GET_SWZ(swz, comp) (((swz) >> ((comp)*2)) & 0x3)
 
 
 struct brw_fp_src {
@@ -333,6 +332,9 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
 void brw_wm_print_program( struct brw_wm_compile *c,
 			   const char *stage );
 
+void brw_wm_print_fp_program( struct brw_wm_compile *c,
+                              const char *stage );
+
 void brw_wm_lookup_iz( GLuint line_aa,
 		       GLuint lookup,
 		       GLboolean ps_uses_depth,
diff --git a/src/gallium/drivers/i965/brw_wm_debug.c b/src/gallium/drivers/i965/brw_wm_debug.c
index 65d7626eea..3d11fa074c 100644
--- a/src/gallium/drivers/i965/brw_wm_debug.c
+++ b/src/gallium/drivers/i965/brw_wm_debug.c
@@ -34,6 +34,62 @@
 #include "brw_context.h"
 #include "brw_wm.h"
 
+static void print_writemask( unsigned writemask )
+{
+   if (writemask != BRW_WRITEMASK_XYZW)
+      debug_printf(".%s%s%s%s", 
+		   (writemask & BRW_WRITEMASK_X) ? "x" : "",
+		   (writemask & BRW_WRITEMASK_Y) ? "y" : "",
+		   (writemask & BRW_WRITEMASK_Z) ? "z" : "",
+		   (writemask & BRW_WRITEMASK_W) ? "w" : "");
+}
+
+static void print_swizzle( unsigned swizzle )
+{
+   char *swz = "xyzw";
+   if (swizzle != BRW_SWIZZLE_XYZW)
+      debug_printf(".%c%c%c%c", 
+		   swz[BRW_GET_SWZ(swizzle, X)],
+		   swz[BRW_GET_SWZ(swizzle, Y)],
+		   swz[BRW_GET_SWZ(swizzle, Z)],
+		   swz[BRW_GET_SWZ(swizzle, W)]);
+}
+
+static void print_opcode( unsigned opcode )
+{
+   switch (opcode) {
+   case WM_PIXELXY:
+      debug_printf("PIXELXY");
+      break;
+   case WM_DELTAXY:
+      debug_printf("DELTAXY");
+      break;
+   case WM_PIXELW:
+      debug_printf("PIXELW");
+      break;
+   case WM_WPOSXY:
+      debug_printf("WPOSXY");
+      break;
+   case WM_PINTERP:
+      debug_printf("PINTERP");
+      break;
+   case WM_LINTERP:
+      debug_printf("LINTERP");
+      break;
+   case WM_CINTERP:
+      debug_printf("CINTERP");
+      break;
+   case WM_FB_WRITE:
+      debug_printf("FB_WRITE");
+      break;
+   case WM_FRONTFACING:
+      debug_printf("FRONTFACING");
+      break;
+   default:
+      debug_printf("%s", tgsi_get_opcode_info(opcode)->mnemonic);
+      break;
+   }
+}
 
 void brw_wm_print_value( struct brw_wm_compile *c,
 		       struct brw_wm_value *value )
@@ -98,47 +154,11 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
 	 debug_printf(",");
    }
    debug_printf("]");
-
-   if (inst->writemask != BRW_WRITEMASK_XYZW)
-      debug_printf(".%s%s%s%s", 
-		   (inst->writemask & BRW_WRITEMASK_X) ? "x" : "",
-		   (inst->writemask & BRW_WRITEMASK_Y) ? "y" : "",
-		   (inst->writemask & BRW_WRITEMASK_Z) ? "z" : "",
-		   (inst->writemask & BRW_WRITEMASK_W) ? "w" : "");
-
-   switch (inst->opcode) {
-   case WM_PIXELXY:
-      debug_printf(" = PIXELXY");
-      break;
-   case WM_DELTAXY:
-      debug_printf(" = DELTAXY");
-      break;
-   case WM_PIXELW:
-      debug_printf(" = PIXELW");
-      break;
-   case WM_WPOSXY:
-      debug_printf(" = WPOSXY");
-      break;
-   case WM_PINTERP:
-      debug_printf(" = PINTERP");
-      break;
-   case WM_LINTERP:
-      debug_printf(" = LINTERP");
-      break;
-   case WM_CINTERP:
-      debug_printf(" = CINTERP");
-      break;
-   case WM_FB_WRITE:
-      debug_printf(" = FB_WRITE");
-      break;
-   case WM_FRONTFACING:
-      debug_printf(" = FRONTFACING");
-      break;
-   default:
-      debug_printf(" = %s", tgsi_get_opcode_info(inst->opcode)->mnemonic);
-      break;
-   }
-
+   print_writemask(inst->writemask);
+   
+   debug_printf(" = ");
+   print_opcode(inst->opcode);
+  
    if (inst->saturate)
       debug_printf("_SAT");
 
@@ -173,3 +193,64 @@ void brw_wm_print_program( struct brw_wm_compile *c,
    debug_printf("\n");
 }
 
+static const char *file_strings[TGSI_FILE_COUNT+1] = {
+   "NULL",
+   "CONST",
+   "IN",
+   "OUT",
+   "TEMP",
+   "SAMPLER",
+   "ADDR",
+   "IMM",
+   "LOOP",
+   "PAYLOAD"
+};
+
+static void brw_wm_print_fp_insn( struct brw_wm_compile *c,
+                                  struct brw_fp_instruction *inst )
+{
+   GLuint i;
+   GLuint nr_args = brw_wm_nr_args(inst->opcode);
+
+   print_opcode(inst->opcode);
+   if (inst->dst.saturate)
+      debug_printf("_SAT");
+   debug_printf(" ");
+
+   if (inst->dst.indirect)
+      debug_printf("[");
+
+   debug_printf("%s[%d]",
+                file_strings[inst->dst.file],
+                inst->dst.index );
+   print_writemask(inst->dst.writemask);
+
+   if (inst->dst.indirect)
+      debug_printf("]");
+
+   debug_printf(nr_args ? ", " : "\n");
+   
+   for (i = 0; i < nr_args; i++) {
+      debug_printf("%s%s%s[%d]%s",
+                   inst->src[i].negate ? "-" : "",
+                   inst->src[i].abs ? "ABS(" : "",
+                   file_strings[inst->src[i].file],
+                   inst->src[i].index,
+                   inst->src[i].abs ? ")" : "");
+      print_swizzle(inst->src[i].swizzle);
+      debug_printf("%s", i == nr_args - 1 ? "\n" : ", ");
+   }
+}
+
+
+void brw_wm_print_fp_program( struct brw_wm_compile *c,
+                              const char *stage )
+{
+   GLuint insn;
+
+   debug_printf("%s:\n", stage);
+   for (insn = 0; insn < c->nr_fp_insns; insn++)
+      brw_wm_print_fp_insn(c, &c->fp_instructions[insn]);
+   debug_printf("\n");
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index bba448815b..74aa02f198 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -45,20 +45,6 @@
 #include "brw_debug.h"
 
 
-
-
-static const char *wm_opcode_strings[] = {
-   "PIXELXY",
-   "DELTAXY",
-   "PIXELW",
-   "LINTERP",
-   "PINTERP",
-   "CINTERP",
-   "WPOSXY",
-   "FB_WRITE",
-   "FRONTFACING",
-};
-
 /***********************************************************************
  * Source regs
  */
@@ -94,10 +80,10 @@ static struct brw_fp_src src_swizzle( struct brw_fp_src reg, int x, int y, int z
 {
    unsigned swz = reg.swizzle;
 
-   reg.swizzle = ( GET_SWZ(swz, x) << 0 |
-		   GET_SWZ(swz, y) << 2 |
-		   GET_SWZ(swz, z) << 4 |
-		   GET_SWZ(swz, w) << 6 );
+   reg.swizzle = ( BRW_GET_SWZ(swz, x) << 0 |
+		   BRW_GET_SWZ(swz, y) << 2 |
+		   BRW_GET_SWZ(swz, z) << 4 |
+		   BRW_GET_SWZ(swz, w) << 6 );
 
    return reg;
 }
@@ -200,10 +186,10 @@ out:
       swizzle |= (swizzle & 0x3) << (j * 2);
 
    return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE, i ),
-		       GET_SWZ(swizzle, X),
-		       GET_SWZ(swizzle, Y),
-		       GET_SWZ(swizzle, Z),
-		       GET_SWZ(swizzle, W) );
+		       BRW_GET_SWZ(swizzle, X),
+		       BRW_GET_SWZ(swizzle, Y),
+		       BRW_GET_SWZ(swizzle, Z),
+		       BRW_GET_SWZ(swizzle, W) );
 }
 
 
@@ -843,7 +829,7 @@ static GLboolean projtex( struct brw_wm_compile *c,
       return GL_FALSE;  /* ut2004 gun rendering !?! */
    
    if (src.file == TGSI_FILE_INPUT && 
-       GET_SWZ(src.swizzle, W) == W &&
+       BRW_GET_SWZ(src.swizzle, W) == W &&
        c->fp->info.input_interpolate[src.index] != TGSI_INTERPOLATE_PERSPECTIVE)
       return GL_FALSE;
 
@@ -1214,8 +1200,7 @@ int brw_wm_pass_fp( struct brw_wm_compile *c )
    }
 
    if (BRW_DEBUG & DEBUG_WM) {
-      debug_printf("pass_fp:\n");
-      //brw_print_program( c->fp_brw_program );
+      brw_wm_print_fp_program( c, "pass_fp" );
       debug_printf("\n");
    }
 
diff --git a/src/gallium/drivers/i965/brw_wm_glsl.c b/src/gallium/drivers/i965/brw_wm_glsl.c
index 284f819bf8..3b3afc39d3 100644
--- a/src/gallium/drivers/i965/brw_wm_glsl.c
+++ b/src/gallium/drivers/i965/brw_wm_glsl.c
@@ -558,7 +558,7 @@ static struct brw_reg get_src_reg(struct brw_wm_compile *c,
 {
     const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
     const GLuint nr = 1;
-    const GLuint component = GET_SWZ(src->Swizzle, channel);
+    const GLuint component = BRW_GET_SWZ(src->Swizzle, channel);
 
     /* Extended swizzle terms */
     if (component == SWIZZLE_ZERO) {
@@ -598,7 +598,7 @@ static struct brw_reg get_src_reg_imm(struct brw_wm_compile *c,
     const struct prog_src_register *src = &inst->SrcReg[srcRegIndex];
     if (src->File == TGSI_FILE_IMMEDIATE) {
        /* an immediate */
-       const int component = GET_SWZ(src->Swizzle, channel);
+       const int component = BRW_GET_SWZ(src->Swizzle, channel);
        const GLfloat *param =
           c->fp->program.Base.Parameters->ParameterValues[src->Index];
        GLfloat value = param[component];
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
index 7b18335dec..53232325d2 100644
--- a/src/gallium/drivers/i965/brw_wm_pass0.c
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -227,7 +227,7 @@ static const struct brw_wm_ref *get_fp_src_reg_ref( struct brw_wm_compile *c,
 						    struct brw_fp_src src,
 						    GLuint i )
 {
-   return pass0_get_reg(c, src.file, src.index, GET_SWZ(src.swizzle,i));
+   return pass0_get_reg(c, src.file, src.index, BRW_GET_SWZ(src.swizzle,i));
 }
 
 
-- 
cgit v1.2.3


From 1e3910a878e63d7859b205a30e23535d1da67d45 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 12:01:11 +0000
Subject: i965g: init saturate field in fp dst_reg helper

---
 src/gallium/drivers/i965/brw_wm_fp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index 74aa02f198..d27a768a0c 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -223,6 +223,7 @@ static struct brw_fp_dst dst_reg(GLuint file, GLuint idx)
    reg.index = idx;
    reg.writemask = BRW_WRITEMASK_XYZW;
    reg.indirect = 0;
+   reg.saturate = 0;
    return reg;
 }
 
-- 
cgit v1.2.3


From 3e14a482daf5e69331efac69711534a8b66118e4 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 12:51:26 +0000
Subject: i965g: propogate nr_cbufs into wm prog key

---
 src/gallium/drivers/i965/brw_context.h | 1 +
 src/gallium/drivers/i965/brw_pipe_fb.c | 5 ++++-
 src/gallium/drivers/i965/brw_wm.c      | 5 +++++
 3 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 05fc9d45b5..f53b92d4f5 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -209,6 +209,7 @@ struct brw_sampler {
 #define PIPE_NEW_QUERY                  0x80000
 #define PIPE_NEW_SCISSOR                0x100000
 #define PIPE_NEW_BOUND_TEXTURES         0x200000
+#define PIPE_NEW_NR_CBUFS               0x400000
 
 
diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
index d9b70f4eef..f65f45fb84 100644
--- a/src/gallium/drivers/i965/brw_pipe_fb.c
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -38,7 +38,10 @@ static void brw_set_framebuffer_state( struct pipe_context *pipe,
       }
    }
    
-   brw->curr.fb.nr_cbufs = fb->nr_cbufs;
+   if (brw->curr.fb.nr_cbufs != fb->nr_cbufs) {
+      brw->curr.fb.nr_cbufs = fb->nr_cbufs;
+      brw->state.dirty.mesa |= PIPE_NEW_NR_CBUFS;
+   }
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 7f2cb15256..8589aa22a8 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -259,6 +259,10 @@ static void brw_wm_populate_key( struct brw_context *brw,
    /* CACHE_NEW_VS_PROG */
    key->vp_nr_outputs = brw->vs.prog_data->nr_outputs;
 
+   key->nr_cbufs = brw->curr.fb.nr_cbufs;
+
+   key->nr_inputs = brw->curr.fragment_shader->info.num_inputs;
+
    /* The unique fragment program ID */
    key->program_string_id = brw->curr.fragment_shader->id;
 }
@@ -294,6 +298,7 @@ const struct brw_tracked_state brw_wm_prog = {
       .mesa  = (PIPE_NEW_FRAGMENT_SHADER |
 		PIPE_NEW_DEPTH_STENCIL_ALPHA |
 		PIPE_NEW_RAST |
+		PIPE_NEW_NR_CBUFS |
 		PIPE_NEW_BOUND_TEXTURES),
       .brw   = (BRW_NEW_WM_INPUT_DIMENSIONS |
 		BRW_NEW_REDUCED_PRIMITIVE),
-- 
cgit v1.2.3


From eacd13bcc809e1e877a48c2942eb6285aa21f6be Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 13:09:12 +0000
Subject: i965g: plumb through fb_write target and eot data

---
 src/gallium/drivers/i965/brw_wm.h       | 10 +++++-----
 src/gallium/drivers/i965/brw_wm_emit.c  |  4 ++--
 src/gallium/drivers/i965/brw_wm_fp.c    | 26 +++++++++++++-------------
 src/gallium/drivers/i965/brw_wm_pass0.c | 10 +++++++---
 src/gallium/drivers/i965/brw_wm_pass1.c |  4 ++--
 5 files changed, 29 insertions(+), 25 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index 7d044ff6ec..f85a8af878 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -142,9 +142,10 @@ struct brw_wm_instruction {
    GLuint saturate:1;
    GLuint writemask:4;
    GLuint tex_unit:4;   /* texture/sampler unit for texture instructions */
-   GLuint tex_target:4; /* TGSI_TEXTURE_x for texture instructions*/
+   GLuint target:4;     /* TGSI_TEXTURE_x for texture instructions,
+                         * target binding table index for FB_WRITE
+                         */
    GLuint eot:1;    	/* End of thread indicator for FB_WRITE*/
-   GLuint target:10;    /* target binding table index for FB_WRITE*/
 };
 
 
@@ -204,10 +205,9 @@ struct brw_fp_instruction {
    struct brw_fp_dst dst;
    struct brw_fp_src src[3];
    unsigned opcode:8;
+   unsigned target:8; /* XXX: special usage for FB_WRITE */
    unsigned tex_unit:4;
-   unsigned tex_target:4;
-   unsigned target:10;		/* destination surface for FB_WRITE */
-   unsigned eot:1;		/* mark last instruction (usually FB_WRITE) */
+   unsigned pad:12;
 };
 
 
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 1c38f80cda..a14e12f35b 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -803,7 +803,7 @@ static void emit_tex( struct brw_wm_compile *c,
 
    /* How many input regs are there?
     */
-   switch (inst->tex_target) {
+   switch (inst->target) {
    case TGSI_TEXTURE_1D:
       emit = BRW_WRITEMASK_X;
       nr = 1;
@@ -885,7 +885,7 @@ static void emit_txb( struct brw_wm_compile *c,
    GLuint msg_type;
    /* Shadow ignored for txb.
     */
-   switch (inst->tex_target) {
+   switch (inst->target) {
    case TGSI_TEXTURE_1D:
    case TGSI_TEXTURE_SHADOW1D:
       brw_MOV(p, brw_message_reg(2), arg[0]);
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index d27a768a0c..2a207958eb 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -280,18 +280,24 @@ static struct brw_fp_instruction *get_fp_inst(struct brw_wm_compile *c)
 static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
 					     GLuint op,
 					     struct brw_fp_dst dest,
-					     GLuint tex_src_unit,
-					     GLuint tex_src_target,
+					     GLuint tex_unit,
+					     GLuint target,
 					     struct brw_fp_src src0,
 					     struct brw_fp_src src1,
 					     struct brw_fp_src src2 )
 {
    struct brw_fp_instruction *inst = get_fp_inst(c);
 
+   if (tex_unit || target)
+      assert(op == TGSI_OPCODE_TXP ||
+             op == TGSI_OPCODE_TXB ||
+             op == TGSI_OPCODE_TEX ||
+             op == WM_FB_WRITE);
+
    inst->opcode = op;
    inst->dst = dest;
-   inst->tex_unit = tex_src_unit;
-   inst->tex_target = tex_src_target;
+   inst->tex_unit = tex_unit;
+   inst->target = target;
    inst->src[0] = src0;
    inst->src[1] = src1;
    inst->src[2] = src2;
@@ -916,23 +922,17 @@ static void emit_fb_write( struct brw_wm_compile *c )
 
    for (i = 0 ; i < c->key.nr_cbufs; i++) {
       struct brw_fp_src outcolor;
-      unsigned target = 1<<i;
-
-      /* Set EOT flag on last inst:
-       */
-      if (i == c->key.nr_cbufs - 1)
-	 target |= 1;
       
       outcolor = find_output_by_semantic(c, TGSI_SEMANTIC_COLOR, i);
 
-      /* Use emit_tex_op so that we can specify the inst->tex_target
+      /* Use emit_tex_op so that we can specify the inst->target
        * field, which is abused to contain the FB write target and the
        * EOT marker
        */
       emit_tex_op(c, WM_FB_WRITE,
 		  dst_undef(),
-		  target,
-		  0,
+		  (i == c->key.nr_cbufs - 1), /* EOT */
+		  i,
 		  outcolor,
 		  payload_r0_depth,
 		  outdepth);
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
index 53232325d2..7bb341e2c2 100644
--- a/src/gallium/drivers/i965/brw_wm_pass0.c
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -274,9 +274,13 @@ translate_insn(struct brw_wm_compile *c,
    out->opcode = inst->opcode;
    out->saturate = inst->dst.saturate;
    out->tex_unit = inst->tex_unit;
-   out->tex_target = inst->tex_target;
-   out->eot = inst->eot; //inst->Aux & 1;
-   out->target = inst->target; //inst->Aux >> 1;
+   out->target = inst->target;
+
+   /* Nasty hack:
+    */
+   out->eot = (inst->opcode == WM_FB_WRITE &&
+               inst->tex_unit != 0);
+
 
    /* Args:
     */
diff --git a/src/gallium/drivers/i965/brw_wm_pass1.c b/src/gallium/drivers/i965/brw_wm_pass1.c
index 09ad2b8f5b..005747f00b 100644
--- a/src/gallium/drivers/i965/brw_wm_pass1.c
+++ b/src/gallium/drivers/i965/brw_wm_pass1.c
@@ -223,11 +223,11 @@ void brw_wm_pass1( struct brw_wm_compile *c )
 
       case TGSI_OPCODE_TEX:
       case TGSI_OPCODE_TXP:
-	 read0 = get_texcoord_mask(inst->tex_target);
+	 read0 = get_texcoord_mask(inst->target);
 	 break;
 
       case TGSI_OPCODE_TXB:
-	 read0 = get_texcoord_mask(inst->tex_target) | BRW_WRITEMASK_W;
+	 read0 = get_texcoord_mask(inst->target) | BRW_WRITEMASK_W;
 	 break;
 
       case WM_WPOSXY:
-- 
cgit v1.2.3


From 11805726d311a5d11e58f01b2793cc19d7f98566 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 13:41:52 +0000
Subject: i965g: remove references to brw_surface_bo()

---
 src/gallium/drivers/i965/brw_misc_state.c | 6 +++---
 src/gallium/drivers/i965/brw_screen.h     | 3 ---
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index 5ee87bcac0..4dd73636fd 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -231,7 +231,7 @@ static int prepare_depthbuffer(struct brw_context *brw)
    struct pipe_surface *zsbuf = brw->curr.fb.zsbuf;
 
    if (zsbuf)
-      brw_add_validated_bo(brw, brw_surface_bo(zsbuf));
+      brw_add_validated_bo(brw, brw_surface(zsbuf)->bo);
 
    return 0;
 }
@@ -278,8 +278,8 @@ static int emit_depthbuffer(struct brw_context *brw)
 	 return PIPE_ERROR_BAD_INPUT;
       }
 
-      bo = brw_surface_bo(surface);
-      pitch = brw_surface_pitch(surface);
+      bo = brw_surface(surface)->bo;
+      pitch = brw_surface(surface)->pitch;
 
       BEGIN_BATCH(len, IGNORE_CLIPRECTS);
       OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2));
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index f7267cc78a..301b20d549 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -167,9 +167,6 @@ brw_buffer_is_user_buffer( const struct pipe_buffer *buf )
    return ((const struct brw_buffer *)buf)->user_buffer != NULL;
 }
 
-struct brw_winsys_buffer *
-brw_surface_bo( struct pipe_surface *surface );
-
 unsigned
 brw_surface_pitch( const struct pipe_surface *surface );
 
-- 
cgit v1.2.3


From 4d1ae7a546250548332e432f305ce47bd97967c3 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 14:18:28 +0000
Subject: i965g: propogate index size state

---
 src/gallium/drivers/i965/brw_draw.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 88cb31ad54..84803e43be 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -194,8 +194,10 @@ brw_draw_range_elements(struct pipe_context *pipe,
     * XXX: do we need to go through state validation to achieve this?
     * Could just call upload code directly.
     */
-   if (brw->curr.index_buffer != index_buffer) {
+   if (brw->curr.index_buffer != index_buffer ||
+       brw->curr.index_size != index_size) {
       pipe_buffer_reference( &brw->curr.index_buffer, index_buffer );
+      brw->curr.index_size = index_size;
       brw->state.dirty.mesa |= PIPE_NEW_INDEX_BUFFER;
    }
 
-- 
cgit v1.2.3


From 96e938f62c729fab74601627d54c9c4cf499ebdf Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 6 Nov 2009 15:08:05 +0000
Subject: llvmpipe: Fix build with llvm 2.6.

Fixes bug 24949.
---
 src/gallium/drivers/llvmpipe/lp_bld_misc.cpp | 7 +++----
 src/gallium/drivers/llvmpipe/lp_bld_misc.h   | 8 +++++++-
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
index c9acaf1f16..d3f78c06d9 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.cpp
@@ -26,8 +26,6 @@
  **************************************************************************/
 
 
-#include "llvm/Config/config.h"
-
 #include "pipe/p_config.h"
 
 #include "lp_bld_misc.h"
@@ -50,12 +48,13 @@ LLVMLinkInJIT(void)
 extern "C" int X86TargetMachineModule;
 
 
-void
+int
 LLVMInitializeNativeTarget(void)
 {
 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-   X86TargetMachineModule = 1;			
+   X86TargetMachineModule = 1;
 #endif
+   return 0;
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_misc.h b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
index 51a84c5e25..0e787e0b9c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_misc.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_misc.h
@@ -30,17 +30,23 @@
 #define LP_BLD_MISC_H
 
 
+#include "llvm/Config/config.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 
+#ifndef LLVM_NATIVE_ARCH
+
 void
 LLVMLinkInJIT(void);
 
-void
+int
 LLVMInitializeNativeTarget(void);
 
+#endif /* !LLVM_NATIVE_ARCH */
+
 
 #ifdef __cplusplus
 }
-- 
cgit v1.2.3


From 86c32df3e08b69605cbc59f4b3b72ac58b651db2 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 15:40:16 +0000
Subject: i965g: get brw_state_debug.c building

---
 src/gallium/drivers/i965/Makefile          |  1 +
 src/gallium/drivers/i965/brw_state_debug.c | 27 ++++++++++++++++++++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index f0a5bc7ee5..8df07d1c10 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -40,6 +40,7 @@ C_SOURCES = \
 	brw_sf_emit.c \
 	brw_sf_state.c \
 	brw_state_batch.c \
+	brw_state_debug.c \
 	brw_state_cache.c \
 	brw_state_upload.c \
 	brw_structs_dump.c \
diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c
index cc4744dc16..050f74761c 100644
--- a/src/gallium/drivers/i965/brw_state_debug.c
+++ b/src/gallium/drivers/i965/brw_state_debug.c
@@ -40,7 +40,29 @@ struct dirty_bit_map {
 #define DEFINE_BIT(name) {name, #name, 0}
 
 static struct dirty_bit_map mesa_bits[] = {
+   DEFINE_BIT(PIPE_NEW_DEPTH_STENCIL_ALPHA),
+   DEFINE_BIT(PIPE_NEW_RAST),
+   DEFINE_BIT(PIPE_NEW_BLEND),
+   DEFINE_BIT(PIPE_NEW_VIEWPORT),
+   DEFINE_BIT(PIPE_NEW_SAMPLERS),
+   DEFINE_BIT(PIPE_NEW_VERTEX_BUFFER),
+   DEFINE_BIT(PIPE_NEW_VERTEX_ELEMENT),
+   DEFINE_BIT(PIPE_NEW_FRAGMENT_SHADER),
+   DEFINE_BIT(PIPE_NEW_VERTEX_SHADER),
+   DEFINE_BIT(PIPE_NEW_FRAGMENT_CONSTANTS),
+   DEFINE_BIT(PIPE_NEW_VERTEX_CONSTANTS),
+   DEFINE_BIT(PIPE_NEW_CLIP),
+   DEFINE_BIT(PIPE_NEW_INDEX_BUFFER),
+   DEFINE_BIT(PIPE_NEW_INDEX_RANGE),
    DEFINE_BIT(PIPE_NEW_BLEND_COLOR),
+   DEFINE_BIT(PIPE_NEW_POLYGON_STIPPLE),
+   DEFINE_BIT(PIPE_NEW_FRAMEBUFFER_DIMENSIONS),
+   DEFINE_BIT(PIPE_NEW_DEPTH_BUFFER),
+   DEFINE_BIT(PIPE_NEW_COLOR_BUFFERS),
+   DEFINE_BIT(PIPE_NEW_QUERY),
+   DEFINE_BIT(PIPE_NEW_SCISSOR),
+   DEFINE_BIT(PIPE_NEW_BOUND_TEXTURES),
+   DEFINE_BIT(PIPE_NEW_NR_CBUFS),
    {0, 0, 0}
 };
 
@@ -55,11 +77,10 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_CONTEXT),
    DEFINE_BIT(BRW_NEW_WM_INPUT_DIMENSIONS),
    DEFINE_BIT(BRW_NEW_PSP),
-   DEFINE_BIT(BRW_NEW_FENCE),
+   DEFINE_BIT(BRW_NEW_WM_SURFACES),
+   DEFINE_BIT(BRW_NEW_xxx),
    DEFINE_BIT(BRW_NEW_INDICES),
-   DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
    DEFINE_BIT(BRW_NEW_VERTICES),
-   DEFINE_BIT(BRW_NEW_BATCH),
    {0, 0, 0}
 };
 
-- 
cgit v1.2.3


From 12ea198fd20c04b94bf0fe584b6d894d019d0c40 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Fri, 6 Nov 2009 00:09:04 +0000
Subject: i965g: Header whitespace

---
 src/gallium/drivers/i965/brw_winsys.h | 84 +++++++++++++++++------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index 2da660a1e6..a723244960 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -54,7 +54,7 @@ struct brw_winsys_buffer {
 enum brw_buffer_type
 {
    BRW_BUFFER_TYPE_TEXTURE,
-   BRW_BUFFER_TYPE_SCANOUT, /**< a texture used for scanning out from */
+   BRW_BUFFER_TYPE_SCANOUT,          /**< a texture used for scanning out from */
    BRW_BUFFER_TYPE_VERTEX,
    BRW_BUFFER_TYPE_CURBE,
    BRW_BUFFER_TYPE_QUERY,
@@ -63,9 +63,9 @@ enum brw_buffer_type
    BRW_BUFFER_TYPE_BATCH,
    BRW_BUFFER_TYPE_GENERAL_STATE,
    BRW_BUFFER_TYPE_SURFACE_STATE,
-   BRW_BUFFER_TYPE_PIXEL,       /* image uploads, pbo's, etc */
-   BRW_BUFFER_TYPE_GENERIC,     /* unknown */
-   BRW_BUFFER_TYPE_MAX		/* Count of possible values */
+   BRW_BUFFER_TYPE_PIXEL,            /* image uploads, pbo's, etc */
+   BRW_BUFFER_TYPE_GENERIC,          /* unknown */
+   BRW_BUFFER_TYPE_MAX               /* Count of possible values */
 };
 
 
@@ -74,14 +74,14 @@ enum brw_buffer_type
  */
 enum brw_buffer_usage {
    BRW_USAGE_STATE,         /* INSTRUCTION, 0 */
-   BRW_USAGE_QUERY_RESULT,	 /* INSTRUCTION, INSTRUCTION */
+   BRW_USAGE_QUERY_RESULT,  /* INSTRUCTION, INSTRUCTION */
    BRW_USAGE_RENDER_TARGET, /* RENDER,      0 */
-   BRW_USAGE_DEPTH_BUFFER,	 /* RENDER,      RENDER */
-   BRW_USAGE_BLIT_SOURCE,	 /* RENDER,      0 */
+   BRW_USAGE_DEPTH_BUFFER,  /* RENDER,      RENDER */
+   BRW_USAGE_BLIT_SOURCE,   /* RENDER,      0 */
    BRW_USAGE_BLIT_DEST,     /* RENDER,      RENDER */
-   BRW_USAGE_SAMPLER,	 /* SAMPLER,     0 */
-   BRW_USAGE_VERTEX,	 /* VERTEX,      0 */
-   BRW_USAGE_SCRATCH,	 /* 0,           0 */
+   BRW_USAGE_SAMPLER,       /* SAMPLER,     0 */
+   BRW_USAGE_VERTEX,        /* VERTEX,      0 */
+   BRW_USAGE_SCRATCH,       /* 0,           0 */
    BRW_USAGE_MAX
 };
 
@@ -124,11 +124,11 @@ struct brw_winsys_reloc {
    struct brw_winsys_buffer *bo;
 };
 
-static INLINE void make_reloc( struct brw_winsys_reloc *reloc,
-                               enum brw_buffer_usage usage,
-                               unsigned delta,
-                               unsigned offset,
-                               struct brw_winsys_buffer *bo)
+static INLINE void make_reloc(struct brw_winsys_reloc *reloc,
+                              enum brw_buffer_usage usage,
+                              unsigned delta,
+                              unsigned offset,
+                              struct brw_winsys_buffer *bo)
 {
    reloc->usage = usage;
    reloc->delta = delta;
@@ -149,27 +149,27 @@ struct brw_winsys_screen {
    /**
     * Create a buffer.
     */
-   enum pipe_error (*bo_alloc)( struct brw_winsys_screen *sws,
-                                enum brw_buffer_type type,
-                                unsigned size,
-                                unsigned alignment,
-                                struct brw_winsys_buffer **bo_out );
+   enum pipe_error (*bo_alloc)(struct brw_winsys_screen *sws,
+                               enum brw_buffer_type type,
+                               unsigned size,
+                               unsigned alignment,
+                               struct brw_winsys_buffer **bo_out);
 
    /* Destroy a buffer when our refcount goes to zero:
     */
-   void (*bo_destroy)( struct brw_winsys_buffer *buffer );
+   void (*bo_destroy)(struct brw_winsys_buffer *buffer);
 
    /* delta -- added to b2->offset, and written into buffer
     * offset -- location above value is written to within buffer
     */
-   enum pipe_error (*bo_emit_reloc)( struct brw_winsys_buffer *buffer,
-                                     enum brw_buffer_usage usage,
-                                     unsigned delta,
-                                     unsigned offset,
-                                     struct brw_winsys_buffer *b2);
+   enum pipe_error (*bo_emit_reloc)(struct brw_winsys_buffer *buffer,
+                                    enum brw_buffer_usage usage,
+                                    unsigned delta,
+                                    unsigned offset,
+                                    struct brw_winsys_buffer *b2);
 
-   enum pipe_error (*bo_exec)( struct brw_winsys_buffer *buffer,
-                               unsigned bytes_used );
+   enum pipe_error (*bo_exec)(struct brw_winsys_buffer *buffer,
+                              unsigned bytes_used);
 
    enum pipe_error (*bo_subdata)(struct brw_winsys_buffer *buffer,
                                  enum brw_buffer_data_type data_type,
@@ -181,14 +181,14 @@ struct brw_winsys_screen {
 
    boolean (*bo_is_busy)(struct brw_winsys_buffer *buffer);
    boolean (*bo_references)(struct brw_winsys_buffer *a,
-			    struct brw_winsys_buffer *b);
+                            struct brw_winsys_buffer *b);
 
    /* XXX: couldn't this be handled by returning true/false on
     * bo_emit_reloc?
     */
-   enum pipe_error (*check_aperture_space)( struct brw_winsys_screen *iws,
-					    struct brw_winsys_buffer **buffers,
-					    unsigned count );
+   enum pipe_error (*check_aperture_space)(struct brw_winsys_screen *iws,
+                                           struct brw_winsys_buffer **buffers,
+                                           unsigned count);
 
    /**
     * Map a buffer.
@@ -199,11 +199,11 @@ struct brw_winsys_screen {
                    unsigned length,
                    boolean write,
                    boolean discard,
-                   boolean flush_explicit );
+                   boolean flush_explicit);
 
-   void (*bo_flush_range)( struct brw_winsys_buffer *buffer,
-                           unsigned offset,
-                           unsigned length );
+   void (*bo_flush_range)(struct brw_winsys_buffer *buffer,
+                          unsigned offset,
+                          unsigned length);
 
    /**
     * Unmap a buffer.
@@ -224,7 +224,7 @@ struct brw_winsys_screen {
 };
 
 static INLINE void *
-bo_map_read( struct brw_winsys_screen *sws, struct brw_winsys_buffer *buf )
+bo_map_read(struct brw_winsys_screen *sws, struct brw_winsys_buffer *buf)
 {
    return sws->bo_map( buf,
                        BRW_DATA_OTHER,
@@ -259,8 +259,8 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen);
  */
 struct pipe_texture;
 boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
-				      struct brw_winsys_buffer **buffer,
-				      unsigned *stride);
+                                      struct brw_winsys_buffer **buffer,
+                                      unsigned *stride);
 
 /**
  * Wrap a brw_winsys buffer with a texture blanket.
@@ -269,9 +269,9 @@ boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
  */
 struct pipe_texture * 
 brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
-				  const struct pipe_texture *template,
-				  const unsigned pitch,
-				  struct brw_winsys_buffer *buffer);
+                                  const struct pipe_texture *template,
+                                  const unsigned pitch,
+                                  struct brw_winsys_buffer *buffer);
 
 
-- 
cgit v1.2.3


From 215b49ae1e44cc9ec2fcf7b9ca3711c07bcc95bc Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <wallbraker@gmail.com>
Date: Fri, 6 Nov 2009 15:54:27 +0000
Subject: i965g: Add functions needed by the winsys

---
 src/gallium/drivers/i965/brw_screen_texture.c | 117 +++++++++++++++++++++++++-
 1 file changed, 115 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 8e684aa076..911f4825f2 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -300,8 +300,6 @@ fail:
    return NULL;
 }
 
-
-
 static struct pipe_texture *brw_texture_blanket(struct pipe_screen *screen,
 						const struct pipe_texture *templ,
 						const unsigned *stride,
@@ -365,7 +363,122 @@ boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen,
    return FALSE;
 }
 
+boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
+                                      struct brw_winsys_buffer **buffer,
+                                      unsigned *stride)
+{
+   struct brw_texture *tex = brw_texture(texture);
+
+   *buffer = tex->bo;
+   if (stride)
+      *stride = tex->pitch;
+
+   return TRUE;
+}
+
+struct pipe_texture * 
+brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
+                                  const struct pipe_texture *templ,
+                                  const unsigned pitch,
+                                  struct brw_winsys_buffer *buffer)
+{
+   struct brw_screen *bscreen = brw_screen(screen);
+   struct brw_texture *tex;
+   enum brw_buffer_type buffer_type;
+   enum pipe_error ret;
+
+   if (pf_is_compressed(templ->format))
+      return NULL;
+
+   if (pf_is_depth_or_stencil(templ->format))
+      return NULL;
+ 
+   tex = CALLOC_STRUCT(brw_texture);
+   if (!tex)
+      return NULL;
+
+   memcpy(&tex->base, templ, sizeof *templ);
+   pipe_reference_init(&tex->base.reference, 1);
+   tex->base.screen = screen;
+
+   tex->cpp = pf_get_size(tex->base.format);
+
+   make_empty_list(&tex->views[0]);
+   make_empty_list(&tex->views[1]);
+
+   if (1)
+      tex->tiling = BRW_TILING_NONE;
+   else
+      tex->tiling = BRW_TILING_X;
+
+   if (!brw_texture_layout(bscreen, tex))
+      goto fail;
+
+   
+   if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
+                           PIPE_TEXTURE_USAGE_PRIMARY)) {
+      buffer_type = BRW_BUFFER_TYPE_SCANOUT;
+   } else {
+      buffer_type = BRW_BUFFER_TYPE_TEXTURE;
+   }
+
+   tex->bo = buffer;
+
+   if (tex->pitch != pitch)
+      goto fail;
+
+
+/* fix this warning
+   if (tex->size > buffer->size)
+      goto fail;
+ */
+
+   if (ret)
+      goto fail;
+
+   tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
+   tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
+   tex->ss.ss0.surface_format = translate_tex_format(tex->base.format);
+   assert(tex->ss.ss0.surface_format != BRW_SURFACEFORMAT_INVALID);
+
+   /* This is ok for all textures with channel width 8bit or less:
+    */
+/*    tex->ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */
+
+
+   /* XXX: what happens when tex->bo->offset changes???
+    */
+   tex->ss.ss1.base_addr = 0; /* reloc */
+   tex->ss.ss2.mip_count = tex->base.last_level;
+   tex->ss.ss2.width = tex->base.width[0] - 1;
+   tex->ss.ss2.height = tex->base.height[0] - 1;
+
+   switch (tex->tiling) {
+   case BRW_TILING_NONE:
+      tex->ss.ss3.tiled_surface = 0;
+      tex->ss.ss3.tile_walk = 0;
+      break;
+   case BRW_TILING_X:
+      tex->ss.ss3.tiled_surface = 1;
+      tex->ss.ss3.tile_walk = BRW_TILEWALK_XMAJOR;
+      break;
+   case BRW_TILING_Y:
+      tex->ss.ss3.tiled_surface = 1;
+      tex->ss.ss3.tile_walk = BRW_TILEWALK_YMAJOR;
+      break;
+   }
+
+   tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1;
+   tex->ss.ss3.depth = tex->base.depth[0] - 1;
+
+   tex->ss.ss4.min_lod = 0;
 
+   return &tex->base;
+
+fail:
+   FREE(tex);
+   return NULL;
+}
 
 void brw_screen_tex_init( struct brw_screen *brw_screen )
 {
-- 
cgit v1.2.3


From 2eb6b0defe65b01a7ed1562c2f16c17125242c16 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Fri, 6 Nov 2009 16:40:54 +0000
Subject: i965g: Fix texture blanket function

---
 src/gallium/drivers/i965/brw_screen_texture.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 911f4825f2..adc0aaa8a9 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -387,12 +387,14 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
    enum brw_buffer_type buffer_type;
    enum pipe_error ret;
 
-   if (pf_is_compressed(templ->format))
+   if (templ->target != PIPE_TEXTURE_2D ||
+       templ->last_level != 0 ||
+       templ->depth[0] != 1)
       return NULL;
 
-   if (pf_is_depth_or_stencil(templ->format))
+   if (pf_is_compressed(templ->format))
       return NULL;
- 
+
    tex = CALLOC_STRUCT(brw_texture);
    if (!tex)
       return NULL;
@@ -408,6 +410,9 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
 
    if (1)
       tex->tiling = BRW_TILING_NONE;
+   else if (bscreen->chipset.is_965 &&
+            pf_is_depth_or_stencil(templ->format))
+      tex->tiling = BRW_TILING_Y;
    else
       tex->tiling = BRW_TILING_X;
 
@@ -424,17 +429,13 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
 
    tex->bo = buffer;
 
-   if (tex->pitch != pitch)
-      goto fail;
-
+   tex->pitch = pitch;
 
-/* fix this warning
+   /* fix this warning */
+#if 0
    if (tex->size > buffer->size)
       goto fail;
- */
-
-   if (ret)
-      goto fail;
+#endif
 
    tex->ss.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW;
    tex->ss.ss0.surface_type = translate_tex_target(tex->base.target);
-- 
cgit v1.2.3


From 4fbe6c4e4e754e0e850165d5a303990515ceaba6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 14:20:04 +0000
Subject: i965g: get rid of cc key, simplify state upload

Keep a valid reloc table active between uploads, avoid
recalculating it every time.
---
 src/gallium/drivers/i965/brw_cc.c      | 155 +++++++--------------------------
 src/gallium/drivers/i965/brw_context.c |   8 +-
 src/gallium/drivers/i965/brw_context.h |  13 ++-
 src/gallium/drivers/i965/brw_pipe_fb.c |   4 +
 4 files changed, 51 insertions(+), 129 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_cc.c b/src/gallium/drivers/i965/brw_cc.c
index f05728ea5d..3e070f5591 100644
--- a/src/gallium/drivers/i965/brw_cc.c
+++ b/src/gallium/drivers/i965/brw_cc.c
@@ -35,48 +35,13 @@
 #include "brw_defines.h"
 
 
-struct sane_viewport {
-   float top;
-   float left;
-   float width;
-   float height;
-   float near;
-   float far;
-};
-
-static void calc_sane_viewport( const struct pipe_viewport_state *vp,
-				struct sane_viewport *svp )
-{
-   /* XXX fix me, obviously.
-    */
-   svp->top = 0;
-   svp->left = 0;
-   svp->width = 250;
-   svp->height = 250;
-   svp->near = 0;
-   svp->far = 1;
-}
-
 static enum pipe_error prepare_cc_vp( struct brw_context *brw )
 {
-   struct brw_cc_viewport ccv;
-   struct sane_viewport svp;
-   enum pipe_error ret;
-
-   memset(&ccv, 0, sizeof(ccv));
-
-   /* PIPE_NEW_VIEWPORT */
-   calc_sane_viewport( &brw->curr.viewport, &svp );
-
-   ccv.min_depth = svp.near;
-   ccv.max_depth = svp.far;
-
-   ret = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0,
-                         &brw->cc.vp_bo );
-   if (ret)
-      return ret;
-                
-   return PIPE_OK;
+   return brw_cache_data( &brw->cache, 
+                         BRW_CC_VP,
+                         &brw->curr.ccv,
+                         NULL, 0,
+                         &brw->cc.reloc[CC_RELOC_VP].bo );
 }
 
 const struct brw_tracked_state brw_cc_vp = {
@@ -88,15 +53,6 @@ const struct brw_tracked_state brw_cc_vp = {
    .prepare = prepare_cc_vp
 };
 
-struct brw_cc_unit_key {
-   struct brw_cc0 cc0;
-   struct brw_cc1 cc1;
-   struct brw_cc2 cc2;
-   struct brw_cc3 cc3;
-   struct brw_cc5 cc5;
-   struct brw_cc6 cc6;
-   struct brw_cc7 cc7;
-};
 
 /* A long-winded way to OR two unsigned integers together:
  */
@@ -110,85 +66,22 @@ combine_cc3( struct brw_cc3 a, struct brw_cc3 b )
    return ca.cc3;
 }
 
-static void
-cc_unit_populate_key(const struct brw_context *brw,
-		     struct brw_cc_unit_key *key)
-{
-   key->cc0 = brw->curr.zstencil->cc0;
-   key->cc1 = brw->curr.zstencil->cc1;
-   key->cc2 = brw->curr.zstencil->cc2;
-   key->cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 );
-   key->cc5 = brw->curr.blend->cc5;
-   key->cc6 = brw->curr.blend->cc6;
-   key->cc7 = brw->curr.zstencil->cc7;
-}
-
-/**
- * Creates the state cache entry for the given CC unit key.
- */
-static enum pipe_error
-cc_unit_create_from_key(struct brw_context *brw, 
-                        struct brw_cc_unit_key *key,
-                        struct brw_winsys_reloc *reloc,
-                        struct brw_winsys_buffer **bo_out)
-{
-   struct brw_cc_unit_state cc;
-   enum pipe_error ret;
-
-   memset(&cc, 0, sizeof(cc));
-
-   cc.cc0 = key->cc0;
-   cc.cc1 = key->cc1;
-   cc.cc2 = key->cc2;
-   cc.cc3 = key->cc3;
-
-   cc.cc4.cc_viewport_state_offset = 0;
-
-   cc.cc5 = key->cc5;
-   cc.cc6 = key->cc6;
-   cc.cc7 = key->cc7;
-   
-   ret = brw_upload_cache(&brw->cache, BRW_CC_UNIT,
-                          key, sizeof(*key),
-                          reloc, 1,
-                          &cc, sizeof(cc),
-                          NULL, NULL,
-                          bo_out);
-   if (ret)
-      return ret;
-
-   return PIPE_OK;
-}
 
 static int prepare_cc_unit( struct brw_context *brw )
 {
-   struct brw_cc_unit_key key;
-   struct brw_winsys_reloc reloc[1];
-   enum pipe_error ret;
-
-   cc_unit_populate_key(brw, &key);
-
-   /* CACHE_NEW_CC_VP */
-   make_reloc(&reloc[0],
-              BRW_USAGE_STATE,
-              0,
-              offsetof(struct brw_cc_unit_state, cc4),
-              brw->cc.vp_bo);
-
-   if (brw_search_cache(&brw->cache, BRW_CC_UNIT,
-                        &key, sizeof(key),
-                        reloc, 1,
-                        NULL,
-                        &brw->cc.state_bo))
-      return PIPE_OK;
-
-   ret = cc_unit_create_from_key(brw, &key, 
-                                 reloc,
-                                 &brw->cc.state_bo);
-   if (ret)
-      return ret;
+   brw->cc.cc.cc0 = brw->curr.zstencil->cc0;
+   brw->cc.cc.cc1 = brw->curr.zstencil->cc1;
+   brw->cc.cc.cc2 = brw->curr.zstencil->cc2;
+   brw->cc.cc.cc3 = combine_cc3( brw->curr.zstencil->cc3, brw->curr.blend->cc3 );
    
-   return PIPE_OK;
+   brw->cc.cc.cc5 = brw->curr.blend->cc5;
+   brw->cc.cc.cc6 = brw->curr.blend->cc6;
+   brw->cc.cc.cc7 = brw->curr.zstencil->cc7;
+
+   return brw_cache_data_sz(&brw->cache, BRW_CC_UNIT,
+                           &brw->cc.cc, sizeof(brw->cc.cc),
+                           brw->cc.reloc, 1,
+                           &brw->cc.state_bo);
 }
 
 const struct brw_tracked_state brw_cc_unit = {
@@ -201,4 +94,18 @@ const struct brw_tracked_state brw_cc_unit = {
 };
 
 
+void brw_hw_cc_init( struct brw_context *brw )
+{
+   make_reloc(&brw->cc.reloc[0],
+              BRW_USAGE_STATE,
+              0,
+              offsetof(struct brw_cc_unit_state, cc4),
+              NULL);
+}
+
 
+void brw_hw_cc_cleanup( struct brw_context *brw )
+{
+   bo_reference(&brw->cc.state_bo, NULL);
+   bo_reference(&brw->cc.reloc[0].bo, NULL);
+}
diff --git a/src/gallium/drivers/i965/brw_context.c b/src/gallium/drivers/i965/brw_context.c
index f85116a568..e67551882d 100644
--- a/src/gallium/drivers/i965/brw_context.c
+++ b/src/gallium/drivers/i965/brw_context.c
@@ -65,6 +65,9 @@ static void brw_destroy_context( struct pipe_context *pipe )
    brw_pipe_vertex_cleanup( brw );
    brw_pipe_clear_cleanup( brw );
 
+   brw_hw_cc_cleanup( brw );
+
+
    FREE(brw->wm.compile_data);
 
    for (i = 0; i < brw->curr.fb.nr_cbufs; i++)
@@ -96,9 +99,6 @@ static void brw_destroy_context( struct pipe_context *pipe )
    bo_reference(&brw->wm.sampler_bo, NULL);
    bo_reference(&brw->wm.prog_bo, NULL);
    bo_reference(&brw->wm.state_bo, NULL);
-   bo_reference(&brw->cc.prog_bo, NULL);
-   bo_reference(&brw->cc.state_bo, NULL);
-   bo_reference(&brw->cc.vp_bo, NULL);
 }
 
 
@@ -128,6 +128,8 @@ struct pipe_context *brw_create_context(struct pipe_screen *screen)
    brw_pipe_vertex_init( brw );
    brw_pipe_clear_init( brw );
 
+   brw_hw_cc_init( brw );
+
    brw_init_state( brw );
    brw_draw_init( brw );
 
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index f53b92d4f5..4a975ecd7e 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -481,6 +481,8 @@ struct brw_query_object {
    uint64_t result;
 };
 
+#define CC_RELOC_VP 0
+
 
 /**
  * brw_context is derived from pipe_context
@@ -525,6 +527,7 @@ struct brw_context
 
       struct brw_blend_constant_color bcc;
       struct brw_polygon_stipple bps;
+      struct brw_cc_viewport ccv;
 
       /**
        * Index buffer for this draw_prims call.
@@ -708,9 +711,10 @@ struct brw_context
 
 
    struct {
-      struct brw_winsys_buffer *prog_bo;
       struct brw_winsys_buffer *state_bo;
-      struct brw_winsys_buffer *vp_bo;
+
+      struct brw_cc_unit_state cc;
+      struct brw_winsys_reloc reloc[1];
    } cc;
 
    struct {
@@ -764,6 +768,7 @@ void brw_pipe_shader_init( struct brw_context *brw );
 void brw_pipe_vertex_init( struct brw_context *brw );
 void brw_pipe_clear_init( struct brw_context *brw );
 
+
 void brw_pipe_blend_cleanup( struct brw_context *brw );
 void brw_pipe_depth_stencil_cleanup( struct brw_context *brw );
 void brw_pipe_framebuffer_cleanup( struct brw_context *brw );
@@ -776,6 +781,10 @@ void brw_pipe_shader_cleanup( struct brw_context *brw );
 void brw_pipe_vertex_cleanup( struct brw_context *brw );
 void brw_pipe_clear_cleanup( struct brw_context *brw );
 
+void brw_hw_cc_init( struct brw_context *brw );
+void brw_hw_cc_cleanup( struct brw_context *brw );
+
+
 
 void brw_context_flush( struct brw_context *brw );
 
diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
index f65f45fb84..1511220447 100644
--- a/src/gallium/drivers/i965/brw_pipe_fb.c
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -49,7 +49,11 @@ static void brw_set_viewport_state( struct pipe_context *pipe,
 				    const struct pipe_viewport_state *viewport )
 {
    struct brw_context *brw = brw_context(pipe);
+
    brw->curr.viewport = *viewport;
+   brw->curr.ccv.min_depth = 0.0;         /* XXX: near */
+   brw->curr.ccv.max_depth = 1.0;         /* XXX: far */
+
    brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT;
 }
 
-- 
cgit v1.2.3


From 833f5bbfafee00ad44085e121eea0a2579eb3459 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Fri, 6 Nov 2009 18:34:23 +0000
Subject: i965g: First clear!

---
 src/gallium/drivers/i965/brw_screen_texture.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index adc0aaa8a9..842c70a39a 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -384,8 +384,6 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
 {
    struct brw_screen *bscreen = brw_screen(screen);
    struct brw_texture *tex;
-   enum brw_buffer_type buffer_type;
-   enum pipe_error ret;
 
    if (templ->target != PIPE_TEXTURE_2D ||
        templ->last_level != 0 ||
@@ -419,17 +417,13 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
    if (!brw_texture_layout(bscreen, tex))
       goto fail;
 
-   
-   if (templ->tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
-                           PIPE_TEXTURE_USAGE_PRIMARY)) {
-      buffer_type = BRW_BUFFER_TYPE_SCANOUT;
-   } else {
-      buffer_type = BRW_BUFFER_TYPE_TEXTURE;
-   }
+   /* XXX Maybe some more checks? */
+   if ((pitch / tex->cpp) < tex->pitch)
+      goto fail;
 
-   tex->bo = buffer;
+   tex->pitch = pitch / tex->cpp;
 
-   tex->pitch = pitch;
+   tex->bo = buffer;
 
    /* fix this warning */
 #if 0
-- 
cgit v1.2.3


From dc97a5d782b01d530bb7cbe6e76625f969259e32 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Fri, 6 Nov 2009 19:05:41 +0000
Subject: i965g: Match pitch modification on get buffer as well

This is a ugly hack in order to match what the intel
X driver gives us. However putting this in the winsys
where it fits better forces it reach more into the
driver then it already does.
---
 src/gallium/drivers/i965/brw_screen_texture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 842c70a39a..dbefbfc5cc 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -371,7 +371,7 @@ boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
 
    *buffer = tex->bo;
    if (stride)
-      *stride = tex->pitch;
+      *stride = tex->pitch * tex->cpp;
 
    return TRUE;
 }
-- 
cgit v1.2.3


From 9708ce874edb140ddffd44ddbb33011273a8ded9 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Fri, 6 Nov 2009 20:19:39 +0000
Subject: i965g: Add texture transfer functions

They don't seem to work. Maybe we are forgetting to flush the gpu or something.
---
 src/gallium/drivers/i965/brw_screen_texture.c | 95 +++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index dbefbfc5cc..75bb8a73b7 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -363,6 +363,97 @@ boolean brw_is_texture_referenced_by_bo( struct brw_screen *brw_screen,
    return FALSE;
 }
 
+
+/*
+ * Transfer functions
+ */
+
+static struct pipe_transfer*
+brw_get_tex_transfer(struct pipe_screen *screen,
+                     struct pipe_texture *texture,
+                     unsigned face, unsigned level, unsigned zslice,
+                     enum pipe_transfer_usage usage, unsigned x, unsigned y,
+                     unsigned w, unsigned h)
+{
+   struct brw_texture *tex = brw_texture(texture);
+   struct brw_transfer *trans;
+   unsigned offset;  /* in bytes */
+
+   if (texture->target == PIPE_TEXTURE_CUBE) {
+      offset = tex->image_offset[level][face];
+   } else if (texture->target == PIPE_TEXTURE_3D) {
+      offset = tex->image_offset[level][zslice];
+   } else {
+      offset = tex->image_offset[level][0];
+      assert(face == 0);
+      assert(zslice == 0);
+   }
+
+   trans = CALLOC_STRUCT(brw_transfer);
+   if (trans) {
+      pipe_texture_reference(&trans->base.texture, texture);
+      trans->base.format = trans->base.format;
+      trans->base.x = x;
+      trans->base.y = y;
+      trans->base.width = w;
+      trans->base.height = h;
+      trans->base.block = texture->block;
+      trans->base.nblocksx = texture->nblocksx[level];
+      trans->base.nblocksy = texture->nblocksy[level];
+      trans->base.stride = tex->pitch * tex->cpp;
+      trans->offset = offset;
+      trans->base.usage = usage;
+   }
+   return &trans->base;
+}
+
+static void *
+brw_transfer_map(struct pipe_screen *screen,
+                 struct pipe_transfer *transfer)
+{
+   struct brw_texture *tex = brw_texture(transfer->texture);
+   struct brw_winsys_screen *sws = brw_screen(screen)->sws;
+   char *map;
+   unsigned usage = transfer->usage;
+
+   map = sws->bo_map(tex->bo, 
+                     BRW_DATA_OTHER,
+                     0,
+                     tex->bo->size,
+                     (usage & PIPE_TRANSFER_WRITE) ? TRUE : FALSE,
+                     (usage & 0) ? TRUE : FALSE,
+                     (usage & 0) ? TRUE : FALSE);
+
+   if (!map)
+      return NULL;
+
+   return map + brw_transfer(transfer)->offset +
+      transfer->y / transfer->block.height * transfer->stride +
+      transfer->x / transfer->block.width * transfer->block.size;
+}
+
+static void
+brw_transfer_unmap(struct pipe_screen *screen,
+                   struct pipe_transfer *transfer)
+{
+   struct brw_texture *tex = brw_texture(transfer->texture);
+   struct brw_winsys_screen *sws = brw_screen(screen)->sws;
+
+   sws->bo_unmap(tex->bo);
+}
+
+static void
+brw_tex_transfer_destroy(struct pipe_transfer *trans)
+{
+   pipe_texture_reference(&trans->texture, NULL);
+   FREE(trans);
+}
+
+
+/*
+ * Functions exported to the winsys
+ */
+
 boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
                                       struct brw_winsys_buffer **buffer,
                                       unsigned *stride)
@@ -481,4 +572,8 @@ void brw_screen_tex_init( struct brw_screen *brw_screen )
    brw_screen->base.texture_create = brw_texture_create;
    brw_screen->base.texture_destroy = brw_texture_destroy;
    brw_screen->base.texture_blanket = brw_texture_blanket;
+   brw_screen->base.get_tex_transfer = brw_get_tex_transfer;
+   brw_screen->base.transfer_map = brw_transfer_map;
+   brw_screen->base.transfer_unmap = brw_transfer_unmap;
+   brw_screen->base.tex_transfer_destroy = brw_tex_transfer_destroy;
 }
-- 
cgit v1.2.3


From 44cb5b5c663da4d218448cfd2386b431de35c8d2 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sat, 7 Nov 2009 10:46:47 +0100
Subject: nv50: enable all 32 threads of a warp

This should be the default setting.
See also 7d967b9b7c08aea2a471c5bf6aced8bfafdae874.
---
 src/gallium/drivers/nv50/nv50_screen.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index c8d0f1e4d8..e1b2f11239 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -309,7 +309,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_method(so, screen->tesla, 0x121c, 1);
 	so_data  (so, 1);
 
-	/* try to activate all/more lanes (threads) in a warp */
+	/* activate all 32 lanes (threads) in a warp */
+	so_method(so, screen->tesla, 0x19a0, 1);
+	so_data  (so, 0x2);
 	so_method(so, screen->tesla, 0x1400, 1);
 	so_data  (so, 0xf);
 
-- 
cgit v1.2.3


From 229f6b9a7e699b814e07ba762de97a5ebcffce51 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <wallbraker@gmail.com>
Date: Sat, 7 Nov 2009 15:47:21 +0000
Subject: i965g: Formalize on S8Z24 as the suported depth format

---
 src/gallium/drivers/i965/brw_misc_state.c     | 3 ++-
 src/gallium/drivers/i965/brw_pipe_clear.c     | 7 -------
 src/gallium/drivers/i965/brw_screen.c         | 2 ++
 src/gallium/drivers/i965/brw_screen_texture.c | 4 ----
 4 files changed, 4 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_misc_state.c b/src/gallium/drivers/i965/brw_misc_state.c
index 4dd73636fd..e4b24229db 100644
--- a/src/gallium/drivers/i965/brw_misc_state.c
+++ b/src/gallium/drivers/i965/brw_misc_state.c
@@ -265,7 +265,8 @@ static int emit_depthbuffer(struct brw_context *brw)
 	 format = BRW_DEPTHFORMAT_D16_UNORM;
 	 cpp = 2;
 	 break;
-      case PIPE_FORMAT_Z24S8_UNORM:
+      case PIPE_FORMAT_X8Z24_UNORM:
+      case PIPE_FORMAT_S8Z24_UNORM:
 	 format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
 	 cpp = 4;
 	 break;
diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c
index 34cad62977..f846b4342c 100644
--- a/src/gallium/drivers/i965/brw_pipe_clear.c
+++ b/src/gallium/drivers/i965/brw_pipe_clear.c
@@ -135,8 +135,6 @@ static void zstencil_clear(struct brw_context *brw,
    unsigned value;
 
    switch (bsurface->base.format) {
-   case PIPE_FORMAT_Z24S8_UNORM:
-   case PIPE_FORMAT_Z24X8_UNORM:
    case PIPE_FORMAT_X8Z24_UNORM:
    case PIPE_FORMAT_S8Z24_UNORM:
       value = ((unsigned)(depth * MASK24) & MASK24);
@@ -150,11 +148,6 @@ static void zstencil_clear(struct brw_context *brw,
    }
 
    switch (bsurface->base.format) {
-   case PIPE_FORMAT_Z24S8_UNORM:
-   case PIPE_FORMAT_Z24X8_UNORM:
-      value = (value << 8) | stencil;
-      break;
-
    case PIPE_FORMAT_X8Z24_UNORM:
    case PIPE_FORMAT_S8Z24_UNORM:
       value = value | (stencil << 24);
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 575a418b7d..af885320a7 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -210,12 +210,14 @@ brw_is_format_supported(struct pipe_screen *screen,
       PIPE_FORMAT_A8L8_UNORM,
       PIPE_FORMAT_YCBCR,
       PIPE_FORMAT_YCBCR_REV,
+      PIPE_FORMAT_X8Z24_UNORM,
       PIPE_FORMAT_S8Z24_UNORM,
       PIPE_FORMAT_NONE  /* list terminator */
    };
    static const enum pipe_format surface_supported[] = {
       PIPE_FORMAT_A8R8G8B8_UNORM,
       PIPE_FORMAT_R5G6B5_UNORM,
+      PIPE_FORMAT_X8Z24_UNORM,
       PIPE_FORMAT_S8Z24_UNORM,
       PIPE_FORMAT_NONE  /* list terminator */
    };
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 75bb8a73b7..9ca60b46d3 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -142,12 +142,8 @@ static GLuint translate_tex_format( enum pipe_format pf )
    case PIPE_FORMAT_DXT1_SRGB:
       return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
 
-      /* XXX: which pipe depth formats does i965 suppport
-       */
    case PIPE_FORMAT_S8Z24_UNORM:
    case PIPE_FORMAT_X8Z24_UNORM:
-   case PIPE_FORMAT_Z24S8_UNORM:
-   case PIPE_FORMAT_Z24X8_UNORM:
          return BRW_SURFACEFORMAT_I24X8_UNORM;
 
 #if 0
-- 
cgit v1.2.3


From cbad97b68504a64650cb77bad96962310ab9c7f9 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <wallbraker@gmail.com>
Date: Sat, 7 Nov 2009 17:47:00 +0000
Subject: i965g: Fixup texture formats

---
 src/gallium/drivers/i965/brw_screen.c         | 45 +++++++++++++++---
 src/gallium/drivers/i965/brw_screen_texture.c | 66 +++++++++++++++------------
 2 files changed, 74 insertions(+), 37 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index af885320a7..05da72ebb2 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -201,31 +201,62 @@ brw_is_format_supported(struct pipe_screen *screen,
                          unsigned geom_flags)
 {
    static const enum pipe_format tex_supported[] = {
-      PIPE_FORMAT_R8G8B8A8_UNORM,
-      PIPE_FORMAT_A8R8G8B8_UNORM,
-      PIPE_FORMAT_R5G6B5_UNORM,
       PIPE_FORMAT_L8_UNORM,
-      PIPE_FORMAT_A8_UNORM,
       PIPE_FORMAT_I8_UNORM,
+      PIPE_FORMAT_A8_UNORM,
+      PIPE_FORMAT_L16_UNORM,
+      /*PIPE_FORMAT_I16_UNORM,*/
+      /*PIPE_FORMAT_A16_UNORM,*/
       PIPE_FORMAT_A8L8_UNORM,
+      PIPE_FORMAT_R5G6B5_UNORM,
+      PIPE_FORMAT_A1R5G5B5_UNORM,
+      PIPE_FORMAT_A4R4G4B4_UNORM,
+      PIPE_FORMAT_X8R8G8B8_UNORM,
+      PIPE_FORMAT_A8R8G8B8_UNORM,
+      /* video */
       PIPE_FORMAT_YCBCR,
       PIPE_FORMAT_YCBCR_REV,
+      /* compressed */
+      /*PIPE_FORMAT_FXT1_RGBA,*/
+      PIPE_FORMAT_DXT1_RGB,
+      PIPE_FORMAT_DXT1_RGBA,
+      PIPE_FORMAT_DXT3_RGBA,
+      PIPE_FORMAT_DXT5_RGBA,
+      /* sRGB */
+      PIPE_FORMAT_R8G8B8A8_SRGB,
+      PIPE_FORMAT_A8L8_SRGB,
+      PIPE_FORMAT_L8_SRGB,
+      PIPE_FORMAT_DXT1_SRGB,
+      /* depth */
+      PIPE_FORMAT_Z32_FLOAT,
       PIPE_FORMAT_X8Z24_UNORM,
       PIPE_FORMAT_S8Z24_UNORM,
+      PIPE_FORMAT_Z16_UNORM,
+      /* signed */
+      PIPE_FORMAT_R8G8_SNORM,
+      PIPE_FORMAT_R8G8B8A8_SNORM,
       PIPE_FORMAT_NONE  /* list terminator */
    };
-   static const enum pipe_format surface_supported[] = {
+   static const enum pipe_format render_supported[] = {
+      PIPE_FORMAT_X8R8G8B8_UNORM,
       PIPE_FORMAT_A8R8G8B8_UNORM,
       PIPE_FORMAT_R5G6B5_UNORM,
+      PIPE_FORMAT_NONE  /* list terminator */
+   };
+   static const enum pipe_format depth_supported[] = {
+      PIPE_FORMAT_Z32_FLOAT,
       PIPE_FORMAT_X8Z24_UNORM,
       PIPE_FORMAT_S8Z24_UNORM,
+      PIPE_FORMAT_Z16_UNORM,
       PIPE_FORMAT_NONE  /* list terminator */
    };
    const enum pipe_format *list;
    uint i;
 
-   if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET)
-      list = surface_supported;
+   if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL)
+      list = depth_supported;
+   else if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET)
+      list = render_supported;
    else
       list = tex_supported;
 
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 9ca60b46d3..666ec70d42 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -73,16 +73,19 @@ static GLuint translate_tex_format( enum pipe_format pf )
    case PIPE_FORMAT_A8_UNORM:
       return BRW_SURFACEFORMAT_A8_UNORM; 
 
-   case PIPE_FORMAT_A8L8_UNORM:
-      return BRW_SURFACEFORMAT_L8A8_UNORM;
+   case PIPE_FORMAT_L16_UNORM:
+      return BRW_SURFACEFORMAT_L16_UNORM;
 
-   case PIPE_FORMAT_A8R8G8B8_UNORM: /* XXX */
-   case PIPE_FORMAT_B8G8R8A8_UNORM: /* XXX */
-   case PIPE_FORMAT_R8G8B8A8_UNORM: /* XXX */
-      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
+      /* XXX: Add these to gallium
+   case PIPE_FORMAT_I16_UNORM:
+      return BRW_SURFACEFORMAT_I16_UNORM;
 
-   case PIPE_FORMAT_R8G8B8X8_UNORM:
-      return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
+   case PIPE_FORMAT_A16_UNORM:
+      return BRW_SURFACEFORMAT_A16_UNORM; 
+      */
+
+   case PIPE_FORMAT_A8L8_UNORM:
+      return BRW_SURFACEFORMAT_L8A8_UNORM;
 
    case PIPE_FORMAT_R5G6B5_UNORM:
       return BRW_SURFACEFORMAT_B5G6R5_UNORM;
@@ -93,19 +96,15 @@ static GLuint translate_tex_format( enum pipe_format pf )
    case PIPE_FORMAT_A4R4G4B4_UNORM:
       return BRW_SURFACEFORMAT_B4G4R4A4_UNORM;
 
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+      return BRW_SURFACEFORMAT_R8G8B8X8_UNORM;
 
-   case PIPE_FORMAT_L16_UNORM:
-      return BRW_SURFACEFORMAT_L16_UNORM;
-
-      /* XXX: Z texturing: 
-   case PIPE_FORMAT_I16_UNORM:
-      return BRW_SURFACEFORMAT_I16_UNORM;
-       */
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+      return BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
 
-      /* XXX: Z texturing:
-   case PIPE_FORMAT_A16_UNORM:
-      return BRW_SURFACEFORMAT_A16_UNORM; 
-      */
+   /*
+    * Video formats
+    */
 
    case PIPE_FORMAT_YCBCR_REV:
       return BRW_SURFACEFORMAT_YCRCB_NORMAL;
@@ -113,6 +112,9 @@ static GLuint translate_tex_format( enum pipe_format pf )
    case PIPE_FORMAT_YCBCR:
       return BRW_SURFACEFORMAT_YCRCB_SWAPUVY;
 
+   /*
+    * Compressed formats.
+    */
       /* XXX: Add FXT to gallium?
    case PIPE_FORMAT_FXT1_RGBA:
       return BRW_SURFACEFORMAT_FXT1;
@@ -130,6 +132,10 @@ static GLuint translate_tex_format( enum pipe_format pf )
    case PIPE_FORMAT_DXT5_RGBA:
        return BRW_SURFACEFORMAT_BC3_UNORM;
 
+   /*
+    * sRGB formats
+    */
+
    case PIPE_FORMAT_R8G8B8A8_SRGB:
       return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
 
@@ -142,24 +148,24 @@ static GLuint translate_tex_format( enum pipe_format pf )
    case PIPE_FORMAT_DXT1_SRGB:
       return BRW_SURFACEFORMAT_BC1_UNORM_SRGB;
 
+   /*
+    * Depth formats
+    */
+
+   case PIPE_FORMAT_Z16_UNORM:
+         return BRW_SURFACEFORMAT_I16_UNORM;
+
    case PIPE_FORMAT_S8Z24_UNORM:
    case PIPE_FORMAT_X8Z24_UNORM:
          return BRW_SURFACEFORMAT_I24X8_UNORM;
 
-#if 0
-      /* XXX: these different surface formats don't seem to
-       * make any difference for shadow sampler/compares.
-       */
-      if (depth_mode == GL_INTENSITY) 
-         return BRW_SURFACEFORMAT_I24X8_UNORM;
-      else if (depth_mode == GL_ALPHA)
-         return BRW_SURFACEFORMAT_A24X8_UNORM;
-      else
-         return BRW_SURFACEFORMAT_L24X8_UNORM;
-#endif
+   case PIPE_FORMAT_Z32_FLOAT:
+         return BRW_SURFACEFORMAT_I32_FLOAT;
 
       /* XXX: presumably for bump mapping.  Add this to mesa state
        * tracker?
+       *
+       * XXX: Add flipped versions of these formats to Gallium.
        */
    case PIPE_FORMAT_R8G8_SNORM:
       return BRW_SURFACEFORMAT_R8G8_SNORM;
-- 
cgit v1.2.3


From 57d77c6a4474beecdd22b97a8f5af6e4d2833d97 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <wallbraker@gmail.com>
Date: Sat, 7 Nov 2009 19:31:18 +0000
Subject: i915g: Fix comment in is buffer referenced

---
 src/gallium/drivers/i915/i915_context.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index e745f3342d..94c8aee30f 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -155,15 +155,11 @@ static unsigned int
 i915_is_buffer_referenced(struct pipe_context *pipe,
                           struct pipe_buffer *buf)
 {
-   /**
-    * FIXME: Return the corrent result. We can't alays return referenced
-    *        since it causes a double flush within the vbo module.
+   /*
+    * Since we never expose hardware buffers to the state tracker
+    * they can never be referenced, so this isn't a lie
     */
-#if 0
-   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
-#else
    return 0;
-#endif
 }
 
 
-- 
cgit v1.2.3


From 6acb26eadfcb3c21fd09d0b22804b49de9a82cf7 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Sun, 25 Oct 2009 13:22:22 +0100
Subject: r300g: move vborender context function to seperate file

r300g: Un-migrate r300_draw_render.

It'll make maintaining the SW TCL path easier.
---
 src/gallium/drivers/r300/r300_render.c | 5 ++++-
 src/gallium/drivers/r300/r300_render.h | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index c36350d29e..634c803f2a 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -20,6 +20,9 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+/* r300_render: Vertex and index buffer primitive emission. Contains both
+ * HW TCL fastpath rendering, and SW TCL Draw-assisted rendering. */
+
 #include "draw/draw_context.h"
 #include "draw/draw_vbuf.h"
 
@@ -38,7 +41,7 @@
 /* r300_render: Vertex and index buffer primitive emission. */
 #define R300_MAX_VBO_SIZE  (1024 * 1024)
 
-static uint32_t r300_translate_primitive(unsigned prim)
+uint32_t r300_translate_primitive(unsigned prim)
 {
     switch (prim) {
         case PIPE_PRIM_POINTS:
diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h
index 3d8f47ba75..3f8ac1fb7a 100644
--- a/src/gallium/drivers/r300/r300_render.h
+++ b/src/gallium/drivers/r300/r300_render.h
@@ -23,6 +23,8 @@
 #ifndef R300_RENDER_H
 #define R300_RENDER_H
 
+uint32_t r300_translate_primitive(unsigned prim);
+
 boolean r300_draw_range_elements(struct pipe_context* pipe,
                                  struct pipe_buffer* indexBuffer,
                                  unsigned indexSize,
-- 
cgit v1.2.3


From c7dfffc5d5078e3cf1c28c230177cbbb43b91131 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Sun, 25 Oct 2009 12:08:02 +0100
Subject: r300g: enable CS dumping

---
 src/gallium/drivers/r300/r300_cs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 883f0a02dc..86ba91db52 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -34,8 +34,8 @@
 
 #define MAX_CS_SIZE 64 * 1024 / 4
 
-#define VERY_VERBOSE_CS 0
-#define VERY_VERBOSE_REGISTERS 0
+#define VERY_VERBOSE_CS 1
+#define VERY_VERBOSE_REGISTERS 1
 
 /* XXX stolen from radeon_drm.h */
 #define RADEON_GEM_DOMAIN_CPU  0x1
-- 
cgit v1.2.3


From d8592d1724d8c8fd0b36eb21f4007b52f809e062 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <maciej@osiris.(none)>
Date: Sun, 1 Nov 2009 17:04:32 +0100
Subject: r300g: add missing flush

---
 src/gallium/drivers/r300/r300_state.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 658a8cba13..bed886fad0 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -577,6 +577,8 @@ static void r300_set_sampler_textures(struct pipe_context* pipe,
     if (count > 8) {
         return;
     }
+    
+    r300->context.flush(&r300->context, 0, NULL);
 
     for (i = 0; i < count; i++) {
         if (r300->textures[i] != (struct r300_texture*)texture[i]) {
-- 
cgit v1.2.3


From 3445f476977ae403cef9ca15661fa0f96ff50eca Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Sun, 25 Oct 2009 13:53:25 +0100
Subject: r300g: VBOs WIP

---
 src/gallium/drivers/r300/Makefile             |   1 +
 src/gallium/drivers/r300/r300_context.c       |  14 +-
 src/gallium/drivers/r300/r300_context.h       |  27 +--
 src/gallium/drivers/r300/r300_emit.c          | 108 +++++++++++-
 src/gallium/drivers/r300/r300_emit.h          |  12 ++
 src/gallium/drivers/r300/r300_render.c        | 135 +++++++--------
 src/gallium/drivers/r300/r300_state.c         |  13 +-
 src/gallium/drivers/r300/r300_state_derived.c |  14 +-
 src/gallium/drivers/r300/r300_vbo.c           | 226 ++++++++++++++++++++++++++
 src/gallium/drivers/r300/r300_vbo.h           |  36 ++++
 10 files changed, 477 insertions(+), 109 deletions(-)
 create mode 100644 src/gallium/drivers/r300/r300_vbo.c
 create mode 100644 src/gallium/drivers/r300/r300_vbo.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index f73d80de88..d13bb7a36b 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -17,6 +17,7 @@ C_SOURCES = \
 	r300_state.c \
 	r300_state_derived.c \
 	r300_state_invariant.c \
+	r300_vbo.c \
 	r300_vs.c \
 	r300_texture.c \
 	r300_tgsi_to_rc.c
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index f974147ea4..b520e5929e 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -104,6 +104,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
                                          struct r300_winsys* r300_winsys)
 {
     struct r300_context* r300 = CALLOC_STRUCT(r300_context);
+    struct r300_screen* r300screen = r300_screen(screen);
 
     if (!r300)
         return NULL;
@@ -119,9 +120,16 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
 
     r300->context.clear = r300_clear;
 
-    r300->context.draw_arrays = r300_draw_arrays;
-    r300->context.draw_elements = r300_draw_elements;
-    r300->context.draw_range_elements = r300_swtcl_draw_range_elements;
+    if (r300screen->caps->has_tcl)
+    {
+        r300->context.draw_arrays = r300_draw_arrays;
+        r300->context.draw_elements = r300_draw_elements;
+        r300->context.draw_range_elements = r300_draw_range_elements;
+    }
+    else
+    {
+        assert(0);
+    }
 
     r300->context.is_texture_referenced = r300_is_texture_referenced;
     r300->context.is_buffer_referenced = r300_is_buffer_referenced;
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 850e5a41c9..a6748852d8 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -216,18 +216,19 @@ struct r300_texture {
     struct r300_texture_state state;
 };
 
-struct r300_vertex_format {
+struct r300_vertex_info {
     /* Parent class */
     struct vertex_info vinfo;
-    /* R300_VAP_PROG_STREAK_CNTL_[0-7] */
-    uint32_t vap_prog_stream_cntl[8];
-    /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */
-    uint32_t vap_prog_stream_cntl_ext[8];
     /* Map of vertex attributes into PVS memory for HW TCL,
      * or GA memory for SW TCL. */
     int vs_tab[16];
     /* Map of rasterizer attributes from GB through RS to US. */
     int fs_tab[16];
+
+    /* R300_VAP_PROG_STREAK_CNTL_[0-7] */
+    uint32_t vap_prog_stream_cntl[8];
+    /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */
+    uint32_t vap_prog_stream_cntl_ext[8];
 };
 
 extern struct pipe_viewport_state r300_viewport_identity;
@@ -256,7 +257,7 @@ struct r300_context {
      * depends on the combination of both currently loaded shaders. */
     struct util_hash_table* shader_hash_table;
     /* Vertex formatting information. */
-    struct r300_vertex_format* vertex_info;
+    struct r300_vertex_info* vertex_info;
 
     /* Various CSO state objects. */
     /* Blend state. */
@@ -285,12 +286,6 @@ struct r300_context {
     /* Texture states. */
     struct r300_texture* textures[8];
     int texture_count;
-    /* Vertex buffers for Gallium. */
-    struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
-    int vertex_buffer_count;
-    /* Vertex elements for Gallium. */
-    struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS];
-    int vertex_element_count;
     /* Vertex shader. */
     struct r300_vertex_shader* vs;
     /* Viewport state. */
@@ -298,6 +293,14 @@ struct r300_context {
     /* ZTOP state. */
     struct r300_ztop_state ztop_state;
 
+    /* Vertex buffers for Gallium. */
+    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+    int vbuf_count;
+    /* Vertex elements for Gallium. */
+    struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
+    int aos_count;
+    unsigned hw_prim;
+
     /* Bitmask of dirty state objects. */
     uint32_t dirty_state;
     /* Flag indicating whether or not the HW is dirty. */
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 69ce5966e8..92e6ec606c 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -582,7 +582,48 @@ void r300_emit_texture(struct r300_context* r300,
     END_CS;
 }
 
-void r300_emit_vertex_buffer(struct r300_context* r300)
+void r300_emit_aos(struct r300_context* r300, unsigned offset)
+{
+    struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
+    struct pipe_vertex_element *velem = r300->vertex_element;
+    CS_LOCALS(r300);
+    int i;
+    unsigned packet_size = (r300->aos_count * 3 + 1) / 2;
+    BEGIN_CS(2 + packet_size + r300->aos_count * 2);
+    OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
+    OUT_CS(r300->aos_count);
+    for (i = 0; i < r300->aos_count - 1; i += 2) {
+        int buf_num1 = velem[i].vertex_buffer_index;
+        int buf_num2 = velem[i+1].vertex_buffer_index;
+        assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0);
+        assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_size(velem[i+1].src_format) % 4 == 0);
+        OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) |
+               (pf_get_size(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22));
+        OUT_CS(vbuf[buf_num1].buffer_offset + velem[i].src_offset +
+               offset * vbuf[buf_num1].stride);
+        OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset +
+               offset * vbuf[buf_num2].stride);
+    }
+    if (r300->aos_count & 1) {
+        int buf_num = velem[i].vertex_buffer_index;
+        assert(vbuf[buf_num].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0);
+        OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6));
+        OUT_CS(vbuf[buf_num].buffer_offset + velem[i].src_offset +
+               offset * vbuf[buf_num].stride);
+    }
+
+    for (i = 0; i < r300->aos_count; i++) {
+        cs_winsys->write_cs_reloc(cs_winsys,
+                                  vbuf[velem[i].vertex_buffer_index].buffer,
+                                  RADEON_GEM_DOMAIN_GTT,
+                                  0,
+                                  0);
+        cs_count -= 2;
+    }
+    END_CS;
+}
+#if 0
+void r300_emit_draw_packet(struct r300_context* r300)
 {
     CS_LOCALS(r300);
 
@@ -605,6 +646,65 @@ void r300_emit_vertex_buffer(struct r300_context* r300)
     OUT_CS_RELOC(r300->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
     END_CS;
 }
+#endif
+void r300_emit_draw_arrays(struct r300_context *r300,
+                           unsigned count)
+{
+    CS_LOCALS(r300);
+    assert(count < 65536);
+
+    BEGIN_CS(4);
+    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count);
+    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
+           r300->hw_prim);
+    END_CS;
+}
+
+void r300_emit_draw_elements(struct r300_context *r300,
+                             struct pipe_buffer* indexBuffer,
+                             unsigned indexSize,
+                             unsigned minIndex,
+                             unsigned maxIndex,
+                             unsigned start,
+                             unsigned count)
+{
+    CS_LOCALS(r300);
+    assert(indexSize == 4 || indexSize == 2);
+    assert(count < 65536);
+    assert((start * indexSize)  % 4 == 0);
+
+    uint32_t size_dwords;
+    uint32_t skip_dwords = indexSize * start / sizeof(uint32_t);
+    assert(skip_dwords == 0);
+
+    BEGIN_CS(10);
+    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
+    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
+    if (indexSize == 4) {
+        size_dwords = count + start;
+        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+               R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300->hw_prim);
+    } else {
+        size_dwords = (count + start + 1) / 2;
+        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+               (count << 16) | r300->hw_prim);
+    }
+
+    OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
+    OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
+           (0 << R300_INDX_BUFFER_SKIP_SHIFT));
+    OUT_CS(skip_dwords);
+    OUT_CS(size_dwords);
+    cs_winsys->write_cs_reloc(cs_winsys,
+                              indexBuffer,
+                              RADEON_GEM_DOMAIN_GTT,
+                              0,
+                              0);
+    cs_count -= 2;
+
+    END_CS;
+}
 
 void r300_emit_vertex_format_state(struct r300_context* r300)
 {
@@ -771,8 +871,6 @@ void r300_emit_dirty_state(struct r300_context* r300)
         return;
     }
 
-    r300_update_derived_state(r300);
-
     /* Clean out BOs. */
     r300->winsys->reset_bos(r300->winsys);
 
@@ -823,7 +921,7 @@ validate:
             goto validate;
         }
     } else {
-        debug_printf("No VBO while emitting dirty state!\n");
+        // debug_printf("No VBO while emitting dirty state!\n");
     }
     if (!r300->winsys->validate(r300->winsys)) {
         r300->context.flush(&r300->context, 0, NULL);
@@ -951,7 +1049,7 @@ validate:
     */
 
     /* Finally, emit the VBO. */
-    r300_emit_vertex_buffer(r300);
+    //r300_emit_vertex_buffer(r300);
 
     r300->dirty_hw++;
 }
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 6befca72ce..b4fdfecde0 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -29,6 +29,8 @@
 struct rX00_fragment_program_code;
 struct r300_vertex_program_code;
 
+void r300_emit_aos(struct r300_context* r300, unsigned offset);
+
 void r300_emit_blend_state(struct r300_context* r300,
                            struct r300_blend_state* blend);
 
@@ -38,6 +40,16 @@ void r300_emit_blend_color_state(struct r300_context* r300,
 void r300_emit_clip_state(struct r300_context* r300,
                           struct pipe_clip_state* clip);
 
+void r300_emit_draw_arrays(struct r300_context *r300, unsigned count);
+
+void r300_emit_draw_elements(struct r300_context *r300,
+                             struct pipe_buffer* indexBuffer,
+                             unsigned indexSize,
+                             unsigned minIndex,
+                             unsigned maxIndex,
+                             unsigned start,
+                             unsigned count);
+
 void r300_emit_dsa_state(struct r300_context* r300,
                          struct r300_dsa_state* dsa);
 
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 634c803f2a..86aaf841dd 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -31,6 +31,7 @@
 #include "util/u_memory.h"
 #include "util/u_prim.h"
 
+#include "r300_vbo.h"
 #include "r300_cs.h"
 #include "r300_context.h"
 #include "r300_emit.h"
@@ -69,98 +70,64 @@ uint32_t r300_translate_primitive(unsigned prim)
     }
 }
 
-/* This is the fast-path drawing & emission for HW TCL. */
-boolean r300_draw_range_elements(struct pipe_context* pipe,
-                                 struct pipe_buffer* indexBuffer,
-                                 unsigned indexSize,
-                                 unsigned minIndex,
-                                 unsigned maxIndex,
-                                 unsigned mode,
-                                 unsigned start,
-                                 unsigned count)
+static boolean setup_vertex_buffers(struct r300_context *r300)
 {
-    struct r300_context* r300 = r300_context(pipe);
-    uint32_t prim = r300_translate_primitive(mode);
-    struct pipe_vertex_buffer* aos = r300->vertex_buffers;
-    unsigned aos_count = r300->vertex_buffer_count;
-    short* indices;
-    unsigned packet_size;
-    unsigned i;
-    bool invalid = FALSE;
-    
-    CS_LOCALS(r300);
-
-    if (!u_trim_pipe_prim(mode, &count)) {
-        return FALSE;
-    }
+    unsigned vbuf_count = r300->aos_count;
+    struct pipe_vertex_buffer *vbuf= r300->vertex_buffer;
+    struct pipe_vertex_element *velem= r300->vertex_element;
+    bool invalid = false;
 
 validate:
-    for (i = 0; i < aos_count; i++) {
-        if (!r300->winsys->add_buffer(r300->winsys, aos[i].buffer,
-                    RADEON_GEM_DOMAIN_GTT, 0)) {
-            pipe->flush(pipe, 0, NULL);
+    for (int i = 0; i < vbuf_count; i++) {
+        if (!r300->winsys->add_buffer(r300->winsys, vbuf[velem[i].vertex_buffer_index].buffer,
+            RADEON_GEM_DOMAIN_GTT, 0)) {
+            r300->context.flush(&r300->context, 0, NULL);
             goto validate;
         }
     }
+
     if (!r300->winsys->validate(r300->winsys)) {
-        pipe->flush(pipe, 0, NULL);
+        r300->context.flush(&r300->context, 0, NULL);
         if (invalid) {
             /* Well, hell. */
             debug_printf("r300: Stuck in validation loop, gonna quit now.");
             exit(1);
         }
-        invalid = TRUE;
+        invalid = true;
         goto validate;
     }
 
-    r300_emit_dirty_state(r300);
+    return invalid;
+}
 
-    packet_size = (aos_count >> 1) * 3 + (aos_count & 1) * 2;
-
-    BEGIN_CS(3 + packet_size + (aos_count * 2));
-    OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
-    OUT_CS(aos_count);
-    for (i = 0; i < aos_count - 1; i += 2) {
-        OUT_CS(aos[i].stride |
-            (aos[i].stride << 8) |
-            (aos[i + 1].stride << 16) |
-            (aos[i + 1].stride << 24));
-        OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride);
-        OUT_CS(aos[i + 1].buffer_offset + start * 4 * aos[i + 1].stride);
-    }
-    if (aos_count & 1) {
-        OUT_CS(aos[i].stride | (aos[i].stride << 8));
-        OUT_CS(aos[i].buffer_offset + start * 4 * aos[i].stride);
-    }
-    for (i = 0; i < aos_count; i++) {
-        OUT_CS_RELOC(aos[i].buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0);
-    }
-    END_CS;
+/* This is the fast-path drawing & emission for HW TCL. */
+boolean r300_draw_range_elements(struct pipe_context* pipe,
+                                 struct pipe_buffer* indexBuffer,
+                                 unsigned indexSize,
+                                 unsigned minIndex,
+                                 unsigned maxIndex,
+                                 unsigned mode,
+                                 unsigned start,
+                                 unsigned count)
+{
+    struct r300_context* r300 = r300_context(pipe);
 
-    if (indexBuffer) {
-        indices = (short*)pipe_buffer_map(pipe->screen, indexBuffer,
-                                          PIPE_BUFFER_USAGE_CPU_READ);
+    r300_update_derived_state(r300);
 
-        /* Set the starting point. */
-        indices += start;
+    setup_vertex_buffers(r300);
 
-        BEGIN_CS(2 + (count+1)/2);
-        OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (count + 1)/2);
-        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | prim);
-        for (i = 0; i < count - 1; i += 2) {
-            OUT_CS(indices[i + 1] << 16 | indices[i]);
-        }
-        if (count % 2) {
-            OUT_CS(indices[count - 1]);
-        }
-        END_CS;
-    } else {
-        BEGIN_CS(2);
-        OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
-        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
-               prim);
-        END_CS;
-    }
+    setup_vertex_attributes(r300);
+
+    setup_index_buffer(r300, indexBuffer, indexSize);
+
+    r300->hw_prim = r300_translate_primitive(mode);
+
+    r300_emit_dirty_state(r300);
+
+    r300_emit_aos(r300, 0);
+
+    r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex,
+                            start, count);
 
     return TRUE;
 }
@@ -178,7 +145,23 @@ boolean r300_draw_elements(struct pipe_context* pipe,
 boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
                          unsigned start, unsigned count)
 {
-    return pipe->draw_elements(pipe, NULL, 0, mode, start, count);
+    struct r300_context* r300 = r300_context(pipe);
+
+    r300_update_derived_state(r300);
+
+    setup_vertex_buffers(r300);
+
+    setup_vertex_attributes(r300);
+
+    r300->hw_prim = r300_translate_primitive(mode);
+
+    r300_emit_dirty_state(r300);
+
+    r300_emit_aos(r300, start);
+
+    r300_emit_draw_arrays(r300, count);
+
+    return TRUE;
 }
 
 /****************************************************************************
@@ -196,7 +179,9 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
                                        unsigned start,
                                        unsigned count)
 {
+    assert(0);
     struct r300_context* r300 = r300_context(pipe);
+#if 0
     int i;
 
     if (!u_trim_pipe_prim(mode, &count)) {
@@ -236,7 +221,7 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
         draw_set_mapped_element_buffer_range(r300->draw, 0, start,
                                              start + count - 1, NULL);
     }
-
+#endif
     return TRUE;
 }
 
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index bed886fad0..e0b85ab768 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -666,10 +666,9 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
 {
     struct r300_context* r300 = r300_context(pipe);
 
-    memcpy(r300->vertex_buffers, buffers,
+    memcpy(r300->vertex_buffer, buffers,
         sizeof(struct pipe_vertex_buffer) * count);
-
-    r300->vertex_buffer_count = count;
+    r300->vbuf_count = count;
 
     if (r300->draw) {
         draw_flush(r300->draw);
@@ -683,10 +682,10 @@ static void r300_set_vertex_elements(struct pipe_context* pipe,
 {
     struct r300_context* r300 = r300_context(pipe);
 
-    memcpy(r300->vertex_elements, elements,
-        sizeof(struct pipe_vertex_element) * count);
-
-    r300->vertex_element_count = count;
+    memcpy(r300->vertex_element,
+           elements,
+           sizeof(struct pipe_vertex_element) * count);
+    r300->aos_count = count;
 
     if (r300->draw) {
         draw_flush(r300->draw);
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 7d000e9e2d..14d7bb094c 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -63,7 +63,7 @@ int r300_shader_key_compare(void* key1, void* key2) {
 
 /* Set up the vs_tab and routes. */
 static void r300_vs_tab_routes(struct r300_context* r300,
-                               struct r300_vertex_format* vformat)
+                               struct r300_vertex_info* vformat)
 {
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
     struct vertex_info* vinfo = &vformat->vinfo;
@@ -219,7 +219,7 @@ static void r300_vs_tab_routes(struct r300_context* r300,
 
 /* Update the PSC tables. */
 static void r300_vertex_psc(struct r300_context* r300,
-                            struct r300_vertex_format* vformat)
+                            struct r300_vertex_info* vformat)
 {
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
     struct vertex_info* vinfo = &vformat->vinfo;
@@ -282,7 +282,7 @@ static void r300_vertex_psc(struct r300_context* r300,
 
 /* Set up the mappings from GB to US, for RS block. */
 static void r300_update_fs_tab(struct r300_context* r300,
-                               struct r300_vertex_format* vformat)
+                               struct r300_vertex_info* vformat)
 {
     struct tgsi_shader_info* info = &r300->fs->info;
     int i, cols = 0, texs = 0, cols_emitted = 0;
@@ -455,13 +455,13 @@ static void r300_update_rs_block(struct r300_context* r300,
 /* Update the vertex format. */
 static void r300_update_derived_shader_state(struct r300_context* r300)
 {
-    struct r300_shader_key* key;
-    struct r300_vertex_format* vformat;
+    struct r300_vertex_info* vformat;
     struct r300_rs_block* rs_block;
-    struct r300_shader_derived_value* value;
     int i;
 
     /*
+    struct r300_shader_key* key;
+    struct r300_shader_derived_value* value;
     key = CALLOC_STRUCT(r300_shader_key);
     key->vs = r300->vs;
     key->fs = r300->fs;
@@ -486,7 +486,7 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
     } */
 
     /* XXX This will be refactored ASAP. */
-    vformat = CALLOC_STRUCT(r300_vertex_format);
+    vformat = CALLOC_STRUCT(r300_vertex_info);
     rs_block = CALLOC_STRUCT(r300_rs_block);
 
     for (i = 0; i < 16; i++) {
diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
new file mode 100644
index 0000000000..e032641286
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "r300_vbo.h"
+
+#include "pipe/p_format.h"
+
+#include "r300_cs.h"
+#include "r300_context.h"
+#include "r300_reg.h"
+#include "r300_winsys.h"
+
+static void translate_vertex_format(enum pipe_format format,
+                                    unsigned nr_comps,
+                                    unsigned component_size,
+                                    unsigned dst_loc,
+                                    uint32_t *hw_fmt1,
+                                    uint32_t *hw_fmt2)
+{
+    uint32_t fmt1 = 0;
+
+    switch (pf_type(format))
+    {
+        case PIPE_FORMAT_TYPE_FLOAT:
+            assert(component_size == 4);
+            fmt1 = R300_DATA_TYPE_FLOAT_1 + nr_comps - 1;
+            break;
+        case PIPE_FORMAT_TYPE_UNORM:
+        case PIPE_FORMAT_TYPE_SNORM:
+        case PIPE_FORMAT_TYPE_USCALED:
+        case PIPE_FORMAT_TYPE_SSCALED:
+            if (component_size == 1)
+            {
+                assert(nr_comps == 4);
+                fmt1 = R300_DATA_TYPE_BYTE;
+            }
+            else if (component_size == 2)
+            {
+                if (nr_comps == 2)
+                    fmt1 = R300_DATA_TYPE_SHORT_2;
+                else if (nr_comps == 4)
+                    fmt1 = R300_DATA_TYPE_SHORT_4;
+                else
+                    assert(0);
+            }
+            else
+            {
+                assert(0);
+            }
+
+            if (pf_type(format) == PIPE_FORMAT_TYPE_SNORM)
+            {
+                fmt1 |= R300_SIGNED;
+            }
+            else if (pf_type(format) == PIPE_FORMAT_TYPE_SSCALED)
+            {
+                fmt1 |= R300_SIGNED;
+                fmt1 |= R300_NORMALIZE;
+            }
+            else if (pf_type(format) == PIPE_FORMAT_TYPE_USCALED)
+            {
+                fmt1 |= R300_NORMALIZE;
+            }
+            break;
+        default:
+            assert(0);
+            break;
+    }
+
+    *hw_fmt1 = fmt1 | (dst_loc << R300_DST_VEC_LOC_SHIFT);
+    *hw_fmt2 = (pf_swizzle_x(format) << R300_SWIZZLE_SELECT_X_SHIFT) |
+               (pf_swizzle_y(format) << R300_SWIZZLE_SELECT_Y_SHIFT) |
+               (pf_swizzle_z(format) << R300_SWIZZLE_SELECT_Z_SHIFT) |
+               (pf_swizzle_w(format) << R300_SWIZZLE_SELECT_W_SHIFT) |
+               (0xf << R300_WRITE_ENA_SHIFT);
+}
+
+static INLINE void setup_vertex_attribute(struct r300_vertex_info *vinfo,
+                                          struct pipe_vertex_element *vert_elem,
+                                          unsigned attr_num)
+{
+    uint32_t hw_fmt1, hw_fmt2;
+    translate_vertex_format(vert_elem->src_format,
+                            vert_elem->nr_components,
+                            pf_size_x(vert_elem->src_format),
+                            attr_num,
+                            &hw_fmt1,
+                            &hw_fmt2);
+
+    if (attr_num % 2 == 0)
+    {
+        vinfo->vap_prog_stream_cntl[attr_num >> 1] = hw_fmt1;
+        vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] = hw_fmt2;
+    }
+    else
+    {
+        vinfo->vap_prog_stream_cntl[attr_num >> 1] |= hw_fmt1 << 16;
+        vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] |= hw_fmt2 << 16;
+    }
+}
+
+static void finish_vertex_attribs_setup(struct r300_vertex_info *vinfo,
+                                        unsigned attribs_num)
+{
+    uint32_t last_vec_bit = (attribs_num % 2 == 0) ? (R300_LAST_VEC << 16) : R300_LAST_VEC;
+
+    assert(attribs_num > 0 && attribs_num <= 16);
+    vinfo->vap_prog_stream_cntl[(attribs_num - 1) >> 1] |= last_vec_bit;
+}
+
+void setup_vertex_attributes(struct r300_context *r300)
+{
+    for (int i=0; i<r300->aos_count; i++)
+    {
+        struct pipe_vertex_element *vert_elem = &r300->vertex_element[i];
+
+        setup_vertex_attribute(r300->vertex_info, vert_elem, i);
+    }
+
+    finish_vertex_attribs_setup(r300->vertex_info, r300->aos_count);
+}
+
+static void setup_vertex_array(struct r300_context *r300, struct pipe_vertex_element *element)
+{
+}
+
+static void finish_vertex_arrays_setup(struct r300_context *r300)
+{
+}
+
+static bool format_is_supported(enum pipe_format format, int nr_components)
+{
+    if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS)
+        return false;
+
+    if ((pf_size_x(format) != pf_size_y(format)) ||
+        (pf_size_x(format) != pf_size_z(format)) ||
+        (pf_size_x(format) != pf_size_w(format)))
+        return false;
+
+    /* Following should be supported as long as stride is 4 bytes aligned */
+    if (pf_size_x(format) != 1 && nr_components != 4)
+        return false;
+
+    if (pf_size_x(format) != 2 && !(nr_components == 2 || nr_components == 4))
+        return false;
+
+    if (pf_size_x(format) == 3 || pf_size_x(format) > 4)
+        return false;
+
+    return true;
+}
+
+static INLINE int get_buffer_offset(struct r300_context *r300,
+                                    unsigned int buf_nr,
+                                    unsigned int elem_offset)
+{
+    return r300->vertex_buffer[buf_nr].buffer_offset + elem_offset;
+}
+
+/**
+ */
+static void setup_vertex_buffers(struct r300_context *r300)
+{
+    for (int i=0; i<r300->aos_count; i++)
+    {
+        struct pipe_vertex_element *vert_elem = &r300->vertex_element[i];
+        if (!format_is_supported(vert_elem->src_format, vert_elem->nr_components))
+        {
+            assert(0);
+            /* use translate module to convert the data */
+            /*
+            struct pipe_buffer *buf;
+            const unsigned int max_index = r300->vertex_buffers[vert_elem->vertex_buffer_index].max_index;
+            buf = pipe_buffer_create(r300->context.screen, 4, usage, vert_elem->nr_components * max_index * sizeof(float));
+            */
+        }
+
+        if (get_buffer_offset(r300, vert_elem->vertex_buffer_index, vert_elem->src_offset) % 4 != 0)
+        {
+            /* need to align buffer */
+            assert(0);
+        }
+        setup_vertex_array(r300, vert_elem);
+    }
+
+    finish_vertex_arrays_setup(r300);
+}
+
+void setup_index_buffer(struct r300_context *r300,
+                        struct pipe_buffer* indexBuffer,
+                        unsigned indexSize)
+{
+    assert(indexSize = 2);
+
+    if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0))
+    {
+        assert(0);
+    }
+
+    if (!r300->winsys->validate(r300->winsys))
+    {
+        assert(0);
+    }
+}
+
diff --git a/src/gallium/drivers/r300/r300_vbo.h b/src/gallium/drivers/r300/r300_vbo.h
new file mode 100644
index 0000000000..7afa75899c
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_vbo.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef R300_VBO_H
+#define R300_VBO_H
+
+struct r300_context;
+struct pipe_buffer;
+
+void setup_vertex_attributes(struct r300_context *r300);
+
+void setup_index_buffer(struct r300_context *r300,
+                        struct pipe_buffer* indexBuffer,
+                        unsigned indexSize);
+
+#endif
-- 
cgit v1.2.3


From 1ef0341ea7ee08284ebafe4f347643e1190d5777 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <m.cencora@gmail.com>
Date: Sun, 25 Oct 2009 13:51:45 +0100
Subject: r300g: don't hang GPU on misbehaving apps

---
 src/gallium/drivers/r300/r300_render.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 86aaf841dd..cbda30227d 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -112,6 +112,9 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
 {
     struct r300_context* r300 = r300_context(pipe);
 
+    if (!u_trim_pipe_prim(mode, &count))
+        return false;
+
     r300_update_derived_state(r300);
 
     setup_vertex_buffers(r300);
@@ -147,6 +150,9 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
 {
     struct r300_context* r300 = r300_context(pipe);
 
+    if (!u_trim_pipe_prim(mode, &count))
+        return false;
+
     r300_update_derived_state(r300);
 
     setup_vertex_buffers(r300);
-- 
cgit v1.2.3


From 24c6fdbd32a84314c81897d0d1567121ed1c6118 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Fri, 6 Nov 2009 20:21:38 -0800
Subject: r300g: Use common state funcs for translating vert formats.

---
 src/gallium/drivers/r300/r300_vbo.c | 78 +++----------------------------------
 1 file changed, 6 insertions(+), 72 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index e032641286..37b5c9224f 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -27,85 +27,19 @@
 
 #include "r300_cs.h"
 #include "r300_context.h"
+#include "r300_state_inlines.h"
 #include "r300_reg.h"
 #include "r300_winsys.h"
 
-static void translate_vertex_format(enum pipe_format format,
-                                    unsigned nr_comps,
-                                    unsigned component_size,
-                                    unsigned dst_loc,
-                                    uint32_t *hw_fmt1,
-                                    uint32_t *hw_fmt2)
-{
-    uint32_t fmt1 = 0;
-
-    switch (pf_type(format))
-    {
-        case PIPE_FORMAT_TYPE_FLOAT:
-            assert(component_size == 4);
-            fmt1 = R300_DATA_TYPE_FLOAT_1 + nr_comps - 1;
-            break;
-        case PIPE_FORMAT_TYPE_UNORM:
-        case PIPE_FORMAT_TYPE_SNORM:
-        case PIPE_FORMAT_TYPE_USCALED:
-        case PIPE_FORMAT_TYPE_SSCALED:
-            if (component_size == 1)
-            {
-                assert(nr_comps == 4);
-                fmt1 = R300_DATA_TYPE_BYTE;
-            }
-            else if (component_size == 2)
-            {
-                if (nr_comps == 2)
-                    fmt1 = R300_DATA_TYPE_SHORT_2;
-                else if (nr_comps == 4)
-                    fmt1 = R300_DATA_TYPE_SHORT_4;
-                else
-                    assert(0);
-            }
-            else
-            {
-                assert(0);
-            }
-
-            if (pf_type(format) == PIPE_FORMAT_TYPE_SNORM)
-            {
-                fmt1 |= R300_SIGNED;
-            }
-            else if (pf_type(format) == PIPE_FORMAT_TYPE_SSCALED)
-            {
-                fmt1 |= R300_SIGNED;
-                fmt1 |= R300_NORMALIZE;
-            }
-            else if (pf_type(format) == PIPE_FORMAT_TYPE_USCALED)
-            {
-                fmt1 |= R300_NORMALIZE;
-            }
-            break;
-        default:
-            assert(0);
-            break;
-    }
-
-    *hw_fmt1 = fmt1 | (dst_loc << R300_DST_VEC_LOC_SHIFT);
-    *hw_fmt2 = (pf_swizzle_x(format) << R300_SWIZZLE_SELECT_X_SHIFT) |
-               (pf_swizzle_y(format) << R300_SWIZZLE_SELECT_Y_SHIFT) |
-               (pf_swizzle_z(format) << R300_SWIZZLE_SELECT_Z_SHIFT) |
-               (pf_swizzle_w(format) << R300_SWIZZLE_SELECT_W_SHIFT) |
-               (0xf << R300_WRITE_ENA_SHIFT);
-}
-
 static INLINE void setup_vertex_attribute(struct r300_vertex_info *vinfo,
                                           struct pipe_vertex_element *vert_elem,
                                           unsigned attr_num)
 {
-    uint32_t hw_fmt1, hw_fmt2;
-    translate_vertex_format(vert_elem->src_format,
-                            vert_elem->nr_components,
-                            pf_size_x(vert_elem->src_format),
-                            attr_num,
-                            &hw_fmt1,
-                            &hw_fmt2);
+    uint16_t hw_fmt1, hw_fmt2;
+
+    hw_fmt1 = r300_translate_vertex_data_type(vert_elem->src_format) |
+        (attr_num << R300_DST_VEC_LOC_SHIFT);
+    hw_fmt2 = r300_translate_vertex_data_swizzle(vert_elem->src_format);
 
     if (attr_num % 2 == 0)
     {
-- 
cgit v1.2.3


From 96b729f926fafeca6479eed0933bc4275fb7843b Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 09:47:01 -0800
Subject: r300g: Don't pass hw_prim around in the context.

And some other fixes.
---
 src/gallium/drivers/r300/r300_context.h |  1 -
 src/gallium/drivers/r300/r300_emit.c    | 58 -------------------------
 src/gallium/drivers/r300/r300_emit.h    | 10 -----
 src/gallium/drivers/r300/r300_render.c  | 76 +++++++++++++++++++++++++++++----
 src/gallium/drivers/r300/r300_vbo.c     | 10 +++--
 5 files changed, 74 insertions(+), 81 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index a6748852d8..8d14c53f49 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -299,7 +299,6 @@ struct r300_context {
     /* Vertex elements for Gallium. */
     struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
     int aos_count;
-    unsigned hw_prim;
 
     /* Bitmask of dirty state objects. */
     uint32_t dirty_state;
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 92e6ec606c..ec1d521800 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -647,64 +647,6 @@ void r300_emit_draw_packet(struct r300_context* r300)
     END_CS;
 }
 #endif
-void r300_emit_draw_arrays(struct r300_context *r300,
-                           unsigned count)
-{
-    CS_LOCALS(r300);
-    assert(count < 65536);
-
-    BEGIN_CS(4);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count);
-    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
-    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
-           r300->hw_prim);
-    END_CS;
-}
-
-void r300_emit_draw_elements(struct r300_context *r300,
-                             struct pipe_buffer* indexBuffer,
-                             unsigned indexSize,
-                             unsigned minIndex,
-                             unsigned maxIndex,
-                             unsigned start,
-                             unsigned count)
-{
-    CS_LOCALS(r300);
-    assert(indexSize == 4 || indexSize == 2);
-    assert(count < 65536);
-    assert((start * indexSize)  % 4 == 0);
-
-    uint32_t size_dwords;
-    uint32_t skip_dwords = indexSize * start / sizeof(uint32_t);
-    assert(skip_dwords == 0);
-
-    BEGIN_CS(10);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
-    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
-    if (indexSize == 4) {
-        size_dwords = count + start;
-        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
-               R300_VAP_VF_CNTL__INDEX_SIZE_32bit | r300->hw_prim);
-    } else {
-        size_dwords = (count + start + 1) / 2;
-        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
-               (count << 16) | r300->hw_prim);
-    }
-
-    OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
-    OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
-           (0 << R300_INDX_BUFFER_SKIP_SHIFT));
-    OUT_CS(skip_dwords);
-    OUT_CS(size_dwords);
-    cs_winsys->write_cs_reloc(cs_winsys,
-                              indexBuffer,
-                              RADEON_GEM_DOMAIN_GTT,
-                              0,
-                              0);
-    cs_count -= 2;
-
-    END_CS;
-}
 
 void r300_emit_vertex_format_state(struct r300_context* r300)
 {
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index b4fdfecde0..7c83c5166d 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -40,16 +40,6 @@ void r300_emit_blend_color_state(struct r300_context* r300,
 void r300_emit_clip_state(struct r300_context* r300,
                           struct pipe_clip_state* clip);
 
-void r300_emit_draw_arrays(struct r300_context *r300, unsigned count);
-
-void r300_emit_draw_elements(struct r300_context *r300,
-                             struct pipe_buffer* indexBuffer,
-                             unsigned indexSize,
-                             unsigned minIndex,
-                             unsigned maxIndex,
-                             unsigned start,
-                             unsigned count);
-
 void r300_emit_dsa_state(struct r300_context* r300,
                          struct r300_dsa_state* dsa);
 
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index cbda30227d..6f7c645334 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -31,13 +31,13 @@
 #include "util/u_memory.h"
 #include "util/u_prim.h"
 
-#include "r300_vbo.h"
 #include "r300_cs.h"
 #include "r300_context.h"
 #include "r300_emit.h"
 #include "r300_reg.h"
 #include "r300_render.h"
 #include "r300_state_derived.h"
+#include "r300_vbo.h"
 
 /* r300_render: Vertex and index buffer primitive emission. */
 #define R300_MAX_VBO_SIZE  (1024 * 1024)
@@ -70,6 +70,70 @@ uint32_t r300_translate_primitive(unsigned prim)
     }
 }
 
+static void r300_emit_draw_arrays(struct r300_context *r300,
+                                  unsigned mode,
+                                  unsigned count)
+{
+    CS_LOCALS(r300);
+    assert(count < 65536);
+
+    BEGIN_CS(4);
+    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count);
+    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+    OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
+           r300_translate_primitive(mode));
+    END_CS;
+}
+
+static void r300_emit_draw_elements(struct r300_context *r300,
+                                    struct pipe_buffer* indexBuffer,
+                                    unsigned indexSize,
+                                    unsigned minIndex,
+                                    unsigned maxIndex,
+                                    unsigned mode,
+                                    unsigned start,
+                                    unsigned count)
+{
+    CS_LOCALS(r300);
+    assert(indexSize == 4 || indexSize == 2);
+    assert(count < 65536);
+    assert((start * indexSize)  % 4 == 0);
+
+    uint32_t size_dwords;
+    uint32_t skip_dwords = indexSize * start / sizeof(uint32_t);
+    assert(skip_dwords == 0);
+
+    BEGIN_CS(10);
+    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
+    OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
+    if (indexSize == 4) {
+        size_dwords = count + start;
+        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+               R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
+               r300_translate_primitive(mode));
+    } else {
+        size_dwords = (count + start + 1) / 2;
+        OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+               r300_translate_primitive(mode));
+    }
+
+    OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
+    OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
+           (0 << R300_INDX_BUFFER_SKIP_SHIFT));
+    OUT_CS(skip_dwords);
+    OUT_CS(size_dwords);
+    /* XXX hax */
+    cs_winsys->write_cs_reloc(cs_winsys,
+                              indexBuffer,
+                              RADEON_GEM_DOMAIN_GTT,
+                              0,
+                              0);
+    cs_count -= 2;
+
+    END_CS;
+}
+
+
 static boolean setup_vertex_buffers(struct r300_context *r300)
 {
     unsigned vbuf_count = r300->aos_count;
@@ -123,14 +187,12 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
 
     setup_index_buffer(r300, indexBuffer, indexSize);
 
-    r300->hw_prim = r300_translate_primitive(mode);
-
     r300_emit_dirty_state(r300);
 
     r300_emit_aos(r300, 0);
 
     r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex,
-                            start, count);
+                            mode, start, count);
 
     return TRUE;
 }
@@ -159,13 +221,11 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
 
     setup_vertex_attributes(r300);
 
-    r300->hw_prim = r300_translate_primitive(mode);
-
     r300_emit_dirty_state(r300);
 
     r300_emit_aos(r300, start);
 
-    r300_emit_draw_arrays(r300, count);
+    r300_emit_draw_arrays(r300, mode, count);
 
     return TRUE;
 }
@@ -186,8 +246,8 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
                                        unsigned count)
 {
     assert(0);
-    struct r300_context* r300 = r300_context(pipe);
 #if 0
+    struct r300_context* r300 = r300_context(pipe);
     int i;
 
     if (!u_trim_pipe_prim(mode, &count)) {
diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index 37b5c9224f..ab6f5c5942 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -56,7 +56,8 @@ static INLINE void setup_vertex_attribute(struct r300_vertex_info *vinfo,
 static void finish_vertex_attribs_setup(struct r300_vertex_info *vinfo,
                                         unsigned attribs_num)
 {
-    uint32_t last_vec_bit = (attribs_num % 2 == 0) ? (R300_LAST_VEC << 16) : R300_LAST_VEC;
+    uint32_t last_vec_bit = (attribs_num % 2 == 0) ?
+        (R300_LAST_VEC << 16) : R300_LAST_VEC;
 
     assert(attribs_num > 0 && attribs_num <= 16);
     vinfo->vap_prog_stream_cntl[(attribs_num - 1) >> 1] |= last_vec_bit;
@@ -64,10 +65,11 @@ static void finish_vertex_attribs_setup(struct r300_vertex_info *vinfo,
 
 void setup_vertex_attributes(struct r300_context *r300)
 {
-    for (int i=0; i<r300->aos_count; i++)
-    {
-        struct pipe_vertex_element *vert_elem = &r300->vertex_element[i];
+    struct pipe_vertex_element *vert_elem;
+    int i;
 
+    for (i = 0; i < r300->aos_count; i++) {
+        vert_elem = &r300->vertex_element[i];
         setup_vertex_attribute(r300->vertex_info, vert_elem, i);
     }
 
-- 
cgit v1.2.3


From 7518d9b1b7369f6e5ca1fdaf6a34e39a4acace9a Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 10:05:31 -0800
Subject: r300g: Clean up r300_setup_vertex_buffers.

---
 src/gallium/drivers/r300/r300_render.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 6f7c645334..e28af7600a 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -134,16 +134,16 @@ static void r300_emit_draw_elements(struct r300_context *r300,
 }
 
 
-static boolean setup_vertex_buffers(struct r300_context *r300)
+static boolean r300_setup_vertex_buffers(struct r300_context *r300)
 {
     unsigned vbuf_count = r300->aos_count;
-    struct pipe_vertex_buffer *vbuf= r300->vertex_buffer;
-    struct pipe_vertex_element *velem= r300->vertex_element;
-    bool invalid = false;
+    struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
+    struct pipe_vertex_element *velem = r300->vertex_element;
 
 validate:
     for (int i = 0; i < vbuf_count; i++) {
-        if (!r300->winsys->add_buffer(r300->winsys, vbuf[velem[i].vertex_buffer_index].buffer,
+        if (!r300->winsys->add_buffer(r300->winsys,
+                vbuf[velem[i].vertex_buffer_index].buffer,
             RADEON_GEM_DOMAIN_GTT, 0)) {
             r300->context.flush(&r300->context, 0, NULL);
             goto validate;
@@ -152,16 +152,10 @@ validate:
 
     if (!r300->winsys->validate(r300->winsys)) {
         r300->context.flush(&r300->context, 0, NULL);
-        if (invalid) {
-            /* Well, hell. */
-            debug_printf("r300: Stuck in validation loop, gonna quit now.");
-            exit(1);
-        }
-        invalid = true;
-        goto validate;
+        return r300->winsys->validate(r300->winsys);
     }
 
-    return invalid;
+    return TRUE;
 }
 
 /* This is the fast-path drawing & emission for HW TCL. */
@@ -181,7 +175,9 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
 
     r300_update_derived_state(r300);
 
-    setup_vertex_buffers(r300);
+    if (!r300_setup_vertex_buffers(r300)) {
+        return FALSE;
+    }
 
     setup_vertex_attributes(r300);
 
@@ -217,7 +213,9 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
 
     r300_update_derived_state(r300);
 
-    setup_vertex_buffers(r300);
+    if (!r300_setup_vertex_buffers(r300)) {
+        return FALSE;
+    }
 
     setup_vertex_attributes(r300);
 
-- 
cgit v1.2.3


From 7da3cc4241b8550ccc1ec5ba3c93334094f5fb11 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 10:14:07 -0800
Subject: r300g: Clean up indexbuf render, switch to RELOC macro.

---
 src/gallium/drivers/r300/r300_render.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index e28af7600a..b4351d541d 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -94,41 +94,43 @@ static void r300_emit_draw_elements(struct r300_context *r300,
                                     unsigned start,
                                     unsigned count)
 {
+    uint32_t count_dwords;
+    uint32_t offset_dwords = indexSize * start / sizeof(uint32_t);
     CS_LOCALS(r300);
+
+    /* XXX most of these are stupid */
     assert(indexSize == 4 || indexSize == 2);
     assert(count < 65536);
     assert((start * indexSize)  % 4 == 0);
-
-    uint32_t size_dwords;
-    uint32_t skip_dwords = indexSize * start / sizeof(uint32_t);
-    assert(skip_dwords == 0);
+    assert(offset_dwords == 0);
 
     BEGIN_CS(10);
     OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
     if (indexSize == 4) {
-        size_dwords = count + start;
+        count_dwords = count + start;
         OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
                R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
                r300_translate_primitive(mode));
     } else {
-        size_dwords = (count + start + 1) / 2;
+        count_dwords = (count + start + 1) / 2;
         OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
                r300_translate_primitive(mode));
     }
 
+    /* INDX_BUFFER is a truly special packet3.
+     * Unlike most other packet3, where the offset is after the count,
+     * the order is reversed, so the relocation ends up carrying the
+     * size of the indexbuf instead of the offset.
+     *
+     * XXX Fix offset
+     */
     OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
     OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
            (0 << R300_INDX_BUFFER_SKIP_SHIFT));
-    OUT_CS(skip_dwords);
-    OUT_CS(size_dwords);
-    /* XXX hax */
-    cs_winsys->write_cs_reloc(cs_winsys,
-                              indexBuffer,
-                              RADEON_GEM_DOMAIN_GTT,
-                              0,
-                              0);
-    cs_count -= 2;
+    OUT_CS(offset_dwords);
+    OUT_CS_RELOC(indexBuffer, count_dwords,
+        RADEON_GEM_DOMAIN_GTT, 0, 0);
 
     END_CS;
 }
-- 
cgit v1.2.3


From b6c3954138ef70ea7d2cbd3ba9519f404ef616d7 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 10:26:57 -0800
Subject: r300g: s/false/FALSE/

Also s/true/TRUE/
---
 src/gallium/drivers/r300/r300_render.c |  4 ++--
 src/gallium/drivers/r300/r300_vbo.c    | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index b4351d541d..1532de367f 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -173,7 +173,7 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
     struct r300_context* r300 = r300_context(pipe);
 
     if (!u_trim_pipe_prim(mode, &count))
-        return false;
+        return FALSE;
 
     r300_update_derived_state(r300);
 
@@ -211,7 +211,7 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
     struct r300_context* r300 = r300_context(pipe);
 
     if (!u_trim_pipe_prim(mode, &count))
-        return false;
+        return FALSE;
 
     r300_update_derived_state(r300);
 
diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index ab6f5c5942..be74a49eb8 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -84,27 +84,27 @@ static void finish_vertex_arrays_setup(struct r300_context *r300)
 {
 }
 
-static bool format_is_supported(enum pipe_format format, int nr_components)
+static boolean format_is_supported(enum pipe_format format, int nr_components)
 {
     if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS)
-        return false;
+        return FALSE;
 
     if ((pf_size_x(format) != pf_size_y(format)) ||
         (pf_size_x(format) != pf_size_z(format)) ||
         (pf_size_x(format) != pf_size_w(format)))
-        return false;
+        return FALSE;
 
     /* Following should be supported as long as stride is 4 bytes aligned */
     if (pf_size_x(format) != 1 && nr_components != 4)
-        return false;
+        return FALSE;
 
     if (pf_size_x(format) != 2 && !(nr_components == 2 || nr_components == 4))
-        return false;
+        return FALSE;
 
     if (pf_size_x(format) == 3 || pf_size_x(format) > 4)
-        return false;
+        return FALSE;
 
-    return true;
+    return TRUE;
 }
 
 static INLINE int get_buffer_offset(struct r300_context *r300,
-- 
cgit v1.2.3


From 746c01b3b2f77d8d8ba14fc517d04dbaf080d77d Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 10:34:00 -0800
Subject: r300g: Moar vbo cleanup.

---
 src/gallium/drivers/r300/r300_vbo.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index be74a49eb8..cec79ec97e 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -118,13 +118,16 @@ static INLINE int get_buffer_offset(struct r300_context *r300,
  */
 static void setup_vertex_buffers(struct r300_context *r300)
 {
-    for (int i=0; i<r300->aos_count; i++)
+    struct pipe_vertex_element *vert_elem;
+    int i;
+
+    for (i = 0; i < r300->aos_count; i++)
     {
-        struct pipe_vertex_element *vert_elem = &r300->vertex_element[i];
-        if (!format_is_supported(vert_elem->src_format, vert_elem->nr_components))
-        {
+        vert_elem = &r300->vertex_element[i];
+        if (!format_is_supported(vert_elem->src_format,
+                                 vert_elem->nr_components)) {
+            /* XXX use translate module to convert the data */
             assert(0);
-            /* use translate module to convert the data */
             /*
             struct pipe_buffer *buf;
             const unsigned int max_index = r300->vertex_buffers[vert_elem->vertex_buffer_index].max_index;
@@ -132,9 +135,10 @@ static void setup_vertex_buffers(struct r300_context *r300)
             */
         }
 
-        if (get_buffer_offset(r300, vert_elem->vertex_buffer_index, vert_elem->src_offset) % 4 != 0)
-        {
-            /* need to align buffer */
+        if (get_buffer_offset(r300,
+                              vert_elem->vertex_buffer_index,
+                              vert_elem->src_offset) % 4) {
+            /* XXX need to align buffer */
             assert(0);
         }
         setup_vertex_array(r300, vert_elem);
-- 
cgit v1.2.3


From ef513776b5bdd11968d2ca03862e9d1ac48e099f Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 10:39:42 -0800
Subject: r300g: Don't assert on oversized VBOs, just return FALSE.

---
 src/gallium/drivers/r300/r300_render.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 1532de367f..89bf749b5f 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -75,7 +75,6 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
                                   unsigned count)
 {
     CS_LOCALS(r300);
-    assert(count < 65536);
 
     BEGIN_CS(4);
     OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count);
@@ -100,7 +99,6 @@ static void r300_emit_draw_elements(struct r300_context *r300,
 
     /* XXX most of these are stupid */
     assert(indexSize == 4 || indexSize == 2);
-    assert(count < 65536);
     assert((start * indexSize)  % 4 == 0);
     assert(offset_dwords == 0);
 
@@ -172,8 +170,13 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
 {
     struct r300_context* r300 = r300_context(pipe);
 
-    if (!u_trim_pipe_prim(mode, &count))
+    if (!u_trim_pipe_prim(mode, &count)) {
         return FALSE;
+    }
+
+    if (count > 65535) {
+        return FALSE;
+    }
 
     r300_update_derived_state(r300);
 
@@ -210,8 +213,13 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
 {
     struct r300_context* r300 = r300_context(pipe);
 
-    if (!u_trim_pipe_prim(mode, &count))
+    if (!u_trim_pipe_prim(mode, &count)) {
         return FALSE;
+    }
+
+    if (count > 65535) {
+        return FALSE;
+    }
 
     r300_update_derived_state(r300);
 
-- 
cgit v1.2.3


From cd5b2a93d5c9c60dbe72ebc963dcddf0db0b665c Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 10:52:06 -0800
Subject: r300g: Comments.

---
 src/gallium/drivers/r300/r300_render.c |  3 ++-
 src/gallium/drivers/r300/r300_vbo.c    | 14 +++++++++-----
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 89bf749b5f..0df9a94610 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -243,7 +243,8 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
  * keep these functions separated so that they are easier to locate. ~C.    *
  ***************************************************************************/
 
-/* Draw-based drawing for SW TCL chipsets. */
+/* Draw-based drawing for SW TCL chipsets.
+ * XXX currently broken as fucking hell. */
 boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
                                        struct pipe_buffer* indexBuffer,
                                        unsigned indexSize,
diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index cec79ec97e..d8b356a061 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -21,6 +21,9 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
+/* r300_vbo: Various helpers for emitting vertex buffers. Needs cleanup,
+ * refactoring, etc. */
+
 #include "r300_vbo.h"
 
 #include "pipe/p_format.h"
@@ -76,6 +79,7 @@ void setup_vertex_attributes(struct r300_context *r300)
     finish_vertex_attribs_setup(r300->vertex_info, r300->aos_count);
 }
 
+/* XXX WTF are these doing? */
 static void setup_vertex_array(struct r300_context *r300, struct pipe_vertex_element *element)
 {
 }
@@ -84,6 +88,7 @@ static void finish_vertex_arrays_setup(struct r300_context *r300)
 {
 }
 
+/* XXX move/integrate this with the checks in r300_state_inlines */
 static boolean format_is_supported(enum pipe_format format, int nr_components)
 {
     if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS)
@@ -151,16 +156,15 @@ void setup_index_buffer(struct r300_context *r300,
                         struct pipe_buffer* indexBuffer,
                         unsigned indexSize)
 {
+    /* XXX I call BS; why is this different from the assert in r300_render? */
     assert(indexSize = 2);
 
-    if (!r300->winsys->add_buffer(r300->winsys, indexBuffer, RADEON_GEM_DOMAIN_GTT, 0))
-    {
+    if (!r300->winsys->add_buffer(r300->winsys, indexBuffer,
+                                  RADEON_GEM_DOMAIN_GTT, 0)) {
         assert(0);
     }
 
-    if (!r300->winsys->validate(r300->winsys))
-    {
+    if (!r300->winsys->validate(r300->winsys)) {
         assert(0);
     }
 }
-
-- 
cgit v1.2.3


From 0fe5f0c09abce9d540d51942eab08b2248243943 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 11:49:39 -0800
Subject: r300g: Be more verbose in what's killing us WRT vert formats.

---
 src/gallium/drivers/r300/r300_state_inlines.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 52b9650fc1..e53db3d0b5 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -434,8 +434,8 @@ r300_translate_vertex_data_type(enum pipe_format format) {
     unsigned components = pf_component_count(format);
 
     if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) {
-        debug_printf("r300: Bad format %s in %s\n", pf_name(format),
-            __FUNCTION__);
+        debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format),
+            __FUNCTION__, __LINE__);
         return 0;
     }
 
@@ -447,6 +447,8 @@ r300_translate_vertex_data_type(enum pipe_format format) {
                     result = R300_DATA_TYPE_FLOAT_1 + (components - 1);
                     break;
                 default:
+                    debug_printf("r300: Bad format %s in %s:%d\n",
+                        pf_name(format), __FUNCTION__, __LINE__);
                     assert(0);
             }
             break;
@@ -470,10 +472,16 @@ r300_translate_vertex_data_type(enum pipe_format format) {
                     }
                     break;
                 default:
+                    debug_printf("r300: Bad format %s in %s:%d\n",
+                        pf_name(format), __FUNCTION__, __LINE__);
+                    debug_printf("r300: pf_size_x(format) == %d\n",
+                        pf_size_x(format));
                     assert(0);
             }
             break;
         default:
+            debug_printf("r300: Bad format %s in %s:%d\n",
+                pf_name(format), __FUNCTION__, __LINE__);
             assert(0);
     }
 
@@ -492,8 +500,8 @@ static INLINE uint16_t
 r300_translate_vertex_data_swizzle(enum pipe_format format) {
 
     if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) {
-        debug_printf("r300: Bad format %s in %s\n", pf_name(format),
-            __FUNCTION__);
+        debug_printf("r300: Bad format %s in %s:%d\n",
+            pf_name(format), __FUNCTION__, __LINE__);
         return 0;
     }
 
-- 
cgit v1.2.3


From c4fa0e4caa0aeb5cce9bd871f9156da25a9ec404 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 13:07:52 -0800
Subject: r300g: Remove faulty assert.

---
 src/gallium/drivers/r300/r300_vbo.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index d8b356a061..7e88bf3b7c 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -152,13 +152,11 @@ static void setup_vertex_buffers(struct r300_context *r300)
     finish_vertex_arrays_setup(r300);
 }
 
+/* XXX these shouldn't be asserts since we can work around bad indexbufs */
 void setup_index_buffer(struct r300_context *r300,
                         struct pipe_buffer* indexBuffer,
                         unsigned indexSize)
 {
-    /* XXX I call BS; why is this different from the assert in r300_render? */
-    assert(indexSize = 2);
-
     if (!r300->winsys->add_buffer(r300->winsys, indexBuffer,
                                   RADEON_GEM_DOMAIN_GTT, 0)) {
         assert(0);
-- 
cgit v1.2.3


From fa6916cfef6a75eacdbf927a02f64a5a37c3b0d9 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 13:12:15 -0800
Subject: r300g: Remove do-nothing functions.

---
 src/gallium/drivers/r300/r300_vbo.c | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index 7e88bf3b7c..d3f2ce799a 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -79,15 +79,6 @@ void setup_vertex_attributes(struct r300_context *r300)
     finish_vertex_attribs_setup(r300->vertex_info, r300->aos_count);
 }
 
-/* XXX WTF are these doing? */
-static void setup_vertex_array(struct r300_context *r300, struct pipe_vertex_element *element)
-{
-}
-
-static void finish_vertex_arrays_setup(struct r300_context *r300)
-{
-}
-
 /* XXX move/integrate this with the checks in r300_state_inlines */
 static boolean format_is_supported(enum pipe_format format, int nr_components)
 {
@@ -146,10 +137,7 @@ static void setup_vertex_buffers(struct r300_context *r300)
             /* XXX need to align buffer */
             assert(0);
         }
-        setup_vertex_array(r300, vert_elem);
     }
-
-    finish_vertex_arrays_setup(r300);
 }
 
 /* XXX these shouldn't be asserts since we can work around bad indexbufs */
-- 
cgit v1.2.3


From 9f49db6f843885620a52a06721d5972afb29f21a Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 13:37:07 -0800
Subject: r300g: Minor code cleanup to avoid confusion.

---
 src/gallium/drivers/r300/r300_render.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 0df9a94610..fa057324f8 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -136,12 +136,11 @@ static void r300_emit_draw_elements(struct r300_context *r300,
 
 static boolean r300_setup_vertex_buffers(struct r300_context *r300)
 {
-    unsigned vbuf_count = r300->aos_count;
     struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
     struct pipe_vertex_element *velem = r300->vertex_element;
 
 validate:
-    for (int i = 0; i < vbuf_count; i++) {
+    for (int i = 0; i < r300->aos_count; i++) {
         if (!r300->winsys->add_buffer(r300->winsys,
                 vbuf[velem[i].vertex_buffer_index].buffer,
             RADEON_GEM_DOMAIN_GTT, 0)) {
-- 
cgit v1.2.3


From 547e939afb980c2fcc3edbbb07dba0f44be785c1 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 14:14:19 -0800
Subject: r300g: DCE.

This must never have been called before; it's completely wrong.
---
 src/gallium/drivers/r300/r300_state_inlines.h |  2 +-
 src/gallium/drivers/r300/r300_vbo.c           | 27 +--------------------------
 2 files changed, 2 insertions(+), 27 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index e53db3d0b5..b0f3386c62 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -436,7 +436,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
     if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) {
         debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format),
             __FUNCTION__, __LINE__);
-        return 0;
+        assert(0);
     }
 
     switch (pf_type(format)) {
diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index d3f2ce799a..1d45fd590c 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -79,30 +79,6 @@ void setup_vertex_attributes(struct r300_context *r300)
     finish_vertex_attribs_setup(r300->vertex_info, r300->aos_count);
 }
 
-/* XXX move/integrate this with the checks in r300_state_inlines */
-static boolean format_is_supported(enum pipe_format format, int nr_components)
-{
-    if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS)
-        return FALSE;
-
-    if ((pf_size_x(format) != pf_size_y(format)) ||
-        (pf_size_x(format) != pf_size_z(format)) ||
-        (pf_size_x(format) != pf_size_w(format)))
-        return FALSE;
-
-    /* Following should be supported as long as stride is 4 bytes aligned */
-    if (pf_size_x(format) != 1 && nr_components != 4)
-        return FALSE;
-
-    if (pf_size_x(format) != 2 && !(nr_components == 2 || nr_components == 4))
-        return FALSE;
-
-    if (pf_size_x(format) == 3 || pf_size_x(format) > 4)
-        return FALSE;
-
-    return TRUE;
-}
-
 static INLINE int get_buffer_offset(struct r300_context *r300,
                                     unsigned int buf_nr,
                                     unsigned int elem_offset)
@@ -110,8 +86,7 @@ static INLINE int get_buffer_offset(struct r300_context *r300,
     return r300->vertex_buffer[buf_nr].buffer_offset + elem_offset;
 }
 
-/**
- */
+/* XXX not called at all */
 static void setup_vertex_buffers(struct r300_context *r300)
 {
     struct pipe_vertex_element *vert_elem;
-- 
cgit v1.2.3


From a12fc1a9c4d544b015b40ff0266b8c8726d16f75 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 7 Nov 2009 14:32:31 -0800
Subject: r300g: Organize inlined state.

---
 src/gallium/drivers/r300/r300_state_inlines.h | 72 +++++++++++++--------------
 1 file changed, 36 insertions(+), 36 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index b0f3386c62..e6c1cb54da 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -191,6 +191,42 @@ static INLINE uint32_t r300_translate_alpha_function(int alpha_func)
     return 0;
 }
 
+static INLINE uint32_t
+r300_translate_polygon_mode_front(unsigned mode) {
+    switch (mode)
+    {
+        case PIPE_POLYGON_MODE_FILL:
+            return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
+        case PIPE_POLYGON_MODE_LINE:
+            return R300_GA_POLY_MODE_FRONT_PTYPE_LINE;
+        case PIPE_POLYGON_MODE_POINT:
+            return R300_GA_POLY_MODE_FRONT_PTYPE_POINT;
+
+        default:
+            debug_printf("r300: Bad polygon mode %i in %s\n", mode,
+                __FUNCTION__);
+            return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
+    }
+}
+
+static INLINE uint32_t
+r300_translate_polygon_mode_back(unsigned mode) {
+    switch (mode)
+    {
+        case PIPE_POLYGON_MODE_FILL:
+            return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
+        case PIPE_POLYGON_MODE_LINE:
+            return R300_GA_POLY_MODE_BACK_PTYPE_LINE;
+        case PIPE_POLYGON_MODE_POINT:
+            return R300_GA_POLY_MODE_BACK_PTYPE_POINT;
+
+        default:
+            debug_printf("r300: Bad polygon mode %i in %s\n", mode,
+                __FUNCTION__);
+            return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
+    }
+}
+
 /* Texture sampler state. */
 
 static INLINE uint32_t r300_translate_wrap(int wrap)
@@ -512,40 +548,4 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) {
         (0xf << R300_WRITE_ENA_SHIFT));
 }
 
-static INLINE uint32_t
-r300_translate_polygon_mode_front(unsigned mode) {
-    switch (mode)
-    {
-        case PIPE_POLYGON_MODE_FILL:
-            return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
-        case PIPE_POLYGON_MODE_LINE:
-            return R300_GA_POLY_MODE_FRONT_PTYPE_LINE;
-        case PIPE_POLYGON_MODE_POINT:
-            return R300_GA_POLY_MODE_FRONT_PTYPE_POINT;
-
-        default:
-            debug_printf("r300: Bad polygon mode %i in %s\n", mode,
-                __FUNCTION__);
-            return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
-    }
-}
-
-static INLINE uint32_t
-r300_translate_polygon_mode_back(unsigned mode) {
-    switch (mode)
-    {
-        case PIPE_POLYGON_MODE_FILL:
-            return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
-        case PIPE_POLYGON_MODE_LINE:
-            return R300_GA_POLY_MODE_BACK_PTYPE_LINE;
-        case PIPE_POLYGON_MODE_POINT:
-            return R300_GA_POLY_MODE_BACK_PTYPE_POINT;
-
-        default:
-            debug_printf("r300: Bad polygon mode %i in %s\n", mode,
-                __FUNCTION__);
-            return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
-    }
-}
-
 #endif /* R300_STATE_INLINES_H */
-- 
cgit v1.2.3


From ee28a69188d5054f996d0f5fc12820b024ef96a6 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 8 Nov 2009 09:35:07 -0800
Subject: r300g: Fix build error on old compilers.

This dead code was still getting compiled, causing a bad ref in the lib.
---
 src/gallium/drivers/r300/r300_vbo.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index 1d45fd590c..5ad6b9c215 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -85,7 +85,7 @@ static INLINE int get_buffer_offset(struct r300_context *r300,
 {
     return r300->vertex_buffer[buf_nr].buffer_offset + elem_offset;
 }
-
+#if 0
 /* XXX not called at all */
 static void setup_vertex_buffers(struct r300_context *r300)
 {
@@ -95,9 +95,9 @@ static void setup_vertex_buffers(struct r300_context *r300)
     for (i = 0; i < r300->aos_count; i++)
     {
         vert_elem = &r300->vertex_element[i];
+            /* XXX use translate module to convert the data */
         if (!format_is_supported(vert_elem->src_format,
                                  vert_elem->nr_components)) {
-            /* XXX use translate module to convert the data */
             assert(0);
             /*
             struct pipe_buffer *buf;
@@ -114,7 +114,7 @@ static void setup_vertex_buffers(struct r300_context *r300)
         }
     }
 }
-
+#endif
 /* XXX these shouldn't be asserts since we can work around bad indexbufs */
 void setup_index_buffer(struct r300_context *r300,
                         struct pipe_buffer* indexBuffer,
-- 
cgit v1.2.3


From 0525cb1273a51343fba0a94d01d115e4256d1db2 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 8 Nov 2009 09:56:02 -0800
Subject: r300g: Fix is_buffer_referenced.

---
 src/gallium/drivers/r300/r300_context.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index b520e5929e..43d7ff3ed3 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -89,8 +89,11 @@ static unsigned int
 r300_is_buffer_referenced(struct pipe_context *pipe,
                           struct pipe_buffer *buf)
 {
-    /* XXX */
-    return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+    /* This only checks to see whether actual hardware buffers are
+     * referenced. Since we use managed BOs and transfers, it's actually not
+     * possible for pipe_buffers to ever reference the actual hardware, so
+     * buffers are never referenced. */
+    return 0;
 }
 
 static void r300_flush_cb(void *data)
-- 
cgit v1.2.3


From b6f93e2607f1bbc5b2f478f0a57d7786dd7d73a5 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 8 Nov 2009 11:32:32 -0800
Subject: r300g: Enable PSC/RS dump with new debugging flags.

---
 src/gallium/drivers/r300/r300_emit.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index ec1d521800..b3d9db676a 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -510,6 +510,8 @@ void r300_emit_rs_block_state(struct r300_context* r300,
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
     CS_LOCALS(r300);
 
+    DBG(r300, DBG_DRAW, "r300: RS emit:\n");
+
     BEGIN_CS(21);
     if (r300screen->caps->is_r500) {
         OUT_CS_REG_SEQ(R500_RS_IP_0, 8);
@@ -518,7 +520,7 @@ void r300_emit_rs_block_state(struct r300_context* r300,
     }
     for (i = 0; i < 8; i++) {
         OUT_CS(rs->ip[i]);
-        /* debug_printf("ip %d: 0x%08x\n", i, rs->ip[i]); */
+        DBG(r300, DBG_DRAW, "    : ip %d: 0x%08x\n", i, rs->ip[i]);
     }
 
     OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
@@ -532,11 +534,11 @@ void r300_emit_rs_block_state(struct r300_context* r300,
     }
     for (i = 0; i < 8; i++) {
         OUT_CS(rs->inst[i]);
-        /* debug_printf("inst %d: 0x%08x\n", i, rs->inst[i]); */
+        DBG(r300, DBG_DRAW, "    : inst %d: 0x%08x\n", i, rs->inst[i]);
     }
 
-    /* debug_printf("count: 0x%08x inst_count: 0x%08x\n", rs->count,
-     *        rs->inst_count); */
+    DBG(r300, DBG_DRAW, "    : count: 0x%08x inst_count: 0x%08x\n",
+        rs->count, rs->inst_count);
 
     END_CS;
 }
@@ -653,6 +655,8 @@ void r300_emit_vertex_format_state(struct r300_context* r300)
     int i;
     CS_LOCALS(r300);
 
+    DBG(r300, DBG_DRAW, "r300: VAP/PSC emit:\n");
+
     BEGIN_CS(26);
     OUT_CS_REG(R300_VAP_VTX_SIZE, r300->vertex_info->vinfo.size);
 
@@ -662,22 +666,22 @@ void r300_emit_vertex_format_state(struct r300_context* r300)
     OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
     OUT_CS(r300->vertex_info->vinfo.hwfmt[2]);
     OUT_CS(r300->vertex_info->vinfo.hwfmt[3]);
-    /* for (i = 0; i < 4; i++) {
-     *    debug_printf("hwfmt%d: 0x%08x\n", i,
-     *            r300->vertex_info->vinfo.hwfmt[i]);
-     * } */
+    for (i = 0; i < 4; i++) {
+       DBG(r300, DBG_DRAW, "    : hwfmt%d: 0x%08x\n", i,
+               r300->vertex_info->vinfo.hwfmt[i]);
+    }
 
     OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8);
     for (i = 0; i < 8; i++) {
         OUT_CS(r300->vertex_info->vap_prog_stream_cntl[i]);
-        /* debug_printf("prog_stream_cntl%d: 0x%08x\n", i,
-         *        r300->vertex_info->vap_prog_stream_cntl[i]); */
+        DBG(r300, DBG_DRAW, "    : prog_stream_cntl%d: 0x%08x\n", i,
+               r300->vertex_info->vap_prog_stream_cntl[i]);
     }
     OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8);
     for (i = 0; i < 8; i++) {
         OUT_CS(r300->vertex_info->vap_prog_stream_cntl_ext[i]);
-        /* debug_printf("prog_stream_cntl_ext%d: 0x%08x\n", i,
-         *        r300->vertex_info->vap_prog_stream_cntl_ext[i]); */
+        DBG(r300, DBG_DRAW, "    : prog_stream_cntl_ext%d: 0x%08x\n", i,
+               r300->vertex_info->vap_prog_stream_cntl_ext[i]);
     }
     END_CS;
 }
-- 
cgit v1.2.3


From 11d9edf4c9c75d5a41fb0a1757441ad315330bea Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 8 Nov 2009 11:45:57 -0800
Subject: r300g: Unify context names for counts.

From the SW TCL fixups.
---
 src/gallium/drivers/r300/r300_context.c |  2 +-
 src/gallium/drivers/r300/r300_context.h |  4 ++--
 src/gallium/drivers/r300/r300_emit.c    | 16 ++++++++++------
 src/gallium/drivers/r300/r300_render.c  |  2 +-
 src/gallium/drivers/r300/r300_state.c   |  4 ++--
 src/gallium/drivers/r300/r300_vbo.c     |  5 +++--
 6 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 43d7ff3ed3..ae23329b83 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -158,6 +158,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     /* Open up the OQ BO. */
     r300->oqbo = screen->buffer_create(screen, 4096,
             PIPE_BUFFER_USAGE_VERTEX, 4096);
+    make_empty_list(&r300->query_list);
 
     r300_init_flush_functions(r300);
 
@@ -172,6 +173,5 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300);
     r300->dirty_state = R300_NEW_KITCHEN_SINK;
     r300->dirty_hw++;
-    make_empty_list(&r300->query_list);
     return &r300->context;
 }
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 8d14c53f49..f954ba7f9a 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -295,10 +295,10 @@ struct r300_context {
 
     /* Vertex buffers for Gallium. */
     struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
-    int vbuf_count;
+    int vertex_buffer_count;
     /* Vertex elements for Gallium. */
     struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
-    int aos_count;
+    int vertex_element_count;
 
     /* Bitmask of dirty state objects. */
     uint32_t dirty_state;
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index b3d9db676a..eeb97a2d37 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -584,17 +584,20 @@ void r300_emit_texture(struct r300_context* r300,
     END_CS;
 }
 
+/* XXX I can't read this and that's not good */
 void r300_emit_aos(struct r300_context* r300, unsigned offset)
 {
     struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
     struct pipe_vertex_element *velem = r300->vertex_element;
     CS_LOCALS(r300);
     int i;
-    unsigned packet_size = (r300->aos_count * 3 + 1) / 2;
-    BEGIN_CS(2 + packet_size + r300->aos_count * 2);
+    unsigned aos_count = r300->vertex_element_count;
+
+    unsigned packet_size = (aos_count * 3 + 1) / 2;
+    BEGIN_CS(2 + packet_size + aos_count * 2);
     OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
-    OUT_CS(r300->aos_count);
-    for (i = 0; i < r300->aos_count - 1; i += 2) {
+    OUT_CS(aos_count);
+    for (i = 0; i < aos_count - 1; i += 2) {
         int buf_num1 = velem[i].vertex_buffer_index;
         int buf_num2 = velem[i+1].vertex_buffer_index;
         assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0);
@@ -606,7 +609,7 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
         OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset +
                offset * vbuf[buf_num2].stride);
     }
-    if (r300->aos_count & 1) {
+    if (aos_count & 1) {
         int buf_num = velem[i].vertex_buffer_index;
         assert(vbuf[buf_num].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0);
         OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6));
@@ -614,7 +617,8 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
                offset * vbuf[buf_num].stride);
     }
 
-    for (i = 0; i < r300->aos_count; i++) {
+    /* XXX bare CS reloc */
+    for (i = 0; i < aos_count; i++) {
         cs_winsys->write_cs_reloc(cs_winsys,
                                   vbuf[velem[i].vertex_buffer_index].buffer,
                                   RADEON_GEM_DOMAIN_GTT,
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index fa057324f8..1ff3e64b44 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -140,7 +140,7 @@ static boolean r300_setup_vertex_buffers(struct r300_context *r300)
     struct pipe_vertex_element *velem = r300->vertex_element;
 
 validate:
-    for (int i = 0; i < r300->aos_count; i++) {
+    for (int i = 0; i < r300->vertex_element_count; i++) {
         if (!r300->winsys->add_buffer(r300->winsys,
                 vbuf[velem[i].vertex_buffer_index].buffer,
             RADEON_GEM_DOMAIN_GTT, 0)) {
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index e0b85ab768..d1eced61db 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -668,7 +668,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
 
     memcpy(r300->vertex_buffer, buffers,
         sizeof(struct pipe_vertex_buffer) * count);
-    r300->vbuf_count = count;
+    r300->vertex_buffer_count = count;
 
     if (r300->draw) {
         draw_flush(r300->draw);
@@ -685,7 +685,7 @@ static void r300_set_vertex_elements(struct pipe_context* pipe,
     memcpy(r300->vertex_element,
            elements,
            sizeof(struct pipe_vertex_element) * count);
-    r300->aos_count = count;
+    r300->vertex_element_count = count;
 
     if (r300->draw) {
         draw_flush(r300->draw);
diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index 5ad6b9c215..a6a159667a 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -71,12 +71,13 @@ void setup_vertex_attributes(struct r300_context *r300)
     struct pipe_vertex_element *vert_elem;
     int i;
 
-    for (i = 0; i < r300->aos_count; i++) {
+    for (i = 0; i < r300->vertex_element_count; i++) {
         vert_elem = &r300->vertex_element[i];
         setup_vertex_attribute(r300->vertex_info, vert_elem, i);
     }
 
-    finish_vertex_attribs_setup(r300->vertex_info, r300->aos_count);
+    finish_vertex_attribs_setup(r300->vertex_info,
+        r300->vertex_element_count);
 }
 
 static INLINE int get_buffer_offset(struct r300_context *r300,
-- 
cgit v1.2.3


From fe898638086370ed86a9ce76b21fa8ebb88c4b08 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 8 Nov 2009 14:07:01 -0800
Subject: r300g: Protect against possibly missing Draw pointer.

Part of the SW TCL revival.
---
 src/gallium/drivers/r300/r300_state_derived.c | 47 +++++++++++++++++----------
 1 file changed, 29 insertions(+), 18 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 14d7bb094c..7166694edf 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -170,20 +170,30 @@ static void r300_vs_tab_routes(struct r300_context* r300,
         }
         tab[0] = 0;
     }
-    draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
-        draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0));
+
+    /* Position. */
+    if (r300->draw) {
+        draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
+            draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0));
+    }
     vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS;
     vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
 
+    /* Point size. */
     if (psize) {
-        draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS,
-            draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0));
+        if (r300->draw) {
+            draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS,
+                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0));
+        }
         vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
     }
 
+    /* Colors. */
     for (i = 0; i < cols; i++) {
-        draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR,
-            draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i));
+        if (r300->draw) {
+            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR,
+                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i));
+        }
         vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR;
         vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i);
     }
@@ -192,28 +202,27 @@ static void r300_vs_tab_routes(struct r300_context* r300,
      * This gets around a double-increment problem. */
     i = 0;
 
+    /* Fog. This is a special-cased texcoord. */
     if (fog) {
         i++;
-        draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
-            draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0));
+        if (r300->draw) {
+            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
+                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0));
+        }
         vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
         vinfo->hwfmt[3] |= (4 << (3 * i));
     }
 
+    /* Texcoords. */
     for (; i < texs; i++) {
-        draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
-            draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i));
+        if (r300->draw) {
+            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
+                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i));
+        }
         vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
         vinfo->hwfmt[3] |= (4 << (3 * i));
     }
 
-    /* Handle the case where the vertex shader will be generating some of
-     * the attribs based on its inputs. */
-    if (r300screen->caps->has_tcl &&
-            info->num_inputs < info->num_outputs) {
-        vinfo->num_attribs = info->num_inputs;
-    }
-
     draw_compute_vertex_size(vinfo);
 }
 
@@ -455,6 +464,7 @@ static void r300_update_rs_block(struct r300_context* r300,
 /* Update the vertex format. */
 static void r300_update_derived_shader_state(struct r300_context* r300)
 {
+    struct r300_screen* r300screen = r300_screen(r300->context.screen);
     struct r300_vertex_info* vformat;
     struct r300_rs_block* rs_block;
     int i;
@@ -543,7 +553,8 @@ static void r300_update_ztop(struct r300_context* r300)
 
 void r300_update_derived_state(struct r300_context* r300)
 {
-    if (r300->dirty_state &
+    /* XXX */
+    if (TRUE || r300->dirty_state &
         (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) {
         r300_update_derived_shader_state(r300);
     }
-- 
cgit v1.2.3


From c9167d868cfb2ba821f01e0217e3880c5df4c97b Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 8 Nov 2009 14:51:52 -0800
Subject: r300g: Fix up SW TCL rendering functions.

They don't work, but at least they're clean now.
---
 src/gallium/drivers/r300/r300_render.c | 68 ++++++++++++++++++++++++----------
 src/gallium/drivers/r300/r300_render.h |  5 +++
 2 files changed, 53 insertions(+), 20 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 1ff3e64b44..62e1456ed3 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -242,8 +242,44 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
  * keep these functions separated so that they are easier to locate. ~C.    *
  ***************************************************************************/
 
-/* Draw-based drawing for SW TCL chipsets.
- * XXX currently broken as fucking hell. */
+/* SW TCL arrays, using Draw. */
+boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
+                               unsigned mode,
+                               unsigned start,
+                               unsigned count)
+{
+    struct r300_context* r300 = r300_context(pipe);
+    int i;
+
+    if (!u_trim_pipe_prim(mode, &count)) {
+        return FALSE;
+    }
+
+    for (i = 0; i < r300->vertex_buffer_count; i++) {
+        void* buf = pipe_buffer_map(pipe->screen,
+                                    r300->vertex_buffer[i].buffer,
+                                    PIPE_BUFFER_USAGE_CPU_READ);
+        draw_set_mapped_vertex_buffer(r300->draw, i, buf);
+    }
+
+    draw_set_mapped_element_buffer(r300->draw, 0, NULL);
+
+    draw_set_mapped_constant_buffer(r300->draw,
+            r300->shader_constants[PIPE_SHADER_VERTEX].constants,
+            r300->shader_constants[PIPE_SHADER_VERTEX].count *
+                (sizeof(float) * 4));
+
+    draw_arrays(r300->draw, mode, start, count);
+
+    for (i = 0; i < r300->vertex_buffer_count; i++) {
+        pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer);
+        draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
+    }
+
+    return TRUE;
+}
+
+/* SW TCL elements, using Draw. */
 boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
                                        struct pipe_buffer* indexBuffer,
                                        unsigned indexSize,
@@ -253,8 +289,6 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
                                        unsigned start,
                                        unsigned count)
 {
-    assert(0);
-#if 0
     struct r300_context* r300 = r300_context(pipe);
     int i;
 
@@ -264,19 +298,15 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
 
     for (i = 0; i < r300->vertex_buffer_count; i++) {
         void* buf = pipe_buffer_map(pipe->screen,
-                                    r300->vertex_buffers[i].buffer,
+                                    r300->vertex_buffer[i].buffer,
                                     PIPE_BUFFER_USAGE_CPU_READ);
         draw_set_mapped_vertex_buffer(r300->draw, i, buf);
     }
 
-    if (indexBuffer) {
-        void* indices = pipe_buffer_map(pipe->screen, indexBuffer,
-                                        PIPE_BUFFER_USAGE_CPU_READ);
-        draw_set_mapped_element_buffer_range(r300->draw, indexSize,
-                                             minIndex, maxIndex, indices);
-    } else {
-        draw_set_mapped_element_buffer(r300->draw, 0, NULL);
-    }
+    void* indices = pipe_buffer_map(pipe->screen, indexBuffer,
+                                    PIPE_BUFFER_USAGE_CPU_READ);
+    draw_set_mapped_element_buffer_range(r300->draw, indexSize,
+                                         minIndex, maxIndex, indices);
 
     draw_set_mapped_constant_buffer(r300->draw,
             r300->shader_constants[PIPE_SHADER_VERTEX].constants,
@@ -286,16 +316,14 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
     draw_arrays(r300->draw, mode, start, count);
 
     for (i = 0; i < r300->vertex_buffer_count; i++) {
-        pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer);
+        pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer);
         draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
     }
 
-    if (indexBuffer) {
-        pipe_buffer_unmap(pipe->screen, indexBuffer);
-        draw_set_mapped_element_buffer_range(r300->draw, 0, start,
-                                             start + count - 1, NULL);
-    }
-#endif
+    pipe_buffer_unmap(pipe->screen, indexBuffer);
+    draw_set_mapped_element_buffer_range(r300->draw, 0, start,
+                                         start + count - 1, NULL);
+
     return TRUE;
 }
 
diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h
index 3f8ac1fb7a..da83069083 100644
--- a/src/gallium/drivers/r300/r300_render.h
+++ b/src/gallium/drivers/r300/r300_render.h
@@ -42,6 +42,11 @@ boolean r300_draw_elements(struct pipe_context* pipe,
 boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
                          unsigned start, unsigned count);
 
+boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
+                               unsigned mode,
+                               unsigned start,
+                               unsigned count);
+
 boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
                                        struct pipe_buffer* indexBuffer,
                                        unsigned indexSize,
-- 
cgit v1.2.3


From 7204b92101ecf4e2fbc78cf91f387996396deec8 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 9 Nov 2009 14:29:00 +0100
Subject: nv50: clarify data for method 0x121c

---
 src/gallium/drivers/nv50/nv50_state_validate.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index a13d64b7fa..799d2758fe 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -37,13 +37,14 @@ nv50_state_validate_fb(struct nv50_context *nv50)
 	struct pipe_framebuffer_state *fb = &nv50->framebuffer;
 	unsigned i, w, h, gw = 0;
 
-	/* Set nr of active RTs. Don't know what 0xfac6880 does, but
-	 * at least 0x880 was required to draw to more than 1 RT.
-	 * In some special cases, 0xfac6880 is not used, we probably
-	 * don't hit any of these though.
+	/* Set nr of active RTs and select RT for each colour output.
+	 * FP result 0 always goes to RT[0], bits 4 - 6 are ignored.
+	 * Ambiguous assignment results in no rendering (no DATA_ERROR).
 	 */
 	so_method(so, tesla, 0x121c, 1);
-	so_data  (so, 0x0fac6880 | fb->nr_cbufs);
+	so_data  (so, fb->nr_cbufs |
+		  (0 <<  4) | (1 <<  7) | (2 << 10) | (3 << 13) |
+		  (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25));
 
 	for (i = 0; i < fb->nr_cbufs; i++) {
 		struct pipe_texture *pt = fb->cbufs[i]->texture;
-- 
cgit v1.2.3


From bc9d51bb0eab90c47e7b07756e9eba9575f80ffc Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 9 Nov 2009 06:59:03 -0800
Subject: llvmpipe: Ensure stack variables in unit tests are properly aligned.

---
 src/gallium/drivers/llvmpipe/lp_test_blend.c  | 21 +++++++++++----------
 src/gallium/drivers/llvmpipe/lp_test_conv.c   |  5 +++--
 src/gallium/drivers/llvmpipe/lp_test_format.c |  1 +
 3 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index 149fec1d54..29fff91981 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -462,6 +462,7 @@ compute_blend_ref(const struct pipe_blend_state *blend,
 }
 
 
+ALIGN_STACK
 static boolean
 test_one(unsigned verbose,
          FILE *fp,
@@ -530,11 +531,11 @@ test_one(unsigned verbose,
    success = TRUE;
    for(i = 0; i < n && success; ++i) {
       if(mode == AoS) {
-         uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
-         uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
-         uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
-         uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
-         uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t src[LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t dst[LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t con[LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t res[LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t ref[LP_NATIVE_VECTOR_WIDTH/8];
          int64_t start_counter = 0;
          int64_t end_counter = 0;
 
@@ -595,11 +596,11 @@ test_one(unsigned verbose,
 
       if(mode == SoA) {
          const unsigned stride = type.length*type.width/8;
-         uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
-         uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
-         uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
-         uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
-         uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t src[4*LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t dst[4*LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t con[4*LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t res[4*LP_NATIVE_VECTOR_WIDTH/8];
+         ALIGN16_ATTRIB uint8_t ref[4*LP_NATIVE_VECTOR_WIDTH/8];
          int64_t start_counter = 0;
          int64_t end_counter = 0;
          boolean mismatch;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index ac2a6d05e3..968c7a2d4a 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -142,6 +142,7 @@ add_conv_test(LLVMModuleRef module,
 }
 
 
+ALIGN_STACK
 static boolean
 test_one(unsigned verbose,
          FILE *fp,
@@ -229,8 +230,8 @@ test_one(unsigned verbose,
    for(i = 0; i < n && success; ++i) {
       unsigned src_stride = src_type.length*src_type.width/8;
       unsigned dst_stride = dst_type.length*dst_type.width/8;
-      uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
-      uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      ALIGN16_ATTRIB uint8_t src[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
+      ALIGN16_ATTRIB uint8_t dst[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
       double fref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
       uint8_t ref[LP_MAX_VECTOR_LENGTH*LP_MAX_VECTOR_LENGTH];
       int64_t start_counter = 0;
diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c
index b2403ad521..23ea9ebbe7 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_format.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_format.c
@@ -199,6 +199,7 @@ add_store_rgba_test(LLVMModuleRef module,
 }
 
 
+ALIGN_STACK
 static boolean
 test_format(unsigned verbose, FILE *fp, const struct pixel_test_case *test)
 {
-- 
cgit v1.2.3


From eef5a0b3a3e03abd1c69649763efc79575df650f Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 10 Nov 2009 05:22:15 -0800
Subject: llvmpipe: Fix derived blend color state.

---
 src/gallium/drivers/llvmpipe/lp_state_blend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c
index 3f03bd0057..b2e75d3b14 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c
@@ -76,7 +76,7 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe,
    for (i = 0; i < 4; ++i) {
       uint8_t c = float_to_ubyte(blend_color->color[i]);
       for (j = 0; j < 16; ++j)
-         llvmpipe->jit_context.blend_color[i*4 + j] = c;
+         llvmpipe->jit_context.blend_color[i*16 + j] = c;
    }
 }
 
-- 
cgit v1.2.3


From 520b6abdecdaba856e5ca04938e18eb83b33dfaa Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Fri, 6 Nov 2009 12:00:14 -0800
Subject: i915g: Fix memory leak when pci id is unknown.

---
 src/gallium/drivers/i915/i915_screen.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
index c66558c320..d4ee8f5339 100644
--- a/src/gallium/drivers/i915/i915_screen.c
+++ b/src/gallium/drivers/i915/i915_screen.c
@@ -271,6 +271,7 @@ i915_create_screen(struct intel_winsys *iws, uint pci_id)
    default:
       debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", 
                    __FUNCTION__, pci_id);
+      FREE(is);
       return NULL;
    }
 
-- 
cgit v1.2.3


From cbee31a1f84a4d28d126356aaca317e2cdd003dc Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 11 Nov 2009 03:05:16 -0800
Subject: r300, r300g: Add missing registers.

---
 src/gallium/drivers/r300/r300_reg.h  | 8 +++++---
 src/mesa/drivers/dri/r300/r300_reg.h | 2 ++
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 1e4d3f5d70..8ca785cb58 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -1884,6 +1884,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_RGB_ADDR0(x)                ((x) << 0)
 #       define R300_RGB_ADDR1(x)                ((x) << 6)
 #       define R300_RGB_ADDR2(x)                ((x) << 12)
+#       define R300_RGB_TARGET(x)               ((x) << 29)
 
 #define R300_US_ALU_ALPHA_ADDR_0                 0x47C0
 #       define R300_ALU_SRC0A_SHIFT             0
@@ -1901,9 +1902,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_ALU_DSTA_REG                (1 << 23)
 #       define R300_ALU_DSTA_OUTPUT             (1 << 24)
 #		define R300_ALU_DSTA_DEPTH              (1 << 27)
-#       define R300_ALPHA_ADDR0(x)                ((x) << 0)
-#       define R300_ALPHA_ADDR1(x)                ((x) << 6)
-#       define R300_ALPHA_ADDR2(x)                ((x) << 12)
+#       define R300_ALPHA_ADDR0(x)              ((x) << 0)
+#       define R300_ALPHA_ADDR1(x)              ((x) << 6)
+#       define R300_ALPHA_ADDR2(x)              ((x) << 12)
+#       define R300_ALPHA_TARGET(x)             ((x) << 25)
 
 #define R300_US_ALU_RGB_INST_0                   0x48C0
 #       define R300_ALU_ARGC_SRC0C_XYZ          0
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index 623da60333..ea684e7df1 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -1789,6 +1789,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_ALU_DSTC_OUTPUT_X           (1 << 26)
 #       define R300_ALU_DSTC_OUTPUT_Y           (1 << 27)
 #       define R300_ALU_DSTC_OUTPUT_Z           (1 << 28)
+#       define R300_RGB_TARGET(x)               ((x) << 29)
 
 #define R300_US_ALU_ALPHA_ADDR_0                 0x47C0
 #       define R300_ALU_SRC0A_SHIFT             0
@@ -1806,6 +1807,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_ALU_DSTA_REG                (1 << 23)
 #       define R300_ALU_DSTA_OUTPUT             (1 << 24)
 #		define R300_ALU_DSTA_DEPTH              (1 << 27)
+#		define R300_ALPHA_TARGET(x)             ((x) << 25)
 
 #define R300_US_ALU_RGB_INST_0                   0x48C0
 #       define R300_ALU_ARGC_SRC0C_XYZ          0
-- 
cgit v1.2.3


From e243279a48d68c0a14fbf2b78d99b6a9e72c87b6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 21:17:08 +0000
Subject: i965g: avoid use of internally generated immediates

Currently not working, so don't generate more of them.
---
 src/gallium/drivers/i965/brw_wm_fp.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index 2a207958eb..0df84f8546 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -906,8 +906,11 @@ find_output_by_semantic( struct brw_wm_compile *c,
 	 return src_reg( TGSI_FILE_OUTPUT, i );
 
    /* If not found, return some arbitrary immediate value:
+    *
+    * XXX: this is a good idea but immediates are up generating extra
+    * curbe entries atm, as they would have in the original driver.
     */
-   return src_imm1f(c, 1.0);
+   return src_reg( TGSI_FILE_OUTPUT, 0 ); /* src_imm1f(c, 1.0); */
 }
 
 
-- 
cgit v1.2.3


From d86e9079a8eb2e84cb231fdbca4b74d744198afb Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 23:29:47 +0000
Subject: i965g: fix off-by-one on curbe upload

---
 src/gallium/drivers/i965/brw_curbe.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 0a5cfcc7cf..79ebac9d15 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -224,9 +224,13 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
    /* vertex shader constants */
    if (brw->curbe.vs_size) {
       GLuint offset = brw->curbe.vs_start * 16;
-      GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT];
+      GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
       struct pipe_screen *screen = brw->base.screen;
 
+      /* XXX: note that constant buffers are currently *already* in
+       * buffer objects.  If we want to keep on putting them into the
+       * curbe, makes sense to treat constbuf's specially with malloc.
+       */
       const GLfloat *value = screen->buffer_map( screen,
 						 brw->curr.vertex_constants,
 						 PIPE_BUFFER_USAGE_CPU_READ);
@@ -272,8 +276,10 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
       }
 
       if (brw->curbe.curbe_bo == NULL) {
-	 /* Allocate a single page for CURBE entries for this batchbuffer.
-	  * They're generally around 64b.
+	 /* Allocate a single page for CURBE entries for this
+	  * batchbuffer.  They're generally around 64b.  We will
+	  * discard the curbe buffer after the batch is flushed to
+	  * avoid synchronous updates.
 	  */
 	 ret = brw->sws->bo_alloc(brw->sws, 
                                   BRW_BUFFER_TYPE_CURBE,
@@ -292,8 +298,8 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
       /* Copy data to the buffer:
        */
       brw->sws->bo_subdata(brw->curbe.curbe_bo,
+                           BRW_DATA_CONSTANT_BUFFER,
 			   brw->curbe.curbe_offset,
-                           BRW_DATA_OTHER,
 			   bufsz,
 			   buf,
                            NULL, 0);
-- 
cgit v1.2.3


From e4d174ea1b36976f3dd255ee6b86207ad5b25c31 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 23:31:49 +0000
Subject: i965g: smaller upload buffer for index data

---
 src/gallium/drivers/i965/brw_draw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 84803e43be..45d5ade1fc 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -268,7 +268,7 @@ boolean brw_draw_init( struct brw_context *brw )
       return FALSE;
 
    brw->vb.upload_index = u_upload_create( brw->base.screen,
-					   128 * 1024,
+					   32 * 1024,
 					   64,
 					   PIPE_BUFFER_USAGE_INDEX );
    if (brw->vb.upload_index == NULL)
-- 
cgit v1.2.3


From a010307e0adc2c0d4ef586c81507d99ef15c2142 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 6 Nov 2009 23:34:33 +0000
Subject: i965g: some more cases where file_max[] is used without obligatory +1

---
 src/gallium/drivers/i965/brw_vs_emit.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index e0fadc8dce..26f0ec5a11 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -82,15 +82,15 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     * works if everything fits in the GRF.
     * XXX this heuristic/check may need some fine tuning...
     */
-   if (c->vp->info.file_max[TGSI_FILE_CONSTANT] +
-       c->vp->info.file_max[TGSI_FILE_IMMEDIATE] +
-       c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 > BRW_MAX_GRF)
+   if (c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1 +
+       c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+       c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 > BRW_MAX_GRF)
       c->vp->use_const_buffer = GL_TRUE;
    else {
       /* XXX: immediates can go elsewhere if necessary:
        */
-      assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] +
-	     c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 21 <= BRW_MAX_GRF);
+      assert(c->vp->info.file_max[TGSI_FILE_IMMEDIATE] + 1 +
+	     c->vp->info.file_max[TGSI_FILE_TEMPORARY] + 1 + 21 <= BRW_MAX_GRF);
 
       c->vp->use_const_buffer = GL_FALSE;
    }
-- 
cgit v1.2.3


From 0c547d63c497f06c38f7a3c000e478bdcf2594b6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 9 Nov 2009 18:04:58 -0800
Subject: i965g: skip over vertex position output when preallocating FS inputs

---
 src/gallium/drivers/i965/brw_wm_pass2.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c
index a5574bd1a3..2a879863ab 100644
--- a/src/gallium/drivers/i965/brw_wm_pass2.c
+++ b/src/gallium/drivers/i965/brw_wm_pass2.c
@@ -83,6 +83,10 @@ static void init_registers( struct brw_wm_compile *c )
    for (j = 0; j < c->nr_creg; j++) 
       prealloc_reg(c, &c->creg[j], reg++);
 
+   reg++;                       /* XXX: skip over position output */
+
+   /* XXX: currently just hope the VS outputs line up with FS inputs:
+    */
    for (j = 0; j < c->key.vp_nr_outputs; j++)
       prealloc_reg(c, &c->payload.input_interp[j], reg++);
 
-- 
cgit v1.2.3


From 2f54d02d205468a840b35a3554f2ad8ffc31ec9c Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 10 Nov 2009 18:07:11 -0800
Subject: i965g: consult fs inputs when laying out vs output regs

Vertex shader now emits just the FS inputs, in the positions and order
expected by the fragment shader.

This means potentially regenerating the vertex shader to match
different fragment shader's input layouts.
---
 src/gallium/drivers/i965/brw_context.h     |  13 ++++
 src/gallium/drivers/i965/brw_pipe_shader.c |   6 ++
 src/gallium/drivers/i965/brw_vs.c          |  14 ++--
 src/gallium/drivers/i965/brw_vs.h          |   7 +-
 src/gallium/drivers/i965/brw_vs_emit.c     | 116 ++++++++++++++++++++---------
 5 files changed, 113 insertions(+), 43 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 4a975ecd7e..31f3cf3685 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -161,11 +161,24 @@ struct brw_vertex_shader {
    GLboolean use_const_buffer;
 };
 
+struct brw_fs_signature {
+   GLuint nr_inputs;
+   struct {
+      GLuint semantic:5;
+      GLuint semantic_index:27;
+   } input[PIPE_MAX_SHADER_INPUTS];
+};
+
+#define brw_fs_signature_size(s) (offsetof(struct brw_fs_signature, input) + \
+                                  ((s)->nr_inputs * sizeof (s)->input[0])) 
+
 
 struct brw_fragment_shader {
    const struct tgsi_token *tokens;
    struct tgsi_shader_info info;
 
+   struct brw_fs_signature signature;
+
    unsigned iz_lookup;
    //unsigned wm_lookup;
    
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 44f9ad6f9c..7febf9e0c2 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -96,6 +96,12 @@ static void *brw_create_fs_state( struct pipe_context *pipe,
 
    tgsi_scan_shader(fs->tokens, &fs->info);
 
+   fs->signature.nr_inputs = fs->info.num_inputs;
+   for (i = 0; i < fs->info.num_inputs; i++) {
+      fs->signature.input[i].semantic = fs->info.input_semantic_name[i];
+      fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i];
+   }
+
    for (i = 0; i < fs->info.num_inputs; i++)
       if (fs->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION)
 	 fs->uses_depth = 1;
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 966940ceac..05a62ed974 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -90,22 +90,24 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
 {
    struct brw_vs_prog_key key;
    struct brw_vertex_shader *vp = brw->curr.vertex_shader;
+   struct brw_fragment_shader *fs = brw->curr.fragment_shader;
    enum pipe_error ret;
 
    memset(&key, 0, sizeof(key));
 
-   /* Just upload the program verbatim for now.  Always send it all
-    * the inputs it asks for, whether they are varying or not.
-    */
    key.program_string_id = vp->id;
    key.nr_userclip = brw->curr.ucp.nr;
    key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL ||
 			brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL);
 
+   memcpy(&key.fs_signature, &fs->signature,
+          brw_fs_signature_size(&fs->signature));
+
+
    /* Make an early check for the key.
     */
    if (brw_search_cache(&brw->cache, BRW_VS_PROG,
-                        &key, sizeof(key),
+                        &key, brw_vs_prog_key_size(&key),
                         NULL, 0,
                         &brw->vs.prog_data,
                         &brw->vs.prog_bo))
@@ -123,7 +125,9 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
  */
 const struct brw_tracked_state brw_vs_prog = {
    .dirty = {
-      .mesa  = PIPE_NEW_CLIP | PIPE_NEW_RAST,
+      .mesa  = (PIPE_NEW_CLIP | 
+                PIPE_NEW_RAST |
+                PIPE_NEW_FRAGMENT_SHADER),
       .brw   = BRW_NEW_VERTEX_PROGRAM,
       .cache = 0
    },
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index b4e450d89b..3d1598d02b 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -43,8 +43,11 @@ struct brw_vs_prog_key {
    GLuint nr_userclip:4;
    GLuint copy_edgeflag:1;
    GLuint pad:26;
+   struct brw_fs_signature fs_signature;
 };
 
+#define brw_vs_prog_key_size(s) (offsetof(struct brw_vs_prog_key, fs_signature) + \
+                                 brw_fs_signature_size(&(s)->fs_signature))
 
 
 #define MAX_IF_DEPTH 32
@@ -65,8 +68,8 @@ struct brw_vs_compile {
 
    GLboolean copy_edgeflag;
 
-   GLuint first_output;
-   GLuint first_overflow_output; /**< VERT_ATTRIB_x */
+   GLuint overflow_grf_start;
+   GLuint overflow_count;
 
    GLuint first_tmp;
    GLuint last_tmp;
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 26f0ec5a11..933c9c4d63 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -66,6 +66,38 @@ static void release_tmps( struct brw_vs_compile *c )
 }
 
 
+static boolean is_position_output( struct brw_vs_compile *c,
+                                   unsigned vs_output )
+{
+   struct brw_vertex_shader *vs = c->vp;
+   unsigned semantic = vs->info.output_semantic_name[vs_output];
+   unsigned index = vs->info.output_semantic_index[vs_output];
+
+   return (semantic == TGSI_SEMANTIC_POSITION &&
+           index == 0);
+}
+
+
+static boolean find_output_slot( struct brw_vs_compile *c,
+                                  unsigned vs_output,
+                                  unsigned *fs_input_slot )
+{
+   struct brw_vertex_shader *vs = c->vp;
+   unsigned semantic = vs->info.output_semantic_name[vs_output];
+   unsigned index = vs->info.output_semantic_index[vs_output];
+   unsigned i;
+
+   for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+      if (c->key.fs_signature.input[i].semantic == semantic &&
+          c->key.fs_signature.input[i].semantic_index == index) {
+         *fs_input_slot = i;
+         return TRUE;
+      }
+   }
+
+   return FALSE;
+}
+
 
 /**
  * Preallocate GRF register before code emit.
@@ -172,42 +204,50 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    /* Allocate outputs.  The non-position outputs go straight into message regs.
     */
    c->nr_outputs = c->prog_data.nr_outputs;
-   c->first_output = reg;
-   c->first_overflow_output = 0;
 
    if (c->chipset.is_igdng)
       mrf = 8;
    else
       mrf = 4;
 
+   
+   if (c->key.fs_signature.nr_inputs > BRW_MAX_MRF) {
+      c->overflow_grf_start = reg;
+      c->overflow_count = c->key.fs_signature.nr_inputs - BRW_MAX_MRF;
+      reg += c->overflow_count;
+   }
+
    /* XXX: need to access vertex output semantics here:
     */
    for (i = 0; i < c->prog_data.nr_outputs; i++) {
-      assert(i < Elements(c->regs[TGSI_FILE_OUTPUT]));
+      unsigned slot;
 
-      /* XXX: Hardwire position to zero:
-       */
-      if (i == 0) {
-	 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
-	 reg++;
-      }
-      /* XXX: disable psiz:
+      /* XXX: Put output position in slot zero always.  Clipper, etc,
+       * need access to this reg.
        */
-      else if (0) {
-	 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
+      if (is_position_output(c, i)) {
+	 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); /* copy to mrf 0 */
 	 reg++;
-	 mrf++;		/* just a placeholder?  XXX fix later stages & remove this */
       }
-      else if (mrf < 16) {
-	 c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
-	 mrf++;
+      else if (find_output_slot(c, i, &slot)) {
+         
+         if (0 /* is_psize_output(c, i) */ ) {
+            /* c->psize_out.grf = reg; */
+            /* c->psize_out.mrf = i; */
+         }
+         
+         /* The first (16-4) outputs can go straight into the message regs.
+          */
+         if (slot + mrf < BRW_MAX_MRF) {
+            c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(slot + mrf);
+         }
+         else {
+            int grf = c->overflow_grf_start + slot - BRW_MAX_MRF;
+            c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(grf, 0);
+         }
       }
       else {
-	 /* too many vertex results to fit in MRF, use GRF for overflow */
-	 if (!c->first_overflow_output)
-	    c->first_overflow_output = i;
-	 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
-	 reg++;
+         c->regs[TGSI_FILE_OUTPUT][i] = brw_null_reg();
       }
    }     
 
@@ -1072,6 +1112,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
    struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][VERT_RESULT_HPOS];
    struct brw_reg ndc;
    int eot;
+   int i;
    GLuint len_vertext_header = 2;
 
    if (c->key.copy_edgeflag) {
@@ -1167,7 +1208,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
        len_vertext_header = 2;
    }
 
-   eot = (c->first_overflow_output == 0);
+   eot = (c->overflow_count == 0);
 
    brw_urb_WRITE(p, 
 		 brw_null_reg(), /* dest */
@@ -1182,19 +1223,22 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 		 0, 		/* urb destination offset */
 		 BRW_URB_SWIZZLE_INTERLEAVE);
 
-   if (c->first_overflow_output > 0) {
-      /* Not all of the vertex outputs/results fit into the MRF.
-       * Move the overflowed attributes from the GRF to the MRF and
-       * issue another brw_urb_WRITE().
-       */
+   /* Not all of the vertex outputs/results fit into the MRF.
+    * Move the overflowed attributes from the GRF to the MRF and
+    * issue another brw_urb_WRITE().
+    */
+   for (i = 0; i < c->overflow_count; i += BRW_MAX_MRF) {
+      unsigned nr = MIN2(c->overflow_count - i, BRW_MAX_MRF);
+      GLuint j;
+
+      eot = (i + nr >= c->overflow_count);
+
       /* XXX I'm not 100% sure about which MRF regs to use here.  Starting
        * at mrf[4] atm...
        */
-      GLuint i, mrf = 0;
-      for (i = c->first_overflow_output; i < c->prog_data.nr_outputs; i++) {
-	 /* move from GRF to MRF */
-	 brw_MOV(p, brw_message_reg(4+mrf), c->regs[TGSI_FILE_OUTPUT][i]);
-	 mrf++;
+      for (j = 0; j < nr; j++) {
+	 brw_MOV(p, brw_message_reg(4+j), 
+                 brw_vec8_grf(c->overflow_grf_start + i + j, 0));
       }
 
       brw_urb_WRITE(p,
@@ -1203,11 +1247,11 @@ static void emit_vertex_write( struct brw_vs_compile *c)
                     c->r0,          /* src */
                     0,              /* allocate */
                     1,              /* used */
-                    mrf+1,          /* msg len */
+                    nr+1,          /* msg len */
                     0,              /* response len */
-                    1,              /* eot */
-                    1,              /* writes complete */
-                    BRW_MAX_MRF-1,  /* urb destination offset */
+                    eot,            /* eot */
+                    eot,            /* writes complete */
+                    i-1,            /* urb destination offset */
                     BRW_URB_SWIZZLE_INTERLEAVE);
    }
 }
-- 
cgit v1.2.3


From d03a1c2216635a1475172e6603a243348675fd6f Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sat, 14 Nov 2009 16:06:57 -0800
Subject: i965g: restore check on line smooth state

---
 src/gallium/drivers/i965/brw_wm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 8589aa22a8..3c5a2dab7a 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -225,7 +225,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
       line_aa = AA_NEVER;
       break;
    case PIPE_PRIM_LINES:
-      line_aa = AA_ALWAYS;
+      line_aa = (brw->curr.rast->templ.line_smooth ? 
+                 AA_ALWAYS : AA_NEVER);
       break;
    default:
       line_aa = brw->curr.rast->unfilled_aa_line;
-- 
cgit v1.2.3


From ed9c4933af6fb58269f1efc7c826cb6a5fd81d38 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Thu, 5 Nov 2009 19:07:19 +0100
Subject: nv10: Fix build for the last nouveau_class.h changes.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Pekka Paalanen <pq@iki.fi>
---
 src/gallium/drivers/nv10/nv10_context.c   | 2 +-
 src/gallium/drivers/nv10/nv10_prim_vbuf.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c
index 933176fc32..65a22b175e 100644
--- a/src/gallium/drivers/nv10/nv10_context.c
+++ b/src/gallium/drivers/nv10/nv10_context.c
@@ -243,7 +243,7 @@ static void nv10_init_hwctx(struct nv10_context *nv10)
 	OUT_RING  (0.0);
 	OUT_RINGf  (16777216.0);
 
-	BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4);
+	BEGIN_RING(celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4);
 	OUT_RINGf  (-2048.0);
 	OUT_RINGf  (-2048.0);
 	OUT_RINGf  (16777215.0 * 0.5);
diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
index 1806d5f8cc..7ba9777a22 100644
--- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c
+++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
@@ -69,9 +69,9 @@ void nv10_vtxbuf_bind( struct nv10_context* nv10 )
 {
 	int i;
 	for(i = 0; i < 8; i++) {
-		BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(i), 1);
+		BEGIN_RING(celsius, NV10TCL_VTXBUF_ADDRESS(i), 1);
 		OUT_RING(0/*nv10->vtxbuf*/);
-		BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(i) ,1);
+		BEGIN_RING(celsius, NV10TCL_VTXFMT(i), 1);
 		OUT_RING(0/*XXX*/);
 	}
 }
-- 
cgit v1.2.3


From abefd7dcdf28c90454b59faaf9401fa6e6c6f526 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sun, 15 Nov 2009 14:49:02 +0100
Subject: nv20: Fix build for the last nouveau_class.h changes.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Pekka Paalanen <pq@iki.fi>
---
 src/gallium/drivers/nv20/nv20_context.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c
index 9a48739661..276db8b57b 100644
--- a/src/gallium/drivers/nv20/nv20_context.c
+++ b/src/gallium/drivers/nv20/nv20_context.c
@@ -360,13 +360,13 @@ static void nv20_init_hwctx(struct nv20_context *nv20)
 	OUT_RINGf (0.0);
 	OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */
 
-	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE0_X, 4);
+	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
 	OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */
 	OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */
 	OUT_RINGf (0.0);
 	OUT_RINGf (16777215.0);
 
-	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE1_X, 4);
+	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE_X, 4);
 	OUT_RINGf (0.0); /* no effect?, w/2 */
 	OUT_RINGf (0.0); /* no effect?, h/2 */
 	OUT_RINGf (16777215.0 * 0.5);
-- 
cgit v1.2.3


From 3192633d4abe262d413e41feb871fe8deed409d8 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Mon, 16 Nov 2009 19:56:18 +0100
Subject: svga: Add svga driver

---
 src/gallium/drivers/svga/Makefile                  |   63 +
 src/gallium/drivers/svga/SConscript                |   75 +
 src/gallium/drivers/svga/include/README            |    3 +
 src/gallium/drivers/svga/include/svga3d_caps.h     |  139 +
 src/gallium/drivers/svga/include/svga3d_reg.h      | 1793 +++++++++++++
 .../drivers/svga/include/svga3d_shaderdefs.h       |  519 ++++
 src/gallium/drivers/svga/include/svga_reg.h        | 1346 ++++++++++
 src/gallium/drivers/svga/include/svga_types.h      |   46 +
 src/gallium/drivers/svga/svga_cmd.c                | 1427 ++++++++++
 src/gallium/drivers/svga/svga_cmd.h                |  235 ++
 src/gallium/drivers/svga/svga_context.c            |  269 ++
 src/gallium/drivers/svga/svga_context.h            |  443 ++++
 src/gallium/drivers/svga/svga_debug.h              |   74 +
 src/gallium/drivers/svga/svga_draw.c               |  370 +++
 src/gallium/drivers/svga/svga_draw.h               |   83 +
 src/gallium/drivers/svga/svga_draw_arrays.c        |  297 +++
 src/gallium/drivers/svga/svga_draw_elements.c      |  255 ++
 src/gallium/drivers/svga/svga_draw_private.h       |  158 ++
 src/gallium/drivers/svga/svga_hw_reg.h             |   42 +
 src/gallium/drivers/svga/svga_pipe_blend.c         |  246 ++
 src/gallium/drivers/svga/svga_pipe_blit.c          |   84 +
 src/gallium/drivers/svga/svga_pipe_clear.c         |  119 +
 src/gallium/drivers/svga/svga_pipe_constants.c     |   74 +
 src/gallium/drivers/svga/svga_pipe_depthstencil.c  |  153 ++
 src/gallium/drivers/svga/svga_pipe_draw.c          |  261 ++
 src/gallium/drivers/svga/svga_pipe_flush.c         |   68 +
 src/gallium/drivers/svga/svga_pipe_fs.c            |  124 +
 src/gallium/drivers/svga/svga_pipe_misc.c          |  187 ++
 src/gallium/drivers/svga/svga_pipe_query.c         |  267 ++
 src/gallium/drivers/svga/svga_pipe_rasterizer.c    |  250 ++
 src/gallium/drivers/svga/svga_pipe_sampler.c       |  243 ++
 src/gallium/drivers/svga/svga_pipe_vertex.c        |  115 +
 src/gallium/drivers/svga/svga_pipe_vs.c            |  189 ++
 src/gallium/drivers/svga/svga_screen.c             |  435 ++++
 src/gallium/drivers/svga/svga_screen.h             |   95 +
 src/gallium/drivers/svga/svga_screen_buffer.c      |  820 ++++++
 src/gallium/drivers/svga/svga_screen_buffer.h      |  190 ++
 src/gallium/drivers/svga/svga_screen_cache.c       |  307 +++
 src/gallium/drivers/svga/svga_screen_cache.h       |  135 +
 src/gallium/drivers/svga/svga_screen_texture.c     | 1065 ++++++++
 src/gallium/drivers/svga/svga_screen_texture.h     |  177 ++
 src/gallium/drivers/svga/svga_state.c              |  278 ++
 src/gallium/drivers/svga/svga_state.h              |   95 +
 src/gallium/drivers/svga/svga_state_constants.c    |  239 ++
 src/gallium/drivers/svga/svga_state_framebuffer.c  |  455 ++++
 src/gallium/drivers/svga/svga_state_fs.c           |  282 ++
 src/gallium/drivers/svga/svga_state_need_swtnl.c   |  200 ++
 src/gallium/drivers/svga/svga_state_rss.c          |  268 ++
 src/gallium/drivers/svga/svga_state_tss.c          |  279 ++
 src/gallium/drivers/svga/svga_state_vdecl.c        |  182 ++
 src/gallium/drivers/svga/svga_state_vs.c           |  239 ++
 src/gallium/drivers/svga/svga_swtnl.h              |   52 +
 src/gallium/drivers/svga/svga_swtnl_backend.c      |  349 +++
 src/gallium/drivers/svga/svga_swtnl_draw.c         |  170 ++
 src/gallium/drivers/svga/svga_swtnl_private.h      |   93 +
 src/gallium/drivers/svga/svga_swtnl_state.c        |  242 ++
 src/gallium/drivers/svga/svga_tgsi.c               |  266 ++
 src/gallium/drivers/svga/svga_tgsi.h               |  139 +
 src/gallium/drivers/svga/svga_tgsi_decl_sm20.c     |  280 ++
 src/gallium/drivers/svga/svga_tgsi_decl_sm30.c     |  385 +++
 src/gallium/drivers/svga/svga_tgsi_emit.h          |  345 +++
 src/gallium/drivers/svga/svga_tgsi_insn.c          | 2716 ++++++++++++++++++++
 src/gallium/drivers/svga/svga_winsys.h             |  299 +++
 src/gallium/drivers/svga/svgadump/st_shader.h      |  214 ++
 src/gallium/drivers/svga/svgadump/st_shader_dump.c |  649 +++++
 src/gallium/drivers/svga/svgadump/st_shader_dump.h |   42 +
 src/gallium/drivers/svga/svgadump/st_shader_op.c   |  168 ++
 src/gallium/drivers/svga/svgadump/st_shader_op.h   |   46 +
 src/gallium/drivers/svga/svgadump/svga_dump.c      | 1736 +++++++++++++
 src/gallium/drivers/svga/svgadump/svga_dump.h      |   34 +
 src/gallium/drivers/svga/svgadump/svga_dump.py     |  329 +++
 src/gallium/winsys/drm/vmware/Makefile             |   12 +
 src/gallium/winsys/drm/vmware/SConscript           |   11 +
 src/gallium/winsys/drm/vmware/core/Makefile        |   47 +
 src/gallium/winsys/drm/vmware/core/SConscript      |   39 +
 src/gallium/winsys/drm/vmware/core/vmw_buffer.c    |  274 ++
 src/gallium/winsys/drm/vmware/core/vmw_buffer.h    |   65 +
 src/gallium/winsys/drm/vmware/core/vmw_context.c   |  297 +++
 src/gallium/winsys/drm/vmware/core/vmw_context.h   |   59 +
 src/gallium/winsys/drm/vmware/core/vmw_fence.c     |  108 +
 src/gallium/winsys/drm/vmware/core/vmw_fence.h     |   59 +
 src/gallium/winsys/drm/vmware/core/vmw_screen.c    |   74 +
 src/gallium/winsys/drm/vmware/core/vmw_screen.h    |  134 +
 .../winsys/drm/vmware/core/vmw_screen_dri.c        |  371 +++
 .../winsys/drm/vmware/core/vmw_screen_ioctl.c      |  503 ++++
 .../winsys/drm/vmware/core/vmw_screen_pools.c      |   79 +
 .../winsys/drm/vmware/core/vmw_screen_svga.c       |  295 +++
 src/gallium/winsys/drm/vmware/core/vmw_surface.c   |   59 +
 src/gallium/winsys/drm/vmware/core/vmw_surface.h   |   79 +
 src/gallium/winsys/drm/vmware/dri/Makefile         |   18 +
 src/gallium/winsys/drm/vmware/dri/SConscript       |   63 +
 src/gallium/winsys/drm/vmware/egl/Makefile         |   18 +
 src/gallium/winsys/drm/vmware/xorg/Makefile        |   54 +
 src/gallium/winsys/drm/vmware/xorg/SConscript      |   55 +
 src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c      |  150 ++
 95 files changed, 27235 insertions(+)
 create mode 100644 src/gallium/drivers/svga/Makefile
 create mode 100644 src/gallium/drivers/svga/SConscript
 create mode 100644 src/gallium/drivers/svga/include/README
 create mode 100644 src/gallium/drivers/svga/include/svga3d_caps.h
 create mode 100644 src/gallium/drivers/svga/include/svga3d_reg.h
 create mode 100644 src/gallium/drivers/svga/include/svga3d_shaderdefs.h
 create mode 100644 src/gallium/drivers/svga/include/svga_reg.h
 create mode 100644 src/gallium/drivers/svga/include/svga_types.h
 create mode 100644 src/gallium/drivers/svga/svga_cmd.c
 create mode 100644 src/gallium/drivers/svga/svga_cmd.h
 create mode 100644 src/gallium/drivers/svga/svga_context.c
 create mode 100644 src/gallium/drivers/svga/svga_context.h
 create mode 100644 src/gallium/drivers/svga/svga_debug.h
 create mode 100644 src/gallium/drivers/svga/svga_draw.c
 create mode 100644 src/gallium/drivers/svga/svga_draw.h
 create mode 100644 src/gallium/drivers/svga/svga_draw_arrays.c
 create mode 100644 src/gallium/drivers/svga/svga_draw_elements.c
 create mode 100644 src/gallium/drivers/svga/svga_draw_private.h
 create mode 100644 src/gallium/drivers/svga/svga_hw_reg.h
 create mode 100644 src/gallium/drivers/svga/svga_pipe_blend.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_blit.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_clear.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_constants.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_depthstencil.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_draw.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_flush.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_fs.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_misc.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_query.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_rasterizer.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_sampler.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_vertex.c
 create mode 100644 src/gallium/drivers/svga/svga_pipe_vs.c
 create mode 100644 src/gallium/drivers/svga/svga_screen.c
 create mode 100644 src/gallium/drivers/svga/svga_screen.h
 create mode 100644 src/gallium/drivers/svga/svga_screen_buffer.c
 create mode 100644 src/gallium/drivers/svga/svga_screen_buffer.h
 create mode 100644 src/gallium/drivers/svga/svga_screen_cache.c
 create mode 100644 src/gallium/drivers/svga/svga_screen_cache.h
 create mode 100644 src/gallium/drivers/svga/svga_screen_texture.c
 create mode 100644 src/gallium/drivers/svga/svga_screen_texture.h
 create mode 100644 src/gallium/drivers/svga/svga_state.c
 create mode 100644 src/gallium/drivers/svga/svga_state.h
 create mode 100644 src/gallium/drivers/svga/svga_state_constants.c
 create mode 100644 src/gallium/drivers/svga/svga_state_framebuffer.c
 create mode 100644 src/gallium/drivers/svga/svga_state_fs.c
 create mode 100644 src/gallium/drivers/svga/svga_state_need_swtnl.c
 create mode 100644 src/gallium/drivers/svga/svga_state_rss.c
 create mode 100644 src/gallium/drivers/svga/svga_state_tss.c
 create mode 100644 src/gallium/drivers/svga/svga_state_vdecl.c
 create mode 100644 src/gallium/drivers/svga/svga_state_vs.c
 create mode 100644 src/gallium/drivers/svga/svga_swtnl.h
 create mode 100644 src/gallium/drivers/svga/svga_swtnl_backend.c
 create mode 100644 src/gallium/drivers/svga/svga_swtnl_draw.c
 create mode 100644 src/gallium/drivers/svga/svga_swtnl_private.h
 create mode 100644 src/gallium/drivers/svga/svga_swtnl_state.c
 create mode 100644 src/gallium/drivers/svga/svga_tgsi.c
 create mode 100644 src/gallium/drivers/svga/svga_tgsi.h
 create mode 100644 src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
 create mode 100644 src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
 create mode 100644 src/gallium/drivers/svga/svga_tgsi_emit.h
 create mode 100644 src/gallium/drivers/svga/svga_tgsi_insn.c
 create mode 100644 src/gallium/drivers/svga/svga_winsys.h
 create mode 100644 src/gallium/drivers/svga/svgadump/st_shader.h
 create mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.c
 create mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.h
 create mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.c
 create mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.h
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_dump.c
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_dump.h
 create mode 100755 src/gallium/drivers/svga/svgadump/svga_dump.py
 create mode 100644 src/gallium/winsys/drm/vmware/Makefile
 create mode 100644 src/gallium/winsys/drm/vmware/SConscript
 create mode 100644 src/gallium/winsys/drm/vmware/core/Makefile
 create mode 100644 src/gallium/winsys/drm/vmware/core/SConscript
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_buffer.c
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_buffer.h
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_context.c
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_context.h
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_fence.c
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_fence.h
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen.c
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen.h
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_surface.c
 create mode 100644 src/gallium/winsys/drm/vmware/core/vmw_surface.h
 create mode 100644 src/gallium/winsys/drm/vmware/dri/Makefile
 create mode 100644 src/gallium/winsys/drm/vmware/dri/SConscript
 create mode 100644 src/gallium/winsys/drm/vmware/egl/Makefile
 create mode 100644 src/gallium/winsys/drm/vmware/xorg/Makefile
 create mode 100644 src/gallium/winsys/drm/vmware/xorg/SConscript
 create mode 100644 src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile
new file mode 100644
index 0000000000..05ab4ab9b3
--- /dev/null
+++ b/src/gallium/drivers/svga/Makefile
@@ -0,0 +1,63 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = svga
+
+C_SOURCES = \
+	svgadump/st_shader_dump.c \
+	svgadump/st_shader_op.c \
+	svgadump/svga_dump.c \
+	svga_cmd.c \
+	svga_context.c \
+	svga_draw.c \
+	svga_draw_arrays.c \
+	svga_draw_elements.c \
+	svga_pipe_blend.c \
+	svga_pipe_blit.c \
+	svga_pipe_clear.c \
+	svga_pipe_constants.c \
+	svga_pipe_depthstencil.c \
+	svga_pipe_draw.c \
+	svga_pipe_flush.c \
+	svga_pipe_fs.c \
+	svga_pipe_misc.c \
+	svga_pipe_query.c \
+	svga_pipe_rasterizer.c \
+	svga_pipe_sampler.c \
+	svga_pipe_vertex.c \
+	svga_pipe_vs.c \
+	svga_screen.c \
+	svga_screen_buffer.c \
+	svga_screen_texture.c \
+	svga_screen_cache.c \
+	svga_state.c \
+	svga_state_need_swtnl.c \
+	svga_state_constants.c \
+	svga_state_framebuffer.c \
+	svga_state_rss.c \
+	svga_state_tss.c \
+	svga_state_vdecl.c \
+	svga_state_fs.c \
+	svga_state_vs.c \
+	svga_swtnl_backend.c \
+	svga_swtnl_draw.c \
+	svga_swtnl_state.c \
+	svga_tgsi.c \
+	svga_tgsi_decl_sm20.c \
+	svga_tgsi_decl_sm30.c \
+	svga_tgsi_insn.c
+
+LIBRARY_INCLUDES = \
+	-I$(TOP)/src/gallium/drivers/svga/include
+
+LIBRARY_DEFINES = \
+	-DHAVE_STDINT_H -DHAVE_SYS_TYPES_H
+
+CC = gcc -fvisibility=hidden -msse -msse2
+
+# Set the gnu99 standard to enable anonymous structs in vmware headers.
+#
+CFLAGS = -Wall -Werror -Wmissing-prototypes -std=gnu99 -ffast-math \
+	$(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) $(ASM_FLAGS)
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript
new file mode 100644
index 0000000000..0fa745c9b8
--- /dev/null
+++ b/src/gallium/drivers/svga/SConscript
@@ -0,0 +1,75 @@
+Import('*')
+
+env = env.Clone()
+
+if env['platform'] in ['linux']:
+	env.Append(CCFLAGS = ['-fvisibility=hidden'])
+
+if env['gcc']:
+	env.Append(CPPDEFINES = [
+		'HAVE_STDINT_H', 
+		'HAVE_SYS_TYPES_H',
+	])
+	if env['platform'] not in ['windows']:
+		# The Windows headers cause many gcc warnings
+		env.Append(CCFLAGS = ['-Werror'])
+	
+env.Prepend(CPPPATH = [
+	'include',
+])
+
+env.Append(CPPDEFINES = [
+])
+
+sources = [
+    'svga_cmd.c',
+    'svga_context.c',
+    'svga_draw.c',
+    'svga_draw_arrays.c',
+    'svga_draw_elements.c',
+    'svga_pipe_blend.c',
+    'svga_pipe_blit.c',
+    'svga_pipe_clear.c',
+    'svga_pipe_constants.c',
+    'svga_pipe_depthstencil.c',
+    'svga_pipe_draw.c',
+    'svga_pipe_flush.c',
+    'svga_pipe_fs.c',
+    'svga_pipe_misc.c',
+    'svga_pipe_query.c',
+    'svga_pipe_rasterizer.c',
+    'svga_pipe_sampler.c',
+    'svga_pipe_vertex.c',
+    'svga_pipe_vs.c',
+    'svga_screen.c',
+    'svga_screen_buffer.c',
+    'svga_screen_cache.c',
+    'svga_screen_texture.c',
+    'svga_state.c',
+    'svga_state_constants.c',
+    'svga_state_framebuffer.c',
+    'svga_state_need_swtnl.c',
+    'svga_state_rss.c',
+    'svga_state_tss.c',
+    'svga_state_vdecl.c',
+    'svga_state_fs.c',
+    'svga_state_vs.c',
+    'svga_swtnl_backend.c',
+    'svga_swtnl_draw.c',
+    'svga_swtnl_state.c',
+    'svga_tgsi.c',
+    'svga_tgsi_decl_sm20.c',
+    'svga_tgsi_decl_sm30.c',
+    'svga_tgsi_insn.c',
+    
+    'svgadump/svga_dump.c',
+    'svgadump/st_shader_dump.c',
+    'svgadump/st_shader_op.c',
+]
+
+svga = env.ConvenienceLibrary(
+	target = 'svga',
+	source = sources,
+)
+
+Export('svga')
diff --git a/src/gallium/drivers/svga/include/README b/src/gallium/drivers/svga/include/README
new file mode 100644
index 0000000000..a0b8916104
--- /dev/null
+++ b/src/gallium/drivers/svga/include/README
@@ -0,0 +1,3 @@
+This directory contains the headers from the VMware SVGA Device Developer Kit:
+
+   https://vmware-svga.svn.sourceforge.net/svnroot/vmware-svga/trunk/lib/vmware/
diff --git a/src/gallium/drivers/svga/include/svga3d_caps.h b/src/gallium/drivers/svga/include/svga3d_caps.h
new file mode 100644
index 0000000000..714ce9f45f
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga3d_caps.h
@@ -0,0 +1,139 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_caps.h --
+ *
+ *       Definitions for SVGA3D hardware capabilities.  Capabilities
+ *       are used to query for optional rendering features during
+ *       driver initialization. The capability data is stored as very
+ *       basic key/value dictionary within the "FIFO register" memory
+ *       area at the beginning of BAR2.
+ *
+ *       Note that these definitions are only for 3D capabilities.
+ *       The SVGA device also has "device capabilities" and "FIFO
+ *       capabilities", which are non-3D-specific and are stored as
+ *       bitfields rather than key/value pairs.
+ */
+
+#ifndef _SVGA3D_CAPS_H_
+#define _SVGA3D_CAPS_H_
+
+#define SVGA_FIFO_3D_CAPS_SIZE   (SVGA_FIFO_3D_CAPS_LAST - \
+                                  SVGA_FIFO_3D_CAPS + 1)
+
+
+/*
+ * SVGA3dCapsRecordType
+ *
+ *    Record types that can be found in the caps block.
+ *    Related record types are grouped together numerically so that
+ *    SVGA3dCaps_FindRecord() can be applied on a range of record
+ *    types.
+ */
+
+typedef enum {
+   SVGA3DCAPS_RECORD_UNKNOWN        = 0,
+   SVGA3DCAPS_RECORD_DEVCAPS_MIN    = 0x100,
+   SVGA3DCAPS_RECORD_DEVCAPS        = 0x100,
+   SVGA3DCAPS_RECORD_DEVCAPS_MAX    = 0x1ff,
+} SVGA3dCapsRecordType;
+
+
+/*
+ * SVGA3dCapsRecordHeader
+ *
+ *    Header field leading each caps block record. Contains the offset (in
+ *    register words, NOT bytes) to the next caps block record (or the end
+ *    of caps block records which will be a zero word) and the record type
+ *    as defined above.
+ */
+
+typedef
+struct SVGA3dCapsRecordHeader {
+   uint32 length;
+   SVGA3dCapsRecordType type;
+}
+SVGA3dCapsRecordHeader;
+
+
+/*
+ * SVGA3dCapsRecord
+ *
+ *    Caps block record; "data" is a placeholder for the actual data structure
+ *    contained within the record; for example a record containing a FOOBAR
+ *    structure would be of size "sizeof(SVGA3dCapsRecordHeader) +
+ *    sizeof(FOOBAR)".
+ */
+
+typedef
+struct SVGA3dCapsRecord {
+   SVGA3dCapsRecordHeader header;
+   uint32 data[1];
+}
+SVGA3dCapsRecord;
+
+
+typedef uint32 SVGA3dCapPair[2];
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3dCaps_FindRecord
+ *
+ *    Finds the record with the highest-valued type within the given range
+ *    in the caps block.
+ *
+ *    Result: pointer to found record, or NULL if not found.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE SVGA3dCapsRecord *
+SVGA3dCaps_FindRecord(const uint32 *capsBlock,
+                      SVGA3dCapsRecordType recordTypeMin,
+                      SVGA3dCapsRecordType recordTypeMax)
+{
+   SVGA3dCapsRecord *record, *found = NULL;
+   uint32 offset;
+
+   /*
+    * Search linearly through the caps block records for the specified type.
+    */
+   for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) {
+      record = (SVGA3dCapsRecord *) (capsBlock + offset);
+      if ((record->header.type >= recordTypeMin) &&
+          (record->header.type <= recordTypeMax) &&
+          (!found || (record->header.type > found->header.type))) {
+         found = record;
+      }
+   }
+
+   return found;
+}
+
+
+#endif // _SVGA3D_CAPS_H_
diff --git a/src/gallium/drivers/svga/include/svga3d_reg.h b/src/gallium/drivers/svga/include/svga3d_reg.h
new file mode 100644
index 0000000000..77cb453310
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga3d_reg.h
@@ -0,0 +1,1793 @@
+/**********************************************************
+ * Copyright 1998-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_reg.h --
+ *
+ *       SVGA 3D hardware definitions
+ */
+
+#ifndef _SVGA3D_REG_H_
+#define _SVGA3D_REG_H_
+
+#include "svga_reg.h"
+
+
+/*
+ * 3D Hardware Version
+ *
+ *   The hardware version is stored in the SVGA_FIFO_3D_HWVERSION fifo
+ *   register.   Is set by the host and read by the guest.  This lets
+ *   us make new guest drivers which are backwards-compatible with old
+ *   SVGA hardware revisions.  It does not let us support old guest
+ *   drivers.  Good enough for now.
+ *
+ */
+
+#define SVGA3D_MAKE_HWVERSION(major, minor)      (((major) << 16) | ((minor) & 0xFF))
+#define SVGA3D_MAJOR_HWVERSION(version)          ((version) >> 16)
+#define SVGA3D_MINOR_HWVERSION(version)          ((version) & 0xFF)
+
+typedef enum {
+   SVGA3D_HWVERSION_WS5_RC1   = SVGA3D_MAKE_HWVERSION(0, 1),
+   SVGA3D_HWVERSION_WS5_RC2   = SVGA3D_MAKE_HWVERSION(0, 2),
+   SVGA3D_HWVERSION_WS51_RC1  = SVGA3D_MAKE_HWVERSION(0, 3),
+   SVGA3D_HWVERSION_WS6_B1    = SVGA3D_MAKE_HWVERSION(1, 1),
+   SVGA3D_HWVERSION_FUSION_11 = SVGA3D_MAKE_HWVERSION(1, 4),
+   SVGA3D_HWVERSION_WS65_B1   = SVGA3D_MAKE_HWVERSION(2, 0),
+   SVGA3D_HWVERSION_CURRENT   = SVGA3D_HWVERSION_WS65_B1,
+} SVGA3dHardwareVersion;
+
+/*
+ * Generic Types
+ */
+
+typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
+#define SVGA3D_NUM_CLIPPLANES                   6
+#define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS  8
+
+
+/*
+ * Surface formats.
+ *
+ * If you modify this list, be sure to keep GLUtil.c in sync. It
+ * includes the internal format definition of each surface in
+ * GLUtil_ConvertSurfaceFormat, and it contains a table of
+ * human-readable names in GLUtil_GetFormatName.
+ */
+
+typedef enum SVGA3dSurfaceFormat {
+   SVGA3D_FORMAT_INVALID = 0,
+
+   SVGA3D_X8R8G8B8       = 1,
+   SVGA3D_A8R8G8B8       = 2,
+
+   SVGA3D_R5G6B5         = 3,
+   SVGA3D_X1R5G5B5       = 4,
+   SVGA3D_A1R5G5B5       = 5,
+   SVGA3D_A4R4G4B4       = 6,
+
+   SVGA3D_Z_D32          = 7,
+   SVGA3D_Z_D16          = 8,
+   SVGA3D_Z_D24S8        = 9,
+   SVGA3D_Z_D15S1        = 10,
+
+   SVGA3D_LUMINANCE8            = 11,
+   SVGA3D_LUMINANCE4_ALPHA4     = 12,
+   SVGA3D_LUMINANCE16           = 13,
+   SVGA3D_LUMINANCE8_ALPHA8     = 14,
+
+   SVGA3D_DXT1           = 15,
+   SVGA3D_DXT2           = 16,
+   SVGA3D_DXT3           = 17,
+   SVGA3D_DXT4           = 18,
+   SVGA3D_DXT5           = 19,
+
+   SVGA3D_BUMPU8V8       = 20,
+   SVGA3D_BUMPL6V5U5     = 21,
+   SVGA3D_BUMPX8L8V8U8   = 22,
+   SVGA3D_BUMPL8V8U8     = 23,
+
+   SVGA3D_ARGB_S10E5     = 24,   /* 16-bit floating-point ARGB */
+   SVGA3D_ARGB_S23E8     = 25,   /* 32-bit floating-point ARGB */
+
+   SVGA3D_A2R10G10B10    = 26,
+
+   /* signed formats */
+   SVGA3D_V8U8           = 27,
+   SVGA3D_Q8W8V8U8       = 28,
+   SVGA3D_CxV8U8         = 29,
+
+   /* mixed formats */
+   SVGA3D_X8L8V8U8       = 30,
+   SVGA3D_A2W10V10U10    = 31,
+
+   SVGA3D_ALPHA8         = 32,
+
+   /* Single- and dual-component floating point formats */
+   SVGA3D_R_S10E5        = 33,
+   SVGA3D_R_S23E8        = 34,
+   SVGA3D_RG_S10E5       = 35,
+   SVGA3D_RG_S23E8       = 36,
+
+   /*
+    * Any surface can be used as a buffer object, but SVGA3D_BUFFER is
+    * the most efficient format to use when creating new surfaces
+    * expressly for index or vertex data.
+    */
+   SVGA3D_BUFFER         = 37,
+
+   SVGA3D_Z_D24X8        = 38,
+
+   SVGA3D_V16U16         = 39,
+
+   SVGA3D_G16R16         = 40,
+   SVGA3D_A16B16G16R16   = 41,
+
+   /* Packed Video formats */
+   SVGA3D_UYVY           = 42,
+   SVGA3D_YUY2           = 43,
+
+   SVGA3D_FORMAT_MAX
+} SVGA3dSurfaceFormat;
+
+typedef uint32 SVGA3dColor; /* a, r, g, b */
+
+/*
+ * These match the D3DFORMAT_OP definitions used by Direct3D. We need
+ * them so that we can query the host for what the supported surface
+ * operations are (when we're using the D3D backend, in particular),
+ * and so we can send those operations to the guest.
+ */
+typedef enum {
+   SVGA3DFORMAT_OP_TEXTURE                               = 0x00000001,
+   SVGA3DFORMAT_OP_VOLUMETEXTURE                         = 0x00000002,
+   SVGA3DFORMAT_OP_CUBETEXTURE                           = 0x00000004,
+   SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET                = 0x00000008,
+   SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET              = 0x00000010,
+   SVGA3DFORMAT_OP_ZSTENCIL                              = 0x00000040,
+   SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH   = 0x00000080,
+
+/*
+ * This format can be used as a render target if the current display mode
+ * is the same depth if the alpha channel is ignored. e.g. if the device
+ * can render to A8R8G8B8 when the display mode is X8R8G8B8, then the
+ * format op list entry for A8R8G8B8 should have this cap.
+ */
+   SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET  = 0x00000100,
+
+/*
+ * This format contains DirectDraw support (including Flip).  This flag
+ * should not to be set on alpha formats.
+ */
+   SVGA3DFORMAT_OP_DISPLAYMODE                           = 0x00000400,
+
+/*
+ * The rasterizer can support some level of Direct3D support in this format
+ * and implies that the driver can create a Context in this mode (for some
+ * render target format).  When this flag is set, the SVGA3DFORMAT_OP_DISPLAYMODE
+ * flag must also be set.
+ */
+   SVGA3DFORMAT_OP_3DACCELERATION                        = 0x00000800,
+
+/*
+ * This is set for a private format when the driver has put the bpp in
+ * the structure.
+ */
+   SVGA3DFORMAT_OP_PIXELSIZE                             = 0x00001000,
+
+/*
+ * Indicates that this format can be converted to any RGB format for which
+ * SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB is specified
+ */
+   SVGA3DFORMAT_OP_CONVERT_TO_ARGB                       = 0x00002000,
+
+/*
+ * Indicates that this format can be used to create offscreen plain surfaces.
+ */
+   SVGA3DFORMAT_OP_OFFSCREENPLAIN                        = 0x00004000,
+
+/*
+ * Indicated that this format can be read as an SRGB texture (meaning that the
+ * sampler will linearize the looked up data)
+ */
+   SVGA3DFORMAT_OP_SRGBREAD                              = 0x00008000,
+
+/*
+ * Indicates that this format can be used in the bumpmap instructions
+ */
+   SVGA3DFORMAT_OP_BUMPMAP                               = 0x00010000,
+
+/*
+ * Indicates that this format can be sampled by the displacement map sampler
+ */
+   SVGA3DFORMAT_OP_DMAP                                  = 0x00020000,
+
+/*
+ * Indicates that this format cannot be used with texture filtering
+ */
+   SVGA3DFORMAT_OP_NOFILTER                              = 0x00040000,
+
+/*
+ * Indicates that format conversions are supported to this RGB format if
+ * SVGA3DFORMAT_OP_CONVERT_TO_ARGB is specified in the source format.
+ */
+   SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB                    = 0x00080000,
+
+/*
+ * Indicated that this format can be written as an SRGB target (meaning that the
+ * pixel pipe will DE-linearize data on output to format)
+ */
+   SVGA3DFORMAT_OP_SRGBWRITE                             = 0x00100000,
+
+/*
+ * Indicates that this format cannot be used with alpha blending
+ */
+   SVGA3DFORMAT_OP_NOALPHABLEND                          = 0x00200000,
+
+/*
+ * Indicates that the device can auto-generated sublevels for resources
+ * of this format
+ */
+   SVGA3DFORMAT_OP_AUTOGENMIPMAP                         = 0x00400000,
+
+/*
+ * Indicates that this format can be used by vertex texture sampler
+ */
+   SVGA3DFORMAT_OP_VERTEXTEXTURE                         = 0x00800000,
+
+/*
+ * Indicates that this format supports neither texture coordinate wrap
+ * modes, nor mipmapping
+ */
+   SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP                  = 0x01000000
+} SVGA3dFormatOp;
+
+/*
+ * This structure is a conversion of SVGA3DFORMAT_OP_*.
+ * Entries must be located at the same position.
+ */
+typedef union {
+   uint32 value;
+   struct {
+      uint32 texture : 1;
+      uint32 volumeTexture : 1;
+      uint32 cubeTexture : 1;
+      uint32 offscreenRenderTarget : 1;
+      uint32 sameFormatRenderTarget : 1;
+      uint32 unknown1 : 1;
+      uint32 zStencil : 1;
+      uint32 zStencilArbitraryDepth : 1;
+      uint32 sameFormatUpToAlpha : 1;
+      uint32 unknown2 : 1;
+      uint32 displayMode : 1;
+      uint32 acceleration3d : 1;
+      uint32 pixelSize : 1;
+      uint32 convertToARGB : 1;
+      uint32 offscreenPlain : 1;
+      uint32 sRGBRead : 1;
+      uint32 bumpMap : 1;
+      uint32 dmap : 1;
+      uint32 noFilter : 1;
+      uint32 memberOfGroupARGB : 1;
+      uint32 sRGBWrite : 1;
+      uint32 noAlphaBlend : 1;
+      uint32 autoGenMipMap : 1;
+      uint32 vertexTexture : 1;
+      uint32 noTexCoordWrapNorMip : 1;
+   };
+} SVGA3dSurfaceFormatCaps;
+
+/*
+ * SVGA_3D_CMD_SETRENDERSTATE Types.  All value types
+ * must fit in a uint32.
+ */
+
+typedef enum {
+   SVGA3D_RS_INVALID                   = 0,
+   SVGA3D_RS_ZENABLE                   = 1,     /* SVGA3dBool */
+   SVGA3D_RS_ZWRITEENABLE              = 2,     /* SVGA3dBool */
+   SVGA3D_RS_ALPHATESTENABLE           = 3,     /* SVGA3dBool */
+   SVGA3D_RS_DITHERENABLE              = 4,     /* SVGA3dBool */
+   SVGA3D_RS_BLENDENABLE               = 5,     /* SVGA3dBool */
+   SVGA3D_RS_FOGENABLE                 = 6,     /* SVGA3dBool */
+   SVGA3D_RS_SPECULARENABLE            = 7,     /* SVGA3dBool */
+   SVGA3D_RS_STENCILENABLE             = 8,     /* SVGA3dBool */
+   SVGA3D_RS_LIGHTINGENABLE            = 9,     /* SVGA3dBool */
+   SVGA3D_RS_NORMALIZENORMALS          = 10,    /* SVGA3dBool */
+   SVGA3D_RS_POINTSPRITEENABLE         = 11,    /* SVGA3dBool */
+   SVGA3D_RS_POINTSCALEENABLE          = 12,    /* SVGA3dBool */
+   SVGA3D_RS_STENCILREF                = 13,    /* uint32 */
+   SVGA3D_RS_STENCILMASK               = 14,    /* uint32 */
+   SVGA3D_RS_STENCILWRITEMASK          = 15,    /* uint32 */
+   SVGA3D_RS_FOGSTART                  = 16,    /* float */
+   SVGA3D_RS_FOGEND                    = 17,    /* float */
+   SVGA3D_RS_FOGDENSITY                = 18,    /* float */
+   SVGA3D_RS_POINTSIZE                 = 19,    /* float */
+   SVGA3D_RS_POINTSIZEMIN              = 20,    /* float */
+   SVGA3D_RS_POINTSIZEMAX              = 21,    /* float */
+   SVGA3D_RS_POINTSCALE_A              = 22,    /* float */
+   SVGA3D_RS_POINTSCALE_B              = 23,    /* float */
+   SVGA3D_RS_POINTSCALE_C              = 24,    /* float */
+   SVGA3D_RS_FOGCOLOR                  = 25,    /* SVGA3dColor */
+   SVGA3D_RS_AMBIENT                   = 26,    /* SVGA3dColor */
+   SVGA3D_RS_CLIPPLANEENABLE           = 27,    /* SVGA3dClipPlanes */
+   SVGA3D_RS_FOGMODE                   = 28,    /* SVGA3dFogMode */
+   SVGA3D_RS_FILLMODE                  = 29,    /* SVGA3dFillMode */
+   SVGA3D_RS_SHADEMODE                 = 30,    /* SVGA3dShadeMode */
+   SVGA3D_RS_LINEPATTERN               = 31,    /* SVGA3dLinePattern */
+   SVGA3D_RS_SRCBLEND                  = 32,    /* SVGA3dBlendOp */
+   SVGA3D_RS_DSTBLEND                  = 33,    /* SVGA3dBlendOp */
+   SVGA3D_RS_BLENDEQUATION             = 34,    /* SVGA3dBlendEquation */
+   SVGA3D_RS_CULLMODE                  = 35,    /* SVGA3dFace */
+   SVGA3D_RS_ZFUNC                     = 36,    /* SVGA3dCmpFunc */
+   SVGA3D_RS_ALPHAFUNC                 = 37,    /* SVGA3dCmpFunc */
+   SVGA3D_RS_STENCILFUNC               = 38,    /* SVGA3dCmpFunc */
+   SVGA3D_RS_STENCILFAIL               = 39,    /* SVGA3dStencilOp */
+   SVGA3D_RS_STENCILZFAIL              = 40,    /* SVGA3dStencilOp */
+   SVGA3D_RS_STENCILPASS               = 41,    /* SVGA3dStencilOp */
+   SVGA3D_RS_ALPHAREF                  = 42,    /* float (0.0 .. 1.0) */
+   SVGA3D_RS_FRONTWINDING              = 43,    /* SVGA3dFrontWinding */
+   SVGA3D_RS_COORDINATETYPE            = 44,    /* SVGA3dCoordinateType */
+   SVGA3D_RS_ZBIAS                     = 45,    /* float */
+   SVGA3D_RS_RANGEFOGENABLE            = 46,    /* SVGA3dBool */
+   SVGA3D_RS_COLORWRITEENABLE          = 47,    /* SVGA3dColorMask */
+   SVGA3D_RS_VERTEXMATERIALENABLE      = 48,    /* SVGA3dBool */
+   SVGA3D_RS_DIFFUSEMATERIALSOURCE     = 49,    /* SVGA3dVertexMaterial */
+   SVGA3D_RS_SPECULARMATERIALSOURCE    = 50,    /* SVGA3dVertexMaterial */
+   SVGA3D_RS_AMBIENTMATERIALSOURCE     = 51,    /* SVGA3dVertexMaterial */
+   SVGA3D_RS_EMISSIVEMATERIALSOURCE    = 52,    /* SVGA3dVertexMaterial */
+   SVGA3D_RS_TEXTUREFACTOR             = 53,    /* SVGA3dColor */
+   SVGA3D_RS_LOCALVIEWER               = 54,    /* SVGA3dBool */
+   SVGA3D_RS_SCISSORTESTENABLE         = 55,    /* SVGA3dBool */
+   SVGA3D_RS_BLENDCOLOR                = 56,    /* SVGA3dColor */
+   SVGA3D_RS_STENCILENABLE2SIDED       = 57,    /* SVGA3dBool */
+   SVGA3D_RS_CCWSTENCILFUNC            = 58,    /* SVGA3dCmpFunc */
+   SVGA3D_RS_CCWSTENCILFAIL            = 59,    /* SVGA3dStencilOp */
+   SVGA3D_RS_CCWSTENCILZFAIL           = 60,    /* SVGA3dStencilOp */
+   SVGA3D_RS_CCWSTENCILPASS            = 61,    /* SVGA3dStencilOp */
+   SVGA3D_RS_VERTEXBLEND               = 62,    /* SVGA3dVertexBlendFlags */
+   SVGA3D_RS_SLOPESCALEDEPTHBIAS       = 63,    /* float */
+   SVGA3D_RS_DEPTHBIAS                 = 64,    /* float */
+
+
+   /*
+    * Output Gamma Level
+    *
+    * Output gamma effects the gamma curve of colors that are output from the
+    * rendering pipeline.  A value of 1.0 specifies a linear color space. If the
+    * value is <= 0.0, gamma correction is ignored and linear color space is
+    * used.
+    */
+
+   SVGA3D_RS_OUTPUTGAMMA               = 65,    /* float */
+   SVGA3D_RS_ZVISIBLE                  = 66,    /* SVGA3dBool */
+   SVGA3D_RS_LASTPIXEL                 = 67,    /* SVGA3dBool */
+   SVGA3D_RS_CLIPPING                  = 68,    /* SVGA3dBool */
+   SVGA3D_RS_WRAP0                     = 69,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP1                     = 70,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP2                     = 71,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP3                     = 72,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP4                     = 73,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP5                     = 74,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP6                     = 75,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP7                     = 76,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP8                     = 77,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP9                     = 78,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP10                    = 79,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP11                    = 80,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP12                    = 81,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP13                    = 82,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP14                    = 83,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP15                    = 84,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_MULTISAMPLEANTIALIAS      = 85,    /* SVGA3dBool */
+   SVGA3D_RS_MULTISAMPLEMASK           = 86,    /* uint32 */
+   SVGA3D_RS_INDEXEDVERTEXBLENDENABLE  = 87,    /* SVGA3dBool */
+   SVGA3D_RS_TWEENFACTOR               = 88,    /* float */
+   SVGA3D_RS_ANTIALIASEDLINEENABLE     = 89,    /* SVGA3dBool */
+   SVGA3D_RS_COLORWRITEENABLE1         = 90,    /* SVGA3dColorMask */
+   SVGA3D_RS_COLORWRITEENABLE2         = 91,    /* SVGA3dColorMask */
+   SVGA3D_RS_COLORWRITEENABLE3         = 92,    /* SVGA3dColorMask */
+   SVGA3D_RS_SEPARATEALPHABLENDENABLE  = 93,    /* SVGA3dBool */
+   SVGA3D_RS_SRCBLENDALPHA             = 94,    /* SVGA3dBlendOp */
+   SVGA3D_RS_DSTBLENDALPHA             = 95,    /* SVGA3dBlendOp */
+   SVGA3D_RS_BLENDEQUATIONALPHA        = 96,    /* SVGA3dBlendEquation */
+   SVGA3D_RS_MAX
+} SVGA3dRenderStateName;
+
+typedef enum {
+   SVGA3D_VERTEXMATERIAL_NONE     = 0,    /* Use the value in the current material */
+   SVGA3D_VERTEXMATERIAL_DIFFUSE  = 1,    /* Use the value in the diffuse component */
+   SVGA3D_VERTEXMATERIAL_SPECULAR = 2,    /* Use the value in the specular component */
+} SVGA3dVertexMaterial;
+
+typedef enum {
+   SVGA3D_FILLMODE_INVALID = 0,
+   SVGA3D_FILLMODE_POINT   = 1,
+   SVGA3D_FILLMODE_LINE    = 2,
+   SVGA3D_FILLMODE_FILL    = 3,
+   SVGA3D_FILLMODE_MAX
+} SVGA3dFillModeType;
+
+
+typedef
+union {
+   struct {
+      uint16   mode;       /* SVGA3dFillModeType */
+      uint16   face;       /* SVGA3dFace */
+   };
+   uint32 uintValue;
+} SVGA3dFillMode;
+
+typedef enum {
+   SVGA3D_SHADEMODE_INVALID = 0,
+   SVGA3D_SHADEMODE_FLAT    = 1,
+   SVGA3D_SHADEMODE_SMOOTH  = 2,
+   SVGA3D_SHADEMODE_PHONG   = 3,     /* Not supported */
+   SVGA3D_SHADEMODE_MAX
+} SVGA3dShadeMode;
+
+typedef
+union {
+   struct {
+      uint16 repeat;
+      uint16 pattern;
+   };
+   uint32 uintValue;
+} SVGA3dLinePattern;
+
+typedef enum {
+   SVGA3D_BLENDOP_INVALID            = 0,
+   SVGA3D_BLENDOP_ZERO               = 1,
+   SVGA3D_BLENDOP_ONE                = 2,
+   SVGA3D_BLENDOP_SRCCOLOR           = 3,
+   SVGA3D_BLENDOP_INVSRCCOLOR        = 4,
+   SVGA3D_BLENDOP_SRCALPHA           = 5,
+   SVGA3D_BLENDOP_INVSRCALPHA        = 6,
+   SVGA3D_BLENDOP_DESTALPHA          = 7,
+   SVGA3D_BLENDOP_INVDESTALPHA       = 8,
+   SVGA3D_BLENDOP_DESTCOLOR          = 9,
+   SVGA3D_BLENDOP_INVDESTCOLOR       = 10,
+   SVGA3D_BLENDOP_SRCALPHASAT        = 11,
+   SVGA3D_BLENDOP_BLENDFACTOR        = 12,
+   SVGA3D_BLENDOP_INVBLENDFACTOR     = 13,
+   SVGA3D_BLENDOP_MAX
+} SVGA3dBlendOp;
+
+typedef enum {
+   SVGA3D_BLENDEQ_INVALID            = 0,
+   SVGA3D_BLENDEQ_ADD                = 1,
+   SVGA3D_BLENDEQ_SUBTRACT           = 2,
+   SVGA3D_BLENDEQ_REVSUBTRACT        = 3,
+   SVGA3D_BLENDEQ_MINIMUM            = 4,
+   SVGA3D_BLENDEQ_MAXIMUM            = 5,
+   SVGA3D_BLENDEQ_MAX
+} SVGA3dBlendEquation;
+
+typedef enum {
+   SVGA3D_FRONTWINDING_INVALID = 0,
+   SVGA3D_FRONTWINDING_CW      = 1,
+   SVGA3D_FRONTWINDING_CCW     = 2,
+   SVGA3D_FRONTWINDING_MAX
+} SVGA3dFrontWinding;
+
+typedef enum {
+   SVGA3D_FACE_INVALID  = 0,
+   SVGA3D_FACE_NONE     = 1,
+   SVGA3D_FACE_FRONT    = 2,
+   SVGA3D_FACE_BACK     = 3,
+   SVGA3D_FACE_FRONT_BACK = 4,
+   SVGA3D_FACE_MAX
+} SVGA3dFace;
+
+/*
+ * The order and the values should not be changed
+ */
+
+typedef enum {
+   SVGA3D_CMP_INVALID              = 0,
+   SVGA3D_CMP_NEVER                = 1,
+   SVGA3D_CMP_LESS                 = 2,
+   SVGA3D_CMP_EQUAL                = 3,
+   SVGA3D_CMP_LESSEQUAL            = 4,
+   SVGA3D_CMP_GREATER              = 5,
+   SVGA3D_CMP_NOTEQUAL             = 6,
+   SVGA3D_CMP_GREATEREQUAL         = 7,
+   SVGA3D_CMP_ALWAYS               = 8,
+   SVGA3D_CMP_MAX
+} SVGA3dCmpFunc;
+
+/*
+ * SVGA3D_FOGFUNC_* specifies the fog equation, or PER_VERTEX which allows
+ * the fog factor to be specified in the alpha component of the specular
+ * (a.k.a. secondary) vertex color.
+ */
+typedef enum {
+   SVGA3D_FOGFUNC_INVALID          = 0,
+   SVGA3D_FOGFUNC_EXP              = 1,
+   SVGA3D_FOGFUNC_EXP2             = 2,
+   SVGA3D_FOGFUNC_LINEAR           = 3,
+   SVGA3D_FOGFUNC_PER_VERTEX       = 4
+} SVGA3dFogFunction;
+
+/*
+ * SVGA3D_FOGTYPE_* specifies if fog factors are computed on a per-vertex
+ * or per-pixel basis.
+ */
+typedef enum {
+   SVGA3D_FOGTYPE_INVALID          = 0,
+   SVGA3D_FOGTYPE_VERTEX           = 1,
+   SVGA3D_FOGTYPE_PIXEL            = 2,
+   SVGA3D_FOGTYPE_MAX              = 3
+} SVGA3dFogType;
+
+/*
+ * SVGA3D_FOGBASE_* selects depth or range-based fog. Depth-based fog is
+ * computed using the eye Z value of each pixel (or vertex), whereas range-
+ * based fog is computed using the actual distance (range) to the eye.
+ */
+typedef enum {
+   SVGA3D_FOGBASE_INVALID          = 0,
+   SVGA3D_FOGBASE_DEPTHBASED       = 1,
+   SVGA3D_FOGBASE_RANGEBASED       = 2,
+   SVGA3D_FOGBASE_MAX              = 3
+} SVGA3dFogBase;
+
+typedef enum {
+   SVGA3D_STENCILOP_INVALID        = 0,
+   SVGA3D_STENCILOP_KEEP           = 1,
+   SVGA3D_STENCILOP_ZERO           = 2,
+   SVGA3D_STENCILOP_REPLACE        = 3,
+   SVGA3D_STENCILOP_INCRSAT        = 4,
+   SVGA3D_STENCILOP_DECRSAT        = 5,
+   SVGA3D_STENCILOP_INVERT         = 6,
+   SVGA3D_STENCILOP_INCR           = 7,
+   SVGA3D_STENCILOP_DECR           = 8,
+   SVGA3D_STENCILOP_MAX
+} SVGA3dStencilOp;
+
+typedef enum {
+   SVGA3D_CLIPPLANE_0              = (1 << 0),
+   SVGA3D_CLIPPLANE_1              = (1 << 1),
+   SVGA3D_CLIPPLANE_2              = (1 << 2),
+   SVGA3D_CLIPPLANE_3              = (1 << 3),
+   SVGA3D_CLIPPLANE_4              = (1 << 4),
+   SVGA3D_CLIPPLANE_5              = (1 << 5),
+} SVGA3dClipPlanes;
+
+typedef enum {
+   SVGA3D_CLEAR_COLOR              = 0x1,
+   SVGA3D_CLEAR_DEPTH              = 0x2,
+   SVGA3D_CLEAR_STENCIL            = 0x4
+} SVGA3dClearFlag;
+
+typedef enum {
+   SVGA3D_RT_DEPTH                 = 0,
+   SVGA3D_RT_STENCIL               = 1,
+   SVGA3D_RT_COLOR0                = 2,
+   SVGA3D_RT_COLOR1                = 3,
+   SVGA3D_RT_COLOR2                = 4,
+   SVGA3D_RT_COLOR3                = 5,
+   SVGA3D_RT_COLOR4                = 6,
+   SVGA3D_RT_COLOR5                = 7,
+   SVGA3D_RT_COLOR6                = 8,
+   SVGA3D_RT_COLOR7                = 9,
+   SVGA3D_RT_MAX,
+   SVGA3D_RT_INVALID               = ((uint32)-1),
+} SVGA3dRenderTargetType;
+
+#define SVGA3D_MAX_RT_COLOR (SVGA3D_RT_COLOR7 - SVGA3D_RT_COLOR0 + 1)
+
+typedef
+union {
+   struct {
+      uint32  red   : 1;
+      uint32  green : 1;
+      uint32  blue  : 1;
+      uint32  alpha : 1;
+   };
+   uint32 uintValue;
+} SVGA3dColorMask;
+
+typedef enum {
+   SVGA3D_VBLEND_DISABLE            = 0,
+   SVGA3D_VBLEND_1WEIGHT            = 1,
+   SVGA3D_VBLEND_2WEIGHT            = 2,
+   SVGA3D_VBLEND_3WEIGHT            = 3,
+} SVGA3dVertexBlendFlags;
+
+typedef enum {
+   SVGA3D_WRAPCOORD_0   = 1 << 0,
+   SVGA3D_WRAPCOORD_1   = 1 << 1,
+   SVGA3D_WRAPCOORD_2   = 1 << 2,
+   SVGA3D_WRAPCOORD_3   = 1 << 3,
+   SVGA3D_WRAPCOORD_ALL = 0xF,
+} SVGA3dWrapFlags;
+
+/*
+ * SVGA_3D_CMD_TEXTURESTATE Types.  All value types
+ * must fit in a uint32.
+ */
+
+typedef enum {
+   SVGA3D_TS_INVALID                    = 0,
+   SVGA3D_TS_BIND_TEXTURE               = 1,    /* SVGA3dSurfaceId */
+   SVGA3D_TS_COLOROP                    = 2,    /* SVGA3dTextureCombiner */
+   SVGA3D_TS_COLORARG1                  = 3,    /* SVGA3dTextureArgData */
+   SVGA3D_TS_COLORARG2                  = 4,    /* SVGA3dTextureArgData */
+   SVGA3D_TS_ALPHAOP                    = 5,    /* SVGA3dTextureCombiner */
+   SVGA3D_TS_ALPHAARG1                  = 6,    /* SVGA3dTextureArgData */
+   SVGA3D_TS_ALPHAARG2                  = 7,    /* SVGA3dTextureArgData */
+   SVGA3D_TS_ADDRESSU                   = 8,    /* SVGA3dTextureAddress */
+   SVGA3D_TS_ADDRESSV                   = 9,    /* SVGA3dTextureAddress */
+   SVGA3D_TS_MIPFILTER                  = 10,   /* SVGA3dTextureFilter */
+   SVGA3D_TS_MAGFILTER                  = 11,   /* SVGA3dTextureFilter */
+   SVGA3D_TS_MINFILTER                  = 12,   /* SVGA3dTextureFilter */
+   SVGA3D_TS_BORDERCOLOR                = 13,   /* SVGA3dColor */
+   SVGA3D_TS_TEXCOORDINDEX              = 14,   /* uint32 */
+   SVGA3D_TS_TEXTURETRANSFORMFLAGS      = 15,   /* SVGA3dTexTransformFlags */
+   SVGA3D_TS_TEXCOORDGEN                = 16,   /* SVGA3dTextureCoordGen */
+   SVGA3D_TS_BUMPENVMAT00               = 17,   /* float */
+   SVGA3D_TS_BUMPENVMAT01               = 18,   /* float */
+   SVGA3D_TS_BUMPENVMAT10               = 19,   /* float */
+   SVGA3D_TS_BUMPENVMAT11               = 20,   /* float */
+   SVGA3D_TS_TEXTURE_MIPMAP_LEVEL       = 21,   /* uint32 */
+   SVGA3D_TS_TEXTURE_LOD_BIAS           = 22,   /* float */
+   SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL  = 23,   /* uint32 */
+   SVGA3D_TS_ADDRESSW                   = 24,   /* SVGA3dTextureAddress */
+
+
+   /*
+    * Sampler Gamma Level
+    *
+    * Sampler gamma effects the color of samples taken from the sampler.  A
+    * value of 1.0 will produce linear samples.  If the value is <= 0.0 the
+    * gamma value is ignored and a linear space is used.
+    */
+
+   SVGA3D_TS_GAMMA                      = 25,   /* float */
+   SVGA3D_TS_BUMPENVLSCALE              = 26,   /* float */
+   SVGA3D_TS_BUMPENVLOFFSET             = 27,   /* float */
+   SVGA3D_TS_COLORARG0                  = 28,   /* SVGA3dTextureArgData */
+   SVGA3D_TS_ALPHAARG0                  = 29,   /* SVGA3dTextureArgData */
+   SVGA3D_TS_MAX
+} SVGA3dTextureStateName;
+
+typedef enum {
+   SVGA3D_TC_INVALID                   = 0,
+   SVGA3D_TC_DISABLE                   = 1,
+   SVGA3D_TC_SELECTARG1                = 2,
+   SVGA3D_TC_SELECTARG2                = 3,
+   SVGA3D_TC_MODULATE                  = 4,
+   SVGA3D_TC_ADD                       = 5,
+   SVGA3D_TC_ADDSIGNED                 = 6,
+   SVGA3D_TC_SUBTRACT                  = 7,
+   SVGA3D_TC_BLENDTEXTUREALPHA         = 8,
+   SVGA3D_TC_BLENDDIFFUSEALPHA         = 9,
+   SVGA3D_TC_BLENDCURRENTALPHA         = 10,
+   SVGA3D_TC_BLENDFACTORALPHA          = 11,
+   SVGA3D_TC_MODULATE2X                = 12,
+   SVGA3D_TC_MODULATE4X                = 13,
+   SVGA3D_TC_DSDT                      = 14,
+   SVGA3D_TC_DOTPRODUCT3               = 15,
+   SVGA3D_TC_BLENDTEXTUREALPHAPM       = 16,
+   SVGA3D_TC_ADDSIGNED2X               = 17,
+   SVGA3D_TC_ADDSMOOTH                 = 18,
+   SVGA3D_TC_PREMODULATE               = 19,
+   SVGA3D_TC_MODULATEALPHA_ADDCOLOR    = 20,
+   SVGA3D_TC_MODULATECOLOR_ADDALPHA    = 21,
+   SVGA3D_TC_MODULATEINVALPHA_ADDCOLOR = 22,
+   SVGA3D_TC_MODULATEINVCOLOR_ADDALPHA = 23,
+   SVGA3D_TC_BUMPENVMAPLUMINANCE       = 24,
+   SVGA3D_TC_MULTIPLYADD               = 25,
+   SVGA3D_TC_LERP                      = 26,
+   SVGA3D_TC_MAX
+} SVGA3dTextureCombiner;
+
+#define SVGA3D_TC_CAP_BIT(svga3d_tc_op) (svga3d_tc_op ? (1 << (svga3d_tc_op - 1)) : 0)
+
+typedef enum {
+   SVGA3D_TEX_ADDRESS_INVALID    = 0,
+   SVGA3D_TEX_ADDRESS_WRAP       = 1,
+   SVGA3D_TEX_ADDRESS_MIRROR     = 2,
+   SVGA3D_TEX_ADDRESS_CLAMP      = 3,
+   SVGA3D_TEX_ADDRESS_BORDER     = 4,
+   SVGA3D_TEX_ADDRESS_MIRRORONCE = 5,
+   SVGA3D_TEX_ADDRESS_EDGE       = 6,
+   SVGA3D_TEX_ADDRESS_MAX
+} SVGA3dTextureAddress;
+
+/*
+ * SVGA3D_TEX_FILTER_NONE as the minification filter means mipmapping is
+ * disabled, and the rasterizer should use the magnification filter instead.
+ */
+typedef enum {
+   SVGA3D_TEX_FILTER_NONE           = 0,
+   SVGA3D_TEX_FILTER_NEAREST        = 1,
+   SVGA3D_TEX_FILTER_LINEAR         = 2,
+   SVGA3D_TEX_FILTER_ANISOTROPIC    = 3,
+   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, // Deprecated, not implemented
+   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, // Deprecated, not implemented
+   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, // Not currently implemented
+   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, // Not currently implemented
+   SVGA3D_TEX_FILTER_MAX
+} SVGA3dTextureFilter;
+
+typedef enum {
+   SVGA3D_TEX_TRANSFORM_OFF    = 0,
+   SVGA3D_TEX_TRANSFORM_S      = (1 << 0),
+   SVGA3D_TEX_TRANSFORM_T      = (1 << 1),
+   SVGA3D_TEX_TRANSFORM_R      = (1 << 2),
+   SVGA3D_TEX_TRANSFORM_Q      = (1 << 3),
+   SVGA3D_TEX_PROJECTED        = (1 << 15),
+} SVGA3dTexTransformFlags;
+
+typedef enum {
+   SVGA3D_TEXCOORD_GEN_OFF              = 0,
+   SVGA3D_TEXCOORD_GEN_EYE_POSITION     = 1,
+   SVGA3D_TEXCOORD_GEN_EYE_NORMAL       = 2,
+   SVGA3D_TEXCOORD_GEN_REFLECTIONVECTOR = 3,
+   SVGA3D_TEXCOORD_GEN_SPHERE           = 4,
+   SVGA3D_TEXCOORD_GEN_MAX
+} SVGA3dTextureCoordGen;
+
+/*
+ * Texture argument constants for texture combiner
+ */
+typedef enum {
+   SVGA3D_TA_INVALID    = 0,
+   SVGA3D_TA_CONSTANT   = 1,
+   SVGA3D_TA_PREVIOUS   = 2,
+   SVGA3D_TA_DIFFUSE    = 3,
+   SVGA3D_TA_TEXTURE    = 4,
+   SVGA3D_TA_SPECULAR   = 5,
+   SVGA3D_TA_MAX
+} SVGA3dTextureArgData;
+
+#define SVGA3D_TM_MASK_LEN 4
+
+/* Modifiers for texture argument constants defined above. */
+typedef enum {
+   SVGA3D_TM_NONE       = 0,
+   SVGA3D_TM_ALPHA      = (1 << SVGA3D_TM_MASK_LEN),
+   SVGA3D_TM_ONE_MINUS  = (2 << SVGA3D_TM_MASK_LEN),
+} SVGA3dTextureArgModifier;
+
+#define SVGA3D_INVALID_ID         ((uint32)-1)
+#define SVGA3D_MAX_CLIP_PLANES    6
+
+/*
+ * This is the limit to the number of fixed-function texture
+ * transforms and texture coordinates we can support. It does *not*
+ * correspond to the number of texture image units (samplers) we
+ * support!
+ */
+#define SVGA3D_MAX_TEXTURE_COORDS 8
+
+/*
+ * Vertex declarations
+ *
+ * Notes:
+ *
+ * SVGA3D_DECLUSAGE_POSITIONT is for pre-transformed vertices. If you
+ * draw with any POSITIONT vertex arrays, the programmable vertex
+ * pipeline will be implicitly disabled. Drawing will take place as if
+ * no vertex shader was bound.
+ */
+
+typedef enum {
+   SVGA3D_DECLUSAGE_POSITION     = 0,
+   SVGA3D_DECLUSAGE_BLENDWEIGHT,       //  1
+   SVGA3D_DECLUSAGE_BLENDINDICES,      //  2
+   SVGA3D_DECLUSAGE_NORMAL,            //  3
+   SVGA3D_DECLUSAGE_PSIZE,             //  4
+   SVGA3D_DECLUSAGE_TEXCOORD,          //  5
+   SVGA3D_DECLUSAGE_TANGENT,           //  6
+   SVGA3D_DECLUSAGE_BINORMAL,          //  7
+   SVGA3D_DECLUSAGE_TESSFACTOR,        //  8
+   SVGA3D_DECLUSAGE_POSITIONT,         //  9
+   SVGA3D_DECLUSAGE_COLOR,             // 10
+   SVGA3D_DECLUSAGE_FOG,               // 11
+   SVGA3D_DECLUSAGE_DEPTH,             // 12
+   SVGA3D_DECLUSAGE_SAMPLE,            // 13
+   SVGA3D_DECLUSAGE_MAX
+} SVGA3dDeclUsage;
+
+typedef enum {
+   SVGA3D_DECLMETHOD_DEFAULT     = 0,
+   SVGA3D_DECLMETHOD_PARTIALU,
+   SVGA3D_DECLMETHOD_PARTIALV,
+   SVGA3D_DECLMETHOD_CROSSUV,          // Normal
+   SVGA3D_DECLMETHOD_UV,
+   SVGA3D_DECLMETHOD_LOOKUP,           // Lookup a displacement map
+   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map
+} SVGA3dDeclMethod;
+
+typedef enum {
+   SVGA3D_DECLTYPE_FLOAT1        =  0,
+   SVGA3D_DECLTYPE_FLOAT2        =  1,
+   SVGA3D_DECLTYPE_FLOAT3        =  2,
+   SVGA3D_DECLTYPE_FLOAT4        =  3,
+   SVGA3D_DECLTYPE_D3DCOLOR      =  4,
+   SVGA3D_DECLTYPE_UBYTE4        =  5,
+   SVGA3D_DECLTYPE_SHORT2        =  6,
+   SVGA3D_DECLTYPE_SHORT4        =  7,
+   SVGA3D_DECLTYPE_UBYTE4N       =  8,
+   SVGA3D_DECLTYPE_SHORT2N       =  9,
+   SVGA3D_DECLTYPE_SHORT4N       = 10,
+   SVGA3D_DECLTYPE_USHORT2N      = 11,
+   SVGA3D_DECLTYPE_USHORT4N      = 12,
+   SVGA3D_DECLTYPE_UDEC3         = 13,
+   SVGA3D_DECLTYPE_DEC3N         = 14,
+   SVGA3D_DECLTYPE_FLOAT16_2     = 15,
+   SVGA3D_DECLTYPE_FLOAT16_4     = 16,
+   SVGA3D_DECLTYPE_MAX,
+} SVGA3dDeclType;
+
+/*
+ * This structure is used for the divisor for geometry instancing;
+ * it's a direct translation of the Direct3D equivalent.
+ */
+typedef union {
+   struct {
+      /*
+       * For index data, this number represents the number of instances to draw.
+       * For instance data, this number represents the number of
+       * instances/vertex in this stream
+       */
+      uint32 count : 30;
+
+      /*
+       * This is 1 if this is supposed to be the data that is repeated for
+       * every instance.
+       */
+      uint32 indexedData : 1;
+
+      /*
+       * This is 1 if this is supposed to be the per-instance data.
+       */
+      uint32 instanceData : 1;
+   };
+
+   uint32 value;
+} SVGA3dVertexDivisor;
+
+typedef enum {
+   SVGA3D_PRIMITIVE_INVALID                     = 0,
+   SVGA3D_PRIMITIVE_TRIANGLELIST                = 1,
+   SVGA3D_PRIMITIVE_POINTLIST                   = 2,
+   SVGA3D_PRIMITIVE_LINELIST                    = 3,
+   SVGA3D_PRIMITIVE_LINESTRIP                   = 4,
+   SVGA3D_PRIMITIVE_TRIANGLESTRIP               = 5,
+   SVGA3D_PRIMITIVE_TRIANGLEFAN                 = 6,
+   SVGA3D_PRIMITIVE_MAX
+} SVGA3dPrimitiveType;
+
+typedef enum {
+   SVGA3D_COORDINATE_INVALID                   = 0,
+   SVGA3D_COORDINATE_LEFTHANDED                = 1,
+   SVGA3D_COORDINATE_RIGHTHANDED               = 2,
+   SVGA3D_COORDINATE_MAX
+} SVGA3dCoordinateType;
+
+typedef enum {
+   SVGA3D_TRANSFORM_INVALID                     = 0,
+   SVGA3D_TRANSFORM_WORLD                       = 1,
+   SVGA3D_TRANSFORM_VIEW                        = 2,
+   SVGA3D_TRANSFORM_PROJECTION                  = 3,
+   SVGA3D_TRANSFORM_TEXTURE0                    = 4,
+   SVGA3D_TRANSFORM_TEXTURE1                    = 5,
+   SVGA3D_TRANSFORM_TEXTURE2                    = 6,
+   SVGA3D_TRANSFORM_TEXTURE3                    = 7,
+   SVGA3D_TRANSFORM_TEXTURE4                    = 8,
+   SVGA3D_TRANSFORM_TEXTURE5                    = 9,
+   SVGA3D_TRANSFORM_TEXTURE6                    = 10,
+   SVGA3D_TRANSFORM_TEXTURE7                    = 11,
+   SVGA3D_TRANSFORM_WORLD1                      = 12,
+   SVGA3D_TRANSFORM_WORLD2                      = 13,
+   SVGA3D_TRANSFORM_WORLD3                      = 14,
+   SVGA3D_TRANSFORM_MAX
+} SVGA3dTransformType;
+
+typedef enum {
+   SVGA3D_LIGHTTYPE_INVALID                     = 0,
+   SVGA3D_LIGHTTYPE_POINT                       = 1,
+   SVGA3D_LIGHTTYPE_SPOT1                       = 2, /* 1-cone, in degrees */
+   SVGA3D_LIGHTTYPE_SPOT2                       = 3, /* 2-cone, in radians */
+   SVGA3D_LIGHTTYPE_DIRECTIONAL                 = 4,
+   SVGA3D_LIGHTTYPE_MAX
+} SVGA3dLightType;
+
+typedef enum {
+   SVGA3D_CUBEFACE_POSX                         = 0,
+   SVGA3D_CUBEFACE_NEGX                         = 1,
+   SVGA3D_CUBEFACE_POSY                         = 2,
+   SVGA3D_CUBEFACE_NEGY                         = 3,
+   SVGA3D_CUBEFACE_POSZ                         = 4,
+   SVGA3D_CUBEFACE_NEGZ                         = 5,
+} SVGA3dCubeFace;
+
+typedef enum {
+   SVGA3D_SHADERTYPE_COMPILED_DX8               = 0,
+   SVGA3D_SHADERTYPE_VS                         = 1,
+   SVGA3D_SHADERTYPE_PS                         = 2,
+   SVGA3D_SHADERTYPE_MAX
+} SVGA3dShaderType;
+
+typedef enum {
+   SVGA3D_CONST_TYPE_FLOAT                      = 0,
+   SVGA3D_CONST_TYPE_INT                        = 1,
+   SVGA3D_CONST_TYPE_BOOL                       = 2,
+} SVGA3dShaderConstType;
+
+#define SVGA3D_MAX_SURFACE_FACES                6
+
+typedef enum {
+   SVGA3D_STRETCH_BLT_POINT                     = 0,
+   SVGA3D_STRETCH_BLT_LINEAR                    = 1,
+   SVGA3D_STRETCH_BLT_MAX
+} SVGA3dStretchBltMode;
+
+typedef enum {
+   SVGA3D_QUERYTYPE_OCCLUSION                   = 0,
+   SVGA3D_QUERYTYPE_MAX
+} SVGA3dQueryType;
+
+typedef enum {
+   SVGA3D_QUERYSTATE_PENDING     = 0,      /* Waiting on the host (set by guest) */
+   SVGA3D_QUERYSTATE_SUCCEEDED   = 1,      /* Completed successfully (set by host) */
+   SVGA3D_QUERYSTATE_FAILED      = 2,      /* Completed unsuccessfully (set by host) */
+   SVGA3D_QUERYSTATE_NEW         = 3,      /* Never submitted (For guest use only) */
+} SVGA3dQueryState;
+
+typedef enum {
+   SVGA3D_WRITE_HOST_VRAM        = 1,
+   SVGA3D_READ_HOST_VRAM         = 2,
+} SVGA3dTransferType;
+
+/*
+ * The maximum number vertex arrays we're guaranteed to support in
+ * SVGA_3D_CMD_DRAWPRIMITIVES.
+ */
+#define SVGA3D_MAX_VERTEX_ARRAYS   32
+
+/*
+ * Identifiers for commands in the command FIFO.
+ *
+ * IDs between 1000 and 1039 (inclusive) were used by obsolete versions of
+ * the SVGA3D protocol and remain reserved; they should not be used in the
+ * future.
+ *
+ * IDs between 1040 and 1999 (inclusive) are available for use by the
+ * current SVGA3D protocol.
+ *
+ * FIFO clients other than SVGA3D should stay below 1000, or at 2000
+ * and up.
+ */
+
+#define SVGA_3D_CMD_LEGACY_BASE            1000
+#define SVGA_3D_CMD_BASE                   1040
+
+#define SVGA_3D_CMD_SURFACE_DEFINE         SVGA_3D_CMD_BASE + 0
+#define SVGA_3D_CMD_SURFACE_DESTROY        SVGA_3D_CMD_BASE + 1
+#define SVGA_3D_CMD_SURFACE_COPY           SVGA_3D_CMD_BASE + 2
+#define SVGA_3D_CMD_SURFACE_STRETCHBLT     SVGA_3D_CMD_BASE + 3
+#define SVGA_3D_CMD_SURFACE_DMA            SVGA_3D_CMD_BASE + 4
+#define SVGA_3D_CMD_CONTEXT_DEFINE         SVGA_3D_CMD_BASE + 5
+#define SVGA_3D_CMD_CONTEXT_DESTROY        SVGA_3D_CMD_BASE + 6
+#define SVGA_3D_CMD_SETTRANSFORM           SVGA_3D_CMD_BASE + 7
+#define SVGA_3D_CMD_SETZRANGE              SVGA_3D_CMD_BASE + 8
+#define SVGA_3D_CMD_SETRENDERSTATE         SVGA_3D_CMD_BASE + 9
+#define SVGA_3D_CMD_SETRENDERTARGET        SVGA_3D_CMD_BASE + 10
+#define SVGA_3D_CMD_SETTEXTURESTATE        SVGA_3D_CMD_BASE + 11
+#define SVGA_3D_CMD_SETMATERIAL            SVGA_3D_CMD_BASE + 12
+#define SVGA_3D_CMD_SETLIGHTDATA           SVGA_3D_CMD_BASE + 13
+#define SVGA_3D_CMD_SETLIGHTENABLED        SVGA_3D_CMD_BASE + 14
+#define SVGA_3D_CMD_SETVIEWPORT            SVGA_3D_CMD_BASE + 15
+#define SVGA_3D_CMD_SETCLIPPLANE           SVGA_3D_CMD_BASE + 16
+#define SVGA_3D_CMD_CLEAR                  SVGA_3D_CMD_BASE + 17
+#define SVGA_3D_CMD_PRESENT                SVGA_3D_CMD_BASE + 18    // Deprecated
+#define SVGA_3D_CMD_SHADER_DEFINE          SVGA_3D_CMD_BASE + 19
+#define SVGA_3D_CMD_SHADER_DESTROY         SVGA_3D_CMD_BASE + 20
+#define SVGA_3D_CMD_SET_SHADER             SVGA_3D_CMD_BASE + 21
+#define SVGA_3D_CMD_SET_SHADER_CONST       SVGA_3D_CMD_BASE + 22
+#define SVGA_3D_CMD_DRAW_PRIMITIVES        SVGA_3D_CMD_BASE + 23
+#define SVGA_3D_CMD_SETSCISSORRECT         SVGA_3D_CMD_BASE + 24
+#define SVGA_3D_CMD_BEGIN_QUERY            SVGA_3D_CMD_BASE + 25
+#define SVGA_3D_CMD_END_QUERY              SVGA_3D_CMD_BASE + 26
+#define SVGA_3D_CMD_WAIT_FOR_QUERY         SVGA_3D_CMD_BASE + 27
+#define SVGA_3D_CMD_PRESENT_READBACK       SVGA_3D_CMD_BASE + 28    // Deprecated
+#define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN SVGA_3D_CMD_BASE + 29
+#define SVGA_3D_CMD_MAX                    SVGA_3D_CMD_BASE + 30
+
+#define SVGA_3D_CMD_FUTURE_MAX             2000
+
+/*
+ * Common substructures used in multiple FIFO commands:
+ */
+
+typedef struct {
+   union {
+      struct {
+         uint16  function;       // SVGA3dFogFunction
+         uint8   type;           // SVGA3dFogType
+         uint8   base;           // SVGA3dFogBase
+      };
+      uint32     uintValue;
+   };
+} SVGA3dFogMode;
+
+/*
+ * Uniquely identify one image (a 1D/2D/3D array) from a surface. This
+ * is a surface ID as well as face/mipmap indices.
+ */
+
+typedef
+struct SVGA3dSurfaceImageId {
+   uint32               sid;
+   uint32               face;
+   uint32               mipmap;
+} SVGA3dSurfaceImageId;
+
+typedef
+struct SVGA3dGuestImage {
+   SVGAGuestPtr         ptr;
+
+   /*
+    * A note on interpretation of pitch: This value of pitch is the
+    * number of bytes between vertically adjacent image
+    * blocks. Normally this is the number of bytes between the first
+    * pixel of two adjacent scanlines. With compressed textures,
+    * however, this may represent the number of bytes between
+    * compression blocks rather than between rows of pixels.
+    *
+    * XXX: Compressed textures currently must be tightly packed in guest memory.
+    *
+    * If the image is 1-dimensional, pitch is ignored.
+    *
+    * If 'pitch' is zero, the SVGA3D device calculates a pitch value
+    * assuming each row of blocks is tightly packed.
+    */
+   uint32 pitch;
+} SVGA3dGuestImage;
+
+
+/*
+ * FIFO command format definitions:
+ */
+
+/*
+ * The data size header following cmdNum for every 3d command
+ */
+typedef
+struct {
+   uint32               id;
+   uint32               size;
+} SVGA3dCmdHeader;
+
+/*
+ * A surface is a hierarchy of host VRAM surfaces: 1D, 2D, or 3D, with
+ * optional mipmaps and cube faces.
+ */
+
+typedef
+struct {
+   uint32               width;
+   uint32               height;
+   uint32               depth;
+} SVGA3dSize;
+
+typedef enum {
+   SVGA3D_SURFACE_CUBEMAP              = (1 << 0),
+   SVGA3D_SURFACE_HINT_STATIC          = (1 << 1),
+   SVGA3D_SURFACE_HINT_DYNAMIC         = (1 << 2),
+   SVGA3D_SURFACE_HINT_INDEXBUFFER     = (1 << 3),
+   SVGA3D_SURFACE_HINT_VERTEXBUFFER    = (1 << 4),
+   SVGA3D_SURFACE_HINT_TEXTURE         = (1 << 5),
+   SVGA3D_SURFACE_HINT_RENDERTARGET    = (1 << 6),
+   SVGA3D_SURFACE_HINT_DEPTHSTENCIL    = (1 << 7),
+   SVGA3D_SURFACE_HINT_WRITEONLY       = (1 << 8),
+} SVGA3dSurfaceFlags;
+
+typedef
+struct {
+   uint32               numMipLevels;
+} SVGA3dSurfaceFace;
+
+typedef
+struct {
+   uint32                      sid;
+   SVGA3dSurfaceFlags          surfaceFlags;
+   SVGA3dSurfaceFormat         format;
+   SVGA3dSurfaceFace           face[SVGA3D_MAX_SURFACE_FACES];
+   /*
+    * Followed by an SVGA3dSize structure for each mip level in each face.
+    *
+    * A note on surface sizes: Sizes are always specified in pixels,
+    * even if the true surface size is not a multiple of the minimum
+    * block size of the surface's format. For example, a 3x3x1 DXT1
+    * compressed texture would actually be stored as a 4x4x1 image in
+    * memory.
+    */
+} SVGA3dCmdDefineSurface;       /* SVGA_3D_CMD_SURFACE_DEFINE */
+
+typedef
+struct {
+   uint32               sid;
+} SVGA3dCmdDestroySurface;      /* SVGA_3D_CMD_SURFACE_DESTROY */
+
+typedef
+struct {
+   uint32               cid;
+} SVGA3dCmdDefineContext;       /* SVGA_3D_CMD_CONTEXT_DEFINE */
+
+typedef
+struct {
+   uint32               cid;
+} SVGA3dCmdDestroyContext;      /* SVGA_3D_CMD_CONTEXT_DESTROY */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dClearFlag      clearFlag;
+   uint32               color;
+   float                depth;
+   uint32               stencil;
+   /* Followed by variable number of SVGA3dRect structures */
+} SVGA3dCmdClear;               /* SVGA_3D_CMD_CLEAR */
+
+typedef
+struct SVGA3dCopyRect {
+   uint32               x;
+   uint32               y;
+   uint32               w;
+   uint32               h;
+   uint32               srcx;
+   uint32               srcy;
+} SVGA3dCopyRect;
+
+typedef
+struct SVGA3dCopyBox {
+   uint32               x;
+   uint32               y;
+   uint32               z;
+   uint32               w;
+   uint32               h;
+   uint32               d;
+   uint32               srcx;
+   uint32               srcy;
+   uint32               srcz;
+} SVGA3dCopyBox;
+
+typedef
+struct {
+   uint32               x;
+   uint32               y;
+   uint32               w;
+   uint32               h;
+} SVGA3dRect;
+
+typedef
+struct {
+   uint32               x;
+   uint32               y;
+   uint32               z;
+   uint32               w;
+   uint32               h;
+   uint32               d;
+} SVGA3dBox;
+
+typedef
+struct {
+   uint32               x;
+   uint32               y;
+   uint32               z;
+} SVGA3dPoint;
+
+typedef
+struct {
+   SVGA3dLightType      type;
+   SVGA3dBool           inWorldSpace;
+   float                diffuse[4];
+   float                specular[4];
+   float                ambient[4];
+   float                position[4];
+   float                direction[4];
+   float                range;
+   float                falloff;
+   float                attenuation0;
+   float                attenuation1;
+   float                attenuation2;
+   float                theta;
+   float                phi;
+} SVGA3dLightData;
+
+typedef
+struct {
+   uint32               sid;
+   /* Followed by variable number of SVGA3dCopyRect structures */
+} SVGA3dCmdPresent;             /* SVGA_3D_CMD_PRESENT */
+
+typedef
+struct {
+   SVGA3dRenderStateName   state;
+   union {
+      uint32               uintValue;
+      float                floatValue;
+   };
+} SVGA3dRenderState;
+
+typedef
+struct {
+   uint32               cid;
+   /* Followed by variable number of SVGA3dRenderState structures */
+} SVGA3dCmdSetRenderState;      /* SVGA_3D_CMD_SETRENDERSTATE */
+
+typedef
+struct {
+   uint32                 cid;
+   SVGA3dRenderTargetType type;
+   SVGA3dSurfaceImageId   target;
+} SVGA3dCmdSetRenderTarget;     /* SVGA_3D_CMD_SETRENDERTARGET */
+
+typedef
+struct {
+   SVGA3dSurfaceImageId  src;
+   SVGA3dSurfaceImageId  dest;
+   /* Followed by variable number of SVGA3dCopyBox structures */
+} SVGA3dCmdSurfaceCopy;               /* SVGA_3D_CMD_SURFACE_COPY */
+
+typedef
+struct {
+   SVGA3dSurfaceImageId  src;
+   SVGA3dSurfaceImageId  dest;
+   SVGA3dBox             boxSrc;
+   SVGA3dBox             boxDest;
+   SVGA3dStretchBltMode  mode;
+} SVGA3dCmdSurfaceStretchBlt;         /* SVGA_3D_CMD_SURFACE_STRETCHBLT */
+
+typedef
+struct {
+   /*
+    * If the discard flag is present in a surface DMA operation, the host may
+    * discard the contents of the current mipmap level and face of the target
+    * surface before applying the surface DMA contents.
+    */
+   uint32 discard : 1;
+
+   /*
+    * If the unsynchronized flag is present, the host may perform this upload
+    * without syncing to pending reads on this surface.
+    */
+   uint32 unsynchronized : 1;
+
+   /*
+    * Guests *MUST* set the reserved bits to 0 before submitting the command
+    * suffix as future flags may occupy these bits.
+    */
+   uint32 reserved : 30;
+} SVGA3dSurfaceDMAFlags;
+
+typedef
+struct {
+   SVGA3dGuestImage      guest;
+   SVGA3dSurfaceImageId  host;
+   SVGA3dTransferType    transfer;
+   /*
+    * Followed by variable number of SVGA3dCopyBox structures. For consistency
+    * in all clipping logic and coordinate translation, we define the
+    * "source" in each copyBox as the guest image and the
+    * "destination" as the host image, regardless of transfer
+    * direction.
+    *
+    * For efficiency, the SVGA3D device is free to copy more data than
+    * specified. For example, it may round copy boxes outwards such
+    * that they lie on particular alignment boundaries.
+    */
+} SVGA3dCmdSurfaceDMA;                /* SVGA_3D_CMD_SURFACE_DMA */
+
+/*
+ * SVGA3dCmdSurfaceDMASuffix --
+ *
+ *    This is a command suffix that will appear after a SurfaceDMA command in
+ *    the FIFO.  It contains some extra information that hosts may use to
+ *    optimize performance or protect the guest.  This suffix exists to preserve
+ *    backwards compatibility while also allowing for new functionality to be
+ *    implemented.
+ */
+
+typedef
+struct {
+   uint32 suffixSize;
+
+   /*
+    * The maximum offset is used to determine the maximum offset from the
+    * guestPtr base address that will be accessed or written to during this
+    * surfaceDMA.  If the suffix is supported, the host will respect this
+    * boundary while performing surface DMAs.
+    *
+    * Defaults to MAX_UINT32
+    */
+   uint32 maximumOffset;
+
+   /*
+    * A set of flags that describes optimizations that the host may perform
+    * while performing this surface DMA operation.  The guest should never rely
+    * on behaviour that is different when these flags are set for correctness.
+    *
+    * Defaults to 0
+    */
+   SVGA3dSurfaceDMAFlags flags;
+} SVGA3dCmdSurfaceDMASuffix;
+
+/*
+ * SVGA_3D_CMD_DRAW_PRIMITIVES --
+ *
+ *   This command is the SVGA3D device's generic drawing entry point.
+ *   It can draw multiple ranges of primitives, optionally using an
+ *   index buffer, using an arbitrary collection of vertex buffers.
+ *
+ *   Each SVGA3dVertexDecl defines a distinct vertex array to bind
+ *   during this draw call. The declarations specify which surface
+ *   the vertex data lives in, what that vertex data is used for,
+ *   and how to interpret it.
+ *
+ *   Each SVGA3dPrimitiveRange defines a collection of primitives
+ *   to render using the same vertex arrays. An index buffer is
+ *   optional.
+ */
+
+typedef
+struct {
+   /*
+    * A range hint is an optional specification for the range of indices
+    * in an SVGA3dArray that will be used. If 'last' is zero, it is assumed
+    * that the entire array will be used.
+    *
+    * These are only hints. The SVGA3D device may use them for
+    * performance optimization if possible, but it's also allowed to
+    * ignore these values.
+    */
+   uint32               first;
+   uint32               last;
+} SVGA3dArrayRangeHint;
+
+typedef
+struct {
+   /*
+    * Define the origin and shape of a vertex or index array. Both
+    * 'offset' and 'stride' are in bytes. The provided surface will be
+    * reinterpreted as a flat array of bytes in the same format used
+    * by surface DMA operations. To avoid unnecessary conversions, the
+    * surface should be created with the SVGA3D_BUFFER format.
+    *
+    * Index 0 in the array starts 'offset' bytes into the surface.
+    * Index 1 begins at byte 'offset + stride', etc. Array indices may
+    * not be negative.
+    */
+   uint32               surfaceId;
+   uint32               offset;
+   uint32               stride;
+} SVGA3dArray;
+
+typedef
+struct {
+   /*
+    * Describe a vertex array's data type, and define how it is to be
+    * used by the fixed function pipeline or the vertex shader. It
+    * isn't useful to have two VertexDecls with the same
+    * VertexArrayIdentity in one draw call.
+    */
+   SVGA3dDeclType       type;
+   SVGA3dDeclMethod     method;
+   SVGA3dDeclUsage      usage;
+   uint32               usageIndex;
+} SVGA3dVertexArrayIdentity;
+
+typedef
+struct {
+   SVGA3dVertexArrayIdentity  identity;
+   SVGA3dArray                array;
+   SVGA3dArrayRangeHint       rangeHint;
+} SVGA3dVertexDecl;
+
+typedef
+struct {
+   /*
+    * Define a group of primitives to render, from sequential indices.
+    *
+    * The value of 'primitiveType' and 'primitiveCount' imply the
+    * total number of vertices that will be rendered.
+    */
+   SVGA3dPrimitiveType  primType;
+   uint32               primitiveCount;
+
+   /*
+    * Optional index buffer. If indexArray.surfaceId is
+    * SVGA3D_INVALID_ID, we render without an index buffer. Rendering
+    * without an index buffer is identical to rendering with an index
+    * buffer containing the sequence [0, 1, 2, 3, ...].
+    *
+    * If an index buffer is in use, indexWidth specifies the width in
+    * bytes of each index value. It must be less than or equal to
+    * indexArray.stride.
+    *
+    * (Currently, the SVGA3D device requires index buffers to be tightly
+    * packed. In other words, indexWidth == indexArray.stride)
+    */
+   SVGA3dArray          indexArray;
+   uint32               indexWidth;
+
+   /*
+    * Optional index bias. This number is added to all indices from
+    * indexArray before they are used as vertex array indices. This
+    * can be used in multiple ways:
+    *
+    *  - When not using an indexArray, this bias can be used to
+    *    specify where in the vertex arrays to begin rendering.
+    *
+    *  - A positive number here is equivalent to increasing the
+    *    offset in each vertex array.
+    *
+    *  - A negative number can be used to render using a small
+    *    vertex array and an index buffer that contains large
+    *    values. This may be used by some applications that
+    *    crop a vertex buffer without modifying their index
+    *    buffer.
+    *
+    * Note that rendering with a negative bias value may be slower and
+    * use more memory than rendering with a positive or zero bias.
+    */
+   int32                indexBias;
+} SVGA3dPrimitiveRange;
+
+typedef
+struct {
+   uint32               cid;
+   uint32               numVertexDecls;
+   uint32               numRanges;
+
+   /*
+    * There are two variable size arrays after the
+    * SVGA3dCmdDrawPrimitives structure. In order,
+    * they are:
+    *
+    * 1. SVGA3dVertexDecl, quantity 'numVertexDecls'
+    * 2. SVGA3dPrimitiveRange, quantity 'numRanges'
+    * 3. Optionally, SVGA3dVertexDivisor, quantity 'numVertexDecls' (contains
+    *    the frequency divisor for this the corresponding vertex decl)
+    */
+} SVGA3dCmdDrawPrimitives;      /* SVGA_3D_CMD_DRAWPRIMITIVES */
+
+typedef
+struct {
+   uint32                   stage;
+   SVGA3dTextureStateName   name;
+   union {
+      uint32                value;
+      float                 floatValue;
+   };
+} SVGA3dTextureState;
+
+typedef
+struct {
+   uint32               cid;
+   /* Followed by variable number of SVGA3dTextureState structures */
+} SVGA3dCmdSetTextureState;      /* SVGA_3D_CMD_SETTEXTURESTATE */
+
+typedef
+struct {
+   uint32                   cid;
+   SVGA3dTransformType      type;
+   float                    matrix[16];
+} SVGA3dCmdSetTransform;          /* SVGA_3D_CMD_SETTRANSFORM */
+
+typedef
+struct {
+   float                min;
+   float                max;
+} SVGA3dZRange;
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dZRange         zRange;
+} SVGA3dCmdSetZRange;             /* SVGA_3D_CMD_SETZRANGE */
+
+typedef
+struct {
+   float                diffuse[4];
+   float                ambient[4];
+   float                specular[4];
+   float                emissive[4];
+   float                shininess;
+} SVGA3dMaterial;
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dFace           face;
+   SVGA3dMaterial       material;
+} SVGA3dCmdSetMaterial;           /* SVGA_3D_CMD_SETMATERIAL */
+
+typedef
+struct {
+   uint32               cid;
+   uint32               index;
+   SVGA3dLightData      data;
+} SVGA3dCmdSetLightData;           /* SVGA_3D_CMD_SETLIGHTDATA */
+
+typedef
+struct {
+   uint32               cid;
+   uint32               index;
+   uint32               enabled;
+} SVGA3dCmdSetLightEnabled;      /* SVGA_3D_CMD_SETLIGHTENABLED */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dRect           rect;
+} SVGA3dCmdSetViewport;           /* SVGA_3D_CMD_SETVIEWPORT */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dRect           rect;
+} SVGA3dCmdSetScissorRect;         /* SVGA_3D_CMD_SETSCISSORRECT */
+
+typedef
+struct {
+   uint32               cid;
+   uint32               index;
+   float                plane[4];
+} SVGA3dCmdSetClipPlane;           /* SVGA_3D_CMD_SETCLIPPLANE */
+
+typedef
+struct {
+   uint32               cid;
+   uint32               shid;
+   SVGA3dShaderType     type;
+   /* Followed by variable number of DWORDs for shader bycode */
+} SVGA3dCmdDefineShader;           /* SVGA_3D_CMD_SHADER_DEFINE */
+
+typedef
+struct {
+   uint32               cid;
+   uint32               shid;
+   SVGA3dShaderType     type;
+} SVGA3dCmdDestroyShader;         /* SVGA_3D_CMD_SHADER_DESTROY */
+
+typedef
+struct {
+   uint32                  cid;
+   uint32                  reg;     /* register number */
+   SVGA3dShaderType        type;
+   SVGA3dShaderConstType   ctype;
+   uint32                  values[4];
+} SVGA3dCmdSetShaderConst;        /* SVGA_3D_CMD_SET_SHADER_CONST */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dShaderType     type;
+   uint32               shid;
+} SVGA3dCmdSetShader;             /* SVGA_3D_CMD_SET_SHADER */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dQueryType      type;
+} SVGA3dCmdBeginQuery;           /* SVGA_3D_CMD_BEGIN_QUERY */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dQueryType      type;
+   SVGAGuestPtr         guestResult;  /* Points to an SVGA3dQueryResult structure */
+} SVGA3dCmdEndQuery;                  /* SVGA_3D_CMD_END_QUERY */
+
+typedef
+struct {
+   uint32               cid;          /* Same parameters passed to END_QUERY */
+   SVGA3dQueryType      type;
+   SVGAGuestPtr         guestResult;
+} SVGA3dCmdWaitForQuery;              /* SVGA_3D_CMD_WAIT_FOR_QUERY */
+
+typedef
+struct {
+   uint32               totalSize;    /* Set by guest before query is ended. */
+   SVGA3dQueryState     state;        /* Set by host or guest. See SVGA3dQueryState. */
+   union {                            /* Set by host on exit from PENDING state */
+      uint32            result32;
+   };
+} SVGA3dQueryResult;
+
+/*
+ * SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN --
+ *
+ *    This is a blit from an SVGA3D surface to a Screen Object. Just
+ *    like GMR-to-screen blits, this blit may be directed at a
+ *    specific screen or to the virtual coordinate space.
+ *
+ *    The blit copies from a rectangular region of an SVGA3D surface
+ *    image to a rectangular region of a screen or screens.
+ *
+ *    This command takes an optional variable-length list of clipping
+ *    rectangles after the body of the command. If no rectangles are
+ *    specified, there is no clipping region. The entire destRect is
+ *    drawn to. If one or more rectangles are included, they describe
+ *    a clipping region. The clip rectangle coordinates are measured
+ *    relative to the top-left corner of destRect.
+ *
+ *    This clipping region serves multiple purposes:
+ *
+ *      - It can be used to perform an irregularly shaped blit more
+ *        efficiently than by issuing many separate blit commands.
+ *
+ *      - It is equivalent to allowing blits with non-integer
+ *        source coordinates. You could blit just one half-pixel
+ *        of a source, for example, by specifying a larger
+ *        destination rectangle than you need, then removing
+ *        part of it using a clip rectangle.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *
+ * Limitations:
+ *
+ *    - Currently, no backend supports blits from a mipmap or face
+ *      other than the first one.
+ */
+
+typedef
+struct {
+   SVGA3dSurfaceImageId srcImage;
+   SVGASignedRect       srcRect;
+   uint32               destScreenId; /* Screen ID or SVGA_ID_INVALID for virt. coords */
+   SVGASignedRect       destRect;     /* Supports scaling if src/rest different size */
+   /* Clipping: zero or more SVGASignedRects follow */
+} SVGA3dCmdBlitSurfaceToScreen;         /* SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN */
+
+
+/*
+ * Capability query index.
+ *
+ * Notes:
+ *
+ *   1. SVGA3D_DEVCAP_MAX_TEXTURES reflects the maximum number of
+ *      fixed-function texture units available. Each of these units
+ *      work in both FFP and Shader modes, and they support texture
+ *      transforms and texture coordinates. The host may have additional
+ *      texture image units that are only usable with shaders.
+ *
+ *   2. The BUFFER_FORMAT capabilities are deprecated, and they always
+ *      return TRUE. Even on physical hardware that does not support
+ *      these formats natively, the SVGA3D device will provide an emulation
+ *      which should be invisible to the guest OS.
+ *
+ *      In general, the SVGA3D device should support any operation on
+ *      any surface format, it just may perform some of these
+ *      operations in software depending on the capabilities of the
+ *      available physical hardware.
+ *
+ *      XXX: In the future, we will add capabilities that describe in
+ *      detail what formats are supported in hardware for what kinds
+ *      of operations.
+ */
+
+typedef enum {
+   SVGA3D_DEVCAP_3D                                = 0,
+   SVGA3D_DEVCAP_MAX_LIGHTS                        = 1,
+   SVGA3D_DEVCAP_MAX_TEXTURES                      = 2,  /* See note (1) */
+   SVGA3D_DEVCAP_MAX_CLIP_PLANES                   = 3,
+   SVGA3D_DEVCAP_VERTEX_SHADER_VERSION             = 4,
+   SVGA3D_DEVCAP_VERTEX_SHADER                     = 5,
+   SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION           = 6,
+   SVGA3D_DEVCAP_FRAGMENT_SHADER                   = 7,
+   SVGA3D_DEVCAP_MAX_RENDER_TARGETS                = 8,
+   SVGA3D_DEVCAP_S23E8_TEXTURES                    = 9,
+   SVGA3D_DEVCAP_S10E5_TEXTURES                    = 10,
+   SVGA3D_DEVCAP_MAX_FIXED_VERTEXBLEND             = 11,
+   SVGA3D_DEVCAP_D16_BUFFER_FORMAT                 = 12, /* See note (2) */
+   SVGA3D_DEVCAP_D24S8_BUFFER_FORMAT               = 13, /* See note (2) */
+   SVGA3D_DEVCAP_D24X8_BUFFER_FORMAT               = 14, /* See note (2) */
+   SVGA3D_DEVCAP_QUERY_TYPES                       = 15,
+   SVGA3D_DEVCAP_TEXTURE_GRADIENT_SAMPLING         = 16,
+   SVGA3D_DEVCAP_MAX_POINT_SIZE                    = 17,
+   SVGA3D_DEVCAP_MAX_SHADER_TEXTURES               = 18,
+   SVGA3D_DEVCAP_MAX_TEXTURE_WIDTH                 = 19,
+   SVGA3D_DEVCAP_MAX_TEXTURE_HEIGHT                = 20,
+   SVGA3D_DEVCAP_MAX_VOLUME_EXTENT                 = 21,
+   SVGA3D_DEVCAP_MAX_TEXTURE_REPEAT                = 22,
+   SVGA3D_DEVCAP_MAX_TEXTURE_ASPECT_RATIO          = 23,
+   SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY            = 24,
+   SVGA3D_DEVCAP_MAX_PRIMITIVE_COUNT               = 25,
+   SVGA3D_DEVCAP_MAX_VERTEX_INDEX                  = 26,
+   SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS    = 27,
+   SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_INSTRUCTIONS  = 28,
+   SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS           = 29,
+   SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS         = 30,
+   SVGA3D_DEVCAP_TEXTURE_OPS                       = 31,
+   SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8               = 32,
+   SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8               = 33,
+   SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10            = 34,
+   SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5               = 35,
+   SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5               = 36,
+   SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4               = 37,
+   SVGA3D_DEVCAP_SURFACEFMT_R5G6B5                 = 38,
+   SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16            = 39,
+   SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8      = 40,
+   SVGA3D_DEVCAP_SURFACEFMT_ALPHA8                 = 41,
+   SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8             = 42,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D16                  = 43,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8                = 44,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8                = 45,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT1                   = 46,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT2                   = 47,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT3                   = 48,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT4                   = 49,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT5                   = 50,
+   SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8           = 51,
+   SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10            = 52,
+   SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8               = 53,
+   SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8               = 54,
+   SVGA3D_DEVCAP_SURFACEFMT_CxV8U8                 = 55,
+   SVGA3D_DEVCAP_SURFACEFMT_R_S10E5                = 56,
+   SVGA3D_DEVCAP_SURFACEFMT_R_S23E8                = 57,
+   SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5               = 58,
+   SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8               = 59,
+   SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5             = 60,
+   SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8             = 61,
+   SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEXTURES        = 63,
+
+   /*
+    * Note that MAX_SIMULTANEOUS_RENDER_TARGETS is a maximum count of color
+    * render targets.  This does no include the depth or stencil targets.
+    */
+   SVGA3D_DEVCAP_MAX_SIMULTANEOUS_RENDER_TARGETS   = 64,
+
+   SVGA3D_DEVCAP_SURFACEFMT_V16U16                 = 65,
+   SVGA3D_DEVCAP_SURFACEFMT_G16R16                 = 66,
+   SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16           = 67,
+   SVGA3D_DEVCAP_SURFACEFMT_UYVY                   = 68,
+   SVGA3D_DEVCAP_SURFACEFMT_YUY2                   = 69,
+
+   /*
+    * Don't add new caps into the previous section; the values in this
+    * enumeration must not change. You can put new values right before
+    * SVGA3D_DEVCAP_MAX.
+    */
+   SVGA3D_DEVCAP_MAX                                  /* This must be the last index. */
+} SVGA3dDevCapIndex;
+
+typedef union {
+   Bool   b;
+   uint32 u;
+   int32  i;
+   float  f;
+} SVGA3dDevCapResult;
+
+#endif /* _SVGA3D_REG_H_ */
diff --git a/src/gallium/drivers/svga/include/svga3d_shaderdefs.h b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
new file mode 100644
index 0000000000..2078c4a8a4
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
@@ -0,0 +1,519 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_shaderdefs.h --
+ *
+ * SVGA3D byte code format and limit definitions.
+ *
+ * The format of the byte code directly corresponds to that defined
+ * by Microsoft DirectX SDK 9.0c (file d3d9types.h). The format can
+ * also be extended so that different shader formats can be supported
+ * for example GLSL, ARB vp/fp, NV/ATI shader formats, etc.
+ *
+ */
+
+#ifndef __SVGA3D_SHADER_DEFS__
+#define __SVGA3D_SHADER_DEFS__
+
+/* SVGA3D shader hardware limits. */
+
+#define SVGA3D_INPUTREG_MAX            16
+#define SVGA3D_OUTPUTREG_MAX           12
+#define SVGA3D_VERTEX_SAMPLERREG_MAX   4
+#define SVGA3D_PIXEL_SAMPLERREG_MAX    16
+#define SVGA3D_SAMPLERREG_MAX          (SVGA3D_PIXEL_SAMPLERREG_MAX+\
+                                        SVGA3D_VERTEX_SAMPLERREG_MAX)
+#define SVGA3D_TEMPREG_MAX             32
+#define SVGA3D_CONSTREG_MAX            256
+#define SVGA3D_CONSTINTREG_MAX         16
+#define SVGA3D_CONSTBOOLREG_MAX        16
+#define SVGA3D_ADDRREG_MAX             1
+#define SVGA3D_PREDREG_MAX             1
+
+/* SVGA3D byte code specific limits */
+
+#define SVGA3D_MAX_SRC_REGS      4
+#define SVGA3D_MAX_NESTING_LEVEL 32
+
+/* SVGA3D version information. */
+
+#define SVGA3D_VS_TYPE  0xFFFE
+#define SVGA3D_PS_TYPE  0xFFFF
+
+typedef struct {
+   union {
+      struct {
+         uint32 minor : 8;
+         uint32 major : 8;
+         uint32 type : 16;
+      };
+
+      uint32 value;
+   };
+} SVGA3dShaderVersion;
+
+#define SVGA3D_VS_10 ((SVGA3D_VS_TYPE << 16) | 1 << 8)
+#define SVGA3D_VS_11 (SVGA3D_VS_10 | 1)
+#define SVGA3D_VS_20 ((SVGA3D_VS_TYPE << 16) | 2 << 8)
+#define SVGA3D_VS_30 ((SVGA3D_VS_TYPE << 16) | 3 << 8)
+
+#define SVGA3D_PS_10 ((SVGA3D_PS_TYPE << 16) | 1 << 8)
+#define SVGA3D_PS_11 (SVGA3D_PS_10 | 1)
+#define SVGA3D_PS_12 (SVGA3D_PS_10 | 2)
+#define SVGA3D_PS_13 (SVGA3D_PS_10 | 3)
+#define SVGA3D_PS_14 (SVGA3D_PS_10 | 4)
+#define SVGA3D_PS_20 ((SVGA3D_PS_TYPE << 16) | 2 << 8)
+#define SVGA3D_PS_30 ((SVGA3D_PS_TYPE << 16) | 3 << 8)
+
+/* The *_ENABLED are for backwards compatibility with old drivers */
+typedef enum {
+   SVGA3DPSVERSION_NONE = 0,
+   SVGA3DPSVERSION_ENABLED = 1,
+   SVGA3DPSVERSION_11 = 3,
+   SVGA3DPSVERSION_12 = 5,
+   SVGA3DPSVERSION_13 = 7,
+   SVGA3DPSVERSION_14 = 9,
+   SVGA3DPSVERSION_20 = 11,
+   SVGA3DPSVERSION_30 = 13,
+   SVGA3DPSVERSION_40 = 15,
+   SVGA3DPSVERSION_MAX
+} SVGA3dPixelShaderVersion;
+
+typedef enum {
+   SVGA3DVSVERSION_NONE = 0,
+   SVGA3DVSVERSION_ENABLED = 1,
+   SVGA3DVSVERSION_11 = 3,
+   SVGA3DVSVERSION_20 = 5,
+   SVGA3DVSVERSION_30 = 7,
+   SVGA3DVSVERSION_40 = 9,
+   SVGA3DVSVERSION_MAX
+} SVGA3dVertexShaderVersion;
+
+/* SVGA3D instruction op codes. */
+
+typedef enum {
+   SVGA3DOP_NOP = 0,
+   SVGA3DOP_MOV,
+   SVGA3DOP_ADD,
+   SVGA3DOP_SUB,
+   SVGA3DOP_MAD,
+   SVGA3DOP_MUL,
+   SVGA3DOP_RCP,
+   SVGA3DOP_RSQ,
+   SVGA3DOP_DP3,
+   SVGA3DOP_DP4,
+   SVGA3DOP_MIN,
+   SVGA3DOP_MAX,
+   SVGA3DOP_SLT,
+   SVGA3DOP_SGE,
+   SVGA3DOP_EXP,
+   SVGA3DOP_LOG,
+   SVGA3DOP_LIT,
+   SVGA3DOP_DST,
+   SVGA3DOP_LRP,
+   SVGA3DOP_FRC,
+   SVGA3DOP_M4x4,
+   SVGA3DOP_M4x3,
+   SVGA3DOP_M3x4,
+   SVGA3DOP_M3x3,
+   SVGA3DOP_M3x2,
+   SVGA3DOP_CALL,
+   SVGA3DOP_CALLNZ,
+   SVGA3DOP_LOOP,
+   SVGA3DOP_RET,
+   SVGA3DOP_ENDLOOP,
+   SVGA3DOP_LABEL,
+   SVGA3DOP_DCL,
+   SVGA3DOP_POW,
+   SVGA3DOP_CRS,
+   SVGA3DOP_SGN,
+   SVGA3DOP_ABS,
+   SVGA3DOP_NRM,
+   SVGA3DOP_SINCOS,
+   SVGA3DOP_REP,
+   SVGA3DOP_ENDREP,
+   SVGA3DOP_IF,
+   SVGA3DOP_IFC,
+   SVGA3DOP_ELSE,
+   SVGA3DOP_ENDIF,
+   SVGA3DOP_BREAK,
+   SVGA3DOP_BREAKC,
+   SVGA3DOP_MOVA,
+   SVGA3DOP_DEFB,
+   SVGA3DOP_DEFI,
+   SVGA3DOP_TEXCOORD = 64,
+   SVGA3DOP_TEXKILL,
+   SVGA3DOP_TEX,
+   SVGA3DOP_TEXBEM,
+   SVGA3DOP_TEXBEML,
+   SVGA3DOP_TEXREG2AR,
+   SVGA3DOP_TEXREG2GB = 70,
+   SVGA3DOP_TEXM3x2PAD,
+   SVGA3DOP_TEXM3x2TEX,
+   SVGA3DOP_TEXM3x3PAD,
+   SVGA3DOP_TEXM3x3TEX,
+   SVGA3DOP_RESERVED0,
+   SVGA3DOP_TEXM3x3SPEC,
+   SVGA3DOP_TEXM3x3VSPEC,
+   SVGA3DOP_EXPP,
+   SVGA3DOP_LOGP,
+   SVGA3DOP_CND = 80,
+   SVGA3DOP_DEF,
+   SVGA3DOP_TEXREG2RGB,
+   SVGA3DOP_TEXDP3TEX,
+   SVGA3DOP_TEXM3x2DEPTH,
+   SVGA3DOP_TEXDP3,
+   SVGA3DOP_TEXM3x3,
+   SVGA3DOP_TEXDEPTH,
+   SVGA3DOP_CMP,
+   SVGA3DOP_BEM,
+   SVGA3DOP_DP2ADD = 90,
+   SVGA3DOP_DSX,
+   SVGA3DOP_DSY,
+   SVGA3DOP_TEXLDD,
+   SVGA3DOP_SETP,
+   SVGA3DOP_TEXLDL,
+   SVGA3DOP_BREAKP = 96,
+   SVGA3DOP_LAST_INST,
+   SVGA3DOP_PHASE = 0xFFFD,
+   SVGA3DOP_COMMENT = 0xFFFE,
+   SVGA3DOP_END = 0xFFFF,
+} SVGA3dShaderOpCodeType;
+
+/* SVGA3D operation control/comparison function types */
+
+typedef enum {
+   SVGA3DOPCONT_NONE,
+   SVGA3DOPCONT_PROJECT,   /* Projective texturing */
+   SVGA3DOPCONT_BIAS,      /* Texturing with a LOD bias */
+} SVGA3dShaderOpCodeControlFnType;
+
+typedef enum {
+   SVGA3DOPCOMP_RESERVED0 = 0,
+   SVGA3DOPCOMP_GT,
+   SVGA3DOPCOMP_EQ,
+   SVGA3DOPCOMP_GE,
+   SVGA3DOPCOMP_LT,
+   SVGA3DOPCOMPC_NE,
+   SVGA3DOPCOMP_LE,
+   SVGA3DOPCOMP_RESERVED1
+} SVGA3dShaderOpCodeCompFnType;
+
+/* SVGA3D register types */
+
+typedef enum {
+    SVGA3DREG_TEMP = 0,       /* Temporary register file */
+    SVGA3DREG_INPUT,          /* Input register file */
+    SVGA3DREG_CONST,          /* Constant register file */
+    SVGA3DREG_ADDR,           /* Address register for VS */
+    SVGA3DREG_TEXTURE = 3,    /* Texture register file for PS */
+    SVGA3DREG_RASTOUT,        /* Rasterizer register file */
+    SVGA3DREG_ATTROUT,        /* Attribute output register file */
+    SVGA3DREG_TEXCRDOUT,      /* Texture coordinate output register file */
+    SVGA3DREG_OUTPUT = 6,     /* Output register file for VS 3.0+ */
+    SVGA3DREG_CONSTINT,       /* Constant integer vector register file */
+    SVGA3DREG_COLOROUT,       /* Color output register file */
+    SVGA3DREG_DEPTHOUT,       /* Depth output register file */
+    SVGA3DREG_SAMPLER,        /* Sampler state register file */
+    SVGA3DREG_CONST2,         /* Constant register file 2048 - 4095 */
+    SVGA3DREG_CONST3,         /* Constant register file 4096 - 6143 */
+    SVGA3DREG_CONST4,         /* Constant register file 6144 - 8191 */
+    SVGA3DREG_CONSTBOOL,      /* Constant boolean register file */
+    SVGA3DREG_LOOP,           /* Loop counter register file */
+    SVGA3DREG_TEMPFLOAT16,    /* 16-bit float temp register file */
+    SVGA3DREG_MISCTYPE,       /* Miscellaneous (single) registers */
+    SVGA3DREG_LABEL,          /* Label */
+    SVGA3DREG_PREDICATE,      /* Predicate register */
+} SVGA3dShaderRegType;
+
+/* SVGA3D rasterizer output register types */
+
+typedef enum {
+   SVGA3DRASTOUT_POSITION = 0,
+   SVGA3DRASTOUT_FOG,
+   SVGA3DRASTOUT_PSIZE
+} SVGA3dShaderRastOutRegType;
+
+/* SVGA3D miscellaneous register types */
+
+typedef enum {
+   SVGA3DMISCREG_POSITION = 0,   /* Input position x,y,z,rhw (PS) */
+   SVGA3DMISCREG_FACE            /* Floating point primitive area (PS) */
+} SVGA3DShaderMiscRegType;
+
+/* SVGA3D sampler types */
+
+typedef enum {
+   SVGA3DSAMP_UNKNOWN = 0, /* Uninitialized value */
+   SVGA3DSAMP_2D = 2,      /* dcl_2d s# (for declaring a 2-D texture) */
+   SVGA3DSAMP_CUBE,        /* dcl_cube s# (for declaring a cube texture) */
+   SVGA3DSAMP_VOLUME,      /* dcl_volume s# (for declaring a volume texture) */
+} SVGA3dShaderSamplerType;
+
+/* SVGA3D sampler format classes */
+
+typedef enum {
+   SVGA3DSAMPFORMAT_ARGB,        /* ARGB formats */
+   SVGA3DSAMPFORMAT_V8U8,        /* Sign and normalize (SNORM) V & U */
+   SVGA3DSAMPFORMAT_Q8W8V8U8,    /* SNORM all */
+   SVGA3DSAMPFORMAT_CxV8U8,      /* SNORM V & U, C=SQRT(1-U^2-V^2) */
+   SVGA3DSAMPFORMAT_X8L8V8U8,    /* SNORM V & U */
+   SVGA3DSAMPFORMAT_A2W10V10U10, /* SNORM W, V & U */
+   SVGA3DSAMPFORMAT_DXT_PMA,     /* DXT pre-multiplied alpha */
+   SVGA3DSAMPFORMAT_YUV,         /* YUV video format */
+   SVGA3DSAMPFORMAT_UYVY,        /* UYVY video format */
+   SVGA3DSAMPFORMAT_Rx,          /* R16F/32F */
+   SVGA3DSAMPFORMAT_RxGx,        /* R16FG16F, R32FG32F */
+   SVGA3DSAMPFORMAT_V16U16,      /* SNORM all */
+} SVGA3DShaderSamplerFormatClass;
+
+/* SVGA3D write mask */
+
+#define SVGA3DWRITEMASK_0    1 /* Component 0 (X;Red) */
+#define SVGA3DWRITEMASK_1    2 /* Component 1 (Y;Green) */
+#define SVGA3DWRITEMASK_2    4 /* Component 2 (Z;Blue) */
+#define SVGA3DWRITEMASK_3    8 /* Component 3 (W;Alpha) */
+#define SVGA3DWRITEMASK_ALL 15 /* All components */
+
+/* SVGA3D destination modifiers */
+
+#define SVGA3DDSTMOD_NONE              0 /* nop */
+#define SVGA3DDSTMOD_SATURATE          1 /* clamp to [0, 1] */
+#define SVGA3DDSTMOD_PARTIALPRECISION  2 /* Partial precision hint */
+
+/*
+ * Relevant to multisampling only:
+ * When the pixel center is not covered, sample
+ * attribute or compute gradients/LOD
+ * using multisample "centroid" location.
+ * "Centroid" is some location within the covered
+ * region of the pixel.
+ */
+
+#define SVGA3DDSTMOD_MSAMPCENTROID     4
+
+/* SVGA3D source swizzle */
+
+#define SVGA3DSWIZZLE_REPLICATEX 0x00
+#define SVGA3DSWIZZLE_REPLICATEY 0x55
+#define SVGA3DSWIZZLE_REPLICATEZ 0xAA
+#define SVGA3DSWIZZLE_REPLICATEW 0xFF
+#define SVGA3DSWIZZLE_NONE       0xE4
+#define SVGA3DSWIZZLE_YZXW       0xC9
+#define SVGA3DSWIZZLE_ZXYW       0xD2
+#define SVGA3DSWIZZLE_WXYZ       0x1B
+
+/* SVGA3D source modifiers */
+
+typedef enum {
+    SVGA3DSRCMOD_NONE = 0, /* nop */
+    SVGA3DSRCMOD_NEG,      /* negate */
+    SVGA3DSRCMOD_BIAS,     /* bias */
+    SVGA3DSRCMOD_BIASNEG,  /* bias and negate */
+    SVGA3DSRCMOD_SIGN,     /* sign */
+    SVGA3DSRCMOD_SIGNNEG,  /* sign and negate */
+    SVGA3DSRCMOD_COMP,     /* complement */
+    SVGA3DSRCMOD_X2,       /* x2 */
+    SVGA3DSRCMOD_X2NEG,    /* x2 and negate */
+    SVGA3DSRCMOD_DZ,       /* divide through by z component */
+    SVGA3DSRCMOD_DW,       /* divide through by w component */
+    SVGA3DSRCMOD_ABS,      /* abs() */
+    SVGA3DSRCMOD_ABSNEG,   /* -abs() */
+    SVGA3DSRCMOD_NOT,      /* ! (for predicate register) */
+} SVGA3dShaderSrcModType;
+
+/* SVGA3D instruction token */
+
+typedef struct {
+   union {
+      struct {
+         uint32 comment_op : 16;
+         uint32 comment_size : 16;
+      };
+
+      struct {
+         uint32 op : 16;
+         uint32 control : 3;
+         uint32 reserved2 : 5;
+         uint32 size : 4;
+         uint32 predicated : 1;
+         uint32 reserved1 : 1;
+         uint32 coissue : 1;
+         uint32 reserved0 : 1;
+      };
+
+      uint32 value;
+   };
+} SVGA3dShaderInstToken;
+
+/* SVGA3D destination parameter token */
+
+typedef struct {
+   union {
+      struct {
+         uint32 num : 11;
+         uint32 type_upper : 2;
+         uint32 relAddr : 1;
+         uint32 reserved1 : 2;
+         uint32 mask : 4;
+         uint32 dstMod : 4;
+         uint32 shfScale : 4;
+         uint32 type_lower : 3;
+         uint32 reserved0 : 1;
+      };
+
+      uint32 value;
+   };
+} SVGA3dShaderDestToken;
+
+/* SVGA3D source parameter token */
+
+typedef struct {
+   union {
+      struct {
+         uint32 num : 11;
+         uint32 type_upper : 2;
+         uint32 relAddr : 1;
+         uint32 reserved1 : 2;
+         uint32 swizzle : 8;
+         uint32 srcMod : 4;
+         uint32 type_lower : 3;
+         uint32 reserved0 : 1;
+      };
+
+      uint32 value;
+   };
+} SVGA3dShaderSrcToken;
+
+/* SVGA3DOP_DCL parameter tokens */
+
+typedef struct {
+   union {
+      struct {
+         union {
+            struct {
+               uint32 usage : 5;
+               uint32 reserved1 : 11;
+               uint32 index : 4;
+               uint32 reserved0 : 12;
+            }; /* input / output declaration */
+
+            struct {
+               uint32 reserved3 : 27;
+               uint32 type : 4;
+               uint32 reserved2 : 1;
+            }; /* sampler declaration */
+         };
+
+         SVGA3dShaderDestToken dst;
+      };
+
+      uint32 values[2];
+   };
+} SVGA3DOpDclArgs;
+
+/* SVGA3DOP_DEF parameter tokens */
+
+typedef struct {
+   union {
+      struct {
+         SVGA3dShaderDestToken dst;
+
+         union {
+            float constValues[4];
+            int constIValues[4];
+            Bool constBValue;
+         };
+      };
+
+      uint32 values[5];
+   };
+} SVGA3DOpDefArgs;
+
+/* SVGA3D shader token */
+
+typedef union {
+   uint32 value;
+   SVGA3dShaderInstToken inst;
+   SVGA3dShaderDestToken dest;
+   SVGA3dShaderSrcToken src;
+} SVGA3dShaderToken;
+
+/* SVGA3D shader program */
+
+typedef struct {
+   SVGA3dShaderVersion version;
+   /* SVGA3dShaderToken stream */
+} SVGA3dShaderProgram;
+
+/* SVGA3D version specific register assignments */
+
+static const uint32 SVGA3D_INPUT_REG_POSITION_VS11 = 0;
+static const uint32 SVGA3D_INPUT_REG_PSIZE_VS11 = 1;
+static const uint32 SVGA3D_INPUT_REG_FOG_VS11 = 3;
+static const uint32 SVGA3D_INPUT_REG_FOG_MASK_VS11 = SVGA3DWRITEMASK_3;
+static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_VS11 = 2;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_VS11 = 4;
+
+static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_PS11 = 0;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_PS11 = 2;
+static const uint32 SVGA3D_OUTPUT_REG_DEPTH_PS11 = 0;
+static const uint32 SVGA3D_OUTPUT_REG_COLOR_PS11 = 1;
+
+static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_PS20 = 0;
+static const uint32 SVGA3D_INPUT_REG_COLOR_NUM_PS20 = 2;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_PS20 = 2;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_NUM_PS20 = 8;
+static const uint32 SVGA3D_OUTPUT_REG_COLOR_BASE_PS20 = 1;
+static const uint32 SVGA3D_OUTPUT_REG_COLOR_NUM_PS20 = 4;
+static const uint32 SVGA3D_OUTPUT_REG_DEPTH_BASE_PS20 = 0;
+static const uint32 SVGA3D_OUTPUT_REG_DEPTH_NUM_PS20 = 1;
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3dShaderGetRegType --
+ *
+ *      As the register type is split into two non sequential fields,
+ *      this function provides an useful way of accessing the actual
+ *      register type without having to manually concatenate the
+ *      type_upper and type_lower fields.
+ *
+ * Results:
+ *      Returns the register type.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE SVGA3dShaderRegType
+SVGA3dShaderGetRegType(uint32 token)
+{
+   SVGA3dShaderSrcToken src;
+   src.value = token;
+   return (SVGA3dShaderRegType)(src.type_upper << 3 | src.type_lower);
+}
+
+#endif /* __SVGA3D_SHADER_DEFS__ */
diff --git a/src/gallium/drivers/svga/include/svga_reg.h b/src/gallium/drivers/svga/include/svga_reg.h
new file mode 100644
index 0000000000..1b96c2ec07
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga_reg.h
@@ -0,0 +1,1346 @@
+/**********************************************************
+ * Copyright 1998-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga_reg.h --
+ *
+ *    Virtual hardware definitions for the VMware SVGA II device.
+ */
+
+#ifndef _SVGA_REG_H_
+#define _SVGA_REG_H_
+
+/*
+ * PCI device IDs.
+ */
+#define PCI_VENDOR_ID_VMWARE            0x15AD
+#define PCI_DEVICE_ID_VMWARE_SVGA2      0x0405
+
+/*
+ * Legal values for the SVGA_REG_CURSOR_ON register in old-fashioned
+ * cursor bypass mode. This is still supported, but no new guest
+ * drivers should use it.
+ */
+#define SVGA_CURSOR_ON_HIDE            0x0   /* Must be 0 to maintain backward compatibility */
+#define SVGA_CURSOR_ON_SHOW            0x1   /* Must be 1 to maintain backward compatibility */
+#define SVGA_CURSOR_ON_REMOVE_FROM_FB  0x2   /* Remove the cursor from the framebuffer because we need to see what's under it */
+#define SVGA_CURSOR_ON_RESTORE_TO_FB   0x3   /* Put the cursor back in the framebuffer so the user can see it */
+
+/*
+ * The maximum framebuffer size that can traced for e.g. guests in VESA mode.
+ * The changeMap in the monitor is proportional to this number. Therefore, we'd
+ * like to keep it as small as possible to reduce monitor overhead (using
+ * SVGA_VRAM_MAX_SIZE for this increases the size of the shared area by over
+ * 4k!).
+ *
+ * NB: For compatibility reasons, this value must be greater than 0xff0000.
+ *     See bug 335072.
+ */
+#define SVGA_FB_MAX_TRACEABLE_SIZE      0x1000000
+
+#define SVGA_MAX_PSEUDOCOLOR_DEPTH      8
+#define SVGA_MAX_PSEUDOCOLORS           (1 << SVGA_MAX_PSEUDOCOLOR_DEPTH)
+#define SVGA_NUM_PALETTE_REGS           (3 * SVGA_MAX_PSEUDOCOLORS)
+
+#define SVGA_MAGIC         0x900000UL
+#define SVGA_MAKE_ID(ver)  (SVGA_MAGIC << 8 | (ver))
+
+/* Version 2 let the address of the frame buffer be unsigned on Win32 */
+#define SVGA_VERSION_2     2
+#define SVGA_ID_2          SVGA_MAKE_ID(SVGA_VERSION_2)
+
+/* Version 1 has new registers starting with SVGA_REG_CAPABILITIES so
+   PALETTE_BASE has moved */
+#define SVGA_VERSION_1     1
+#define SVGA_ID_1          SVGA_MAKE_ID(SVGA_VERSION_1)
+
+/* Version 0 is the initial version */
+#define SVGA_VERSION_0     0
+#define SVGA_ID_0          SVGA_MAKE_ID(SVGA_VERSION_0)
+
+/* "Invalid" value for all SVGA IDs. (Version ID, screen object ID, surface ID...) */
+#define SVGA_ID_INVALID    0xFFFFFFFF
+
+/* Port offsets, relative to BAR0 */
+#define SVGA_INDEX_PORT         0x0
+#define SVGA_VALUE_PORT         0x1
+#define SVGA_BIOS_PORT          0x2
+#define SVGA_IRQSTATUS_PORT     0x8
+
+/*
+ * Interrupt source flags for IRQSTATUS_PORT and IRQMASK.
+ *
+ * Interrupts are only supported when the
+ * SVGA_CAP_IRQMASK capability is present.
+ */
+#define SVGA_IRQFLAG_ANY_FENCE            0x1    /* Any fence was passed */
+#define SVGA_IRQFLAG_FIFO_PROGRESS        0x2    /* Made forward progress in the FIFO */
+#define SVGA_IRQFLAG_FENCE_GOAL           0x4    /* SVGA_FIFO_FENCE_GOAL reached */
+
+/*
+ * Registers
+ */
+
+enum {
+   SVGA_REG_ID = 0,
+   SVGA_REG_ENABLE = 1,
+   SVGA_REG_WIDTH = 2,
+   SVGA_REG_HEIGHT = 3,
+   SVGA_REG_MAX_WIDTH = 4,
+   SVGA_REG_MAX_HEIGHT = 5,
+   SVGA_REG_DEPTH = 6,
+   SVGA_REG_BITS_PER_PIXEL = 7,       /* Current bpp in the guest */
+   SVGA_REG_PSEUDOCOLOR = 8,
+   SVGA_REG_RED_MASK = 9,
+   SVGA_REG_GREEN_MASK = 10,
+   SVGA_REG_BLUE_MASK = 11,
+   SVGA_REG_BYTES_PER_LINE = 12,
+   SVGA_REG_FB_START = 13,            /* (Deprecated) */
+   SVGA_REG_FB_OFFSET = 14,
+   SVGA_REG_VRAM_SIZE = 15,
+   SVGA_REG_FB_SIZE = 16,
+
+   /* ID 0 implementation only had the above registers, then the palette */
+
+   SVGA_REG_CAPABILITIES = 17,
+   SVGA_REG_MEM_START = 18,           /* (Deprecated) */
+   SVGA_REG_MEM_SIZE = 19,
+   SVGA_REG_CONFIG_DONE = 20,         /* Set when memory area configured */
+   SVGA_REG_SYNC = 21,                /* See "FIFO Synchronization Registers" */
+   SVGA_REG_BUSY = 22,                /* See "FIFO Synchronization Registers" */
+   SVGA_REG_GUEST_ID = 23,            /* Set guest OS identifier */
+   SVGA_REG_CURSOR_ID = 24,           /* (Deprecated) */
+   SVGA_REG_CURSOR_X = 25,            /* (Deprecated) */
+   SVGA_REG_CURSOR_Y = 26,            /* (Deprecated) */
+   SVGA_REG_CURSOR_ON = 27,           /* (Deprecated) */
+   SVGA_REG_HOST_BITS_PER_PIXEL = 28, /* (Deprecated) */
+   SVGA_REG_SCRATCH_SIZE = 29,        /* Number of scratch registers */
+   SVGA_REG_MEM_REGS = 30,            /* Number of FIFO registers */
+   SVGA_REG_NUM_DISPLAYS = 31,        /* (Deprecated) */
+   SVGA_REG_PITCHLOCK = 32,           /* Fixed pitch for all modes */
+   SVGA_REG_IRQMASK = 33,             /* Interrupt mask */
+
+   /* Legacy multi-monitor support */
+   SVGA_REG_NUM_GUEST_DISPLAYS = 34,/* Number of guest displays in X/Y direction */
+   SVGA_REG_DISPLAY_ID = 35,        /* Display ID for the following display attributes */
+   SVGA_REG_DISPLAY_IS_PRIMARY = 36,/* Whether this is a primary display */
+   SVGA_REG_DISPLAY_POSITION_X = 37,/* The display position x */
+   SVGA_REG_DISPLAY_POSITION_Y = 38,/* The display position y */
+   SVGA_REG_DISPLAY_WIDTH = 39,     /* The display's width */
+   SVGA_REG_DISPLAY_HEIGHT = 40,    /* The display's height */
+
+   /* See "Guest memory regions" below. */
+   SVGA_REG_GMR_ID = 41,
+   SVGA_REG_GMR_DESCRIPTOR = 42,
+   SVGA_REG_GMR_MAX_IDS = 43,
+   SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH = 44,
+
+   SVGA_REG_TRACES = 45,            /* Enable trace-based updates even when FIFO is on */
+   SVGA_REG_TOP = 46,               /* Must be 1 more than the last register */
+
+   SVGA_PALETTE_BASE = 1024,        /* Base of SVGA color map */
+   /* Next 768 (== 256*3) registers exist for colormap */
+
+   SVGA_SCRATCH_BASE = SVGA_PALETTE_BASE + SVGA_NUM_PALETTE_REGS
+                                    /* Base of scratch registers */
+   /* Next reg[SVGA_REG_SCRATCH_SIZE] registers exist for scratch usage:
+      First 4 are reserved for VESA BIOS Extension; any remaining are for
+      the use of the current SVGA driver. */
+};
+
+
+/*
+ * Guest memory regions (GMRs):
+ *
+ * This is a new memory mapping feature available in SVGA devices
+ * which have the SVGA_CAP_GMR bit set. Previously, there were two
+ * fixed memory regions available with which to share data between the
+ * device and the driver: the FIFO ('MEM') and the framebuffer. GMRs
+ * are our name for an extensible way of providing arbitrary DMA
+ * buffers for use between the driver and the SVGA device. They are a
+ * new alternative to framebuffer memory, usable for both 2D and 3D
+ * graphics operations.
+ *
+ * Since GMR mapping must be done synchronously with guest CPU
+ * execution, we use a new pair of SVGA registers:
+ *
+ *   SVGA_REG_GMR_ID --
+ *
+ *     Read/write.
+ *     This register holds the 32-bit ID (a small positive integer)
+ *     of a GMR to create, delete, or redefine. Writing this register
+ *     has no side-effects.
+ *
+ *   SVGA_REG_GMR_DESCRIPTOR --
+ *
+ *     Write-only.
+ *     Writing this register will create, delete, or redefine the GMR
+ *     specified by the above ID register. If this register is zero,
+ *     the GMR is deleted. Any pointers into this GMR (including those
+ *     currently being processed by FIFO commands) will be
+ *     synchronously invalidated.
+ *
+ *     If this register is nonzero, it must be the physical page
+ *     number (PPN) of a data structure which describes the physical
+ *     layout of the memory region this GMR should describe. The
+ *     descriptor structure will be read synchronously by the SVGA
+ *     device when this register is written. The descriptor need not
+ *     remain allocated for the lifetime of the GMR.
+ *
+ *     The guest driver should write SVGA_REG_GMR_ID first, then
+ *     SVGA_REG_GMR_DESCRIPTOR.
+ *
+ *   SVGA_REG_GMR_MAX_IDS --
+ *
+ *     Read-only.
+ *     The SVGA device may choose to support a maximum number of
+ *     user-defined GMR IDs. This register holds the number of supported
+ *     IDs. (The maximum supported ID plus 1)
+ *
+ *   SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH --
+ *
+ *     Read-only.
+ *     The SVGA device may choose to put a limit on the total number
+ *     of SVGAGuestMemDescriptor structures it will read when defining
+ *     a single GMR.
+ *
+ * The descriptor structure is an array of SVGAGuestMemDescriptor
+ * structures. Each structure may do one of three things:
+ *
+ *   - Terminate the GMR descriptor list.
+ *     (ppn==0, numPages==0)
+ *
+ *   - Add a PPN or range of PPNs to the GMR's virtual address space.
+ *     (ppn != 0, numPages != 0)
+ *
+ *   - Provide the PPN of the next SVGAGuestMemDescriptor, in order to
+ *     support multi-page GMR descriptor tables without forcing the
+ *     driver to allocate physically contiguous memory.
+ *     (ppn != 0, numPages == 0)
+ *
+ * Note that each physical page of SVGAGuestMemDescriptor structures
+ * can describe at least 2MB of guest memory. If the driver needs to
+ * use more than one page of descriptor structures, it must use one of
+ * its SVGAGuestMemDescriptors to point to an additional page.  The
+ * device will never automatically cross a page boundary.
+ *
+ * Once the driver has described a GMR, it is immediately available
+ * for use via any FIFO command that uses an SVGAGuestPtr structure.
+ * These pointers include a GMR identifier plus an offset into that
+ * GMR.
+ *
+ * The driver must check the SVGA_CAP_GMR bit before using the GMR
+ * registers.
+ */
+
+/*
+ * Special GMR IDs, allowing SVGAGuestPtrs to point to framebuffer
+ * memory as well.  In the future, these IDs could even be used to
+ * allow legacy memory regions to be redefined by the guest as GMRs.
+ *
+ * Using the guest framebuffer (GFB) at BAR1 for general purpose DMA
+ * is being phased out. Please try to use user-defined GMRs whenever
+ * possible.
+ */
+#define SVGA_GMR_NULL         ((uint32) -1)
+#define SVGA_GMR_FRAMEBUFFER  ((uint32) -2)  // Guest Framebuffer (GFB)
+
+typedef
+struct SVGAGuestMemDescriptor {
+   uint32 ppn;
+   uint32 numPages;
+} SVGAGuestMemDescriptor;
+
+typedef
+struct SVGAGuestPtr {
+   uint32 gmrId;
+   uint32 offset;
+} SVGAGuestPtr;
+
+
+/*
+ * SVGAGMRImageFormat --
+ *
+ *    This is a packed representation of the source 2D image format
+ *    for a GMR-to-screen blit. Currently it is defined as an encoding
+ *    of the screen's color depth and bits-per-pixel, however, 16 bits
+ *    are reserved for future use to identify other encodings (such as
+ *    RGBA or higher-precision images).
+ *
+ *    Currently supported formats:
+ *
+ *       bpp depth  Format Name
+ *       --- -----  -----------
+ *        32    24  32-bit BGRX
+ *        24    24  24-bit BGR
+ *        16    16  RGB 5-6-5
+ *        16    15  RGB 5-5-5
+ *
+ */
+
+typedef
+struct SVGAGMRImageFormat {
+   union {
+      struct {
+         uint32 bitsPerPixel : 8;
+         uint32 colorDepth   : 8;
+         uint32 reserved     : 16;  // Must be zero
+      };
+
+      uint32 value;
+   };
+} SVGAGMRImageFormat;
+
+/*
+ * SVGAColorBGRX --
+ *
+ *    A 24-bit color format (BGRX), which does not depend on the
+ *    format of the legacy guest framebuffer (GFB) or the current
+ *    GMRFB state.
+ */
+
+typedef
+struct SVGAColorBGRX {
+   union {
+      struct {
+         uint32 b : 8;
+         uint32 g : 8;
+         uint32 r : 8;
+         uint32 x : 8;  // Unused
+      };
+
+      uint32 value;
+   };
+} SVGAColorBGRX;
+
+
+/*
+ * SVGASignedRect --
+ * SVGASignedPoint --
+ *
+ *    Signed rectangle and point primitives. These are used by the new
+ *    2D primitives for drawing to Screen Objects, which can occupy a
+ *    signed virtual coordinate space.
+ *
+ *    SVGASignedRect specifies a half-open interval: the (left, top)
+ *    pixel is part of the rectangle, but the (right, bottom) pixel is
+ *    not.
+ */
+
+typedef
+struct SVGASignedRect {
+   int32  left;
+   int32  top;
+   int32  right;
+   int32  bottom;
+} SVGASignedRect;
+
+typedef
+struct SVGASignedPoint {
+   int32  x;
+   int32  y;
+} SVGASignedPoint;
+
+
+/*
+ *  Capabilities
+ *
+ *  Note the holes in the bitfield. Missing bits have been deprecated,
+ *  and must not be reused. Those capabilities will never be reported
+ *  by new versions of the SVGA device.
+ */
+
+#define SVGA_CAP_NONE               0x00000000
+#define SVGA_CAP_RECT_COPY          0x00000002
+#define SVGA_CAP_CURSOR             0x00000020
+#define SVGA_CAP_CURSOR_BYPASS      0x00000040   // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080   // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_8BIT_EMULATION     0x00000100
+#define SVGA_CAP_ALPHA_CURSOR       0x00000200
+#define SVGA_CAP_3D                 0x00004000
+#define SVGA_CAP_EXTENDED_FIFO      0x00008000
+#define SVGA_CAP_MULTIMON           0x00010000   // Legacy multi-monitor support
+#define SVGA_CAP_PITCHLOCK          0x00020000
+#define SVGA_CAP_IRQMASK            0x00040000
+#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   // Legacy multi-monitor support
+#define SVGA_CAP_GMR                0x00100000
+#define SVGA_CAP_TRACES             0x00200000
+
+
+/*
+ * FIFO register indices.
+ *
+ * The FIFO is a chunk of device memory mapped into guest physmem.  It
+ * is always treated as 32-bit words.
+ *
+ * The guest driver gets to decide how to partition it between
+ * - FIFO registers (there are always at least 4, specifying where the
+ *   following data area is and how much data it contains; there may be
+ *   more registers following these, depending on the FIFO protocol
+ *   version in use)
+ * - FIFO data, written by the guest and slurped out by the VMX.
+ * These indices are 32-bit word offsets into the FIFO.
+ */
+
+enum {
+   /*
+    * Block 1 (basic registers): The originally defined FIFO registers.
+    * These exist and are valid for all versions of the FIFO protocol.
+    */
+
+   SVGA_FIFO_MIN = 0,
+   SVGA_FIFO_MAX,       /* The distance from MIN to MAX must be at least 10K */
+   SVGA_FIFO_NEXT_CMD,
+   SVGA_FIFO_STOP,
+
+   /*
+    * Block 2 (extended registers): Mandatory registers for the extended
+    * FIFO.  These exist if the SVGA caps register includes
+    * SVGA_CAP_EXTENDED_FIFO; some of them are valid only if their
+    * associated capability bit is enabled.
+    *
+    * Note that when originally defined, SVGA_CAP_EXTENDED_FIFO implied
+    * support only for (FIFO registers) CAPABILITIES, FLAGS, and FENCE.
+    * This means that the guest has to test individually (in most cases
+    * using FIFO caps) for the presence of registers after this; the VMX
+    * can define "extended FIFO" to mean whatever it wants, and currently
+    * won't enable it unless there's room for that set and much more.
+    */
+
+   SVGA_FIFO_CAPABILITIES = 4,
+   SVGA_FIFO_FLAGS,
+   // Valid with SVGA_FIFO_CAP_FENCE:
+   SVGA_FIFO_FENCE,
+
+   /*
+    * Block 3a (optional extended registers): Additional registers for the
+    * extended FIFO, whose presence isn't actually implied by
+    * SVGA_CAP_EXTENDED_FIFO; these exist if SVGA_FIFO_MIN is high enough to
+    * leave room for them.
+    *
+    * These in block 3a, the VMX currently considers mandatory for the
+    * extended FIFO.
+    */
+
+   // Valid if exists (i.e. if extended FIFO enabled):
+   SVGA_FIFO_3D_HWVERSION,       /* See SVGA3dHardwareVersion in svga3d_reg.h */
+   // Valid with SVGA_FIFO_CAP_PITCHLOCK:
+   SVGA_FIFO_PITCHLOCK,
+
+   // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3:
+   SVGA_FIFO_CURSOR_ON,          /* Cursor bypass 3 show/hide register */
+   SVGA_FIFO_CURSOR_X,           /* Cursor bypass 3 x register */
+   SVGA_FIFO_CURSOR_Y,           /* Cursor bypass 3 y register */
+   SVGA_FIFO_CURSOR_COUNT,       /* Incremented when any of the other 3 change */
+   SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */
+
+   // Valid with SVGA_FIFO_CAP_RESERVE:
+   SVGA_FIFO_RESERVED,           /* Bytes past NEXT_CMD with real contents */
+
+   /*
+    * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT:
+    *
+    * By default this is SVGA_ID_INVALID, to indicate that the cursor
+    * coordinates are specified relative to the virtual root. If this
+    * is set to a specific screen ID, cursor position is reinterpreted
+    * as a signed offset relative to that screen's origin. This is the
+    * only way to place the cursor on a non-rooted screen.
+    */
+   SVGA_FIFO_CURSOR_SCREEN_ID,
+
+   /*
+    * XXX: The gap here, up until SVGA_FIFO_3D_CAPS, can be used for new
+    * registers, but this must be done carefully and with judicious use of
+    * capability bits, since comparisons based on SVGA_FIFO_MIN aren't
+    * enough to tell you whether the register exists: we've shipped drivers
+    * and products that used SVGA_FIFO_3D_CAPS but didn't know about some of
+    * the earlier ones.  The actual order of introduction was:
+    * - PITCHLOCK
+    * - 3D_CAPS
+    * - CURSOR_* (cursor bypass 3)
+    * - RESERVED
+    * So, code that wants to know whether it can use any of the
+    * aforementioned registers, or anything else added after PITCHLOCK and
+    * before 3D_CAPS, needs to reason about something other than
+    * SVGA_FIFO_MIN.
+    */
+
+   /*
+    * 3D caps block space; valid with 3D hardware version >=
+    * SVGA3D_HWVERSION_WS6_B1.
+    */
+   SVGA_FIFO_3D_CAPS      = 32,
+   SVGA_FIFO_3D_CAPS_LAST = 32 + 255,
+
+   /*
+    * End of VMX's current definition of "extended-FIFO registers".
+    * Registers before here are always enabled/disabled as a block; either
+    * the extended FIFO is enabled and includes all preceding registers, or
+    * it's disabled entirely.
+    *
+    * Block 3b (truly optional extended registers): Additional registers for
+    * the extended FIFO, which the VMX already knows how to enable and
+    * disable with correct granularity.
+    *
+    * Registers after here exist if and only if the guest SVGA driver
+    * sets SVGA_FIFO_MIN high enough to leave room for them.
+    */
+
+   // Valid if register exists:
+   SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */
+   SVGA_FIFO_FENCE_GOAL,         /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */
+   SVGA_FIFO_BUSY,               /* See "FIFO Synchronization Registers" */
+
+   /*
+    * Always keep this last.  This defines the maximum number of
+    * registers we know about.  At power-on, this value is placed in
+    * the SVGA_REG_MEM_REGS register, and we expect the guest driver
+    * to allocate this much space in FIFO memory for registers.
+    */
+    SVGA_FIFO_NUM_REGS
+};
+
+
+/*
+ * Definition of registers included in extended FIFO support.
+ *
+ * The guest SVGA driver gets to allocate the FIFO between registers
+ * and data.  It must always allocate at least 4 registers, but old
+ * drivers stopped there.
+ *
+ * The VMX will enable extended FIFO support if and only if the guest
+ * left enough room for all registers defined as part of the mandatory
+ * set for the extended FIFO.
+ *
+ * Note that the guest drivers typically allocate the FIFO only at
+ * initialization time, not at mode switches, so it's likely that the
+ * number of FIFO registers won't change without a reboot.
+ *
+ * All registers less than this value are guaranteed to be present if
+ * svgaUser->fifo.extended is set. Any later registers must be tested
+ * individually for compatibility at each use (in the VMX).
+ *
+ * This value is used only by the VMX, so it can change without
+ * affecting driver compatibility; keep it that way?
+ */
+#define SVGA_FIFO_EXTENDED_MANDATORY_REGS  (SVGA_FIFO_3D_CAPS_LAST + 1)
+
+
+/*
+ * FIFO Synchronization Registers
+ *
+ *  This explains the relationship between the various FIFO
+ *  sync-related registers in IOSpace and in FIFO space.
+ *
+ *  SVGA_REG_SYNC --
+ *
+ *       The SYNC register can be used in two different ways by the guest:
+ *
+ *         1. If the guest wishes to fully sync (drain) the FIFO,
+ *            it will write once to SYNC then poll on the BUSY
+ *            register. The FIFO is sync'ed once BUSY is zero.
+ *
+ *         2. If the guest wants to asynchronously wake up the host,
+ *            it will write once to SYNC without polling on BUSY.
+ *            Ideally it will do this after some new commands have
+ *            been placed in the FIFO, and after reading a zero
+ *            from SVGA_FIFO_BUSY.
+ *
+ *       (1) is the original behaviour that SYNC was designed to
+ *       support.  Originally, a write to SYNC would implicitly
+ *       trigger a read from BUSY. This causes us to synchronously
+ *       process the FIFO.
+ *
+ *       This behaviour has since been changed so that writing SYNC
+ *       will *not* implicitly cause a read from BUSY. Instead, it
+ *       makes a channel call which asynchronously wakes up the MKS
+ *       thread.
+ *
+ *       New guests can use this new behaviour to implement (2)
+ *       efficiently. This lets guests get the host's attention
+ *       without waiting for the MKS to poll, which gives us much
+ *       better CPU utilization on SMP hosts and on UP hosts while
+ *       we're blocked on the host GPU.
+ *
+ *       Old guests shouldn't notice the behaviour change. SYNC was
+ *       never guaranteed to process the entire FIFO, since it was
+ *       bounded to a particular number of CPU cycles. Old guests will
+ *       still loop on the BUSY register until the FIFO is empty.
+ *
+ *       Writing to SYNC currently has the following side-effects:
+ *
+ *         - Sets SVGA_REG_BUSY to TRUE (in the monitor)
+ *         - Asynchronously wakes up the MKS thread for FIFO processing
+ *         - The value written to SYNC is recorded as a "reason", for
+ *           stats purposes.
+ *
+ *       If SVGA_FIFO_BUSY is available, drivers are advised to only
+ *       write to SYNC if SVGA_FIFO_BUSY is FALSE. Drivers should set
+ *       SVGA_FIFO_BUSY to TRUE after writing to SYNC. The MKS will
+ *       eventually set SVGA_FIFO_BUSY on its own, but this approach
+ *       lets the driver avoid sending multiple asynchronous wakeup
+ *       messages to the MKS thread.
+ *
+ *  SVGA_REG_BUSY --
+ *
+ *       This register is set to TRUE when SVGA_REG_SYNC is written,
+ *       and it reads as FALSE when the FIFO has been completely
+ *       drained.
+ *
+ *       Every read from this register causes us to synchronously
+ *       process FIFO commands. There is no guarantee as to how many
+ *       commands each read will process.
+ *
+ *       CPU time spent processing FIFO commands will be billed to
+ *       the guest.
+ *
+ *       New drivers should avoid using this register unless they
+ *       need to guarantee that the FIFO is completely drained. It
+ *       is overkill for performing a sync-to-fence. Older drivers
+ *       will use this register for any type of synchronization.
+ *
+ *  SVGA_FIFO_BUSY --
+ *
+ *       This register is a fast way for the guest driver to check
+ *       whether the FIFO is already being processed. It reads and
+ *       writes at normal RAM speeds, with no monitor intervention.
+ *
+ *       If this register reads as TRUE, the host is guaranteeing that
+ *       any new commands written into the FIFO will be noticed before
+ *       the MKS goes back to sleep.
+ *
+ *       If this register reads as FALSE, no such guarantee can be
+ *       made.
+ *
+ *       The guest should use this register to quickly determine
+ *       whether or not it needs to wake up the host. If the guest
+ *       just wrote a command or group of commands that it would like
+ *       the host to begin processing, it should:
+ *
+ *         1. Read SVGA_FIFO_BUSY. If it reads as TRUE, no further
+ *            action is necessary.
+ *
+ *         2. Write TRUE to SVGA_FIFO_BUSY. This informs future guest
+ *            code that we've already sent a SYNC to the host and we
+ *            don't need to send a duplicate.
+ *
+ *         3. Write a reason to SVGA_REG_SYNC. This will send an
+ *            asynchronous wakeup to the MKS thread.
+ */
+
+
+/*
+ * FIFO Capabilities
+ *
+ *      Fence -- Fence register and command are supported
+ *      Accel Front -- Front buffer only commands are supported
+ *      Pitch Lock -- Pitch lock register is supported
+ *      Video -- SVGA Video overlay units are supported
+ *      Escape -- Escape command is supported
+ *
+ * XXX: Add longer descriptions for each capability, including a list
+ *      of the new features that each capability provides.
+ *
+ * SVGA_FIFO_CAP_SCREEN_OBJECT --
+ *
+ *    Provides dynamic multi-screen rendering, for improved Unity and
+ *    multi-monitor modes. With Screen Object, the guest can
+ *    dynamically create and destroy 'screens', which can represent
+ *    Unity windows or virtual monitors. Screen Object also provides
+ *    strong guarantees that DMA operations happen only when
+ *    guest-initiated. Screen Object deprecates the BAR1 guest
+ *    framebuffer (GFB) and all commands that work only with the GFB.
+ *
+ *    New registers:
+ *       FIFO_CURSOR_SCREEN_ID, VIDEO_DATA_GMRID, VIDEO_DST_SCREEN_ID
+ *
+ *    New 2D commands:
+ *       DEFINE_SCREEN, DESTROY_SCREEN, DEFINE_GMRFB, BLIT_GMRFB_TO_SCREEN,
+ *       BLIT_SCREEN_TO_GMRFB, ANNOTATION_FILL, ANNOTATION_COPY
+ *
+ *    New 3D commands:
+ *       BLIT_SURFACE_TO_SCREEN
+ *
+ *    New guarantees:
+ *
+ *       - The host will not read or write guest memory, including the GFB,
+ *         except when explicitly initiated by a DMA command.
+ *
+ *       - All DMA, including legacy DMA like UPDATE and PRESENT_READBACK,
+ *         is guaranteed to complete before any subsequent FENCEs.
+ *
+ *       - All legacy commands which affect a Screen (UPDATE, PRESENT,
+ *         PRESENT_READBACK) as well as new Screen blit commands will
+ *         all behave consistently as blits, and memory will be read
+ *         or written in FIFO order.
+ *
+ *         For example, if you PRESENT from one SVGA3D surface to multiple
+ *         places on the screen, the data copied will always be from the
+ *         SVGA3D surface at the time the PRESENT was issued in the FIFO.
+ *         This was not necessarily true on devices without Screen Object.
+ *
+ *         This means that on devices that support Screen Object, the
+ *         PRESENT_READBACK command should not be necessary unless you
+ *         actually want to read back the results of 3D rendering into
+ *         system memory. (And for that, the BLIT_SCREEN_TO_GMRFB
+ *         command provides a strict superset of functionality.)
+ *
+ *       - When a screen is resized, either using Screen Object commands or
+ *         legacy multimon registers, its contents are preserved.
+ */
+
+#define SVGA_FIFO_CAP_NONE                  0
+#define SVGA_FIFO_CAP_FENCE             (1<<0)
+#define SVGA_FIFO_CAP_ACCELFRONT        (1<<1)
+#define SVGA_FIFO_CAP_PITCHLOCK         (1<<2)
+#define SVGA_FIFO_CAP_VIDEO             (1<<3)
+#define SVGA_FIFO_CAP_CURSOR_BYPASS_3   (1<<4)
+#define SVGA_FIFO_CAP_ESCAPE            (1<<5)
+#define SVGA_FIFO_CAP_RESERVE           (1<<6)
+#define SVGA_FIFO_CAP_SCREEN_OBJECT     (1<<7)
+
+
+/*
+ * FIFO Flags
+ *
+ *      Accel Front -- Driver should use front buffer only commands
+ */
+
+#define SVGA_FIFO_FLAG_NONE                 0
+#define SVGA_FIFO_FLAG_ACCELFRONT       (1<<0)
+#define SVGA_FIFO_FLAG_RESERVED        (1<<31) // Internal use only
+
+/*
+ * FIFO reservation sentinel value
+ */
+
+#define SVGA_FIFO_RESERVED_UNKNOWN      0xffffffff
+
+
+/*
+ * Video overlay support
+ */
+
+#define SVGA_NUM_OVERLAY_UNITS 32
+
+
+/*
+ * Video capabilities that the guest is currently using
+ */
+
+#define SVGA_VIDEO_FLAG_COLORKEY        0x0001
+
+
+/*
+ * Offsets for the video overlay registers
+ */
+
+enum {
+   SVGA_VIDEO_ENABLED = 0,
+   SVGA_VIDEO_FLAGS,
+   SVGA_VIDEO_DATA_OFFSET,
+   SVGA_VIDEO_FORMAT,
+   SVGA_VIDEO_COLORKEY,
+   SVGA_VIDEO_SIZE,          // Deprecated
+   SVGA_VIDEO_WIDTH,
+   SVGA_VIDEO_HEIGHT,
+   SVGA_VIDEO_SRC_X,
+   SVGA_VIDEO_SRC_Y,
+   SVGA_VIDEO_SRC_WIDTH,
+   SVGA_VIDEO_SRC_HEIGHT,
+   SVGA_VIDEO_DST_X,         // Signed int32
+   SVGA_VIDEO_DST_Y,         // Signed int32
+   SVGA_VIDEO_DST_WIDTH,
+   SVGA_VIDEO_DST_HEIGHT,
+   SVGA_VIDEO_PITCH_1,
+   SVGA_VIDEO_PITCH_2,
+   SVGA_VIDEO_PITCH_3,
+   SVGA_VIDEO_DATA_GMRID,    // Optional, defaults to SVGA_GMR_FRAMEBUFFER
+   SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID)
+   SVGA_VIDEO_NUM_REGS
+};
+
+
+/*
+ * SVGA Overlay Units
+ *
+ *      width and height relate to the entire source video frame.
+ *      srcX, srcY, srcWidth and srcHeight represent subset of the source
+ *      video frame to be displayed.
+ */
+
+typedef struct SVGAOverlayUnit {
+   uint32 enabled;
+   uint32 flags;
+   uint32 dataOffset;
+   uint32 format;
+   uint32 colorKey;
+   uint32 size;
+   uint32 width;
+   uint32 height;
+   uint32 srcX;
+   uint32 srcY;
+   uint32 srcWidth;
+   uint32 srcHeight;
+   int32  dstX;
+   int32  dstY;
+   uint32 dstWidth;
+   uint32 dstHeight;
+   uint32 pitches[3];
+   uint32 dataGMRId;
+   uint32 dstScreenId;
+} SVGAOverlayUnit;
+
+
+/*
+ * SVGAScreenObject --
+ *
+ *    This is a new way to represent a guest's multi-monitor screen or
+ *    Unity window. Screen objects are only supported if the
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT capability bit is set.
+ *
+ *    If Screen Objects are supported, they can be used to fully
+ *    replace the functionality provided by the framebuffer registers
+ *    (SVGA_REG_WIDTH, HEIGHT, etc.) and by SVGA_CAP_DISPLAY_TOPOLOGY.
+ *
+ *    The screen object is a struct with guaranteed binary
+ *    compatibility. New flags can be added, and the struct may grow,
+ *    but existing fields must retain their meaning.
+ *
+ */
+
+#define SVGA_SCREEN_HAS_ROOT    (1 << 0)  // Screen is present in the virtual coord space
+#define SVGA_SCREEN_IS_PRIMARY  (1 << 1)  // Guest considers this screen to be 'primary'
+#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)   // Guest is running a fullscreen app here
+
+typedef
+struct SVGAScreenObject {
+   uint32 structSize;   // sizeof(SVGAScreenObject)
+   uint32 id;
+   uint32 flags;
+   struct {
+      uint32 width;
+      uint32 height;
+   } size;
+   struct {
+      int32 x;
+      int32 y;
+   } root;              // Only used if SVGA_SCREEN_HAS_ROOT is set.
+} SVGAScreenObject;
+
+
+/*
+ *  Commands in the command FIFO:
+ *
+ *  Command IDs defined below are used for the traditional 2D FIFO
+ *  communication (not all commands are available for all versions of the
+ *  SVGA FIFO protocol).
+ *
+ *  Note the holes in the command ID numbers: These commands have been
+ *  deprecated, and the old IDs must not be reused.
+ *
+ *  Command IDs from 1000 to 1999 are reserved for use by the SVGA3D
+ *  protocol.
+ *
+ *  Each command's parameters are described by the comments and
+ *  structs below.
+ */
+
+typedef enum {
+   SVGA_CMD_INVALID_CMD           = 0,
+   SVGA_CMD_UPDATE                = 1,
+   SVGA_CMD_RECT_COPY             = 3,
+   SVGA_CMD_DEFINE_CURSOR         = 19,
+   SVGA_CMD_DEFINE_ALPHA_CURSOR   = 22,
+   SVGA_CMD_UPDATE_VERBOSE        = 25,
+   SVGA_CMD_FRONT_ROP_FILL        = 29,
+   SVGA_CMD_FENCE                 = 30,
+   SVGA_CMD_ESCAPE                = 33,
+   SVGA_CMD_DEFINE_SCREEN         = 34,
+   SVGA_CMD_DESTROY_SCREEN        = 35,
+   SVGA_CMD_DEFINE_GMRFB          = 36,
+   SVGA_CMD_BLIT_GMRFB_TO_SCREEN  = 37,
+   SVGA_CMD_BLIT_SCREEN_TO_GMRFB  = 38,
+   SVGA_CMD_ANNOTATION_FILL       = 39,
+   SVGA_CMD_ANNOTATION_COPY       = 40,
+   SVGA_CMD_MAX
+} SVGAFifoCmdId;
+
+#define SVGA_CMD_MAX_ARGS           64
+
+
+/*
+ * SVGA_CMD_UPDATE --
+ *
+ *    This is a DMA transfer which copies from the Guest Framebuffer
+ *    (GFB) at BAR1 + SVGA_REG_FB_OFFSET to any screens which
+ *    intersect with the provided virtual rectangle.
+ *
+ *    This command does not support using arbitrary guest memory as a
+ *    data source- it only works with the pre-defined GFB memory.
+ *    This command also does not support signed virtual coordinates.
+ *    If you have defined screens (using SVGA_CMD_DEFINE_SCREEN) with
+ *    negative root x/y coordinates, the negative portion of those
+ *    screens will not be reachable by this command.
+ *
+ *    This command is not necessary when using framebuffer
+ *    traces. Traces are automatically enabled if the SVGA FIFO is
+ *    disabled, and you may explicitly enable/disable traces using
+ *    SVGA_REG_TRACES. With traces enabled, any write to the GFB will
+ *    automatically act as if a subsequent SVGA_CMD_UPDATE was issued.
+ *
+ *    Traces and SVGA_CMD_UPDATE are the only supported ways to render
+ *    pseudocolor screen updates. The newer Screen Object commands
+ *    only support true color formats.
+ *
+ * Availability:
+ *    Always available.
+ */
+
+typedef
+struct {
+   uint32 x;
+   uint32 y;
+   uint32 width;
+   uint32 height;
+} SVGAFifoCmdUpdate;
+
+
+/*
+ * SVGA_CMD_RECT_COPY --
+ *
+ *    Perform a rectangular DMA transfer from one area of the GFB to
+ *    another, and copy the result to any screens which intersect it.
+ *
+ * Availability:
+ *    SVGA_CAP_RECT_COPY
+ */
+
+typedef
+struct {
+   uint32 srcX;
+   uint32 srcY;
+   uint32 destX;
+   uint32 destY;
+   uint32 width;
+   uint32 height;
+} SVGAFifoCmdRectCopy;
+
+
+/*
+ * SVGA_CMD_DEFINE_CURSOR --
+ *
+ *    Provide a new cursor image, as an AND/XOR mask.
+ *
+ *    The recommended way to position the cursor overlay is by using
+ *    the SVGA_FIFO_CURSOR_* registers, supported by the
+ *    SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability.
+ *
+ * Availability:
+ *    SVGA_CAP_CURSOR
+ */
+
+typedef
+struct {
+   uint32 id;             // Reserved, must be zero.
+   uint32 hotspotX;
+   uint32 hotspotY;
+   uint32 width;
+   uint32 height;
+   uint32 andMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
+   uint32 xorMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
+   /*
+    * Followed by scanline data for AND mask, then XOR mask.
+    * Each scanline is padded to a 32-bit boundary.
+   */
+} SVGAFifoCmdDefineCursor;
+
+
+/*
+ * SVGA_CMD_DEFINE_ALPHA_CURSOR --
+ *
+ *    Provide a new cursor image, in 32-bit BGRA format.
+ *
+ *    The recommended way to position the cursor overlay is by using
+ *    the SVGA_FIFO_CURSOR_* registers, supported by the
+ *    SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability.
+ *
+ * Availability:
+ *    SVGA_CAP_ALPHA_CURSOR
+ */
+
+typedef
+struct {
+   uint32 id;             // Reserved, must be zero.
+   uint32 hotspotX;
+   uint32 hotspotY;
+   uint32 width;
+   uint32 height;
+   /* Followed by scanline data */
+} SVGAFifoCmdDefineAlphaCursor;
+
+
+/*
+ * SVGA_CMD_UPDATE_VERBOSE --
+ *
+ *    Just like SVGA_CMD_UPDATE, but also provide a per-rectangle
+ *    'reason' value, an opaque cookie which is used by internal
+ *    debugging tools. Third party drivers should not use this
+ *    command.
+ *
+ * Availability:
+ *    SVGA_CAP_EXTENDED_FIFO
+ */
+
+typedef
+struct {
+   uint32 x;
+   uint32 y;
+   uint32 width;
+   uint32 height;
+   uint32 reason;
+} SVGAFifoCmdUpdateVerbose;
+
+
+/*
+ * SVGA_CMD_FRONT_ROP_FILL --
+ *
+ *    This is a hint which tells the SVGA device that the driver has
+ *    just filled a rectangular region of the GFB with a solid
+ *    color. Instead of reading these pixels from the GFB, the device
+ *    can assume that they all equal 'color'. This is primarily used
+ *    for remote desktop protocols.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_ACCELFRONT
+ */
+
+#define  SVGA_ROP_COPY                    0x03
+
+typedef
+struct {
+   uint32 color;     // In the same format as the GFB
+   uint32 x;
+   uint32 y;
+   uint32 width;
+   uint32 height;
+   uint32 rop;       // Must be SVGA_ROP_COPY
+} SVGAFifoCmdFrontRopFill;
+
+
+/*
+ * SVGA_CMD_FENCE --
+ *
+ *    Insert a synchronization fence.  When the SVGA device reaches
+ *    this command, it will copy the 'fence' value into the
+ *    SVGA_FIFO_FENCE register. It will also compare the fence against
+ *    SVGA_FIFO_FENCE_GOAL. If the fence matches the goal and the
+ *    SVGA_IRQFLAG_FENCE_GOAL interrupt is enabled, the device will
+ *    raise this interrupt.
+ *
+ * Availability:
+ *    SVGA_FIFO_FENCE for this command,
+ *    SVGA_CAP_IRQMASK for SVGA_FIFO_FENCE_GOAL.
+ */
+
+typedef
+struct {
+   uint32 fence;
+} SVGAFifoCmdFence;
+
+
+/*
+ * SVGA_CMD_ESCAPE --
+ *
+ *    Send an extended or vendor-specific variable length command.
+ *    This is used for video overlay, third party plugins, and
+ *    internal debugging tools. See svga_escape.h
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_ESCAPE
+ */
+
+typedef
+struct {
+   uint32 nsid;
+   uint32 size;
+   /* followed by 'size' bytes of data */
+} SVGAFifoCmdEscape;
+
+
+/*
+ * SVGA_CMD_DEFINE_SCREEN --
+ *
+ *    Define or redefine an SVGAScreenObject. See the description of
+ *    SVGAScreenObject above.  The video driver is responsible for
+ *    generating new screen IDs. They should be small positive
+ *    integers. The virtual device will have an implementation
+ *    specific upper limit on the number of screen IDs
+ *    supported. Drivers are responsible for recycling IDs. The first
+ *    valid ID is zero.
+ *
+ *    - Interaction with other registers:
+ *
+ *    For backwards compatibility, when the GFB mode registers (WIDTH,
+ *    HEIGHT, PITCHLOCK, BITS_PER_PIXEL) are modified, the SVGA device
+ *    deletes all screens other than screen #0, and redefines screen
+ *    #0 according to the specified mode. Drivers that use
+ *    SVGA_CMD_DEFINE_SCREEN should destroy or redefine screen #0.
+ *
+ *    If you use screen objects, do not use the legacy multi-mon
+ *    registers (SVGA_REG_NUM_GUEST_DISPLAYS, SVGA_REG_DISPLAY_*).
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGAScreenObject screen;   // Variable-length according to version
+} SVGAFifoCmdDefineScreen;
+
+
+/*
+ * SVGA_CMD_DESTROY_SCREEN --
+ *
+ *    Destroy an SVGAScreenObject. Its ID is immediately available for
+ *    re-use.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   uint32 screenId;
+} SVGAFifoCmdDestroyScreen;
+
+
+/*
+ * SVGA_CMD_DEFINE_GMRFB --
+ *
+ *    This command sets a piece of SVGA device state called the
+ *    Guest Memory Region Framebuffer, or GMRFB. The GMRFB is a
+ *    piece of light-weight state which identifies the location and
+ *    format of an image in guest memory or in BAR1. The GMRFB has
+ *    an arbitrary size, and it doesn't need to match the geometry
+ *    of the GFB or any screen object.
+ *
+ *    The GMRFB can be redefined as often as you like. You could
+ *    always use the same GMRFB, you could redefine it before
+ *    rendering from a different guest screen, or you could even
+ *    redefine it before every blit.
+ *
+ *    There are multiple ways to use this command. The simplest way is
+ *    to use it to move the framebuffer either to elsewhere in the GFB
+ *    (BAR1) memory region, or to a user-defined GMR. This lets a
+ *    driver use a framebuffer allocated entirely out of normal system
+ *    memory, which we encourage.
+ *
+ *    Another way to use this command is to set up a ring buffer of
+ *    updates in GFB memory. If a driver wants to ensure that no
+ *    frames are skipped by the SVGA device, it is important that the
+ *    driver not modify the source data for a blit until the device is
+ *    done processing the command. One efficient way to accomplish
+ *    this is to use a ring of small DMA buffers. Each buffer is used
+ *    for one blit, then we move on to the next buffer in the
+ *    ring. The FENCE mechanism is used to protect each buffer from
+ *    re-use until the device is finished with that buffer's
+ *    corresponding blit.
+ *
+ *    This command does not affect the meaning of SVGA_CMD_UPDATE.
+ *    UPDATEs always occur from the legacy GFB memory area. This
+ *    command has no support for pseudocolor GMRFBs. Currently only
+ *    true-color 15, 16, and 24-bit depths are supported. Future
+ *    devices may expose capabilities for additional framebuffer
+ *    formats.
+ *
+ *    The default GMRFB value is undefined. Drivers must always send
+ *    this command at least once before performing any blit from the
+ *    GMRFB.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGAGuestPtr        ptr;
+   uint32              bytesPerLine;
+   SVGAGMRImageFormat  format;
+} SVGAFifoCmdDefineGMRFB;
+
+
+/*
+ * SVGA_CMD_BLIT_GMRFB_TO_SCREEN --
+ *
+ *    This is a guest-to-host blit. It performs a DMA operation to
+ *    copy a rectangular region of pixels from the current GMRFB to
+ *    one or more Screen Objects.
+ *
+ *    The destination coordinate may be specified relative to a
+ *    screen's origin (if a screen ID is specified) or relative to the
+ *    virtual coordinate system's origin (if the screen ID is
+ *    SVGA_ID_INVALID). The actual destination may span zero or more
+ *    screens, in the case of a virtual destination rect or a rect
+ *    which extends off the edge of the specified screen.
+ *
+ *    This command writes to the screen's "base layer": the underlying
+ *    framebuffer which exists below any cursor or video overlays. No
+ *    action is necessary to explicitly hide or update any overlays
+ *    which exist on top of the updated region.
+ *
+ *    The SVGA device is guaranteed to finish reading from the GMRFB
+ *    by the time any subsequent FENCE commands are reached.
+ *
+ *    This command consumes an annotation. See the
+ *    SVGA_CMD_ANNOTATION_* commands for details.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGASignedPoint  srcOrigin;
+   SVGASignedRect   destRect;
+   uint32           destScreenId;
+} SVGAFifoCmdBlitGMRFBToScreen;
+
+
+/*
+ * SVGA_CMD_BLIT_SCREEN_TO_GMRFB --
+ *
+ *    This is a host-to-guest blit. It performs a DMA operation to
+ *    copy a rectangular region of pixels from a single Screen Object
+ *    back to the current GMRFB.
+ *
+ *    Usage note: This command should be used rarely. It will
+ *    typically be inefficient, but it is necessary for some types of
+ *    synchronization between 3D (GPU) and 2D (CPU) rendering into
+ *    overlapping areas of a screen.
+ *
+ *    The source coordinate is specified relative to a screen's
+ *    origin. The provided screen ID must be valid. If any parameters
+ *    are invalid, the resulting pixel values are undefined.
+ *
+ *    This command reads the screen's "base layer". Overlays like
+ *    video and cursor are not included, but any data which was sent
+ *    using a blit-to-screen primitive will be available, no matter
+ *    whether the data's original source was the GMRFB or the 3D
+ *    acceleration hardware.
+ *
+ *    Note that our guest-to-host blits and host-to-guest blits aren't
+ *    symmetric in their current implementation. While the parameters
+ *    are identical, host-to-guest blits are a lot less featureful.
+ *    They do not support clipping: If the source parameters don't
+ *    fully fit within a screen, the blit fails. They must originate
+ *    from exactly one screen. Virtual coordinates are not directly
+ *    supported.
+ *
+ *    Host-to-guest blits do support the same set of GMRFB formats
+ *    offered by guest-to-host blits.
+ *
+ *    The SVGA device is guaranteed to finish writing to the GMRFB by
+ *    the time any subsequent FENCE commands are reached.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGASignedPoint  destOrigin;
+   SVGASignedRect   srcRect;
+   uint32           srcScreenId;
+} SVGAFifoCmdBlitScreenToGMRFB;
+
+
+/*
+ * SVGA_CMD_ANNOTATION_FILL --
+ *
+ *    This is a blit annotation. This command stores a small piece of
+ *    device state which is consumed by the next blit-to-screen
+ *    command. The state is only cleared by commands which are
+ *    specifically documented as consuming an annotation. Other
+ *    commands (such as ESCAPEs for debugging) may intervene between
+ *    the annotation and its associated blit.
+ *
+ *    This annotation is a promise about the contents of the next
+ *    blit: The video driver is guaranteeing that all pixels in that
+ *    blit will have the same value, specified here as a color in
+ *    SVGAColorBGRX format.
+ *
+ *    The SVGA device can still render the blit correctly even if it
+ *    ignores this annotation, but the annotation may allow it to
+ *    perform the blit more efficiently, for example by ignoring the
+ *    source data and performing a fill in hardware.
+ *
+ *    This annotation is most important for performance when the
+ *    user's display is being remoted over a network connection.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGAColorBGRX  color;
+} SVGAFifoCmdAnnotationFill;
+
+
+/*
+ * SVGA_CMD_ANNOTATION_COPY --
+ *
+ *    This is a blit annotation. See SVGA_CMD_ANNOTATION_FILL for more
+ *    information about annotations.
+ *
+ *    This annotation is a promise about the contents of the next
+ *    blit: The video driver is guaranteeing that all pixels in that
+ *    blit will have the same value as those which already exist at an
+ *    identically-sized region on the same or a different screen.
+ *
+ *    Note that the source pixels for the COPY in this annotation are
+ *    sampled before applying the anqnotation's associated blit. They
+ *    are allowed to overlap with the blit's destination pixels.
+ *
+ *    The copy source rectangle is specified the same way as the blit
+ *    destination: it can be a rectangle which spans zero or more
+ *    screens, specified relative to either a screen or to the virtual
+ *    coordinate system's origin. If the source rectangle includes
+ *    pixels which are not from exactly one screen, the results are
+ *    undefined.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGASignedPoint  srcOrigin;
+   uint32           srcScreenId;
+} SVGAFifoCmdAnnotationCopy;
+
+#endif
diff --git a/src/gallium/drivers/svga/include/svga_types.h b/src/gallium/drivers/svga/include/svga_types.h
new file mode 100644
index 0000000000..7fd9bab03a
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga_types.h
@@ -0,0 +1,46 @@
+/**********************************************************
+ * Copyright 1998-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef _SVGA_TYPES_H_
+#define _SVGA_TYPES_H_
+
+#include "pipe/p_compiler.h"
+
+typedef int64_t int64;
+typedef uint64_t uint64;
+
+typedef int32_t int32;
+typedef uint32_t uint32;
+
+typedef int16_t int16;
+typedef uint16_t uint16;
+
+typedef int8_t int8;
+typedef uint8_t uint8;
+
+typedef uint8_t Bool;
+
+#endif /* _SVGA_TYPES_H_ */
+
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
new file mode 100644
index 0000000000..a0da7d7e5d
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -0,0 +1,1427 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * svga_cmd.c --
+ *
+ *      Command construction utility for the SVGA3D protocol used by
+ *      the VMware SVGA device, based on the svgautil library.
+ */
+
+#include "svga_winsys.h"
+#include "svga_screen_buffer.h"
+#include "svga_screen_texture.h"
+#include "svga_cmd.h"
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * surface_to_surfaceid --
+ *
+ *      Utility function for surface ids.
+ *      Can handle null surface. Does a surface_reallocation so you need
+ *      to have allocated the fifo space before converting.
+ *
+ * Results:
+ *      id is filld out.
+ *
+ * Side effects:
+ *      One surface relocation is preformed for texture handle.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE
+void surface_to_surfaceid(struct svga_winsys_context *swc, // IN
+                          struct pipe_surface *surface,    // IN
+                          SVGA3dSurfaceImageId *id,        // OUT
+                          unsigned flags)                  // IN
+{
+   if(surface) {
+      struct svga_surface *s = svga_surface(surface);
+      swc->surface_relocation(swc, &id->sid, s->handle, flags);
+      id->face = s->real_face; /* faces have the same order */
+      id->mipmap = s->real_level;
+   }
+   else {
+      id->sid = SVGA3D_INVALID_ID;
+      id->face = 0;
+      id->mipmap = 0;
+   }
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_FIFOReserve --
+ *
+ *      Reserve space for an SVGA3D FIFO command.
+ *
+ *      The 2D SVGA commands have been around for a while, so they
+ *      have a rather asymmetric structure. The SVGA3D protocol is
+ *      more uniform: each command begins with a header containing the
+ *      command number and the full size.
+ *
+ *      This is a convenience wrapper around SVGA_FIFOReserve. We
+ *      reserve space for the whole command, and write the header.
+ *
+ *      This function must be paired with SVGA_FIFOCommitAll().
+ *
+ * Results:
+ *      Returns a pointer to the space reserved for command-specific
+ *      data. It must be 'cmdSize' bytes long.
+ *
+ * Side effects:
+ *      Begins a FIFO reservation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void *
+SVGA3D_FIFOReserve(struct svga_winsys_context *swc,
+                   uint32 cmd,       // IN
+                   uint32 cmdSize,   // IN
+                   uint32 nr_relocs) // IN
+{
+   SVGA3dCmdHeader *header;
+
+   header = swc->reserve(swc, sizeof *header + cmdSize, nr_relocs);
+   if(!header)
+      return NULL;
+
+   header->id = cmd;
+   header->size = cmdSize;
+
+   return &header[1];
+}
+
+
+void
+SVGA_FIFOCommitAll(struct svga_winsys_context *swc)
+{
+   swc->commit(swc);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DefineContext --
+ *
+ *      Create a new context, to be referred to with the provided ID.
+ *
+ *      Context objects encapsulate all render state, and shader
+ *      objects are per-context.
+ *
+ *      Surfaces are not per-context. The same surface can be shared
+ *      between multiple contexts, and surface operations can occur
+ *      without a context.
+ *
+ *      If the provided context ID already existed, it is redefined.
+ *
+ *      Context IDs are arbitrary small non-negative integers,
+ *      global to the entire SVGA device.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DefineContext(struct svga_winsys_context *swc)  // IN
+{
+   SVGA3dCmdDefineContext *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_CONTEXT_DEFINE, sizeof *cmd, 0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DestroyContext --
+ *
+ *      Delete a context created with SVGA3D_DefineContext.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DestroyContext(struct svga_winsys_context *swc)  // IN
+{
+   SVGA3dCmdDestroyContext *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_CONTEXT_DESTROY, sizeof *cmd, 0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   
+   cmd->cid = swc->cid;
+   
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginDefineSurface --
+ *
+ *      Begin a SURFACE_DEFINE command. This reserves space for it in
+ *      the FIFO, and returns pointers to the command's faces and
+ *      mipsizes arrays.
+ *
+ *      This function must be paired with SVGA_FIFOCommitAll().
+ *      The faces and mipSizes arrays are initialized to zero.
+ *
+ *      This creates a "surface" object in the SVGA3D device,
+ *      with the provided surface ID (sid). Surfaces are generic
+ *      containers for host VRAM objects like textures, vertex
+ *      buffers, and depth/stencil buffers.
+ *
+ *      Surfaces are hierarchial:
+ *
+ *        - Surface may have multiple faces (for cube maps)
+ *
+ *          - Each face has a list of mipmap levels
+ *
+ *             - Each mipmap image may have multiple volume
+ *               slices, if the image is three dimensional.
+ *
+ *                - Each slice is a 2D array of 'blocks'
+ *
+ *                   - Each block may be one or more pixels.
+ *                     (Usually 1, more for DXT or YUV formats.)
+ *
+ *      Surfaces are generic host VRAM objects. The SVGA3D device
+ *      may optimize surfaces according to the format they were
+ *      created with, but this format does not limit the ways in
+ *      which the surface may be used. For example, a depth surface
+ *      can be used as a texture, or a floating point image may
+ *      be used as a vertex buffer. Some surface usages may be
+ *      lower performance, due to software emulation, but any
+ *      usage should work with any surface.
+ *
+ *      If 'sid' is already defined, the old surface is deleted
+ *      and this new surface replaces it.
+ *
+ *      Surface IDs are arbitrary small non-negative integers,
+ *      global to the entire SVGA device.
+ *
+ * Results:
+ *      Returns pointers to arrays allocated in the FIFO for 'faces'
+ *      and 'mipSizes'.
+ *
+ * Side effects:
+ *      Begins a FIFO reservation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginDefineSurface(struct svga_winsys_context *swc,
+                          struct svga_winsys_surface *sid, // IN
+                          SVGA3dSurfaceFlags flags,    // IN
+                          SVGA3dSurfaceFormat format,  // IN
+                          SVGA3dSurfaceFace **faces,   // OUT
+                          SVGA3dSize **mipSizes,       // OUT
+                          uint32 numMipSizes)          // IN
+{
+   SVGA3dCmdDefineSurface *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_DEFINE, sizeof *cmd +
+                            sizeof **mipSizes * numMipSizes, 1);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, sid, PIPE_BUFFER_USAGE_GPU_WRITE);
+   cmd->surfaceFlags = flags;
+   cmd->format = format;
+
+   *faces = &cmd->face[0];
+   *mipSizes = (SVGA3dSize*) &cmd[1];
+
+   memset(*faces, 0, sizeof **faces * SVGA3D_MAX_SURFACE_FACES);
+   memset(*mipSizes, 0, sizeof **mipSizes * numMipSizes);
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DefineSurface2D --
+ *
+ *      This is a simplified version of SVGA3D_BeginDefineSurface(),
+ *      which does not support cube maps, mipmaps, or volume textures.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DefineSurface2D(struct svga_winsys_context *swc,    // IN
+                       struct svga_winsys_surface *sid, // IN
+                       uint32 width,                // IN
+                       uint32 height,               // IN
+                       SVGA3dSurfaceFormat format)  // IN
+{
+   SVGA3dSize *mipSizes;
+   SVGA3dSurfaceFace *faces;
+   enum pipe_error ret;
+
+   ret = SVGA3D_BeginDefineSurface(swc,
+                                   sid, 0, format, &faces, &mipSizes, 1);
+   if(ret != PIPE_OK)
+      return ret;
+
+   faces[0].numMipLevels = 1;
+
+   mipSizes[0].width = width;
+   mipSizes[0].height = height;
+   mipSizes[0].depth = 1;
+ 
+   swc->commit(swc);;
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DestroySurface --
+ *
+ *      Release the host VRAM encapsulated by a particular surface ID.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DestroySurface(struct svga_winsys_context *swc,
+                      struct svga_winsys_surface *sid)  // IN
+{
+   SVGA3dCmdDestroySurface *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_DESTROY, sizeof *cmd, 1);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   
+   swc->surface_relocation(swc, &cmd->sid, sid, PIPE_BUFFER_USAGE_GPU_READ);
+   swc->commit(swc);;
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSurfaceDMA--
+ *
+ *      Begin a SURFACE_DMA command. This reserves space for it in
+ *      the FIFO, and returns a pointer to the command's box array.
+ *      This function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      When the SVGA3D device asynchronously processes this FIFO
+ *      command, a DMA operation is performed between host VRAM and
+ *      a generic SVGAGuestPtr. The guest pointer may refer to guest
+ *      VRAM (provided by the SVGA PCI device) or to guest system
+ *      memory that has been set up as a Guest Memory Region (GMR)
+ *      by the SVGA device.
+ *
+ *      The guest's DMA buffer must remain valid (not freed, paged out,
+ *      or overwritten) until the host has finished processing this
+ *      command. The guest can determine that the host has finished
+ *      by using the SVGA device's FIFO Fence mechanism.
+ *
+ *      The guest's image buffer can be an arbitrary size and shape.
+ *      Guest image data is interpreted according to the SVGA3D surface
+ *      format specified when the surface was defined.
+ *
+ *      The caller may optionally define the guest image's pitch.
+ *      guestImage->pitch can either be zero (assume image is tightly
+ *      packed) or it must be the number of bytes between vertically
+ *      adjacent image blocks.
+ *
+ *      The provided copybox list specifies which regions of the source
+ *      image are to be copied, and where they appear on the destination.
+ *
+ *      NOTE: srcx/srcy are always on the guest image and x/y are
+ *      always on the host image, regardless of the actual transfer
+ *      direction!
+ *
+ *      For efficiency, the SVGA3D device is free to copy more data
+ *      than specified. For example, it may round copy boxes outwards
+ *      such that they lie on particular alignment boundaries.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
+                  struct svga_transfer *st,         // IN
+                  SVGA3dTransferType transfer,      // IN
+                  const SVGA3dCopyBox *boxes,       // IN
+                  uint32 numBoxes)                  // IN
+{
+   struct svga_texture *texture = svga_texture(st->base.texture); 
+   SVGA3dCmdSurfaceDMA *cmd;
+   SVGA3dCmdSurfaceDMASuffix *pSuffix;
+   uint32 boxesSize = sizeof *boxes * numBoxes;
+   unsigned region_flags;
+   unsigned surface_flags;
+   
+   if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_READ;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+   }
+   else if(transfer == SVGA3D_READ_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_READ;
+   }
+   else {
+      assert(0);
+      return PIPE_ERROR_BAD_INPUT;
+   }
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_DMA,
+                            sizeof *cmd + boxesSize + sizeof *pSuffix,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->region_relocation(swc, &cmd->guest.ptr, st->hwbuf, 0, region_flags);
+   cmd->guest.pitch = st->base.stride;
+
+   swc->surface_relocation(swc, &cmd->host.sid, texture->handle, surface_flags);
+   cmd->host.face = st->base.face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
+   cmd->host.mipmap = st->base.level;
+
+   cmd->transfer = transfer;
+
+   memcpy(&cmd[1], boxes, boxesSize);
+   
+   pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + boxesSize);
+   pSuffix->suffixSize = sizeof *pSuffix;
+   pSuffix->maximumOffset = st->hw_nblocksy*st->base.stride;
+   memset(&pSuffix->flags, 0, sizeof pSuffix->flags);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_BufferDMA(struct svga_winsys_context *swc,
+                 struct svga_winsys_buffer *guest,
+                 struct svga_winsys_surface *host,
+                 SVGA3dTransferType transfer,      // IN
+                 uint32 size,                      // IN
+                 uint32 offset,                    // IN
+                 SVGA3dSurfaceDMAFlags flags)      // IN
+{
+   SVGA3dCmdSurfaceDMA *cmd;
+   SVGA3dCopyBox *box;
+   SVGA3dCmdSurfaceDMASuffix *pSuffix;
+   unsigned region_flags;
+   unsigned surface_flags;
+   
+   if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_READ;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+   }
+   else if(transfer == SVGA3D_READ_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_READ;
+   }
+   else {
+      assert(0);
+      return PIPE_ERROR_BAD_INPUT;
+   }
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_DMA,
+                            sizeof *cmd + sizeof *box + sizeof *pSuffix,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags);
+   cmd->guest.pitch = 0;
+
+   swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags);
+   cmd->host.face = 0;
+   cmd->host.mipmap = 0;
+
+   cmd->transfer = transfer;
+
+   box = (SVGA3dCopyBox *)&cmd[1];
+   box->x = offset;
+   box->y = 0;
+   box->z = 0;
+   box->w = size;
+   box->h = 1;
+   box->d = 1;
+   box->srcx = offset;
+   box->srcy = 0;
+   box->srcz = 0;
+   
+   pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + sizeof *box);
+   pSuffix->suffixSize = sizeof *pSuffix;
+   pSuffix->maximumOffset = offset + size;
+   pSuffix->flags = flags;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetRenderTarget --
+ *
+ *      Bind a surface object to a particular render target attachment
+ *      point on the current context. Render target attachment points
+ *      exist for color buffers, a depth buffer, and a stencil buffer.
+ *
+ *      The SVGA3D device is quite lenient about the types of surfaces
+ *      that may be used as render targets. The color buffers must
+ *      all be the same size, but the depth and stencil buffers do not
+ *      have to be the same size as the color buffer. All attachments
+ *      are optional.
+ *
+ *      Some combinations of render target formats may require software
+ *      emulation, depending on the capabilities of the host graphics
+ *      API and graphics hardware.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetRenderTarget(struct svga_winsys_context *swc,
+                       SVGA3dRenderTargetType type,   // IN
+                       struct pipe_surface *surface)  // IN
+{
+   SVGA3dCmdSetRenderTarget *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETRENDERTARGET, sizeof *cmd, 1);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+
+   cmd->cid = swc->cid;
+
+   cmd->type = type;
+
+   surface_to_surfaceid(swc, surface, &cmd->target, PIPE_BUFFER_USAGE_GPU_WRITE);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+
+
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DefineShader --
+ *
+ *      Upload the bytecode for a new shader. The bytecode is "SVGA3D
+ *      format", which is theoretically a binary-compatible superset
+ *      of Microsoft's DirectX shader bytecode. In practice, the
+ *      SVGA3D bytecode doesn't yet have any extensions to DirectX's
+ *      bytecode format.
+ *
+ *      The SVGA3D device supports shader models 1.1 through 2.0.
+ *
+ *      The caller chooses a shader ID (small positive integer) by
+ *      which this shader will be identified in future commands. This
+ *      ID is in a namespace which is per-context and per-shader-type.
+ *
+ *      'bytecodeLen' is specified in bytes. It must be a multiple of 4.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DefineShader(struct svga_winsys_context *swc,
+                    uint32 shid,                  // IN
+                    SVGA3dShaderType type,        // IN
+                    const uint32 *bytecode,       // IN
+                    uint32 bytecodeLen)           // IN
+{
+   SVGA3dCmdDefineShader *cmd;
+
+   assert(bytecodeLen % 4 == 0);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SHADER_DEFINE, sizeof *cmd + bytecodeLen,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->shid = shid;
+   cmd->type = type;
+   memcpy(&cmd[1], bytecode, bytecodeLen);
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DestroyShader --
+ *
+ *      Delete a shader that was created by SVGA3D_DefineShader. If
+ *      the shader was the current vertex or pixel shader for its
+ *      context, rendering results are undefined until a new shader is
+ *      bound.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DestroyShader(struct svga_winsys_context *swc,
+                     uint32 shid,            // IN
+                     SVGA3dShaderType type)  // IN
+{
+   SVGA3dCmdDestroyShader *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SHADER_DESTROY, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->shid = shid;
+   cmd->type = type;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetShaderConst --
+ *
+ *      Set the value of a shader constant.
+ *
+ *      Shader constants are analogous to uniform variables in GLSL,
+ *      except that they belong to the render context rather than to
+ *      an individual shader.
+ *
+ *      Constants may have one of three types: A 4-vector of floats,
+ *      a 4-vector of integers, or a single boolean flag.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetShaderConst(struct svga_winsys_context *swc,
+                      uint32 reg,                   // IN
+                      SVGA3dShaderType type,        // IN
+                      SVGA3dShaderConstType ctype,  // IN
+                      const void *value)            // IN
+{
+   SVGA3dCmdSetShaderConst *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SET_SHADER_CONST, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->reg = reg;
+   cmd->type = type;
+   cmd->ctype = ctype;
+
+   switch (ctype) {
+
+   case SVGA3D_CONST_TYPE_FLOAT:
+   case SVGA3D_CONST_TYPE_INT:
+      memcpy(&cmd->values, value, sizeof cmd->values);
+      break;
+
+   case SVGA3D_CONST_TYPE_BOOL:
+      memset(&cmd->values, 0, sizeof cmd->values);
+      cmd->values[0] = *(uint32*)value;
+      break;
+
+   default:
+      assert(0);
+      break;
+
+   }
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetShader --
+ *
+ *      Switch active shaders. This binds a new vertex or pixel shader
+ *      to the specified context.
+ *
+ *      A shader ID of SVGA3D_INVALID_ID unbinds any shader, switching
+ *      back to the fixed function vertex or pixel pipeline.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetShader(struct svga_winsys_context *swc,
+                 SVGA3dShaderType type,  // IN
+                 uint32 shid)            // IN
+{
+   SVGA3dCmdSetShader *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SET_SHADER, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   
+   cmd->cid = swc->cid;
+   cmd->type = type;
+   cmd->shid = shid;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginClear --
+ *
+ *      Begin a CLEAR command. This reserves space for it in the FIFO,
+ *      and returns a pointer to the command's rectangle array.  This
+ *      function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      Clear is a rendering operation which fills a list of
+ *      rectangles with constant values on all render target types
+ *      indicated by 'flags'.
+ *
+ *      Clear is not affected by clipping, depth test, or other
+ *      render state which affects the fragment pipeline.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      May write to attached render target surfaces.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginClear(struct svga_winsys_context *swc,
+                  SVGA3dClearFlag flags,  // IN
+                  uint32 color,           // IN
+                  float depth,            // IN
+                  uint32 stencil,         // IN
+                  SVGA3dRect **rects,     // OUT
+                  uint32 numRects)        // IN
+{
+   SVGA3dCmdClear *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_CLEAR, 
+                            sizeof *cmd + sizeof **rects * numRects,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->clearFlag = flags;
+   cmd->color = color;
+   cmd->depth = depth;
+   cmd->stencil = stencil;
+   *rects = (SVGA3dRect*) &cmd[1];
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_ClearRect --
+ *
+ *      This is a simplified version of SVGA3D_BeginClear().
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_ClearRect(struct svga_winsys_context *swc,
+                 SVGA3dClearFlag flags,  // IN
+                 uint32 color,           // IN
+                 float depth,            // IN
+                 uint32 stencil,         // IN
+                 uint32 x,               // IN
+                 uint32 y,               // IN
+                 uint32 w,               // IN
+                 uint32 h)               // IN
+{
+   SVGA3dRect *rect;
+   enum pipe_error ret;
+
+   ret = SVGA3D_BeginClear(swc, flags, color, depth, stencil, &rect, 1);
+   if(ret != PIPE_OK)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   memset(rect, 0, sizeof *rect);
+   rect->x = x;
+   rect->y = y;
+   rect->w = w;
+   rect->h = h;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginDrawPrimitives --
+ *
+ *      Begin a DRAW_PRIMITIVES command. This reserves space for it in
+ *      the FIFO, and returns a pointer to the command's arrays.
+ *      This function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      Drawing commands consist of two variable-length arrays:
+ *      SVGA3dVertexDecl elements declare a set of vertex buffers to
+ *      use while rendering, and SVGA3dPrimitiveRange elements specify
+ *      groups of primitives each with an optional index buffer.
+ *
+ *      The decls and ranges arrays are initialized to zero.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      May write to attached render target surfaces.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc,
+                           SVGA3dVertexDecl **decls,      // OUT
+                           uint32 numVertexDecls,         // IN
+                           SVGA3dPrimitiveRange **ranges, // OUT
+                           uint32 numRanges)              // IN
+{
+   SVGA3dCmdDrawPrimitives *cmd;
+   SVGA3dVertexDecl *declArray;
+   SVGA3dPrimitiveRange *rangeArray;
+   uint32 declSize = sizeof **decls * numVertexDecls;
+   uint32 rangeSize = sizeof **ranges * numRanges;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DRAW_PRIMITIVES, 
+                            sizeof *cmd + declSize + rangeSize,
+                            numVertexDecls + numRanges);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->numVertexDecls = numVertexDecls;
+   cmd->numRanges = numRanges;
+
+   declArray = (SVGA3dVertexDecl*) &cmd[1];
+   rangeArray = (SVGA3dPrimitiveRange*) &declArray[numVertexDecls];
+
+   memset(declArray, 0, declSize);
+   memset(rangeArray, 0, rangeSize);
+
+   *decls = declArray;
+   *ranges = rangeArray;
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSurfaceCopy --
+ *
+ *      Begin a SURFACE_COPY command. This reserves space for it in
+ *      the FIFO, and returns a pointer to the command's arrays.  This
+ *      function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      The box array is initialized with zeroes.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Asynchronously copies a list of boxes from surface to surface.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginSurfaceCopy(struct svga_winsys_context *swc,
+                        struct pipe_surface *src,    // IN
+                        struct pipe_surface *dest,   // IN
+                        SVGA3dCopyBox **boxes,       // OUT
+                        uint32 numBoxes)             // IN
+{
+   SVGA3dCmdSurfaceCopy *cmd;
+   uint32 boxesSize = sizeof **boxes * numBoxes;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_COPY, sizeof *cmd + boxesSize,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   surface_to_surfaceid(swc, src, &cmd->src, PIPE_BUFFER_USAGE_GPU_READ);
+   surface_to_surfaceid(swc, dest, &cmd->dest, PIPE_BUFFER_USAGE_GPU_WRITE);
+   *boxes = (SVGA3dCopyBox*) &cmd[1];
+
+   memset(*boxes, 0, boxesSize);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SurfaceStretchBlt --
+ *
+ *      Issue a SURFACE_STRETCHBLT command: an asynchronous
+ *      surface-to-surface blit, with scaling.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Asynchronously copies one box from surface to surface.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SurfaceStretchBlt(struct svga_winsys_context *swc,
+                         struct pipe_surface *src,    // IN
+                         struct pipe_surface *dest,   // IN
+                         SVGA3dBox *boxSrc,           // IN
+                         SVGA3dBox *boxDest,          // IN
+                         SVGA3dStretchBltMode mode)   // IN
+{
+   SVGA3dCmdSurfaceStretchBlt *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_STRETCHBLT, sizeof *cmd,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   surface_to_surfaceid(swc, src, &cmd->src, PIPE_BUFFER_USAGE_GPU_READ);
+   surface_to_surfaceid(swc, dest, &cmd->dest, PIPE_BUFFER_USAGE_GPU_WRITE);
+   cmd->boxSrc = *boxSrc;
+   cmd->boxDest = *boxDest;
+   cmd->mode = mode;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetViewport --
+ *
+ *      Set the current context's viewport rectangle. The viewport
+ *      is clipped to the dimensions of the current render target,
+ *      then all rendering is clipped to the viewport.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetViewport(struct svga_winsys_context *swc,
+                   SVGA3dRect *rect)  // IN
+{
+   SVGA3dCmdSetViewport *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETVIEWPORT, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->rect = *rect;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetScissorRect --
+ *
+ *      Set the current context's scissor rectangle. If scissor
+ *      is enabled then all rendering is clipped to the scissor.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetScissorRect(struct svga_winsys_context *swc,
+                      SVGA3dRect *rect)  // IN
+{
+   SVGA3dCmdSetScissorRect *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETSCISSORRECT, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->rect = *rect;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetClipPlane --
+ *
+ *      Set one of the current context's clip planes. If the clip
+ *      plane is enabled then all 3d rendering is clipped to against
+ *      the plane.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error SVGA3D_SetClipPlane(struct svga_winsys_context *swc,
+                         uint32 index, const float *plane)
+{
+   SVGA3dCmdSetClipPlane *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETCLIPPLANE, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->index = index;
+   cmd->plane[0] = plane[0];
+   cmd->plane[1] = plane[1];
+   cmd->plane[2] = plane[2];
+   cmd->plane[3] = plane[3];
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetZRange --
+ *
+ *      Set the range of the depth buffer to use. 'min' and 'max'
+ *      are values between 0.0 and 1.0.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetZRange(struct svga_winsys_context *swc,
+                 float zMin,  // IN
+                 float zMax)  // IN
+{
+   SVGA3dCmdSetZRange *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETZRANGE, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->zRange.min = zMin;
+   cmd->zRange.max = zMax;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSetTextureState --
+ *
+ *      Begin a SETTEXTURESTATE command. This reserves space for it in
+ *      the FIFO, and returns a pointer to the command's texture state
+ *      array.  This function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      This command sets rendering state which is per-texture-unit.
+ *
+ *      XXX: Individual texture states need documentation. However,
+ *           they are very similar to the texture states defined by
+ *           Direct3D. The D3D documentation is a good starting point
+ *           for understanding SVGA3D texture states.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginSetTextureState(struct svga_winsys_context *swc,
+                            SVGA3dTextureState **states,  // OUT
+                            uint32 numStates)             // IN
+{
+   SVGA3dCmdSetTextureState *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETTEXTURESTATE, 
+                            sizeof *cmd + sizeof **states * numStates,
+                            numStates);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   *states = (SVGA3dTextureState*) &cmd[1];
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSetRenderState --
+ *
+ *      Begin a SETRENDERSTATE command. This reserves space for it in
+ *      the FIFO, and returns a pointer to the command's texture state
+ *      array.  This function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      This command sets rendering state which is global to the context.
+ *
+ *      XXX: Individual render states need documentation. However,
+ *           they are very similar to the render states defined by
+ *           Direct3D. The D3D documentation is a good starting point
+ *           for understanding SVGA3D render states.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginSetRenderState(struct svga_winsys_context *swc,
+                           SVGA3dRenderState **states,  // OUT
+                           uint32 numStates)            // IN
+{
+   SVGA3dCmdSetRenderState *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETRENDERSTATE, 
+                            sizeof *cmd + sizeof **states * numStates,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   *states = (SVGA3dRenderState*) &cmd[1];
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginQuery--
+ *
+ *      Issues a SVGA_3D_CMD_BEGIN_QUERY command.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginQuery(struct svga_winsys_context *swc,
+                  SVGA3dQueryType type) // IN
+{
+   SVGA3dCmdBeginQuery *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_BEGIN_QUERY,
+                            sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->type = type;
+
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_EndQuery--
+ *
+ *      Issues a SVGA_3D_CMD_END_QUERY command.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_EndQuery(struct svga_winsys_context *swc,
+                SVGA3dQueryType type,              // IN
+                struct svga_winsys_buffer *buffer) // IN/OUT
+{
+   SVGA3dCmdEndQuery *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_END_QUERY, 
+                            sizeof *cmd,
+                            1);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->type = type;
+
+   swc->region_relocation(swc, &cmd->guestResult, buffer, 0,
+                          PIPE_BUFFER_USAGE_GPU_WRITE);
+
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_WaitForQuery--
+ *
+ *      Issues a SVGA_3D_CMD_WAIT_FOR_QUERY command.  This reserves space
+ *      for it in the FIFO.  This doesn't actually wait for the query to
+ *      finish but instead tells the host to start a wait at the driver
+ *      level.  The caller can wait on the status variable in the
+ *      guestPtr memory or send an insert fence instruction after this
+ *      command and wait on the fence.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
+                    SVGA3dQueryType type,              // IN
+                    struct svga_winsys_buffer *buffer) // IN/OUT
+{
+   SVGA3dCmdWaitForQuery *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_WAIT_FOR_QUERY, 
+                            sizeof *cmd,
+                            1);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->type = type;
+   
+   swc->region_relocation(swc, &cmd->guestResult, buffer, 0,
+                          PIPE_BUFFER_USAGE_GPU_WRITE);
+
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h
new file mode 100644
index 0000000000..8041054769
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_cmd.h
@@ -0,0 +1,235 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga_cmd.h --
+ *
+ *      Command construction utility for the SVGA3D protocol used by
+ *      the VMware SVGA device, based on the svgautil library.
+ */
+
+#ifndef __SVGA3D_H__
+#define __SVGA3D_H__
+
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+
+#include "pipe/p_defines.h"
+
+
+struct pipe_buffer;
+struct pipe_surface;
+struct svga_transfer;
+struct svga_winsys_context;
+struct svga_winsys_buffer;
+struct svga_winsys_surface;
+
+
+/*
+ * SVGA Device Interoperability
+ */
+
+void *
+SVGA3D_FIFOReserve(struct svga_winsys_context *swc, uint32 cmd, uint32 cmdSize, uint32 nr_relocs);
+
+void
+SVGA_FIFOCommitAll(struct svga_winsys_context *swc);
+
+
+/*
+ * Context Management
+ */
+
+enum pipe_error
+SVGA3D_DefineContext(struct svga_winsys_context *swc);
+
+enum pipe_error
+SVGA3D_DestroyContext(struct svga_winsys_context *swc);
+
+
+/*
+ * Surface Management
+ */
+
+enum pipe_error
+SVGA3D_BeginDefineSurface(struct svga_winsys_context *swc,
+                          struct svga_winsys_surface *sid,
+                          SVGA3dSurfaceFlags flags,
+                          SVGA3dSurfaceFormat format,
+                          SVGA3dSurfaceFace **faces,
+                          SVGA3dSize **mipSizes,
+                          uint32 numMipSizes);
+enum pipe_error
+SVGA3D_DefineSurface2D(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *sid,
+                       uint32 width,
+                       uint32 height,
+                       SVGA3dSurfaceFormat format);
+enum pipe_error
+SVGA3D_DestroySurface(struct svga_winsys_context *swc,
+                      struct svga_winsys_surface *sid);
+
+
+/*
+ * Surface Operations
+ */
+
+enum pipe_error
+SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
+                  struct svga_transfer *st,
+                  SVGA3dTransferType transfer,
+                  const SVGA3dCopyBox *boxes,
+                  uint32 numBoxes);
+
+enum pipe_error
+SVGA3D_BufferDMA(struct svga_winsys_context *swc,
+                 struct svga_winsys_buffer *guest,
+                 struct svga_winsys_surface *host,
+                 SVGA3dTransferType transfer,
+                 uint32 size,
+                 uint32 offset,
+                 SVGA3dSurfaceDMAFlags flags);
+
+/*
+ * Drawing Operations
+ */
+
+
+enum pipe_error
+SVGA3D_BeginClear(struct svga_winsys_context *swc,
+                  SVGA3dClearFlag flags,
+                  uint32 color, float depth, uint32 stencil,
+                  SVGA3dRect **rects, uint32 numRects);
+
+enum pipe_error
+SVGA3D_ClearRect(struct svga_winsys_context *swc,
+                 SVGA3dClearFlag flags, uint32 color, float depth,
+                 uint32 stencil, uint32 x, uint32 y, uint32 w, uint32 h);
+
+enum pipe_error
+SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc,
+                           SVGA3dVertexDecl **decls,
+                           uint32 numVertexDecls,
+                           SVGA3dPrimitiveRange **ranges,
+                           uint32 numRanges);
+
+/*
+ * Blits
+ */
+
+enum pipe_error
+SVGA3D_BeginSurfaceCopy(struct svga_winsys_context *swc,
+                        struct pipe_surface *src,
+                        struct pipe_surface *dest,
+                        SVGA3dCopyBox **boxes, uint32 numBoxes);
+
+
+enum pipe_error
+SVGA3D_SurfaceStretchBlt(struct svga_winsys_context *swc,
+                         struct pipe_surface *src,
+                         struct pipe_surface *dest,
+                         SVGA3dBox *boxSrc, SVGA3dBox *boxDest,
+                         SVGA3dStretchBltMode mode);
+
+/*
+ * Shared FFP/Shader Render State
+ */
+
+enum pipe_error
+SVGA3D_SetRenderTarget(struct svga_winsys_context *swc,
+                       SVGA3dRenderTargetType type,
+                       struct pipe_surface *surface);
+
+enum pipe_error
+SVGA3D_SetZRange(struct svga_winsys_context *swc,
+                 float zMin, float zMax);
+
+enum pipe_error
+SVGA3D_SetViewport(struct svga_winsys_context *swc,
+                   SVGA3dRect *rect);
+
+enum pipe_error
+SVGA3D_SetScissorRect(struct svga_winsys_context *swc,
+                      SVGA3dRect *rect);
+
+enum pipe_error
+SVGA3D_SetClipPlane(struct svga_winsys_context *swc,
+                    uint32 index, const float *plane);
+
+enum pipe_error
+SVGA3D_BeginSetTextureState(struct svga_winsys_context *swc,
+                            SVGA3dTextureState **states,
+                            uint32 numStates);
+
+enum pipe_error
+SVGA3D_BeginSetRenderState(struct svga_winsys_context *swc,
+                           SVGA3dRenderState **states,
+                           uint32 numStates);
+
+
+/*
+ * Shaders
+ */
+
+enum pipe_error
+SVGA3D_DefineShader(struct svga_winsys_context *swc,
+                    uint32 shid, SVGA3dShaderType type,
+                    const uint32 *bytecode, uint32 bytecodeLen);
+
+enum pipe_error
+SVGA3D_DestroyShader(struct svga_winsys_context *swc,
+                     uint32 shid, SVGA3dShaderType type);
+
+enum pipe_error
+SVGA3D_SetShaderConst(struct svga_winsys_context *swc,
+                      uint32 reg, SVGA3dShaderType type,
+                      SVGA3dShaderConstType ctype, const void *value);
+
+enum pipe_error
+SVGA3D_SetShader(struct svga_winsys_context *swc,
+                 SVGA3dShaderType type, uint32 shid);
+
+
+/*
+ * Queries
+ */
+
+enum pipe_error
+SVGA3D_BeginQuery(struct svga_winsys_context *swc,
+                  SVGA3dQueryType type);
+
+enum pipe_error
+SVGA3D_EndQuery(struct svga_winsys_context *swc,
+                SVGA3dQueryType type,
+                struct svga_winsys_buffer *buffer);
+
+enum pipe_error
+SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
+                    SVGA3dQueryType type,
+                    struct svga_winsys_buffer *buffer);
+
+#endif /* __SVGA3D_H__ */
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
new file mode 100644
index 0000000000..73233957f3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -0,0 +1,269 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_texture.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_swtnl.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+#include "svga_state.h"
+
+
+static void svga_destroy( struct pipe_context *pipe )
+{
+   struct svga_context *svga = svga_context( pipe );
+   unsigned shader;
+
+   svga_cleanup_framebuffer( svga );
+   svga_cleanup_tss_binding( svga );
+
+   svga_hwtnl_destroy( svga->hwtnl );
+
+   svga_cleanup_vertex_state(svga);
+   
+   svga->swc->destroy(svga->swc);
+   
+   svga_destroy_swtnl( svga );
+
+   u_upload_destroy( svga->upload_vb );
+   u_upload_destroy( svga->upload_ib );
+
+   for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader)
+      pipe_buffer_reference( &svga->curr.cb[shader], NULL );
+
+   FREE( svga );
+}
+
+static unsigned int
+svga_is_texture_referenced( struct pipe_context *pipe,
+			    struct pipe_texture *texture,
+			    unsigned face, unsigned level)
+{
+   struct svga_texture *tex = svga_texture(texture);
+   struct svga_screen *ss = svga_screen(pipe->screen);
+
+   /**
+    * The screen does not cache texture writes.
+    */
+
+   if (!tex->handle || ss->sws->surface_is_flushed(ss->sws, tex->handle))
+      return PIPE_UNREFERENCED;
+
+   /**
+    * sws->surface_is_flushed() does not distinguish between read references
+    * and write references. So assume a reference is both.
+    */
+
+   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
+static unsigned int
+svga_is_buffer_referenced( struct pipe_context *pipe,
+			   struct pipe_buffer *buf)
+
+{
+   struct svga_screen *ss = svga_screen(pipe->screen);
+   struct svga_buffer *sbuf = svga_buffer(buf);
+
+   /**
+    * XXX: Check this.
+    * The screen may cache buffer writes, but when we map, we map out
+    * of those cached writes, so we don't need to set a
+    * PIPE_REFERENCED_FOR_WRITE flag for cached buffers.
+    */
+
+   if (!sbuf->handle || ss->sws->surface_is_flushed(ss->sws, sbuf->handle))
+     return PIPE_UNREFERENCED;
+
+   /**
+    * sws->surface_is_flushed() does not distinguish between read references
+    * and write references. So assume a reference is both,
+    * however, we make an exception for index- and vertex buffers, to avoid
+    * a flush in st_bufferobj_get_subdata, during display list replay.
+    */
+
+   if (sbuf->base.usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_INDEX))
+      return PIPE_REFERENCED_FOR_READ;
+
+   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
+
+struct pipe_context *svga_context_create( struct pipe_screen *screen )
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_context *svga = NULL;
+   enum pipe_error ret;
+
+   svga = CALLOC_STRUCT(svga_context);
+   if (svga == NULL)
+      goto error1;
+
+   svga->pipe.winsys = screen->winsys;
+   svga->pipe.screen = screen;
+   svga->pipe.destroy = svga_destroy;
+   svga->pipe.clear = svga_clear;
+
+   svga->pipe.is_texture_referenced = svga_is_texture_referenced;
+   svga->pipe.is_buffer_referenced = svga_is_buffer_referenced;
+
+   svga->swc = svgascreen->sws->context_create(svgascreen->sws);
+   if(!svga->swc)
+      goto error2;
+
+   svga_init_blend_functions(svga);
+   svga_init_blit_functions(svga);
+   svga_init_depth_stencil_functions(svga);
+   svga_init_draw_functions(svga);
+   svga_init_flush_functions(svga);
+   svga_init_misc_functions(svga);
+   svga_init_rasterizer_functions(svga);
+   svga_init_sampler_functions(svga);
+   svga_init_fs_functions(svga);
+   svga_init_vs_functions(svga);
+   svga_init_vertex_functions(svga);
+   svga_init_constbuffer_functions(svga);
+   svga_init_query_functions(svga);
+
+   /* debug */
+   svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE);
+   svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE);
+   svga->debug.use_min_mipmap = debug_get_bool_option("SVGA_USE_MIN_MIPMAP", FALSE);
+   svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0);
+
+   if (!svga_init_swtnl(svga))
+      goto error3;
+
+   svga->upload_ib = u_upload_create( svga->pipe.screen,
+                                      32 * 1024,
+                                      16,
+                                      PIPE_BUFFER_USAGE_INDEX );
+   if (svga->upload_ib == NULL)
+      goto error4;
+
+   svga->upload_vb = u_upload_create( svga->pipe.screen,
+                                      128 * 1024,
+                                      16,
+                                      PIPE_BUFFER_USAGE_VERTEX );
+   if (svga->upload_vb == NULL)
+      goto error5;
+
+   svga->hwtnl = svga_hwtnl_create( svga,
+                                    svga->upload_ib,
+                                    svga->swc );
+   if (svga->hwtnl == NULL)
+      goto error6;
+
+
+   ret = svga_emit_initial_state( svga );
+   if (ret)
+      goto error7;
+   
+   /* Avoid shortcircuiting state with initial value of zero.
+    */
+   memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear));
+   memset(&svga->state.hw_clear.framebuffer, 0x0, 
+          sizeof(svga->state.hw_clear.framebuffer));
+
+   memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw));
+   memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views));
+   svga->state.hw_draw.num_views = 0;
+
+   svga->dirty = ~0;
+   svga->state.white_fs_id = SVGA3D_INVALID_ID;
+
+   LIST_INITHEAD(&svga->dirty_buffers);
+
+   return &svga->pipe;
+
+error7:
+   svga_hwtnl_destroy( svga->hwtnl );
+error6:
+   u_upload_destroy( svga->upload_vb );
+error5:
+   u_upload_destroy( svga->upload_ib );
+error4:
+   svga_destroy_swtnl(svga);
+error3:
+   svga->swc->destroy(svga->swc);
+error2:
+   FREE(svga);
+error1:
+   return NULL;
+}
+
+
+void svga_context_flush( struct svga_context *svga, 
+                         struct pipe_fence_handle **pfence )
+{
+   struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
+   
+   /* Unmap upload manager buffers: 
+    */
+   u_upload_flush(svga->upload_vb);
+   u_upload_flush(svga->upload_ib);
+
+   /* Flush screen, to ensure that texture dma uploads are processed
+    * before submitting commands.
+    */
+   svga_screen_flush(svgascreen, NULL);
+   
+   svga_context_flush_buffers(svga);
+
+   /* Flush pending commands to hardware:
+    */
+   svga->swc->flush(svga->swc, pfence);
+
+   if (SVGA_DEBUG & DEBUG_SYNC) {
+      if (pfence && *pfence)
+         svga->pipe.screen->fence_finish( svga->pipe.screen, *pfence, 0);
+   }
+}
+
+
+void svga_hwtnl_flush_retry( struct svga_context *svga )
+{
+   enum pipe_error ret = PIPE_OK;
+
+   ret = svga_hwtnl_flush( svga->hwtnl );
+   if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
+      svga_context_flush( svga, NULL );
+      ret = svga_hwtnl_flush( svga->hwtnl );
+   }
+
+   assert(ret == 0);
+}
+
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
new file mode 100644
index 0000000000..9a3e92fd8d
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -0,0 +1,443 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_CONTEXT_H
+#define SVGA_CONTEXT_H
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "util/u_double_list.h"
+
+#include "tgsi/tgsi_scan.h"
+
+
+#define SVGA_TEX_UNITS 8
+
+struct draw_vertex_shader;
+struct svga_shader_result;
+struct SVGACmdMemory;
+struct u_upload_mgr;
+
+
+struct svga_shader
+{
+   const struct tgsi_token *tokens;
+
+   struct tgsi_shader_info info;
+
+   struct svga_shader_result *results;
+
+   unsigned id;
+
+   boolean use_sm30;
+};
+
+struct svga_fragment_shader
+{
+   struct svga_shader base;
+};
+
+struct svga_vertex_shader
+{
+   struct svga_shader base;
+
+   struct draw_vertex_shader *draw_shader;
+};
+
+
+struct svga_cache_context;
+struct svga_tracked_state;
+
+struct svga_blend_state {
+
+   boolean need_white_fragments;
+
+   /* Should be per-render-target:
+    */
+   struct {
+      uint8_t writemask;
+
+      boolean blend_enable;
+      uint8_t srcblend;
+      uint8_t dstblend;
+      uint8_t blendeq;
+      
+      boolean separate_alpha_blend_enable;
+      uint8_t srcblend_alpha;
+      uint8_t dstblend_alpha;
+      uint8_t blendeq_alpha;
+
+   } rt[1];
+};
+
+struct svga_depth_stencil_state {
+   unsigned zfunc:8;
+   unsigned zenable:1;
+   unsigned zwriteenable:1;
+
+   unsigned alphatestenable:1;
+   unsigned alphafunc:8;
+  
+   struct {
+      unsigned enabled:1;
+      unsigned func:8;
+      unsigned fail:8;
+      unsigned zfail:8;
+      unsigned pass:8;
+   } stencil[2];
+   
+   /* SVGA3D has one ref/mask/writemask triple shared between front &
+    * back face stencil.  We really need two:
+    */
+   unsigned stencil_ref:8;
+   unsigned stencil_mask:8;
+   unsigned stencil_writemask:8;
+
+   float    alpharef;
+};
+
+#define SVGA_UNFILLED_DISABLE 0
+#define SVGA_UNFILLED_LINE    1
+#define SVGA_UNFILLED_POINT   2
+
+#define SVGA_PIPELINE_FLAG_POINTS   (1<<PIPE_PRIM_POINTS)
+#define SVGA_PIPELINE_FLAG_LINES    (1<<PIPE_PRIM_LINES)
+#define SVGA_PIPELINE_FLAG_TRIS     (1<<PIPE_PRIM_TRIANGLES)
+
+struct svga_rasterizer_state {
+   struct pipe_rasterizer_state templ; /* needed for draw module */
+
+   unsigned shademode:8;
+   unsigned cullmode:8;
+   unsigned scissortestenable:1;
+   unsigned multisampleantialias:1;
+   unsigned antialiasedlineenable:1;
+   unsigned lastpixel:1;
+
+   unsigned linepattern;
+
+   float slopescaledepthbias;
+   float depthbias;
+   float pointsize;
+   float pointsize_min;
+   float pointsize_max;
+   
+   unsigned hw_unfilled:16;         /* PIPE_POLYGON_MODE_x */
+   unsigned need_pipeline:16;    /* which prims do we need help for? */
+};
+
+struct svga_sampler_state {
+   unsigned mipfilter;
+   unsigned magfilter;
+   unsigned minfilter;
+   unsigned aniso_level;
+   float lod_bias;
+   unsigned addressu;
+   unsigned addressv;
+   unsigned addressw;
+   unsigned bordercolor;
+   unsigned normalized_coords:1;
+   unsigned compare_mode:1;
+   unsigned compare_func:3;
+
+   unsigned min_lod;
+   unsigned view_min_lod;
+   unsigned view_max_lod;
+};
+
+/* Use to calculate differences between state emitted to hardware and
+ * current driver-calculated state.  
+ */
+struct svga_state 
+{
+   const struct svga_blend_state *blend;
+   const struct svga_depth_stencil_state *depth;
+   const struct svga_rasterizer_state *rast;
+   const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+
+   struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; /* or texture ID's? */
+   struct svga_fragment_shader *fs;
+   struct svga_vertex_shader *vs;
+
+   struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
+   struct pipe_buffer *cb[PIPE_SHADER_TYPES];
+
+   struct pipe_framebuffer_state framebuffer;
+   float depthscale;
+
+   struct pipe_poly_stipple poly_stipple;
+   struct pipe_scissor_state scissor;
+   struct pipe_blend_color blend_color;
+   struct pipe_clip_state clip;
+   struct pipe_viewport_state viewport;
+
+   const unsigned *edgeflags;
+
+   unsigned num_samplers;
+   unsigned num_textures;
+   unsigned num_vertex_elements;
+   unsigned num_vertex_buffers;
+   unsigned reduced_prim;
+
+   struct {
+      unsigned flag_1d;
+      unsigned flag_srgb;
+   } tex_flags;
+
+   boolean any_user_vertex_buffers;
+
+   unsigned zero_stride_vertex_elements;
+   unsigned num_zero_stride_vertex_elements;
+   /* ### maybe dynamically allocate this */
+   float zero_stride_constants[PIPE_MAX_ATTRIBS*4];
+};
+
+#define RS_MAX 97
+#define TS_MAX 30
+#define CB_MAX 256
+
+struct svga_prescale {
+   float translate[4];
+   float scale[4];
+   boolean enabled;
+};
+
+
+/* Updated by calling svga_update_state( SVGA_STATE_HW_VIEWPORT )
+ */
+struct svga_hw_clear_state
+{
+   struct {
+      unsigned x,y,w,h;
+   } viewport;
+
+   struct {
+      float zmin, zmax;
+   } depthrange;
+   
+   struct pipe_framebuffer_state framebuffer;
+   struct svga_prescale prescale;
+};
+
+struct svga_hw_view_state
+{
+   struct pipe_texture *texture;
+   struct svga_sampler_view *v;
+   unsigned min_lod;
+   unsigned max_lod;
+   int dirty;
+};
+
+/* Updated by calling svga_update_state( SVGA_STATE_HW_DRAW )
+ */
+struct svga_hw_draw_state
+{
+   unsigned rs[RS_MAX];
+   unsigned ts[16][TS_MAX];
+   float cb[PIPE_SHADER_TYPES][CB_MAX][4];
+
+   unsigned shader_id[PIPE_SHADER_TYPES];
+   
+   struct svga_shader_result *fs;
+   struct svga_shader_result *vs;
+   struct svga_hw_view_state views[PIPE_MAX_SAMPLERS];
+
+   unsigned num_views;
+};
+
+
+/* Updated by calling svga_update_state( SVGA_STATE_NEED_SWTNL )
+ */
+struct svga_sw_state
+{
+   unsigned ve_format[PIPE_MAX_ATTRIBS]; /* NEW_VELEMENT */
+
+   /* which parts we need */
+   boolean need_swvfetch;
+   boolean need_pipeline;
+   boolean need_swtnl;
+};
+
+
+/* Queue some state updates (like rss) and submit them to hardware in
+ * a single packet.
+ */
+struct svga_hw_queue;
+
+struct svga_query;
+
+struct svga_context
+{
+   struct pipe_context pipe;
+   struct svga_winsys_context *swc;
+
+   struct {
+      boolean no_swtnl;
+      boolean force_swtnl;
+      boolean use_min_mipmap;
+
+      /* incremented for each shader */
+      unsigned shader_id;
+
+      unsigned disable_shader;
+   } debug;
+
+   struct {
+      struct draw_context *draw;
+      struct vbuf_render *backend;
+      unsigned hw_prim;
+      boolean new_vbuf;
+      boolean new_vdecl;
+   } swtnl;
+
+   struct {
+      unsigned dirty[4];
+
+      unsigned texture_timestamp;
+      unsigned next_fs_id;
+      unsigned next_vs_id;
+
+      /* Internally generated shaders:
+       */
+      unsigned white_fs_id;
+
+      /* 
+       */
+      struct svga_sw_state          sw;
+      struct svga_hw_draw_state     hw_draw;
+      struct svga_hw_clear_state    hw_clear;
+   } state;
+
+   struct svga_state curr;      /* state from the state tracker */
+   unsigned dirty;              /* statechanges since last update_state() */
+
+   struct u_upload_mgr *upload_ib;
+   struct u_upload_mgr *upload_vb;
+   struct svga_hwtnl *hwtnl;
+
+   /** The occlusion query currently in progress */
+   struct svga_query *sq;
+
+   /** List of buffers with queued transfers */
+   struct list_head dirty_buffers;
+};
+
+/* A flag for each state_tracker state object:
+ */
+#define SVGA_NEW_BLEND               0x1
+#define SVGA_NEW_DEPTH_STENCIL       0x2
+#define SVGA_NEW_RAST                0x4
+#define SVGA_NEW_SAMPLER             0x8
+#define SVGA_NEW_TEXTURE             0x10
+#define SVGA_NEW_VBUFFER             0x20
+#define SVGA_NEW_VELEMENT            0x40
+#define SVGA_NEW_FS                  0x80
+#define SVGA_NEW_VS                  0x100
+#define SVGA_NEW_FS_CONST_BUFFER     0x200
+#define SVGA_NEW_VS_CONST_BUFFER     0x400
+#define SVGA_NEW_FRAME_BUFFER        0x800
+#define SVGA_NEW_STIPPLE             0x1000
+#define SVGA_NEW_SCISSOR             0x2000
+#define SVGA_NEW_BLEND_COLOR         0x5000
+#define SVGA_NEW_CLIP                0x8000
+#define SVGA_NEW_VIEWPORT            0x10000
+#define SVGA_NEW_PRESCALE            0x20000
+#define SVGA_NEW_REDUCED_PRIMITIVE   0x40000
+#define SVGA_NEW_TEXTURE_BINDING     0x80000
+#define SVGA_NEW_NEED_PIPELINE       0x100000
+#define SVGA_NEW_NEED_SWVFETCH       0x200000
+#define SVGA_NEW_NEED_SWTNL          0x400000
+#define SVGA_NEW_FS_RESULT           0x800000
+#define SVGA_NEW_VS_RESULT           0x1000000
+#define SVGA_NEW_EDGEFLAGS           0x2000000
+#define SVGA_NEW_ZERO_STRIDE         0x4000000
+#define SVGA_NEW_TEXTURE_FLAGS       0x8000000
+
+
+
+
+
+/***********************************************************************
+ * svga_clear.c: 
+ */
+void svga_clear(struct pipe_context *pipe, 
+                unsigned buffers,
+                const float *rgba,
+                double depth,
+                unsigned stencil);
+
+
+/***********************************************************************
+ * svga_screen_texture.c: 
+ */
+void svga_mark_surfaces_dirty(struct svga_context *svga);
+
+
+
+
+void svga_init_state_functions( struct svga_context *svga );
+void svga_init_flush_functions( struct svga_context *svga );
+void svga_init_string_functions( struct svga_context *svga );
+void svga_init_blit_functions(struct svga_context *svga);
+
+void svga_init_blend_functions( struct svga_context *svga );
+void svga_init_depth_stencil_functions( struct svga_context *svga );
+void svga_init_misc_functions( struct svga_context *svga );
+void svga_init_rasterizer_functions( struct svga_context *svga );
+void svga_init_sampler_functions( struct svga_context *svga );
+void svga_init_fs_functions( struct svga_context *svga );
+void svga_init_vs_functions( struct svga_context *svga );
+void svga_init_vertex_functions( struct svga_context *svga );
+void svga_init_constbuffer_functions( struct svga_context *svga );
+void svga_init_draw_functions( struct svga_context *svga );
+void svga_init_query_functions( struct svga_context *svga );
+
+void svga_cleanup_vertex_state( struct svga_context *svga );
+void svga_cleanup_tss_binding( struct svga_context *svga );
+void svga_cleanup_framebuffer( struct svga_context *svga );
+
+void svga_context_flush( struct svga_context *svga,
+                         struct pipe_fence_handle **pfence );
+
+void svga_hwtnl_flush_retry( struct svga_context *svga );
+
+
+/***********************************************************************
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct svga_context *
+svga_context( struct pipe_context *pipe )
+{
+   return (struct svga_context *)pipe;
+}
+
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h
new file mode 100644
index 0000000000..b7bb5686ed
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_debug.h
@@ -0,0 +1,74 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DEBUG_H
+#define SVGA_DEBUG_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+
+#define DEBUG_DMA      0x1
+#define DEBUG_TGSI     0x4
+#define DEBUG_PIPE     0x8
+#define DEBUG_STATE    0x10
+#define DEBUG_SCREEN   0x20
+#define DEBUG_TEX      0x40
+#define DEBUG_SWTNL    0x80
+#define DEBUG_CONSTS   0x100
+#define DEBUG_VIEWPORT 0x200
+#define DEBUG_VIEWS    0x400
+#define DEBUG_PERF     0x800    /* print something when we hit any slow path operation */
+#define DEBUG_FLUSH    0x1000   /* flush after every draw */
+#define DEBUG_SYNC     0x2000   /* sync after every flush */
+#define DEBUG_QUERY    0x4000
+
+#ifdef DEBUG
+extern int SVGA_DEBUG;
+#define DBSTR(x) x
+#else
+#define SVGA_DEBUG 0
+#define DBSTR(x) ""
+#endif
+
+static INLINE void
+SVGA_DBG( unsigned flag, const char *fmt, ... )
+{
+#ifdef DEBUG 
+    if (SVGA_DEBUG & flag)
+    {
+        va_list args;
+
+        va_start( args, fmt );
+        debug_vprintf( fmt, args );
+        va_end( args );
+    }
+#else
+    (void)flag;
+    (void)fmt;
+#endif
+}
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
new file mode 100644
index 0000000000..1b371cecc6
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -0,0 +1,370 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "svga_draw.h"
+#include "svga_draw_private.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_cmd.h"
+
+
+struct svga_hwtnl *svga_hwtnl_create( struct svga_context *svga,
+                                      struct u_upload_mgr *upload_ib,
+                                      struct svga_winsys_context *swc )
+{
+   struct svga_hwtnl *hwtnl = CALLOC_STRUCT(svga_hwtnl);
+   if (hwtnl == NULL)
+      goto fail;
+
+   hwtnl->svga = svga;
+   hwtnl->upload_ib = upload_ib;
+   
+   hwtnl->cmd.swc = swc;
+
+   return hwtnl;
+
+fail:
+   return NULL;
+}
+
+void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl )
+{
+   int i, j;
+
+   for (i = 0; i < PIPE_PRIM_MAX; i++) {
+      for (j = 0; j < IDX_CACHE_MAX; j++) {
+         pipe_buffer_reference( &hwtnl->index_cache[i][j].buffer,
+                                NULL );
+      }
+   }
+
+   for (i = 0; i < hwtnl->cmd.vdecl_count; i++)
+      pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
+
+   for (i = 0; i < hwtnl->cmd.prim_count; i++)
+      pipe_buffer_reference(&hwtnl->cmd.prim_ib[i], NULL);
+      
+
+   FREE(hwtnl);
+}
+
+
+void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl,
+                               boolean flatshade,
+                               boolean flatshade_first )
+{
+   hwtnl->hw_pv = PV_FIRST;
+   hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST;
+}                               
+
+void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl,
+                              unsigned mode )
+{
+   hwtnl->api_fillmode = mode;
+}                               
+
+void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl,
+                             unsigned count )
+{
+   unsigned i;
+
+   assert(hwtnl->cmd.prim_count == 0);
+
+   for (i = count; i < hwtnl->cmd.vdecl_count; i++) {
+      pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i],
+                            NULL);
+   }
+
+   hwtnl->cmd.vdecl_count = count;
+}
+
+
+void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl,
+                          unsigned i,
+                          const SVGA3dVertexDecl *decl,
+                          struct pipe_buffer *vb)
+{
+   assert(hwtnl->cmd.prim_count == 0);
+
+   assert( i < hwtnl->cmd.vdecl_count );
+
+   hwtnl->cmd.vdecl[i] = *decl;
+
+   pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i],
+                         vb);   
+}
+
+
+
+enum pipe_error
+svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
+{
+   struct svga_winsys_context *swc = hwtnl->cmd.swc;
+   struct svga_context *svga = hwtnl->svga;
+   enum pipe_error ret;
+
+   if (hwtnl->cmd.prim_count) {
+      struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+      struct svga_winsys_surface *ib_handle[QSZ];
+      struct svga_winsys_surface *handle;
+      SVGA3dVertexDecl *vdecl;
+      SVGA3dPrimitiveRange *prim;
+      unsigned i;
+
+      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+         handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]);
+         if (handle == NULL)
+            return PIPE_ERROR_OUT_OF_MEMORY;
+
+         vb_handle[i] = handle;
+      }
+
+      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+         if (hwtnl->cmd.prim_ib[i]) {
+            handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
+            if (handle == NULL)
+               return PIPE_ERROR_OUT_OF_MEMORY;
+         }
+         else
+            handle = NULL;
+
+         ib_handle[i] = handle;
+      }
+
+      ret = SVGA3D_BeginDrawPrimitives(swc, 
+                                       &vdecl, 
+                                       hwtnl->cmd.vdecl_count, 
+                                       &prim, 
+                                       hwtnl->cmd.prim_count);
+      if (ret != PIPE_OK) 
+         return ret;
+
+      
+      memcpy( vdecl,
+              hwtnl->cmd.vdecl,
+              hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
+
+      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+         /* Given rangeHint is considered to be relative to indexBias, and 
+          * indexBias varies per primitive, we cannot accurately supply an 
+          * rangeHint when emitting more than one primitive per draw command.
+          */
+         if (hwtnl->cmd.prim_count == 1) {
+            vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
+            vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
+         }
+         else {
+            vdecl[i].rangeHint.first = 0;
+            vdecl[i].rangeHint.last = 0;
+         }
+
+         swc->surface_relocation(swc,
+                                 &vdecl[i].array.surfaceId,
+                                 vb_handle[i],
+                                 PIPE_BUFFER_USAGE_GPU_READ);
+      }
+
+      memcpy( prim,
+              hwtnl->cmd.prim,
+              hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+
+      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+         swc->surface_relocation(swc,
+                                 &prim[i].indexArray.surfaceId,
+                                 ib_handle[i],
+                                 PIPE_BUFFER_USAGE_GPU_READ);
+         pipe_buffer_reference(&hwtnl->cmd.prim_ib[i], NULL);
+      }
+      
+      SVGA_FIFOCommitAll( swc );
+      hwtnl->cmd.prim_count = 0;
+   }
+
+   return PIPE_OK;
+}
+
+
+
+
+
+/***********************************************************************
+ * Internal functions:
+ */
+
+enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
+                                 const SVGA3dPrimitiveRange *range,
+                                 unsigned min_index,
+                                 unsigned max_index,
+                                 struct pipe_buffer *ib )
+{
+   int ret = PIPE_OK;
+
+#ifdef DEBUG
+   {
+      unsigned i;
+      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+         struct pipe_buffer *vb = hwtnl->cmd.vdecl_vb[i];
+         unsigned size = vb ? vb->size : 0;
+         unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
+         unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
+         unsigned index_bias = range->indexBias;
+         unsigned width;
+
+         assert(vb);
+         assert(size);
+         assert(offset < size);
+         assert(index_bias >= 0);
+         assert(min_index <= max_index);
+         assert(offset + index_bias*stride < size);
+         assert(offset + (index_bias + min_index)*stride < size);
+
+         switch (hwtnl->cmd.vdecl[i].identity.type) {
+         case SVGA3D_DECLTYPE_FLOAT1:
+            width = 4;
+            break;
+         case SVGA3D_DECLTYPE_FLOAT2:
+            width = 4*2;
+            break;
+         case SVGA3D_DECLTYPE_FLOAT3:
+            width = 4*3;
+            break;
+         case SVGA3D_DECLTYPE_FLOAT4:
+            width = 4*4;
+            break;
+         case SVGA3D_DECLTYPE_D3DCOLOR:
+            width = 4;
+            break;
+         case SVGA3D_DECLTYPE_UBYTE4:
+            width = 1*4;
+            break;
+         case SVGA3D_DECLTYPE_SHORT2:
+            width = 2*2;
+            break;
+         case SVGA3D_DECLTYPE_SHORT4:
+            width = 2*4;
+            break;
+         case SVGA3D_DECLTYPE_UBYTE4N:
+            width = 1*4;
+            break;
+         case SVGA3D_DECLTYPE_SHORT2N:
+            width = 2*2;
+            break;
+         case SVGA3D_DECLTYPE_SHORT4N:
+            width = 2*4;
+            break;
+         case SVGA3D_DECLTYPE_USHORT2N:
+            width = 2*2;
+            break;
+         case SVGA3D_DECLTYPE_USHORT4N:
+            width = 2*4;
+            break;
+         case SVGA3D_DECLTYPE_UDEC3:
+            width = 4;
+            break;
+         case SVGA3D_DECLTYPE_DEC3N:
+            width = 4;
+            break;
+         case SVGA3D_DECLTYPE_FLOAT16_2:
+            width = 2*2;
+            break;
+         case SVGA3D_DECLTYPE_FLOAT16_4:
+            width = 2*4;
+            break;
+         default:
+            assert(0);
+            width = 0;
+            break;
+         }
+
+         assert(!stride || width <= stride);
+         assert(offset + (index_bias + max_index)*stride + width <= size);
+      }
+
+      assert(range->indexWidth == range->indexArray.stride);
+
+      if(ib) {
+         unsigned size = ib->size;
+         unsigned offset = range->indexArray.offset;
+         unsigned stride = range->indexArray.stride;
+         unsigned count;
+
+         assert(size);
+         assert(offset < size);
+         assert(stride);
+
+         switch (range->primType) {
+         case SVGA3D_PRIMITIVE_POINTLIST:
+            count = range->primitiveCount;
+            break;
+         case SVGA3D_PRIMITIVE_LINELIST:
+            count = range->primitiveCount * 2;
+            break;
+         case SVGA3D_PRIMITIVE_LINESTRIP:
+            count = range->primitiveCount + 1;
+            break;
+         case SVGA3D_PRIMITIVE_TRIANGLELIST:
+            count = range->primitiveCount * 3;
+            break;
+         case SVGA3D_PRIMITIVE_TRIANGLESTRIP:
+            count = range->primitiveCount + 2;
+            break;
+         case SVGA3D_PRIMITIVE_TRIANGLEFAN:
+            count = range->primitiveCount + 2;
+            break;
+         default:
+            assert(0);
+            count = 0;
+            break;
+         }
+
+         assert(offset + count*stride <= size);
+      }
+   }
+#endif
+
+   if (hwtnl->cmd.prim_count+1 >= QSZ) {
+      ret = svga_hwtnl_flush( hwtnl );
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   
+   /* min/max indices are relative to bias */
+   hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
+   hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
+
+   hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+
+   pipe_buffer_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
+   hwtnl->cmd.prim_count++;
+
+   return ret;
+}
diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h
new file mode 100644
index 0000000000..14553b17b5
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw.h
@@ -0,0 +1,83 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DRAW_H
+#define SVGA_DRAW_H
+
+#include "pipe/p_compiler.h"
+
+#include "svga_hw_reg.h"
+
+struct svga_hwtnl;
+struct svga_winsys_context;
+struct svga_screen;
+struct svga_context;
+struct pipe_buffer;
+struct u_upload_mgr;
+
+struct svga_hwtnl *svga_hwtnl_create( struct svga_context *svga,
+                                      struct u_upload_mgr *upload_ib,
+                                      struct svga_winsys_context *swc );
+
+void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl );
+
+void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl,
+                               boolean flatshade,
+                               boolean flatshade_first );
+
+void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl,
+                              unsigned mode );
+
+void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl,
+                       unsigned i,
+                       const SVGA3dVertexDecl *decl,
+                       struct pipe_buffer *vb);
+
+void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl,
+                             unsigned count );
+
+
+enum pipe_error 
+svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl,
+                        unsigned prim, 
+                        unsigned start, 
+                        unsigned count);
+
+enum pipe_error
+svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
+                                struct pipe_buffer *indexBuffer,
+                                unsigned index_size,
+                                unsigned min_index,
+                                unsigned max_index,
+                                unsigned prim, 
+                                unsigned start, 
+                                unsigned count,
+                                unsigned bias );
+
+enum pipe_error
+svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
+
+
+#endif /* SVGA_DRAW_H_ */
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
new file mode 100644
index 0000000000..75492dffca
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -0,0 +1,297 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_inlines.h"
+#include "util/u_prim.h"
+#include "indices/u_indices.h"
+
+#include "svga_hw_reg.h"
+#include "svga_draw.h"
+#include "svga_draw_private.h"
+#include "svga_context.h"
+
+
+#define DBG 0
+
+
+
+
+static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl,
+                                         unsigned nr,
+                                         unsigned index_size,
+                                         u_generate_func generate,
+                                         struct pipe_buffer **out_buf )
+{
+   struct pipe_screen *screen = hwtnl->svga->pipe.screen;
+   unsigned size = index_size * nr;
+   struct pipe_buffer *dst = NULL;
+   void *dst_map = NULL;
+
+   dst = screen->buffer_create( screen, 32, 
+                                PIPE_BUFFER_USAGE_INDEX |
+                                PIPE_BUFFER_USAGE_CPU_WRITE |
+                                PIPE_BUFFER_USAGE_GPU_READ, 
+                                size );
+   if (dst == NULL)
+      goto fail;
+
+   dst_map = pipe_buffer_map( screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE );
+   if (dst_map == NULL)
+      goto fail;
+
+   generate( nr,
+             dst_map );
+
+   pipe_buffer_unmap( screen, dst );
+
+   *out_buf = dst;
+   return PIPE_OK;
+
+fail:
+   if (dst_map)
+      screen->buffer_unmap( screen, dst );
+
+   if (dst)
+      screen->buffer_destroy( dst );
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+static boolean compare( unsigned cached_nr,
+                        unsigned nr,
+                        unsigned type )
+{
+   if (type == U_GENERATE_REUSABLE)
+      return cached_nr >= nr;
+   else
+      return cached_nr == nr;
+}
+
+static enum pipe_error retrieve_or_generate_indices( struct svga_hwtnl *hwtnl,
+                                                     unsigned prim,
+                                                     unsigned gen_type,
+                                                     unsigned gen_nr,
+                                                     unsigned gen_size,
+                                                     u_generate_func generate,
+                                                     struct pipe_buffer **out_buf )
+{
+   enum pipe_error ret = PIPE_OK;
+   int i;
+
+   for (i = 0; i < IDX_CACHE_MAX; i++) {
+      if (hwtnl->index_cache[prim][i].buffer != NULL &&
+          hwtnl->index_cache[prim][i].generate == generate)
+      {
+         if (compare(hwtnl->index_cache[prim][i].gen_nr, gen_nr, gen_type))
+         {
+            pipe_buffer_reference( out_buf,
+                                   hwtnl->index_cache[prim][i].buffer );
+
+            if (DBG) 
+               debug_printf("%s retrieve %d/%d\n", __FUNCTION__, i, gen_nr);
+
+            return PIPE_OK;
+         }
+         else if (gen_type == U_GENERATE_REUSABLE) 
+         {
+            pipe_buffer_reference( &hwtnl->index_cache[prim][i].buffer,
+                                   NULL );
+
+            if (DBG) 
+               debug_printf("%s discard %d/%d\n", __FUNCTION__, 
+                            i, hwtnl->index_cache[prim][i].gen_nr);
+
+            break;
+         }
+      }
+   }
+
+   if (i == IDX_CACHE_MAX)
+   {
+      unsigned smallest = 0;
+      unsigned smallest_size = ~0;
+      
+      for (i = 0; i < IDX_CACHE_MAX && smallest_size; i++) {
+         if (hwtnl->index_cache[prim][i].buffer == NULL)
+         {
+            smallest = i;
+            smallest_size = 0;
+         }
+         else if (hwtnl->index_cache[prim][i].gen_nr < smallest)
+         {
+            smallest = i;
+            smallest_size = hwtnl->index_cache[prim][i].gen_nr;
+         }
+      }
+
+      assert (smallest != IDX_CACHE_MAX);
+
+      pipe_buffer_reference( &hwtnl->index_cache[prim][smallest].buffer,
+                             NULL );
+
+      if (DBG)
+         debug_printf("%s discard smallest %d/%d\n", __FUNCTION__, 
+                      smallest, smallest_size);
+      
+      i = smallest;
+   }
+      
+      
+   ret = generate_indices( hwtnl, 
+                           gen_nr,
+                           gen_size,
+                           generate,
+                           out_buf );
+   if (ret != PIPE_OK)
+      return ret;
+
+
+   hwtnl->index_cache[prim][i].generate = generate;
+   hwtnl->index_cache[prim][i].gen_nr = gen_nr;
+   pipe_buffer_reference( &hwtnl->index_cache[prim][i].buffer,
+                          *out_buf );
+
+   if (DBG)
+      debug_printf("%s cache %d/%d\n", __FUNCTION__, 
+                   i, hwtnl->index_cache[prim][i].gen_nr);
+
+   return PIPE_OK;
+}
+
+
+
+static enum pipe_error
+simple_draw_arrays( struct svga_hwtnl *hwtnl,
+                    unsigned prim, unsigned start, unsigned count )
+{
+   SVGA3dPrimitiveRange range;
+   unsigned hw_prim;
+   unsigned hw_count;
+
+   hw_prim = svga_translate_prim(prim, count, &hw_count);
+   if (hw_count == 0)
+      return PIPE_ERROR_BAD_INPUT;
+      
+   range.primType = hw_prim;
+   range.primitiveCount = hw_count;
+   range.indexArray.surfaceId = SVGA3D_INVALID_ID;
+   range.indexArray.offset = 0;
+   range.indexArray.stride = 0;
+   range.indexWidth = 0;
+   range.indexBias = start;
+
+   /* Min/max index should be calculated prior to applying bias, so we
+    * end up with min_index = 0, max_index = count - 1 and everybody
+    * looking at those numbers knows to adjust them by
+    * range.indexBias.
+    */
+   return svga_hwtnl_prim( hwtnl, &range, 0, count - 1, NULL );
+}
+
+
+
+
+
+
+
+
+
+
+enum pipe_error 
+svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl,
+                        unsigned prim, 
+                        unsigned start, 
+                        unsigned count)
+{
+   unsigned gen_prim, gen_size, gen_nr, gen_type;
+   u_generate_func gen_func;
+   enum pipe_error ret = PIPE_OK;
+
+   if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL && 
+       prim >= PIPE_PRIM_TRIANGLES) 
+   {
+      gen_type = u_unfilled_generator( prim,
+                                       start,
+                                       count,
+                                       hwtnl->api_fillmode,
+                                       &gen_prim,
+                                       &gen_size,
+                                       &gen_nr,
+                                       &gen_func );
+   }
+   else {
+      gen_type = u_index_generator( svga_hw_prims,
+                                    prim,
+                                    start,
+                                    count,
+                                    hwtnl->api_pv,
+                                    hwtnl->hw_pv,
+                                    &gen_prim,
+                                    &gen_size,
+                                    &gen_nr,
+                                    &gen_func );
+   }
+
+   if (gen_type == U_GENERATE_LINEAR) {
+      return simple_draw_arrays( hwtnl, gen_prim, start, count );
+   }
+   else {
+      struct pipe_buffer *gen_buf = NULL;
+
+      /* Need to draw as indexed primitive. 
+       * Potentially need to run the gen func to build an index buffer.
+       */
+      ret = retrieve_or_generate_indices( hwtnl,
+                                          prim,
+                                          gen_type,
+                                          gen_nr,
+                                          gen_size,
+                                          gen_func,
+                                          &gen_buf );
+      if (ret)
+         goto done;
+
+      ret = svga_hwtnl_simple_draw_range_elements( hwtnl,
+                                                   gen_buf,
+                                                   gen_size,
+                                                   0,
+                                                   count - 1,
+                                                   gen_prim,
+                                                   0,
+                                                   gen_nr,
+                                                   start );
+      if (ret)
+         goto done;
+
+   done:
+      if (gen_buf)
+         pipe_buffer_reference( &gen_buf, NULL );
+
+      return ret;
+   }
+}
+
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c
new file mode 100644
index 0000000000..167d817831
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -0,0 +1,255 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
+#include "indices/u_indices.h"
+
+#include "svga_cmd.h"
+#include "svga_draw.h"
+#include "svga_draw_private.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_context.h"
+
+#include "svga_hw_reg.h"
+
+
+static enum pipe_error
+translate_indices( struct svga_hwtnl *hwtnl,
+                   struct pipe_buffer *src,
+                   unsigned offset,
+                   unsigned nr,
+                   unsigned index_size,
+                   u_translate_func translate,
+                   struct pipe_buffer **out_buf )
+{
+   struct pipe_screen *screen = hwtnl->svga->pipe.screen;
+   unsigned size = index_size * nr;
+   const void *src_map = NULL;
+   struct pipe_buffer *dst = NULL;
+   void *dst_map = NULL;
+
+   dst = screen->buffer_create( screen, 32, 
+                                PIPE_BUFFER_USAGE_INDEX |
+                                PIPE_BUFFER_USAGE_CPU_WRITE |
+                                PIPE_BUFFER_USAGE_GPU_READ, 
+                                size );
+   if (dst == NULL)
+      goto fail;
+
+   src_map = pipe_buffer_map( screen, src, PIPE_BUFFER_USAGE_CPU_READ );
+   if (src_map == NULL)
+      goto fail;
+
+   dst_map = pipe_buffer_map( screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE );
+   if (dst_map == NULL)
+      goto fail;
+
+   translate( (const char *)src_map + offset,
+              nr,
+              dst_map );
+
+   pipe_buffer_unmap( screen, src );
+   pipe_buffer_unmap( screen, dst );
+
+   *out_buf = dst;
+   return PIPE_OK;
+
+fail:
+   if (src_map)
+      screen->buffer_unmap( screen, src );
+
+   if (dst_map)
+      screen->buffer_unmap( screen, dst );
+
+   if (dst)
+      screen->buffer_destroy( dst );
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+
+
+
+enum pipe_error
+svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
+                                       struct pipe_buffer *index_buffer,
+                                       unsigned index_size,
+                                       unsigned min_index,
+                                       unsigned max_index,
+                                       unsigned prim, 
+                                       unsigned start,
+                                       unsigned count,
+                                       unsigned bias )
+{
+   struct pipe_buffer *upload_buffer = NULL;
+   SVGA3dPrimitiveRange range;
+   unsigned hw_prim;
+   unsigned hw_count;
+   unsigned index_offset = start * index_size;
+   int ret = PIPE_OK;
+
+   hw_prim = svga_translate_prim(prim, count, &hw_count);
+   if (hw_count == 0)
+      goto done;
+
+   if (index_buffer && 
+       svga_buffer_is_user_buffer(index_buffer)) 
+   {
+      assert( index_buffer->size >= index_offset + count * index_size );
+
+      ret = u_upload_buffer( hwtnl->upload_ib,
+                             index_offset,
+                             count * index_size,
+                             index_buffer,
+                             &index_offset,
+                             &upload_buffer );
+      if (ret)
+         goto done;
+
+      /* Don't need to worry about refcounting index_buffer as this is
+       * just a stack variable without a counted reference of its own.
+       * The caller holds the reference.
+       */
+      index_buffer = upload_buffer;
+   }
+
+   range.primType = hw_prim;
+   range.primitiveCount = hw_count;
+   range.indexArray.offset = index_offset;
+   range.indexArray.stride = index_size;
+   range.indexWidth = index_size;
+   range.indexBias = bias;
+      
+   ret = svga_hwtnl_prim( hwtnl, &range, min_index, max_index, index_buffer );
+   if (ret)
+      goto done;
+
+done:
+   if (upload_buffer)
+      pipe_buffer_reference( &upload_buffer, NULL );
+
+   return ret;
+}
+
+
+
+
+enum pipe_error
+svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
+                                struct pipe_buffer *index_buffer,
+                                unsigned index_size,
+                                unsigned min_index,
+                                unsigned max_index,
+                                unsigned prim, unsigned start, unsigned count,
+                                unsigned bias)
+{
+   unsigned gen_prim, gen_size, gen_nr, gen_type;
+   u_translate_func gen_func;
+   enum pipe_error ret = PIPE_OK;
+
+   if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL && 
+       prim >= PIPE_PRIM_TRIANGLES) 
+   {
+      gen_type = u_unfilled_translator( prim,
+                                        index_size,
+                                        count,
+                                        hwtnl->api_fillmode,
+                                        &gen_prim,
+                                        &gen_size,
+                                        &gen_nr,
+                                        &gen_func );
+   }
+   else
+   {
+      gen_type = u_index_translator( svga_hw_prims,
+                                     prim,
+                                     index_size,
+                                     count,
+                                     hwtnl->api_pv,
+                                     hwtnl->hw_pv,
+                                     &gen_prim,
+                                     &gen_size,
+                                     &gen_nr,
+                                     &gen_func );
+   }
+
+   
+   if (gen_type == U_TRANSLATE_MEMCPY) {
+      /* No need for translation, just pass through to hardware: 
+       */
+      return svga_hwtnl_simple_draw_range_elements( hwtnl, index_buffer,
+                                                    index_size,
+                                                    min_index,
+                                                    max_index,
+                                                    gen_prim, start, count, bias );
+   }
+   else {
+      struct pipe_buffer *gen_buf = NULL;
+
+      /* Need to allocate a new index buffer and run the translate
+       * func to populate it.  Could potentially cache this translated
+       * index buffer with the original to avoid future
+       * re-translations.  Not much point if we're just accelerating
+       * GL though, as index buffers are typically used only once
+       * there.
+       */
+      ret = translate_indices( hwtnl,
+                               index_buffer,
+                               start * index_size,
+                               gen_nr,
+                               gen_size,
+                               gen_func,
+                               &gen_buf );
+      if (ret)
+         goto done;
+
+      ret = svga_hwtnl_simple_draw_range_elements( hwtnl,
+                                                   gen_buf,
+                                                   gen_size,
+                                                   min_index,
+                                                   max_index,
+                                                   gen_prim,
+                                                   0,
+                                                   gen_nr,
+                                                   bias );
+      if (ret)
+         goto done;
+
+   done:
+      if (gen_buf)
+         pipe_buffer_reference( &gen_buf, NULL );
+
+      return ret;
+   }
+}
+
+
+
+
+
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h
new file mode 100644
index 0000000000..9aa40e1664
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -0,0 +1,158 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DRAW_H_
+#define SVGA_DRAW_H_
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+#include "indices/u_indices.h"
+#include "svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+struct svga_context;
+struct u_upload_mgr;
+
+/* Should include polygon?
+ */
+static const unsigned svga_hw_prims = 
+   ((1 << PIPE_PRIM_POINTS) |
+    (1 << PIPE_PRIM_LINES) |
+    (1 << PIPE_PRIM_LINE_STRIP) |
+    (1 << PIPE_PRIM_TRIANGLES) |
+    (1 << PIPE_PRIM_TRIANGLE_STRIP) |
+    (1 << PIPE_PRIM_TRIANGLE_FAN));
+
+
+static INLINE unsigned svga_translate_prim(unsigned mode, 
+                                           unsigned count,
+                                           unsigned *out_count)
+{
+   switch (mode) {
+   case PIPE_PRIM_POINTS:
+      *out_count = count;
+      return SVGA3D_PRIMITIVE_POINTLIST;
+
+   case PIPE_PRIM_LINES:
+      *out_count = count / 2;
+      return SVGA3D_PRIMITIVE_LINELIST; 
+
+   case PIPE_PRIM_LINE_STRIP:
+      *out_count = count - 1;
+      return SVGA3D_PRIMITIVE_LINESTRIP; 
+
+   case PIPE_PRIM_TRIANGLES:
+      *out_count = count / 3;
+      return SVGA3D_PRIMITIVE_TRIANGLELIST; 
+
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      *out_count = count - 2;
+      return SVGA3D_PRIMITIVE_TRIANGLESTRIP; 
+
+   case PIPE_PRIM_TRIANGLE_FAN:
+      *out_count = count - 2;
+      return SVGA3D_PRIMITIVE_TRIANGLEFAN; 
+
+   default:
+      assert(0);
+      *out_count = 0;
+      return 0;
+   }
+}
+
+
+struct index_cache {
+   u_generate_func generate;
+   unsigned gen_nr;
+
+   /* If non-null, this buffer is filled by calling 
+    *   generate(nr, map(buffer))
+    */
+   struct pipe_buffer *buffer;
+};
+
+#define QSZ 32
+
+struct draw_cmd {
+   struct svga_winsys_context *swc;
+
+   SVGA3dVertexDecl vdecl[SVGA3D_INPUTREG_MAX];
+   struct pipe_buffer *vdecl_vb[SVGA3D_INPUTREG_MAX];
+   unsigned vdecl_count;
+
+   SVGA3dPrimitiveRange prim[QSZ];
+   struct pipe_buffer *prim_ib[QSZ];
+   unsigned prim_count;
+   unsigned min_index[QSZ];
+   unsigned max_index[QSZ];
+};
+
+#define IDX_CACHE_MAX  8
+
+struct svga_hwtnl {
+   struct svga_context *svga;
+   struct u_upload_mgr *upload_ib;
+   
+   /* Flatshade information:
+    */
+   unsigned api_pv;
+   unsigned hw_pv;
+   unsigned api_fillmode;
+
+   /* Cache the results of running a particular generate func on each
+    * primitive type.
+    */
+   struct index_cache index_cache[PIPE_PRIM_MAX][IDX_CACHE_MAX];
+
+   /* Try to build the maximal draw command packet before emitting:
+    */
+   struct draw_cmd cmd;
+};
+
+
+
+/***********************************************************************
+ * Internal functions
+ */
+enum pipe_error 
+svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
+                 const SVGA3dPrimitiveRange *range,
+                 unsigned min_index,
+                 unsigned max_index,
+                 struct pipe_buffer *ib );
+
+enum pipe_error
+svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
+                                       struct pipe_buffer *indexBuffer,
+                                       unsigned index_size,
+                                       unsigned min_index,
+                                       unsigned max_index,
+                                       unsigned prim, 
+                                       unsigned start,
+                                       unsigned count,
+                                       unsigned bias );
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_hw_reg.h b/src/gallium/drivers/svga/svga_hw_reg.h
new file mode 100644
index 0000000000..183f4b918e
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_hw_reg.h
@@ -0,0 +1,42 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_HW_REG_H
+#define SVGA_HW_REG_H
+
+#include "pipe/p_compiler.h"
+
+#if defined(PIPE_CC_GCC)
+#ifndef HAVE_STDINT_H
+#define HAVE_STDINT_H
+#endif
+#endif
+
+#include "svga_types.h"
+
+#include "svga3d_reg.h"
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c
new file mode 100644
index 0000000000..855d228755
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_blend.c
@@ -0,0 +1,246 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+
+#include "svga_hw_reg.h"
+
+
+static INLINE unsigned
+svga_translate_blend_factor(unsigned factor)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_ZERO:            return SVGA3D_BLENDOP_ZERO;
+   case PIPE_BLENDFACTOR_SRC_ALPHA:       return SVGA3D_BLENDOP_SRCALPHA;
+   case PIPE_BLENDFACTOR_ONE:             return SVGA3D_BLENDOP_ONE;
+   case PIPE_BLENDFACTOR_SRC_COLOR:       return SVGA3D_BLENDOP_SRCCOLOR;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:   return SVGA3D_BLENDOP_INVSRCCOLOR;
+   case PIPE_BLENDFACTOR_DST_COLOR:       return SVGA3D_BLENDOP_DESTCOLOR;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:   return SVGA3D_BLENDOP_INVDESTCOLOR;
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:   return SVGA3D_BLENDOP_INVSRCALPHA;
+   case PIPE_BLENDFACTOR_DST_ALPHA:       return SVGA3D_BLENDOP_DESTALPHA;
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:   return SVGA3D_BLENDOP_INVDESTALPHA;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return SVGA3D_BLENDOP_SRCALPHASAT;
+   case PIPE_BLENDFACTOR_CONST_COLOR:     return SVGA3D_BLENDOP_BLENDFACTOR;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR: return SVGA3D_BLENDOP_INVBLENDFACTOR;
+   case PIPE_BLENDFACTOR_CONST_ALPHA:     return SVGA3D_BLENDOP_BLENDFACTOR; /* ? */
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return SVGA3D_BLENDOP_INVBLENDFACTOR; /* ? */
+   default:
+      assert(0);
+      return SVGA3D_BLENDOP_ZERO;
+   }
+}
+
+static INLINE unsigned
+svga_translate_blend_func(unsigned mode)
+{
+   switch (mode) {
+   case PIPE_BLEND_ADD:              return SVGA3D_BLENDEQ_ADD;
+   case PIPE_BLEND_SUBTRACT:         return SVGA3D_BLENDEQ_SUBTRACT;
+   case PIPE_BLEND_REVERSE_SUBTRACT: return SVGA3D_BLENDEQ_REVSUBTRACT;
+   case PIPE_BLEND_MIN:              return SVGA3D_BLENDEQ_MINIMUM;
+   case PIPE_BLEND_MAX:              return SVGA3D_BLENDEQ_MAXIMUM;
+   default:
+      assert(0);
+      return SVGA3D_BLENDEQ_ADD;
+   }
+}
+
+
+static void *
+svga_create_blend_state(struct pipe_context *pipe,
+                        const struct pipe_blend_state *templ)
+{
+   struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state );
+   unsigned i;
+
+ 
+   /* Fill in the per-rendertarget blend state.  We currently only
+    * have one rendertarget.
+    */
+   for (i = 0; i < 1; i++) {
+      /* No way to set this in SVGA3D, and no way to correctly implement it on
+       * top of D3D9 API.  Instead we try to simulate with various blend modes.
+       */
+      if (templ->logicop_enable) {
+         switch (templ->logicop_func) {
+         case PIPE_LOGICOP_XOR:
+            blend->need_white_fragments = TRUE;
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_SUBTRACT;
+            break;
+         case PIPE_LOGICOP_CLEAR:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MINIMUM;
+            break;
+         case PIPE_LOGICOP_COPY:
+            blend->rt[i].blend_enable = FALSE;
+            break;
+         case PIPE_LOGICOP_COPY_INVERTED:
+            blend->rt[i].blend_enable   = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_INVSRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
+            break;
+         case PIPE_LOGICOP_NOOP:
+            blend->rt[i].blend_enable   = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_DESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
+            break;
+         case PIPE_LOGICOP_SET:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MAXIMUM;
+            break;
+         case PIPE_LOGICOP_INVERT:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_INVSRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
+            break;
+         case PIPE_LOGICOP_AND:
+            /* Approximate with minimum - works for the 0 & anything case: */
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_SRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_DESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MINIMUM;
+            break;
+         case PIPE_LOGICOP_AND_REVERSE:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_SRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_INVDESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MINIMUM;
+            break;
+         case PIPE_LOGICOP_AND_INVERTED:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_INVSRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_DESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MINIMUM;
+            break;
+         case PIPE_LOGICOP_OR:
+            /* Approximate with maximum - works for the 1 | anything case: */
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_SRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_DESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MAXIMUM;
+            break;
+         case PIPE_LOGICOP_OR_REVERSE:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_SRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_INVDESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MAXIMUM;
+            break;
+         case PIPE_LOGICOP_OR_INVERTED:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_INVSRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_DESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MAXIMUM;
+            break;
+         case PIPE_LOGICOP_NAND:
+         case PIPE_LOGICOP_NOR:
+         case PIPE_LOGICOP_EQUIV:
+            /* Fill these in with plausible values */
+            blend->rt[i].blend_enable = FALSE;
+            break;
+         default:
+            assert(0);
+            break;
+         }
+      }
+      else {
+         blend->rt[i].blend_enable   = templ->blend_enable;
+
+         if (templ->blend_enable) {
+            blend->rt[i].srcblend       = svga_translate_blend_factor(templ->rgb_src_factor);
+            blend->rt[i].dstblend       = svga_translate_blend_factor(templ->rgb_dst_factor);
+            blend->rt[i].blendeq        = svga_translate_blend_func(templ->rgb_func);
+            blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->alpha_src_factor);
+            blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->alpha_dst_factor);
+            blend->rt[i].blendeq_alpha  = svga_translate_blend_func(templ->alpha_func);
+
+            if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend ||
+                blend->rt[i].dstblend_alpha != blend->rt[i].dstblend ||
+                blend->rt[i].blendeq_alpha  != blend->rt[i].blendeq)
+            {
+               blend->rt[i].separate_alpha_blend_enable = TRUE;
+            }
+         }
+      }
+
+      blend->rt[i].writemask = templ->colormask;
+   }
+
+   return blend;
+}
+
+static void svga_bind_blend_state(struct pipe_context *pipe,
+                                  void *blend)
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.blend = (struct svga_blend_state*)blend;
+   svga->dirty |= SVGA_NEW_BLEND;
+}
+
+
+static void svga_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+   FREE(blend);
+}
+
+static void svga_set_blend_color( struct pipe_context *pipe,
+                                  const struct pipe_blend_color *blend_color )
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.blend_color = *blend_color;
+
+   svga->dirty |= SVGA_NEW_BLEND;
+}
+
+
+void svga_init_blend_functions( struct svga_context *svga )
+{
+   svga->pipe.create_blend_state = svga_create_blend_state;
+   svga->pipe.bind_blend_state = svga_bind_blend_state;
+   svga->pipe.delete_blend_state = svga_delete_blend_state;
+
+   svga->pipe.set_blend_color = svga_set_blend_color;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c
new file mode 100644
index 0000000000..5a4a8c0f5f
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -0,0 +1,84 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_screen_texture.h"
+#include "svga_context.h"
+#include "svga_cmd.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+
+
+static void svga_surface_copy(struct pipe_context *pipe,
+                              struct pipe_surface *dest,
+                              unsigned destx, unsigned desty,
+                              struct pipe_surface *src,
+                              unsigned srcx, unsigned srcy,
+                              unsigned width, unsigned height)
+{
+   struct svga_context *svga = svga_context(pipe);
+   SVGA3dCopyBox *box;
+   enum pipe_error ret;
+
+   svga_hwtnl_flush_retry( svga );
+
+   ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+                                 src,
+                                 dest,
+                                 &box,
+                                 1);
+   if(ret != PIPE_OK) {
+
+      svga_context_flush(svga, NULL);
+
+      ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+                                    src,
+                                    dest,
+                                    &box,
+                                    1);
+      assert(ret == PIPE_OK);
+   }
+
+   box->x = destx;
+   box->y = desty;
+   box->z = 0;
+   box->w = width;
+   box->h = height;
+   box->d = 1;
+   box->srcx = srcx;
+   box->srcy = srcy;
+   box->srcz = 0;
+
+   SVGA_FIFOCommitAll(svga->swc);
+
+   svga_surface(dest)->dirty = TRUE;
+   svga_propagate_surface(pipe, dest);
+}
+
+
+void
+svga_init_blit_functions(struct svga_context *svga)
+{
+   svga->pipe.surface_copy = svga_surface_copy;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c
new file mode 100644
index 0000000000..8977d26541
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_clear.c
@@ -0,0 +1,119 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_defines.h"
+#include "util/u_pack_color.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+
+
+static enum pipe_error
+try_clear(struct svga_context *svga, 
+          unsigned buffers,
+          const float *rgba,
+          double depth,
+          unsigned stencil)
+{
+   int ret = PIPE_OK;
+   SVGA3dRect rect = { 0, 0, 0, 0 };
+   boolean restore_viewport = FALSE;
+   SVGA3dClearFlag flags = 0;
+   struct pipe_framebuffer_state *fb = &svga->curr.framebuffer;
+   unsigned color = 0;
+
+   ret = svga_update_state(svga, SVGA_STATE_HW_CLEAR);
+   if (ret)
+      return ret;
+
+   if ((buffers & PIPE_CLEAR_COLOR) && fb->cbufs[0]) {
+      flags |= SVGA3D_CLEAR_COLOR;
+      util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &color);
+
+      rect.w = fb->cbufs[0]->width;
+      rect.h = fb->cbufs[0]->height;
+   }
+
+   if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && fb->zsbuf) {
+      flags |= SVGA3D_CLEAR_DEPTH;
+
+      if (svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_Z24S8_UNORM)
+         flags |= SVGA3D_CLEAR_STENCIL;
+
+      rect.w = MAX2(rect.w, fb->zsbuf->width);
+      rect.h = MAX2(rect.h, fb->zsbuf->height);
+   }
+
+   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
+      restore_viewport = TRUE;
+      ret = SVGA3D_SetViewport(svga->swc, &rect);
+      if (ret)
+         return ret;
+   }
+
+   ret = SVGA3D_ClearRect(svga->swc, flags, color, depth, stencil,
+                          rect.x, rect.y, rect.w, rect.h);
+   if (ret != PIPE_OK)
+      return ret;
+
+   if (restore_viewport) {
+      memcpy(&rect, &svga->state.hw_clear.viewport, sizeof rect);
+      ret = SVGA3D_SetViewport(svga->swc, &rect);
+   }
+   
+   return ret;
+}
+
+/**
+ * Clear the given surface to the specified value.
+ * No masking, no scissor (clear entire buffer).
+ */
+void
+svga_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
+	   double depth, unsigned stencil)
+{
+   struct svga_context *svga = svga_context( pipe );
+   int ret;
+
+   ret = try_clear( svga, buffers, rgba, depth, stencil );
+
+   if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
+      /* Flush command buffer and retry:
+       */
+      svga_context_flush( svga, NULL );
+
+      ret = try_clear( svga, buffers, rgba, depth, stencil );
+   }
+
+   /*
+    * Mark target surfaces as dirty
+    * TODO Mark only cleared surfaces.
+    */
+   svga_mark_surfaces_dirty(svga);
+
+   assert (ret == PIPE_OK);
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c
new file mode 100644
index 0000000000..10e7a12189
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_constants.c
@@ -0,0 +1,74 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_hw_reg.h"
+#include "svga_cmd.h"
+
+/***********************************************************************
+ * Constant buffers 
+ */
+
+struct svga_constbuf 
+{
+   unsigned type;
+   float (*data)[4];
+   unsigned count;
+};
+
+
+
+static void svga_set_constant_buffer(struct pipe_context *pipe,
+                                     uint shader, uint index,
+                                     const struct pipe_constant_buffer *buf)
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   assert(shader < PIPE_SHADER_TYPES);
+   assert(index == 0);
+
+   pipe_buffer_reference( &svga->curr.cb[shader],
+                          buf->buffer );
+
+   if (shader == PIPE_SHADER_FRAGMENT)
+      svga->dirty |= SVGA_NEW_FS_CONST_BUFFER;
+   else
+      svga->dirty |= SVGA_NEW_VS_CONST_BUFFER;
+}
+
+
+
+void svga_init_constbuffer_functions( struct svga_context *svga )
+{
+   svga->pipe.set_constant_buffer = svga_set_constant_buffer;
+}
+
diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
new file mode 100644
index 0000000000..df636c08a0
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@@ -0,0 +1,153 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_hw_reg.h"
+
+
+static INLINE unsigned
+svga_translate_compare_func(unsigned func)
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER:     return SVGA3D_CMP_NEVER;
+   case PIPE_FUNC_LESS:      return SVGA3D_CMP_LESS;
+   case PIPE_FUNC_LEQUAL:    return SVGA3D_CMP_LESSEQUAL;
+   case PIPE_FUNC_GREATER:   return SVGA3D_CMP_GREATER;
+   case PIPE_FUNC_GEQUAL:    return SVGA3D_CMP_GREATEREQUAL;
+   case PIPE_FUNC_NOTEQUAL:  return SVGA3D_CMP_NOTEQUAL;
+   case PIPE_FUNC_EQUAL:     return SVGA3D_CMP_EQUAL;
+   case PIPE_FUNC_ALWAYS:    return SVGA3D_CMP_ALWAYS;
+   default:
+      assert(0);
+      return SVGA3D_CMP_ALWAYS;
+   }
+}
+
+static INLINE unsigned
+svga_translate_stencil_op(unsigned op)
+{
+   switch (op) {
+   case PIPE_STENCIL_OP_KEEP:      return SVGA3D_STENCILOP_KEEP;
+   case PIPE_STENCIL_OP_ZERO:      return SVGA3D_STENCILOP_ZERO;
+   case PIPE_STENCIL_OP_REPLACE:   return SVGA3D_STENCILOP_REPLACE;
+   case PIPE_STENCIL_OP_INCR:      return SVGA3D_STENCILOP_INCR;
+   case PIPE_STENCIL_OP_DECR:      return SVGA3D_STENCILOP_DECR;
+   case PIPE_STENCIL_OP_INCR_WRAP: return SVGA3D_STENCILOP_INCRSAT; /* incorrect? */
+   case PIPE_STENCIL_OP_DECR_WRAP: return SVGA3D_STENCILOP_DECRSAT; /* incorrect? */
+   case PIPE_STENCIL_OP_INVERT:    return SVGA3D_STENCILOP_INVERT;
+   default:
+      assert(0);
+      return SVGA3D_STENCILOP_KEEP;
+   }
+}
+
+
+static void *
+svga_create_depth_stencil_state(struct pipe_context *pipe,
+				const struct pipe_depth_stencil_alpha_state *templ)
+{
+   struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state );
+
+   /* Don't try to figure out CW/CCW correspondence with
+    * stencil[0]/[1] at this point.  Presumably this can change as
+    * back/front face are modified.
+    */
+   ds->stencil[0].enabled = templ->stencil[0].enabled;
+   if (ds->stencil[0].enabled) {
+      ds->stencil[0].func  = svga_translate_compare_func(templ->stencil[0].func);
+      ds->stencil[0].fail  = svga_translate_stencil_op(templ->stencil[0].fail_op);
+      ds->stencil[0].zfail = svga_translate_stencil_op(templ->stencil[0].zfail_op);
+      ds->stencil[0].pass  = svga_translate_stencil_op(templ->stencil[0].zpass_op);
+      
+      /* SVGA3D has one ref/mask/writemask triple shared between front &
+       * back face stencil.  We really need two:
+       */
+      ds->stencil_ref       = templ->stencil[0].ref_value & 0xff;
+      ds->stencil_mask      = templ->stencil[0].valuemask & 0xff;
+      ds->stencil_writemask = templ->stencil[0].writemask & 0xff;
+   }
+
+
+   ds->stencil[1].enabled = templ->stencil[1].enabled;
+   if (templ->stencil[1].enabled) {
+      ds->stencil[1].func   = svga_translate_compare_func(templ->stencil[1].func);
+      ds->stencil[1].fail   = svga_translate_stencil_op(templ->stencil[1].fail_op);
+      ds->stencil[1].zfail  = svga_translate_stencil_op(templ->stencil[1].zfail_op);
+      ds->stencil[1].pass   = svga_translate_stencil_op(templ->stencil[1].zpass_op);
+
+      ds->stencil_ref       = templ->stencil[1].ref_value & 0xff;
+      ds->stencil_mask      = templ->stencil[1].valuemask & 0xff;
+      ds->stencil_writemask = templ->stencil[1].writemask & 0xff;
+   }
+
+
+   ds->zenable = templ->depth.enabled;
+   if (ds->zenable) {
+      ds->zfunc = svga_translate_compare_func(templ->depth.func);
+      ds->zwriteenable = templ->depth.writemask;
+   }
+
+   ds->alphatestenable = templ->alpha.enabled;
+   if (ds->alphatestenable) {
+      ds->alphafunc = svga_translate_compare_func(templ->alpha.func);
+      ds->alpharef = templ->alpha.ref_value;
+   }
+
+   return ds;
+}
+
+static void svga_bind_depth_stencil_state(struct pipe_context *pipe,
+                                          void *depth_stencil)
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.depth = (const struct svga_depth_stencil_state *)depth_stencil;
+   svga->dirty |= SVGA_NEW_DEPTH_STENCIL;
+}
+
+static void svga_delete_depth_stencil_state(struct pipe_context *pipe,
+                                            void *depth_stencil)
+{
+   FREE(depth_stencil);
+}
+
+
+
+void svga_init_depth_stencil_functions( struct svga_context *svga )
+{
+   svga->pipe.create_depth_stencil_alpha_state = svga_create_depth_stencil_state;
+   svga->pipe.bind_depth_stencil_alpha_state = svga_bind_depth_stencil_state;
+   svga->pipe.delete_depth_stencil_alpha_state = svga_delete_depth_stencil_state;
+}
+
+
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
new file mode 100644
index 0000000000..71a552862e
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -0,0 +1,261 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_inlines.h"
+#include "util/u_prim.h"
+#include "util/u_time.h"
+#include "indices/u_indices.h"
+
+#include "svga_hw_reg.h"
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_winsys.h"
+#include "svga_draw.h"
+#include "svga_state.h"
+#include "svga_swtnl.h"
+#include "svga_debug.h"
+
+
+
+static enum pipe_error
+retry_draw_range_elements( struct svga_context *svga,
+                           struct pipe_buffer *index_buffer,
+                           unsigned index_size,
+                           unsigned min_index,
+                           unsigned max_index,
+                           unsigned prim, 
+                           unsigned start, 
+                           unsigned count,
+                           boolean do_retry )
+{
+   enum pipe_error ret = 0;
+
+   svga_hwtnl_set_unfilled( svga->hwtnl,
+                            svga->curr.rast->hw_unfilled );
+
+   svga_hwtnl_set_flatshade( svga->hwtnl,
+                             svga->curr.rast->templ.flatshade,
+                             svga->curr.rast->templ.flatshade_first );
+
+
+   ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
+   if (ret)
+      goto retry;
+
+   ret = svga_hwtnl_draw_range_elements( svga->hwtnl,
+                                         index_buffer, index_size,
+                                         min_index, max_index,
+                                         prim, start, count, 0 );
+   if (ret)
+      goto retry;
+
+   if (svga->curr.any_user_vertex_buffers) {
+      ret = svga_hwtnl_flush( svga->hwtnl );
+      if (ret)
+         goto retry;
+   }
+
+   return PIPE_OK;
+
+retry:
+   svga_context_flush( svga, NULL );
+
+   if (do_retry)
+   {
+      return retry_draw_range_elements( svga,
+                                        index_buffer, index_size,
+                                        min_index, max_index,
+                                        prim, start, count,
+                                        FALSE );
+   }
+
+   return ret;
+}
+
+
+static enum pipe_error
+retry_draw_arrays( struct svga_context *svga,
+                   unsigned prim, 
+                   unsigned start, 
+                   unsigned count,
+                   boolean do_retry )
+{
+   enum pipe_error ret;
+
+   svga_hwtnl_set_unfilled( svga->hwtnl,
+                            svga->curr.rast->hw_unfilled );
+
+   svga_hwtnl_set_flatshade( svga->hwtnl,
+                             svga->curr.rast->templ.flatshade,
+                             svga->curr.rast->templ.flatshade_first );
+
+   ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
+   if (ret)
+      goto retry;
+
+   ret = svga_hwtnl_draw_arrays( svga->hwtnl, prim,
+                                 start, count );
+   if (ret)
+      goto retry;
+
+   if (svga->curr.any_user_vertex_buffers) {
+      ret = svga_hwtnl_flush( svga->hwtnl );
+      if (ret)
+         goto retry;
+   }
+
+   return 0;
+
+retry:
+   if (ret == PIPE_ERROR_OUT_OF_MEMORY && do_retry) 
+   {
+      svga_context_flush( svga, NULL );
+
+      return retry_draw_arrays( svga,
+                                prim,
+                                start,
+                                count,
+                                FALSE );
+   }
+
+   return ret;
+}
+
+
+
+
+
+static boolean
+svga_draw_range_elements( struct pipe_context *pipe,
+                          struct pipe_buffer *index_buffer,
+                          unsigned index_size,
+                          unsigned min_index,
+                          unsigned max_index,
+                          unsigned prim, unsigned start, unsigned count)
+{
+   struct svga_context *svga = svga_context( pipe );
+   unsigned reduced_prim = u_reduced_prim(prim);
+   enum pipe_error ret = 0;
+
+   if (!u_trim_pipe_prim( prim, &count ))
+      return TRUE;
+
+   /*
+    * Mark currently bound target surfaces as dirty
+    * doesn't really matter if it is done before drawing.
+    *
+    * TODO If we ever normaly return something other then
+    * true we should not mark it as dirty then.
+    */
+   svga_mark_surfaces_dirty(svga_context(pipe));
+
+   if (svga->curr.reduced_prim != reduced_prim) {
+      svga->curr.reduced_prim = reduced_prim;
+      svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE;
+   }
+   
+   svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL );
+
+#ifdef DEBUG
+   if (svga->curr.vs->base.id == svga->debug.disable_shader ||
+       svga->curr.fs->base.id == svga->debug.disable_shader)
+      return 0;
+#endif
+
+   if (svga->state.sw.need_swtnl)
+   {
+      ret = svga_swtnl_draw_range_elements( svga, 
+                                            index_buffer, 
+                                            index_size,
+                                            min_index, max_index,
+                                            prim,
+                                            start, count );
+   }
+   else {
+      if (index_buffer) {
+         ret = retry_draw_range_elements( svga,
+                                          index_buffer,
+                                          index_size,
+                                          min_index,
+                                          max_index,
+                                          prim,
+                                          start,
+                                          count,
+                                          TRUE );
+      }
+      else {
+         ret = retry_draw_arrays( svga, 
+                                  prim, 
+                                  start, 
+                                  count,
+                                  TRUE );
+      }
+   }
+
+   if (SVGA_DEBUG & DEBUG_FLUSH) {
+      static unsigned id;
+      debug_printf("%s %d\n", __FUNCTION__, id++);
+      if (id > 1300)
+         util_time_sleep( 2000 );
+
+      svga_hwtnl_flush_retry( svga );
+      svga_context_flush(svga, NULL);
+   }
+
+   return ret == PIPE_OK;
+}
+
+
+static boolean 
+svga_draw_elements( struct pipe_context *pipe,
+                    struct pipe_buffer *index_buffer,
+                    unsigned index_size,
+                    unsigned prim, unsigned start, unsigned count)
+{
+   return svga_draw_range_elements( pipe, index_buffer,
+                                    index_size,
+                                    0, 0xffffffff,
+                                    prim, start, count );
+}
+
+static boolean 
+svga_draw_arrays( struct pipe_context *pipe,
+                  unsigned prim, unsigned start, unsigned count)
+{
+   return svga_draw_range_elements(pipe, NULL, 0, 
+                                   start, start + count - 1, 
+                                   prim, 
+                                   start, count);
+}
+
+
+void svga_init_draw_functions( struct svga_context *svga )
+{
+   svga->pipe.draw_arrays = svga_draw_arrays;
+   svga->pipe.draw_elements = svga_draw_elements;
+   svga->pipe.draw_range_elements = svga_draw_range_elements;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c
new file mode 100644
index 0000000000..942366de72
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_flush.c
@@ -0,0 +1,68 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_defines.h"
+#include "svga_screen.h"
+#include "svga_screen_texture.h"
+#include "svga_context.h"
+#include "svga_winsys.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+
+
+
+
+static void svga_flush( struct pipe_context *pipe,
+                        unsigned flags,
+                        struct pipe_fence_handle **fence )
+{
+   struct svga_context *svga = svga_context(pipe);
+   int i;
+
+   /* Emit buffered drawing commands.
+    */
+   svga_hwtnl_flush_retry( svga );
+
+   /* Emit back-copy from render target view to texture.
+    */
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+      if (svga->curr.framebuffer.cbufs[i])
+         svga_propagate_surface(pipe, svga->curr.framebuffer.cbufs[i]);
+   }
+   if (svga->curr.framebuffer.zsbuf)
+      svga_propagate_surface(pipe, svga->curr.framebuffer.zsbuf);
+
+   /* Flush command queue.
+    */
+   svga_context_flush(svga, fence);
+}
+
+
+void svga_init_flush_functions( struct svga_context *svga )
+{
+   svga->pipe.flush = svga_flush;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c
new file mode 100644
index 0000000000..e3be840d92
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -0,0 +1,124 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+
+#include "svga_screen.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_tgsi.h"
+#include "svga_hw_reg.h"
+#include "svga_cmd.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+
+
+/***********************************************************************
+ * Fragment shaders 
+ */
+
+static void *
+svga_create_fs_state(struct pipe_context *pipe,
+                     const struct pipe_shader_state *templ)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_fragment_shader *fs;
+
+   fs = CALLOC_STRUCT(svga_fragment_shader);
+   if (!fs)
+      return NULL;
+
+   fs->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+   /* Collect basic info that we'll need later:
+    */
+   tgsi_scan_shader(fs->base.tokens, &fs->base.info);
+
+   fs->base.id = svga->debug.shader_id++;
+   fs->base.use_sm30 = svgascreen->use_ps30;
+   
+   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
+      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
+                   __FUNCTION__, fs->base.id,
+                   fs->base.info.num_inputs, fs->base.info.num_outputs);
+   }
+
+   return fs;
+}
+
+static void
+svga_bind_fs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader;
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.fs = fs;
+   svga->dirty |= SVGA_NEW_FS;
+}
+
+static
+void svga_delete_fs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader;
+   struct svga_shader_result *result, *tmp;
+   enum pipe_error ret;
+
+   svga_hwtnl_flush_retry( svga );
+
+   for (result = fs->base.results; result; result = tmp ) {
+      tmp = result->next;
+
+      ret = SVGA3D_DestroyShader(svga->swc, 
+                                 result->id,
+                                 SVGA3D_SHADERTYPE_PS );
+      if(ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_DestroyShader(svga->swc, 
+                                    result->id,
+                                    SVGA3D_SHADERTYPE_PS );
+         assert(ret == PIPE_OK);
+      }
+
+      svga_destroy_shader_result( result );
+   }
+
+   FREE((void *)fs->base.tokens);
+   FREE(fs);
+}
+
+
+void svga_init_fs_functions( struct svga_context *svga )
+{
+   svga->pipe.create_fs_state = svga_create_fs_state;
+   svga->pipe.bind_fs_state = svga_bind_fs_state;
+   svga->pipe.delete_fs_state = svga_delete_fs_state;
+}
+
diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c
new file mode 100644
index 0000000000..58cb1e6e23
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -0,0 +1,187 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "svga_context.h"
+#include "svga_screen_texture.h"
+#include "svga_state.h"
+#include "svga_winsys.h"
+
+#include "svga_hw_reg.h"
+
+
+
+
+static void svga_set_scissor_state( struct pipe_context *pipe,
+                                 const struct pipe_scissor_state *scissor )
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   memcpy( &svga->curr.scissor, scissor, sizeof(*scissor) );
+   svga->dirty |= SVGA_NEW_SCISSOR;
+}
+
+
+static void svga_set_polygon_stipple( struct pipe_context *pipe,
+                                      const struct pipe_poly_stipple *stipple )
+{
+   /* overridden by the draw module */
+}
+
+
+void svga_cleanup_framebuffer(struct svga_context *svga)
+{
+   struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   int i;
+
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+      pipe_surface_reference(&curr->cbufs[i], NULL);
+      pipe_surface_reference(&hw->cbufs[i], NULL);
+   }
+
+   pipe_surface_reference(&curr->zsbuf, NULL);
+   pipe_surface_reference(&hw->zsbuf, NULL);
+}
+
+
+#define DEPTH_BIAS_SCALE_FACTOR_D16    ((float)(1<<15))
+#define DEPTH_BIAS_SCALE_FACTOR_D24S8  ((float)(1<<23))
+#define DEPTH_BIAS_SCALE_FACTOR_D32    ((float)(1<<31))
+
+
+static void svga_set_framebuffer_state(struct pipe_context *pipe,
+				       const struct pipe_framebuffer_state *fb)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct pipe_framebuffer_state *dst = &svga->curr.framebuffer;
+   boolean propagate = FALSE;
+   int i;
+
+   dst->width = fb->width;
+   dst->height = fb->height;
+   dst->nr_cbufs = fb->nr_cbufs;
+
+   /* check if we need to propaget any of the target surfaces */
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+      if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i])
+         if (svga_surface_needs_propagation(dst->cbufs[i]))
+            propagate = TRUE;
+   }
+
+   if (propagate) {
+      /* make sure that drawing calls comes before propagation calls */
+      svga_hwtnl_flush_retry( svga );
+   
+      for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+         if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i])
+            svga_propagate_surface(pipe, dst->cbufs[i]);
+   }
+
+   /* XXX: Actually the virtual hardware may support rendertargets with
+    * different size, depending on the host API and driver, but since we cannot
+    * know that make no such assumption here. */
+   for(i = 0; i < fb->nr_cbufs; ++i) {
+      if (fb->zsbuf && fb->cbufs[i]) {
+         assert(fb->zsbuf->width == fb->cbufs[i]->width); 
+         assert(fb->zsbuf->height == fb->cbufs[i]->height); 
+      }
+   }
+
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+      pipe_surface_reference(&dst->cbufs[i], fb->cbufs[i]);
+   pipe_surface_reference(&dst->zsbuf, fb->zsbuf);
+
+
+   if (svga->curr.framebuffer.zsbuf)
+   {
+      switch (svga->curr.framebuffer.zsbuf->format) {
+      case PIPE_FORMAT_Z16_UNORM:
+         svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D16;
+         break;
+      case PIPE_FORMAT_S8Z24_UNORM:
+      case PIPE_FORMAT_X8Z24_UNORM:
+      case PIPE_FORMAT_Z24S8_UNORM:
+      case PIPE_FORMAT_Z24X8_UNORM:
+         svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D24S8;
+         break;
+      case PIPE_FORMAT_Z32_UNORM:
+         svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D32;
+         break;
+      case PIPE_FORMAT_Z32_FLOAT:
+         svga->curr.depthscale = 1.0f / ((float)(1<<23));
+         break;
+      default:
+         svga->curr.depthscale = 0.0f;
+         break;
+      }
+   }
+   else {
+      svga->curr.depthscale = 0.0f;
+   }
+
+   svga->dirty |= SVGA_NEW_FRAME_BUFFER;
+}
+
+
+
+static void svga_set_clip_state( struct pipe_context *pipe,
+                                 const struct pipe_clip_state *clip )
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.clip = *clip; /* struct copy */
+
+   svga->dirty |= SVGA_NEW_CLIP;
+}
+
+
+
+/* Called when driver state tracker notices changes to the viewport
+ * matrix:
+ */
+static void svga_set_viewport_state( struct pipe_context *pipe,
+				     const struct pipe_viewport_state *viewport )
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.viewport = *viewport; /* struct copy */
+
+   svga->dirty |= SVGA_NEW_VIEWPORT;
+}
+
+
+
+void svga_init_misc_functions( struct svga_context *svga )
+{
+   svga->pipe.set_scissor_state = svga_set_scissor_state;
+   svga->pipe.set_polygon_stipple = svga_set_polygon_stipple;
+   svga->pipe.set_framebuffer_state = svga_set_framebuffer_state;
+   svga->pipe.set_clip_state = svga_set_clip_state;
+   svga->pipe.set_viewport_state = svga_set_viewport_state;
+}
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
new file mode 100644
index 0000000000..01336b0a2c
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -0,0 +1,267 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "util/u_memory.h"
+
+#include "svga_cmd.h"
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+
+
+/* Fixme: want a public base class for all pipe structs, even if there
+ * isn't much in them.
+ */
+struct pipe_query {
+   int dummy;
+};
+
+struct svga_query {
+   struct pipe_query base;
+   SVGA3dQueryType type;
+   struct svga_winsys_buffer *hwbuf;
+   volatile SVGA3dQueryResult *queryResult;
+   struct pipe_fence_handle *fence;
+};
+
+/***********************************************************************
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct svga_query *
+svga_query( struct pipe_query *q )
+{
+   return (struct svga_query *)q;
+}
+
+static boolean svga_get_query_result(struct pipe_context *pipe, 
+                                     struct pipe_query *q,
+                                     boolean wait,
+                                     uint64_t *result);
+
+static struct pipe_query *svga_create_query( struct pipe_context *pipe,
+                                             unsigned query_type )
+{
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_query *sq;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+   sq = CALLOC_STRUCT(svga_query);
+   if (!sq)
+      goto no_sq;
+
+   sq->type = SVGA3D_QUERYTYPE_OCCLUSION;
+
+   sq->hwbuf = svga_winsys_buffer_create(svgascreen, 
+                                         1,
+                                         SVGA_BUFFER_USAGE_PINNED,
+                                         sizeof *sq->queryResult);
+   if(!sq->hwbuf)
+      goto no_hwbuf;
+    
+   sq->queryResult = (SVGA3dQueryResult *)sws->buffer_map(sws, 
+                                                          sq->hwbuf, 
+                                                          PIPE_BUFFER_USAGE_CPU_WRITE);
+   if(!sq->queryResult)
+      goto no_query_result;
+
+   sq->queryResult->totalSize = sizeof *sq->queryResult;
+   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+
+   /*
+    * We request the buffer to be pinned and assume it is always mapped.
+    * 
+    * The reason is that we don't want to wait for fences when checking the
+    * query status.
+    */
+   sws->buffer_unmap(sws, sq->hwbuf);
+
+   return &sq->base;
+
+no_query_result:
+   sws->buffer_destroy(sws, sq->hwbuf);
+no_hwbuf:
+   FREE(sq);
+no_sq:
+   return NULL;
+}
+
+static void svga_destroy_query(struct pipe_context *pipe,
+                               struct pipe_query *q)
+{
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_query *sq = svga_query( q );
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   sws->buffer_destroy(sws, sq->hwbuf);
+   sws->fence_reference(sws, &sq->fence, NULL);
+   FREE(sq);
+}
+
+static void svga_begin_query(struct pipe_context *pipe, 
+                             struct pipe_query *q)
+{
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_context *svga = svga_context( pipe );
+   struct svga_query *sq = svga_query( q );
+   enum pipe_error ret;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   
+   assert(!svga->sq);
+
+   /* Need to flush out buffered drawing commands so that they don't
+    * get counted in the query results.
+    */
+   svga_hwtnl_flush_retry(svga);
+   
+   if(sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
+      /* The application doesn't care for the pending query result. We cannot
+       * let go the existing buffer and just get a new one because its storage
+       * may be reused for other purposes and clobbered by the host when it
+       * determines the query result. So the only option here is to wait for
+       * the existing query's result -- not a big deal, given that no sane
+       * application would do this.
+       */
+      uint64_t result;
+
+      svga_get_query_result(pipe, q, TRUE, &result);
+      
+      assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
+   }
+   
+   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+   sws->fence_reference(sws, &sq->fence, NULL);
+
+   ret = SVGA3D_BeginQuery(svga->swc, sq->type);
+   if(ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_BeginQuery(svga->swc, sq->type);
+      assert(ret == PIPE_OK);
+   }
+
+   svga->sq = sq;
+}
+
+static void svga_end_query(struct pipe_context *pipe, 
+                           struct pipe_query *q)
+{
+   struct svga_context *svga = svga_context( pipe );
+   struct svga_query *sq = svga_query( q );
+   enum pipe_error ret;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   assert(svga->sq == sq);
+
+   svga_hwtnl_flush_retry(svga);
+   
+   /* Set to PENDING before sending EndQuery. */
+   sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
+
+   ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf);
+   if(ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf);
+      assert(ret == PIPE_OK);
+   }
+   
+   /* TODO: Delay flushing. We don't really need to flush here, just ensure 
+    * that there is one flush before svga_get_query_result attempts to get the
+    * result */
+   svga_context_flush(svga, NULL);
+
+   svga->sq = NULL;
+}
+
+static boolean svga_get_query_result(struct pipe_context *pipe, 
+                                     struct pipe_query *q,
+                                     boolean wait,
+                                     uint64_t *result)
+{
+   struct svga_context *svga = svga_context( pipe );
+   struct svga_screen *svgascreen = svga_screen( pipe->screen );
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_query *sq = svga_query( q );
+   SVGA3dQueryState state;
+   
+   SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__);
+
+   /* The query status won't be updated by the host unless 
+    * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause a 
+    * synchronous wait on the host */
+   if(!sq->fence) {
+      enum pipe_error ret;
+
+      ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf);
+      if(ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf);
+         assert(ret == PIPE_OK);
+      }
+   
+      svga_context_flush(svga, &sq->fence);
+      
+      assert(sq->fence);
+   }
+
+   state = sq->queryResult->state;
+   if(state == SVGA3D_QUERYSTATE_PENDING) {
+      if(!wait)
+         return FALSE;
+   
+      sws->fence_finish(sws, sq->fence, 0);
+      
+      state = sq->queryResult->state;
+   }
+
+   assert(state == SVGA3D_QUERYSTATE_SUCCEEDED || 
+          state == SVGA3D_QUERYSTATE_FAILED);
+   
+   *result = (uint64_t)sq->queryResult->result32;
+
+   SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, (unsigned)*result);
+
+   return TRUE;
+}
+
+
+
+void svga_init_query_functions( struct svga_context *svga )
+{
+   svga->pipe.create_query = svga_create_query;
+   svga->pipe.destroy_query = svga_destroy_query;
+   svga->pipe.begin_query = svga_begin_query;
+   svga->pipe.end_query = svga_end_query;
+   svga->pipe.get_query_result = svga_get_query_result;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
new file mode 100644
index 0000000000..b03f8eb9cf
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -0,0 +1,250 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+
+#include "svga_hw_reg.h"
+
+/* Hardware frontwinding is always set up as SVGA3D_FRONTWINDING_CW.
+ */
+static SVGA3dFace svga_translate_cullmode( unsigned mode,
+                                           unsigned front_winding )
+{
+   switch (mode) {
+   case PIPE_WINDING_NONE:
+      return SVGA3D_FACE_NONE;
+   case PIPE_WINDING_CCW:
+      return SVGA3D_FACE_BACK;
+   case PIPE_WINDING_CW:
+      return SVGA3D_FACE_FRONT;
+   case PIPE_WINDING_BOTH:
+      return SVGA3D_FACE_FRONT_BACK;
+   default:
+      assert(0);
+      return SVGA3D_FACE_NONE;
+   }
+}
+
+static SVGA3dShadeMode svga_translate_flatshade( unsigned mode )
+{
+   return mode ? SVGA3D_SHADEMODE_FLAT : SVGA3D_SHADEMODE_SMOOTH;
+}
+
+
+static void *
+svga_create_rasterizer_state(struct pipe_context *pipe,
+                             const struct pipe_rasterizer_state *templ)
+{
+   struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state );
+   /* need this for draw module. */
+   rast->templ = *templ;
+
+   /* light_twoside          - XXX: need fragment shader varient */
+   /* poly_smooth            - XXX: no fallback available */
+   /* poly_stipple_enable    - draw module */
+   /* point_sprite           - ? */
+   /* point_size_per_vertex  - ? */
+   /* sprite_coord_mode      - ??? */
+   /* bypass_vs_viewport_and_clip        - handled by viewport setup */
+   /* flatshade_first        - handled by index translation */
+   /* gl_rasterization_rules - XXX - viewport code */
+   /* line_width             - draw module */
+   /* fill_cw, fill_ccw      - draw module or index translation */
+
+   rast->shademode = svga_translate_flatshade( templ->flatshade );
+   rast->cullmode = svga_translate_cullmode( templ->cull_mode, 
+                                             templ->front_winding );
+   rast->scissortestenable = templ->scissor;
+   rast->multisampleantialias = templ->multisample;
+   rast->antialiasedlineenable = templ->line_smooth;
+   rast->lastpixel = templ->line_last_pixel;
+   rast->pointsize = templ->point_size;
+   rast->pointsize_min = templ->point_size_min;
+   rast->pointsize_max = templ->point_size_max;
+   rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+
+   /* Use swtnl + decomposition implement these:
+    */
+   if (templ->poly_stipple_enable)
+      rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+
+   if (templ->line_width != 1.0 &&
+       templ->line_width != 0.0)
+      rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+
+   if (templ->line_stipple_enable) {
+      /* LinePattern not implemented on all backends. 
+       */
+      if (0) {
+         SVGA3dLinePattern lp;
+         lp.repeat = templ->line_stipple_factor + 1;
+         lp.pattern = templ->line_stipple_pattern;
+         rast->linepattern = lp.uintValue;
+      }
+      else {
+         rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+      }
+   } 
+
+   if (templ->point_smooth)
+      rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS;
+
+   {
+      boolean offset_cw = templ->offset_cw;
+      boolean offset_ccw = templ->offset_ccw;
+      boolean offset  = 0;
+      int fill_cw = templ->fill_cw;
+      int fill_ccw = templ->fill_ccw;
+      int fill = PIPE_POLYGON_MODE_FILL;
+
+      switch (templ->cull_mode) {
+      case PIPE_WINDING_BOTH:
+         offset = 0;
+         fill = PIPE_POLYGON_MODE_FILL;
+         break;
+
+      case PIPE_WINDING_CW:
+         offset = offset_ccw;
+         fill = fill_ccw;
+         break;
+
+      case PIPE_WINDING_CCW:
+         offset = offset_cw;
+         fill = fill_cw;
+         break;
+
+      case PIPE_WINDING_NONE:
+         if (fill_cw != fill_ccw || offset_cw != offset_ccw) 
+         {
+            /* Always need the draw module to work out different
+             * front/back fill modes:
+             */
+            rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+         }
+         else {
+            offset = offset_ccw;
+            fill = fill_ccw;
+         }
+         break;
+
+      default:
+         assert(0);
+         break;
+      }
+
+      /* Unfilled primitive modes aren't implemented on all virtual
+       * hardware.  We can do some unfilled processing with index
+       * translation, but otherwise need the draw module:
+       */
+      if (fill != PIPE_POLYGON_MODE_FILL &&
+          (templ->flatshade ||
+           templ->light_twoside ||
+           offset ||
+           templ->cull_mode != PIPE_WINDING_NONE)) 
+      {
+         fill = PIPE_POLYGON_MODE_FILL;
+         rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+      }
+
+      /* If we are decomposing to lines, and lines need the pipeline,
+       * then we also need the pipeline for tris.
+       */
+      if (fill == PIPE_POLYGON_MODE_LINE &&
+          (rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES))
+      {
+         fill = PIPE_POLYGON_MODE_FILL;
+         rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+      }
+
+      /* Similarly for points:
+       */
+      if (fill == PIPE_POLYGON_MODE_POINT &&
+          (rast->need_pipeline & SVGA_PIPELINE_FLAG_POINTS))
+      {
+         fill = PIPE_POLYGON_MODE_FILL;
+         rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+      }
+
+      if (offset) {
+         rast->slopescaledepthbias = templ->offset_scale;
+         rast->depthbias = templ->offset_units;
+      }
+
+      rast->hw_unfilled = fill;
+   }
+
+
+
+
+   if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) {
+      /* Turn off stuff which will get done in the draw module:
+       */
+      rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+      rast->slopescaledepthbias = 0;
+      rast->depthbias = 0;
+   }
+
+   return rast;
+}
+
+static void svga_bind_rasterizer_state( struct pipe_context *pipe,
+                                        void *state )
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state;
+
+   svga->curr.rast = raster;
+
+   draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL);
+   
+   svga->dirty |= SVGA_NEW_RAST;
+}
+
+static void svga_delete_rasterizer_state(struct pipe_context *pipe,
+                                         void *raster)
+{
+   FREE(raster);
+}
+
+
+void svga_init_rasterizer_functions( struct svga_context *svga )
+{
+   svga->pipe.create_rasterizer_state = svga_create_rasterizer_state;
+   svga->pipe.bind_rasterizer_state = svga_bind_rasterizer_state;
+   svga->pipe.delete_rasterizer_state = svga_delete_rasterizer_state;
+}
+
+
+/***********************************************************************
+ * Hardware state update
+ */
+
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
new file mode 100644
index 0000000000..3eeca6b784
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -0,0 +1,243 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "svga_context.h"
+#include "svga_screen_texture.h"
+#include "svga_state.h"
+
+#include "svga_hw_reg.h"
+
+#include "svga_debug.h"
+
+static INLINE unsigned
+translate_wrap_mode(unsigned wrap)
+{
+   switch (wrap) {
+   case PIPE_TEX_WRAP_REPEAT: 
+      return SVGA3D_TEX_ADDRESS_WRAP;
+
+   case PIPE_TEX_WRAP_CLAMP: 
+      return SVGA3D_TEX_ADDRESS_CLAMP;
+
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 
+      /* Unfortunately SVGA3D_TEX_ADDRESS_EDGE not respected by
+       * hardware.
+       */
+      return SVGA3D_TEX_ADDRESS_CLAMP;
+
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 
+      return SVGA3D_TEX_ADDRESS_BORDER;
+
+   case PIPE_TEX_WRAP_MIRROR_REPEAT: 
+      return SVGA3D_TEX_ADDRESS_MIRROR;
+
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:  
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:   
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 
+      return SVGA3D_TEX_ADDRESS_MIRRORONCE;
+
+   default:
+      assert(0);
+      return SVGA3D_TEX_ADDRESS_WRAP;
+   }
+}
+
+static INLINE unsigned translate_img_filter( unsigned filter )
+{
+   switch (filter) {
+   case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST;
+   case PIPE_TEX_FILTER_LINEAR:  return SVGA3D_TEX_FILTER_LINEAR;
+   case PIPE_TEX_FILTER_ANISO:   return SVGA3D_TEX_FILTER_ANISOTROPIC;
+   default:
+      assert(0);
+      return SVGA3D_TEX_FILTER_NEAREST;
+   }
+}
+
+static INLINE unsigned translate_mip_filter( unsigned filter )
+{
+   switch (filter) {
+   case PIPE_TEX_MIPFILTER_NONE:    return SVGA3D_TEX_FILTER_NONE;
+   case PIPE_TEX_MIPFILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST;
+   case PIPE_TEX_MIPFILTER_LINEAR:  return SVGA3D_TEX_FILTER_LINEAR;
+   default:
+      assert(0);
+      return SVGA3D_TEX_FILTER_NONE;
+   }
+}
+
+static void *
+svga_create_sampler_state(struct pipe_context *pipe,
+                          const struct pipe_sampler_state *sampler)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state );
+   
+   cso->mipfilter = translate_mip_filter(sampler->min_mip_filter);
+   cso->magfilter = translate_img_filter( sampler->mag_img_filter );
+   cso->minfilter = translate_img_filter( sampler->min_img_filter );
+   cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 );
+   cso->lod_bias = sampler->lod_bias;
+   cso->addressu = translate_wrap_mode(sampler->wrap_s);
+   cso->addressv = translate_wrap_mode(sampler->wrap_t);
+   cso->addressw = translate_wrap_mode(sampler->wrap_r);
+   cso->normalized_coords = sampler->normalized_coords;
+   cso->compare_mode = sampler->compare_mode;
+   cso->compare_func = sampler->compare_func;
+
+   {
+      ubyte r = float_to_ubyte(sampler->border_color[0]);
+      ubyte g = float_to_ubyte(sampler->border_color[1]);
+      ubyte b = float_to_ubyte(sampler->border_color[2]);
+      ubyte a = float_to_ubyte(sampler->border_color[3]);
+
+      util_pack_color_ub( r, g, b, a,
+                          PIPE_FORMAT_B8G8R8A8_UNORM,
+                          &cso->bordercolor );
+   }
+
+   /* No SVGA3D support for:
+    *    - min/max LOD clamping
+    */
+   cso->min_lod = 0;
+   cso->view_min_lod = MAX2(sampler->min_lod, 0);
+   cso->view_max_lod = MAX2(sampler->max_lod, 0);
+
+   /* Use min_mipmap */
+   if (svga->debug.use_min_mipmap) {
+      if (cso->view_min_lod == cso->view_max_lod) {
+         cso->min_lod = cso->view_min_lod;
+         cso->view_min_lod = 0;
+         cso->view_max_lod = 1000; /* Just a high number */
+         cso->mipfilter = SVGA3D_TEX_FILTER_NONE;
+      }
+   }
+
+   SVGA_DBG(DEBUG_VIEWS, "min %u, view(min %u, max %u) lod, mipfilter %s\n",
+            cso->min_lod, cso->view_min_lod, cso->view_max_lod,
+            cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING");
+
+   return cso;
+}
+
+static void svga_bind_sampler_states(struct pipe_context *pipe,
+                                     unsigned num, void **sampler)
+{
+   struct svga_context *svga = svga_context(pipe);
+   unsigned i;
+
+   assert(num <= PIPE_MAX_SAMPLERS);
+
+   /* Check for no-op */
+   if (num == svga->curr.num_samplers &&
+       !memcmp(svga->curr.sampler, sampler, num * sizeof(void *))) {
+      debug_printf("sampler noop\n");
+      return;
+   }
+
+   for (i = 0; i < num; i++)
+      svga->curr.sampler[i] = sampler[i];
+
+   for (i = num; i < svga->curr.num_samplers; i++)
+      svga->curr.sampler[i] = NULL;
+
+   svga->curr.num_samplers = num;
+   svga->dirty |= SVGA_NEW_SAMPLER;
+}
+
+static void svga_delete_sampler_state(struct pipe_context *pipe,
+                                      void *sampler)
+{
+   FREE(sampler);
+}
+
+
+static void svga_set_sampler_textures(struct pipe_context *pipe,
+                                      unsigned num,
+                                      struct pipe_texture **texture)
+{
+   struct svga_context *svga = svga_context(pipe);
+   unsigned flag_1d = 0;
+   unsigned flag_srgb = 0;
+   uint i;
+
+   assert(num <= PIPE_MAX_SAMPLERS);
+
+   /* Check for no-op */
+   if (num == svga->curr.num_textures &&
+       !memcmp(svga->curr.texture, texture, num * sizeof(struct pipe_texture *))) {
+      if (0) debug_printf("texture noop\n");
+      return;
+   }
+
+   for (i = 0; i < num; i++) {
+      pipe_texture_reference(&svga->curr.texture[i],
+                             texture[i]);
+
+      if (!texture[i])
+         continue;
+
+      if (texture[i]->format == PIPE_FORMAT_A8R8G8B8_SRGB)
+         flag_srgb |= 1 << i;
+
+      if (texture[i]->target == PIPE_TEXTURE_1D)
+         flag_1d |= 1 << i;
+   }
+
+   for (i = num; i < svga->curr.num_textures; i++)
+      pipe_texture_reference(&svga->curr.texture[i],
+                             NULL);
+
+   svga->curr.num_textures = num;
+   svga->dirty |= SVGA_NEW_TEXTURE_BINDING;
+
+   if (flag_srgb != svga->curr.tex_flags.flag_srgb ||
+       flag_1d != svga->curr.tex_flags.flag_1d) 
+   {
+      svga->dirty |= SVGA_NEW_TEXTURE_FLAGS;
+      svga->curr.tex_flags.flag_1d = flag_1d;
+      svga->curr.tex_flags.flag_srgb = flag_srgb;
+   }  
+}
+
+
+
+void svga_init_sampler_functions( struct svga_context *svga )
+{
+   svga->pipe.create_sampler_state = svga_create_sampler_state;
+   svga->pipe.bind_sampler_states = svga_bind_sampler_states;
+   svga->pipe.delete_sampler_state = svga_delete_sampler_state;
+   svga->pipe.set_sampler_textures = svga_set_sampler_textures;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
new file mode 100644
index 0000000000..28e2787e0d
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -0,0 +1,115 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_winsys.h"
+
+#include "svga_hw_reg.h"
+
+
+static void svga_set_vertex_buffers(struct pipe_context *pipe,
+                                    unsigned count,
+                                    const struct pipe_vertex_buffer *buffers)
+{
+   struct svga_context *svga = svga_context(pipe);
+   unsigned i;
+   boolean any_user_buffer = FALSE;
+
+   /* Check for no change */
+   if (count == svga->curr.num_vertex_buffers &&
+       memcmp(svga->curr.vb, buffers, count * sizeof buffers[0]) == 0)
+      return;
+
+   /* Adjust refcounts */
+   for (i = 0; i < count; i++) {
+      pipe_buffer_reference(&svga->curr.vb[i].buffer, buffers[i].buffer);
+      if (svga_buffer(buffers[i].buffer)->user)
+         any_user_buffer = TRUE;
+   }
+
+   for ( ; i < svga->curr.num_vertex_buffers; i++)
+      pipe_buffer_reference(&svga->curr.vb[i].buffer, NULL);
+
+   /* Copy remaining data */
+   memcpy(svga->curr.vb, buffers, count * sizeof buffers[0]);
+   svga->curr.num_vertex_buffers = count;
+   svga->curr.any_user_vertex_buffers = any_user_buffer;
+
+   svga->dirty |= SVGA_NEW_VBUFFER;
+}
+
+static void svga_set_vertex_elements(struct pipe_context *pipe,
+                                     unsigned count,
+                                     const struct pipe_vertex_element *elements)
+{
+   struct svga_context *svga = svga_context(pipe);
+   unsigned i;
+
+   for (i = 0; i < count; i++)
+      svga->curr.ve[i] = elements[i];
+
+   svga->curr.num_vertex_elements = count;
+   svga->dirty |= SVGA_NEW_VELEMENT;
+}
+
+
+static void svga_set_edgeflags(struct pipe_context *pipe,
+                               const unsigned *bitfield)
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   if (bitfield != NULL || svga->curr.edgeflags != NULL) {
+      svga->curr.edgeflags = bitfield;
+      svga->dirty |= SVGA_NEW_EDGEFLAGS;
+   }
+}
+
+
+void svga_cleanup_vertex_state( struct svga_context *svga )
+{
+   unsigned i;
+   
+   for (i = 0 ; i < svga->curr.num_vertex_buffers; i++)
+      pipe_buffer_reference(&svga->curr.vb[i].buffer, NULL);
+}
+
+
+void svga_init_vertex_functions( struct svga_context *svga )
+{
+   svga->pipe.set_vertex_buffers = svga_set_vertex_buffers;
+   svga->pipe.set_vertex_elements = svga_set_vertex_elements;
+   svga->pipe.set_edgeflags = svga_set_edgeflags;
+}
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c
new file mode 100644
index 0000000000..e5ffe668c3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -0,0 +1,189 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+
+#include "svga_screen.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_tgsi.h"
+#include "svga_hw_reg.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+
+
+static const struct tgsi_token *substitute_vs( 
+   unsigned shader_id,
+   const struct tgsi_token *old_tokens )
+{
+#if 0
+   if (shader_id == 12) {
+   static struct tgsi_token tokens[300];
+
+   const char *text = 
+      "VERT1.1\n"
+      "DCL IN[0]\n"
+      "DCL IN[1]\n"
+      "DCL IN[2]\n"
+      "DCL OUT[0], POSITION\n"
+      "DCL TEMP[0..4]\n"
+      "IMM FLT32 {     1.0000,     1.0000,     1.0000,     1.0000 }\n"
+      "IMM FLT32 {     0.45,     1.0000,     1.0000,     1.0000 }\n"
+      "IMM FLT32 { 1.297863, 0.039245, 0.035993, 0.035976}\n"
+      "IMM FLT32 { -0.019398, 1.696131, -0.202151, -0.202050  }\n"
+      "IMM FLT32 { 0.051711, -0.348713, -0.979204, -0.978714  }\n"
+      "IMM FLT32 { 0.000000, 0.000003, 139.491577, 141.421356 }\n"
+      "DCL CONST[0..7]\n"
+      "DCL CONST[9..16]\n"
+      "  MOV TEMP[2], IMM[0]\n"
+
+      "  MOV TEMP[2].xyz, IN[2]\n"
+      "  MOV TEMP[2].xyz, IN[0]\n"
+      "  MOV TEMP[2].xyz, IN[1]\n"
+
+      "  MUL TEMP[1], IMM[3], TEMP[2].yyyy\n"
+      "  MAD TEMP[3], IMM[2],  TEMP[2].xxxx, TEMP[1]\n"
+      "  MAD TEMP[1], IMM[4], TEMP[2].zzzz, TEMP[3]\n"
+      "  MAD TEMP[4], IMM[5], TEMP[2].wwww, TEMP[1]\n"
+
+      "  MOV OUT[0], TEMP[4]\n"
+      "  END\n";
+
+   if (!tgsi_text_translate( text,
+                             tokens,
+                             Elements(tokens) ))
+   {
+      assert(0);
+      return NULL;
+   }
+
+   return tokens;
+   }
+#endif
+
+   return old_tokens;
+}
+
+
+/***********************************************************************
+ * Vertex shaders 
+ */
+
+static void *
+svga_create_vs_state(struct pipe_context *pipe,
+                     const struct pipe_shader_state *templ)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader);
+   if (!vs)
+      return NULL;
+
+   /* substitute a debug shader?
+    */
+   vs->base.tokens = tgsi_dup_tokens(substitute_vs(svga->debug.shader_id,
+                                                   templ->tokens));
+
+
+   /* Collect basic info that we'll need later:
+    */
+   tgsi_scan_shader(vs->base.tokens, &vs->base.info);
+
+   {
+      /* Need to do construct a new template in case we substitued a
+       * debug shader.
+       */
+      struct pipe_shader_state tmp2 = *templ;
+      tmp2.tokens = vs->base.tokens;
+      vs->draw_shader = draw_create_vertex_shader(svga->swtnl.draw, &tmp2);
+   }
+
+   vs->base.id = svga->debug.shader_id++;
+   vs->base.use_sm30 = svgascreen->use_vs30;
+
+   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
+      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
+                   __FUNCTION__, vs->base.id,
+                   vs->base.info.num_inputs, vs->base.info.num_outputs);
+   }
+
+   return vs;
+}
+
+static void svga_bind_vs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.vs = vs;
+   svga->dirty |= SVGA_NEW_VS;
+}
+
+
+static void svga_delete_vs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
+   struct svga_shader_result *result, *tmp;
+   enum pipe_error ret;
+
+   svga_hwtnl_flush_retry( svga );
+
+   draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader);
+   
+   for (result = vs->base.results; result; result = tmp ) {
+      tmp = result->next;
+
+      ret = SVGA3D_DestroyShader(svga->swc, 
+                                 result->id,
+                                 SVGA3D_SHADERTYPE_VS );
+      if(ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_DestroyShader(svga->swc, 
+                                    result->id,
+                                    SVGA3D_SHADERTYPE_VS );
+         assert(ret == PIPE_OK);
+      }
+
+      svga_destroy_shader_result( result );
+   }
+
+   FREE((void *)vs->base.tokens);
+   FREE(vs);
+}
+
+
+void svga_init_vs_functions( struct svga_context *svga )
+{
+   svga->pipe.create_vs_state = svga_create_vs_state;
+   svga->pipe.bind_vs_state = svga_bind_vs_state;
+   svga->pipe.delete_vs_state = svga_delete_vs_state;
+}
+
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
new file mode 100644
index 0000000000..3afcaffff5
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -0,0 +1,435 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "util/u_string.h"
+#include "util/u_math.h"
+
+#include "svga_winsys.h"
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_texture.h"
+#include "svga_screen_buffer.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+
+#ifdef DEBUG
+int SVGA_DEBUG = 0;
+
+static const struct debug_named_value svga_debug_flags[] = {
+   { "dma",      DEBUG_DMA },
+   { "tgsi",     DEBUG_TGSI },
+   { "pipe",     DEBUG_PIPE },
+   { "state",    DEBUG_STATE },
+   { "screen",   DEBUG_SCREEN },
+   { "tex",      DEBUG_TEX },
+   { "swtnl",    DEBUG_SWTNL },
+   { "const",    DEBUG_CONSTS },
+   { "viewport", DEBUG_VIEWPORT },
+   { "views",    DEBUG_VIEWS },
+   { "perf",     DEBUG_PERF },
+   { "flush",    DEBUG_FLUSH },
+   { "sync",     DEBUG_SYNC },
+   {NULL, 0}
+};
+#endif
+
+static const char *
+svga_get_vendor( struct pipe_screen *pscreen )
+{
+   return "VMware, Inc.";
+}
+
+
+static const char *
+svga_get_name( struct pipe_screen *pscreen )
+{
+#ifdef DEBUG
+   /* Only return internal details in the DEBUG version:
+    */
+   return "SVGA3D; build: DEBUG; mutex: " PIPE_ATOMIC;
+#else
+   return "SVGA3D; build: RELEASE; ";
+#endif
+}
+
+
+
+
+static float
+svga_get_paramf(struct pipe_screen *screen, int param)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   SVGA3dDevCapResult result;
+
+   switch (param) {
+   case PIPE_CAP_MAX_LINE_WIDTH:
+      /* fall-through */
+   case PIPE_CAP_MAX_LINE_WIDTH_AA:
+      return 7.0;
+
+   case PIPE_CAP_MAX_POINT_WIDTH:
+      /* fall-through */
+   case PIPE_CAP_MAX_POINT_WIDTH_AA:
+      /* Keep this to a reasonable size to avoid failures in
+       * conform/pntaa.c:
+       */
+      return 80.0;
+
+   case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+      return 4.0;
+
+   case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+      return 16.0;
+
+   case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+      return 16;
+   case PIPE_CAP_NPOT_TEXTURES:
+      return 1;
+   case PIPE_CAP_TWO_SIDED_STENCIL:
+      return 1;
+   case PIPE_CAP_GLSL:
+      return svgascreen->use_ps30 && svgascreen->use_vs30;
+   case PIPE_CAP_ANISOTROPIC_FILTER:
+      return 1;
+   case PIPE_CAP_POINT_SPRITE:
+      return 1;
+   case PIPE_CAP_MAX_RENDER_TARGETS:
+      if(!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_RENDER_TARGETS, &result))
+         return 1;
+      if(!result.u)
+         return 1;
+      return MIN2(result.u, PIPE_MAX_COLOR_BUFS);
+   case PIPE_CAP_OCCLUSION_QUERY:
+      return 1;
+   case PIPE_CAP_TEXTURE_SHADOW_MAP:
+      return 1;
+   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+      return SVGA_MAX_TEXTURE_LEVELS;
+   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+      return 8;  /* max 128x128x128 */
+   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+      return SVGA_MAX_TEXTURE_LEVELS;
+
+   case PIPE_CAP_TEXTURE_MIRROR_REPEAT: /* req. for GL 1.4 */
+      return 1;
+
+   case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */
+      return 1;
+
+   default:
+      return 0;
+   }
+}
+
+
+/* This is a fairly pointless interface
+ */
+static int
+svga_get_param(struct pipe_screen *screen, int param)
+{
+   return (int) svga_get_paramf( screen, param );
+}
+
+
+static INLINE SVGA3dDevCapIndex
+svga_translate_format_cap(enum pipe_format format)
+{
+   switch(format) {
+   
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8;
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8;
+
+   case PIPE_FORMAT_R5G6B5_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_R5G6B5;
+   case PIPE_FORMAT_A1R5G5B5_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5;
+   case PIPE_FORMAT_A4R4G4B4_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4;
+
+   case PIPE_FORMAT_Z16_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_Z_D16;
+   case PIPE_FORMAT_Z24S8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8;
+   case PIPE_FORMAT_Z24X8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8;
+
+   case PIPE_FORMAT_A8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_ALPHA8;
+   case PIPE_FORMAT_L8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8;
+
+   case PIPE_FORMAT_DXT1_RGB:
+   case PIPE_FORMAT_DXT1_RGBA:
+      return SVGA3D_DEVCAP_SURFACEFMT_DXT1;
+   case PIPE_FORMAT_DXT3_RGBA:
+      return SVGA3D_DEVCAP_SURFACEFMT_DXT3;
+   case PIPE_FORMAT_DXT5_RGBA:
+      return SVGA3D_DEVCAP_SURFACEFMT_DXT5;
+
+   default:
+      return SVGA3D_DEVCAP_MAX;
+   }
+}
+
+
+static boolean
+svga_is_format_supported( struct pipe_screen *screen,
+                          enum pipe_format format, 
+                          enum pipe_texture_target target,
+                          unsigned tex_usage, 
+                          unsigned geom_flags )
+{
+   struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+   SVGA3dDevCapIndex index;
+   SVGA3dDevCapResult result;
+   
+   assert(tex_usage);
+
+   /* Override host capabilities */
+   if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+      switch(format) { 
+
+      /* Often unsupported/problematic. This means we end up with the same
+       * visuals for all virtual hardware implementations.
+       */
+      case PIPE_FORMAT_A4R4G4B4_UNORM:
+      case PIPE_FORMAT_A1R5G5B5_UNORM:
+         return FALSE;
+         
+      /* Simulate ability to render into compressed textures */
+      case PIPE_FORMAT_DXT1_RGB:
+      case PIPE_FORMAT_DXT1_RGBA:
+      case PIPE_FORMAT_DXT3_RGBA:
+      case PIPE_FORMAT_DXT5_RGBA:
+         return TRUE;
+
+      default:
+         break;
+      }
+   }
+   
+   /* Try to query the host */
+   index = svga_translate_format_cap(format);
+   if( index < SVGA3D_DEVCAP_MAX && 
+       sws->get_cap(sws, index, &result) )
+   {
+      SVGA3dSurfaceFormatCaps mask;
+      
+      mask.value = 0;
+      if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET)
+         mask.offscreenRenderTarget = 1;
+      if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL)
+         mask.zStencil = 1;
+      if (tex_usage & PIPE_TEXTURE_USAGE_SAMPLER)
+         mask.texture = 1;
+
+      if ((result.u & mask.value) == mask.value)
+         return TRUE;
+      else
+         return FALSE;
+   }
+
+   /* Use our translate functions directly rather than relying on a
+    * duplicated list of supported formats which is prone to getting
+    * out of sync:
+    */
+   if(tex_usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DEPTH_STENCIL))
+      return svga_translate_format_render(format) != SVGA3D_FORMAT_INVALID;
+   else
+      return svga_translate_format(format) != SVGA3D_FORMAT_INVALID;
+}
+
+
+static void
+svga_fence_reference(struct pipe_screen *screen,
+                     struct pipe_fence_handle **ptr,
+                     struct pipe_fence_handle *fence)
+{
+   struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+   sws->fence_reference(sws, ptr, fence);
+}
+
+
+static int
+svga_fence_signalled(struct pipe_screen *screen,
+                     struct pipe_fence_handle *fence,
+                     unsigned flag)
+{
+   struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+   return sws->fence_signalled(sws, fence, flag);
+}
+
+
+static int
+svga_fence_finish(struct pipe_screen *screen,
+                  struct pipe_fence_handle *fence,
+                  unsigned flag)
+{
+   struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+   return sws->fence_finish(sws, fence, flag);
+}
+
+
+static void
+svga_destroy_screen( struct pipe_screen *screen )
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   
+   svga_screen_cache_cleanup(svgascreen);
+
+   pipe_mutex_destroy(svgascreen->swc_mutex);
+   pipe_mutex_destroy(svgascreen->tex_mutex);
+
+   svgascreen->swc->destroy(svgascreen->swc);
+   
+   svgascreen->sws->destroy(svgascreen->sws);
+   
+   FREE(svgascreen);
+}
+
+
+/**
+ * Create a new svga_screen object
+ */
+struct pipe_screen *
+svga_screen_create(struct svga_winsys_screen *sws)
+{
+   struct svga_screen *svgascreen;
+   struct pipe_screen *screen;
+   SVGA3dDevCapResult result;
+
+#ifdef DEBUG
+   SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 );
+#endif
+
+   svgascreen = CALLOC_STRUCT(svga_screen);
+   if (!svgascreen)
+      goto error1;
+
+   svgascreen->debug.force_level_surface_view =
+      debug_get_bool_option("SVGA_FORCE_LEVEL_SURFACE_VIEW", FALSE);
+   svgascreen->debug.force_surface_view =
+      debug_get_bool_option("SVGA_FORCE_SURFACE_VIEW", FALSE);
+   svgascreen->debug.force_sampler_view =
+      debug_get_bool_option("SVGA_FORCE_SAMPLER_VIEW", FALSE);
+   svgascreen->debug.no_surface_view =
+      debug_get_bool_option("SVGA_NO_SURFACE_VIEW", FALSE);
+   svgascreen->debug.no_sampler_view =
+      debug_get_bool_option("SVGA_NO_SAMPLER_VIEW", FALSE);
+
+   screen = &svgascreen->screen;
+
+   screen->destroy = svga_destroy_screen;
+   screen->get_name = svga_get_name;
+   screen->get_vendor = svga_get_vendor;
+   screen->get_param = svga_get_param;
+   screen->get_paramf = svga_get_paramf;
+   screen->is_format_supported = svga_is_format_supported;
+   screen->fence_reference = svga_fence_reference;
+   screen->fence_signalled = svga_fence_signalled;
+   screen->fence_finish = svga_fence_finish;
+   svgascreen->sws = sws;
+
+   svga_screen_init_texture_functions(screen);
+   svga_screen_init_buffer_functions(screen);
+
+   svgascreen->use_ps30 =
+      sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) &&
+      result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE;
+
+   svgascreen->use_vs30 =
+      sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) &&
+      result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE;
+
+#if 1
+   /* Shader model 2.0 is unsupported at the moment. */
+   if(!svgascreen->use_ps30 || !svgascreen->use_vs30)
+      goto error2;
+#else
+   if(debug_get_bool_option("SVGA_NO_SM30", FALSE))
+      svgascreen->use_vs30 = svgascreen->use_ps30 = FALSE;
+#endif
+
+   svgascreen->swc = sws->context_create(sws);
+   if(!svgascreen->swc)
+      goto error2;
+
+   pipe_mutex_init(svgascreen->tex_mutex);
+   pipe_mutex_init(svgascreen->swc_mutex);
+
+   LIST_INITHEAD(&svgascreen->cached_buffers);
+   
+   svga_screen_cache_init(svgascreen);
+
+   return screen;
+error2:
+   FREE(svgascreen);
+error1:
+   return NULL;
+}
+
+void svga_screen_flush( struct svga_screen *svgascreen, 
+                        struct pipe_fence_handle **pfence )
+{
+   struct pipe_fence_handle *fence = NULL;
+
+   SVGA_DBG(DEBUG_PERF, "%s\n", __FUNCTION__);
+   
+   pipe_mutex_lock(svgascreen->swc_mutex);
+   svgascreen->swc->flush(svgascreen->swc, &fence);
+   pipe_mutex_unlock(svgascreen->swc_mutex);
+   
+   svga_screen_cache_flush(svgascreen, fence);
+   
+   if(pfence)
+      *pfence = fence;
+   else
+      svgascreen->sws->fence_reference(svgascreen->sws, &fence, NULL);
+}
+
+struct svga_winsys_screen *
+svga_winsys_screen(struct pipe_screen *screen)
+{
+   return svga_screen(screen)->sws;
+}
+
+#ifdef DEBUG
+struct svga_screen *
+svga_screen(struct pipe_screen *screen)
+{
+   assert(screen);
+   assert(screen->destroy == svga_destroy_screen);
+   return (struct svga_screen *)screen;
+}
+#endif
diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h
new file mode 100644
index 0000000000..b94ca7fc1c
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen.h
@@ -0,0 +1,95 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SCREEN_H
+#define SVGA_SCREEN_H
+
+
+#include "pipe/p_screen.h"
+#include "pipe/p_thread.h"
+
+#include "util/u_double_list.h"
+
+#include "svga_screen_cache.h"
+
+
+struct svga_winsys_screen;
+struct svga_winsys_context;
+struct SVGACmdMemory;
+
+#define SVGA_COMBINE_USERBUFFERS 1
+
+/**
+ * Subclass of pipe_screen
+ */
+struct svga_screen
+{
+   struct pipe_screen screen;
+   struct svga_winsys_screen *sws;
+
+   unsigned use_ps30;
+   unsigned use_vs30;
+   
+   struct {
+      boolean force_level_surface_view;
+      boolean force_surface_view;
+      boolean no_surface_view;
+      boolean force_sampler_view;
+      boolean no_sampler_view;
+   } debug;
+
+   /* The screen needs its own context */
+   struct svga_winsys_context *swc;
+   struct SVGACmdMemory *fifo;
+
+   unsigned texture_timestamp;
+   pipe_mutex tex_mutex; 
+   pipe_mutex swc_mutex; /* Protects the use of swc and dirty_buffers */
+   
+   /** 
+    * List of buffers with cached GMR. Ordered from the most recently used to
+    * the least recently used 
+    */
+   struct list_head cached_buffers;
+   
+   struct svga_host_surface_cache cache;
+};
+
+#ifndef DEBUG
+/** cast wrapper */
+static INLINE struct svga_screen *
+svga_screen(struct pipe_screen *pscreen)
+{
+   return (struct svga_screen *) pscreen;
+}
+#else
+struct svga_screen *
+svga_screen(struct pipe_screen *screen);
+#endif
+
+void svga_screen_flush( struct svga_screen *svga_screen, 
+                        struct pipe_fence_handle **pfence );
+
+#endif /* SVGA_SCREEN_H */
diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c
new file mode 100644
index 0000000000..3b7811734e
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_buffer.c
@@ -0,0 +1,820 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_thread.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_debug.h"
+
+
+/**
+ * Vertex and index buffers have to be treated slightly differently from 
+ * regular guest memory regions because the SVGA device sees them as 
+ * surfaces, and the state tracker can create/destroy without the pipe 
+ * driver, therefore we must do the uploads from the vws.
+ */
+static INLINE boolean
+svga_buffer_needs_hw_storage(unsigned usage)
+{
+   return usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_INDEX);
+}
+
+
+static INLINE enum pipe_error
+svga_buffer_create_host_surface(struct svga_screen *ss,
+                                struct svga_buffer *sbuf)
+{
+   if(!sbuf->handle) {
+      sbuf->key.flags = 0;
+      
+      sbuf->key.format = SVGA3D_BUFFER;
+      if(sbuf->base.usage & PIPE_BUFFER_USAGE_VERTEX)
+         sbuf->key.flags |= SVGA3D_SURFACE_HINT_VERTEXBUFFER;
+      if(sbuf->base.usage & PIPE_BUFFER_USAGE_INDEX)
+         sbuf->key.flags |= SVGA3D_SURFACE_HINT_INDEXBUFFER;
+      
+      sbuf->key.size.width = sbuf->base.size;
+      sbuf->key.size.height = 1;
+      sbuf->key.size.depth = 1;
+      
+      sbuf->key.numFaces = 1;
+      sbuf->key.numMipLevels = 1;
+      
+      sbuf->handle = svga_screen_surface_create(ss, &sbuf->key);
+      if(!sbuf->handle)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+   
+      /* Always set the discard flag on the first time the buffer is written
+       * as svga_screen_surface_create might have passed a recycled host
+       * buffer.
+       */
+      sbuf->hw.flags.discard = TRUE;
+
+      SVGA_DBG(DEBUG_DMA, "   grab sid %p sz %d\n", sbuf->handle, sbuf->base.size);
+   }
+   
+   return PIPE_OK;
+}   
+
+
+static INLINE void
+svga_buffer_destroy_host_surface(struct svga_screen *ss,
+                                 struct svga_buffer *sbuf)
+{
+   if(sbuf->handle) {
+      SVGA_DBG(DEBUG_DMA, " ungrab sid %p sz %d\n", sbuf->handle, sbuf->base.size);
+      svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle);
+   }
+}   
+
+
+static INLINE void
+svga_buffer_destroy_hw_storage(struct svga_screen *ss, struct svga_buffer *sbuf)
+{
+   struct svga_winsys_screen *sws = ss->sws;
+
+   assert(!sbuf->map.count);
+   assert(sbuf->hw.buf);
+   if(sbuf->hw.buf) {
+      sws->buffer_destroy(sws, sbuf->hw.buf);
+      sbuf->hw.buf = NULL;
+      assert(sbuf->head.prev && sbuf->head.next);
+      LIST_DEL(&sbuf->head);
+#ifdef DEBUG
+      sbuf->head.next = sbuf->head.prev = NULL; 
+#endif
+   }
+}
+
+static INLINE enum pipe_error
+svga_buffer_backup(struct svga_screen *ss, struct svga_buffer *sbuf)
+{
+   if (sbuf->hw.buf && sbuf->hw.num_ranges) {
+      void *src;
+
+      if (!sbuf->swbuf)
+	 sbuf->swbuf = align_malloc(sbuf->base.size, sbuf->base.alignment);
+      if (!sbuf->swbuf)
+	 return PIPE_ERROR_OUT_OF_MEMORY;
+
+      src = ss->sws->buffer_map(ss->sws, sbuf->hw.buf,
+				PIPE_BUFFER_USAGE_CPU_READ);
+      if (!src)
+	 return PIPE_ERROR;
+
+      memcpy(sbuf->swbuf, src, sbuf->base.size);
+      ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf);
+   }
+
+   return PIPE_OK;
+}
+
+/**
+ * Try to make GMR space available by freeing the hardware storage of 
+ * unmapped
+ */
+boolean
+svga_buffer_free_cached_hw_storage(struct svga_screen *ss)
+{
+   struct list_head *curr;
+   struct svga_buffer *sbuf;
+   enum pipe_error ret = PIPE_OK;
+
+   curr = ss->cached_buffers.prev;
+   
+   /* free the least recently used buffer's hw storage which is not mapped */
+   do {
+      if(curr == &ss->cached_buffers)
+         return FALSE;
+
+      sbuf = LIST_ENTRY(struct svga_buffer, curr, head);
+      
+      curr = curr->prev;
+      if (sbuf->map.count == 0)
+	 ret = svga_buffer_backup(ss, sbuf);
+
+   } while(sbuf->map.count != 0 || ret != PIPE_OK);
+   
+   svga_buffer_destroy_hw_storage(ss, sbuf);
+   
+   return TRUE;
+}
+
+struct svga_winsys_buffer *
+svga_winsys_buffer_create( struct svga_screen *ss,
+                           unsigned alignment, 
+                           unsigned usage,
+                           unsigned size )
+{
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_winsys_buffer *buf;
+   
+   /* Just try */
+   buf = sws->buffer_create(sws, alignment, usage, size);
+   if(!buf) {
+
+      SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "flushing screen to find %d bytes GMR\n", 
+               size); 
+      
+      /* Try flushing all pending DMAs */
+      svga_screen_flush(ss, NULL);
+      buf = sws->buffer_create(sws, alignment, usage, size);
+
+      SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "evicting buffers to find %d bytes GMR\n", 
+               size);
+
+      /* Try evicing all buffer storage */
+      while(!buf && svga_buffer_free_cached_hw_storage(ss))
+         buf = sws->buffer_create(sws, alignment, usage, size);
+   }
+   
+   return buf;
+}
+
+
+/**
+ * Allocate DMA'ble storage for the buffer. 
+ * 
+ * Called before mapping a buffer.
+ */
+static INLINE enum pipe_error
+svga_buffer_create_hw_storage(struct svga_screen *ss,
+                              struct svga_buffer *sbuf)
+{
+   if(!sbuf->hw.buf) {
+      unsigned alignment = sbuf->base.alignment;
+      unsigned usage = 0;
+      unsigned size = sbuf->base.size;
+      
+      sbuf->hw.buf = svga_winsys_buffer_create(ss, alignment, usage, size);
+      if(!sbuf->hw.buf)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      
+      assert(!sbuf->needs_flush);
+      assert(!sbuf->head.prev && !sbuf->head.next);
+      LIST_ADD(&sbuf->head, &ss->cached_buffers);
+   }
+   
+   return PIPE_OK;
+}
+
+
+/**
+ * Variant of SVGA3D_BufferDMA which leaves the copy box temporarily in blank.
+ */
+static enum pipe_error
+svga_buffer_upload_command(struct svga_context *svga,
+                           struct svga_buffer *sbuf)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   struct svga_winsys_buffer *guest = sbuf->hw.buf;
+   struct svga_winsys_surface *host = sbuf->handle;
+   SVGA3dTransferType transfer = SVGA3D_WRITE_HOST_VRAM;
+   SVGA3dSurfaceDMAFlags flags = sbuf->hw.flags;
+   SVGA3dCmdSurfaceDMA *cmd;
+   uint32 numBoxes = sbuf->hw.num_ranges;
+   SVGA3dCopyBox *boxes;
+   SVGA3dCmdSurfaceDMASuffix *pSuffix;
+   unsigned region_flags;
+   unsigned surface_flags;
+   struct pipe_buffer *dummy;
+
+   if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_READ;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+   }
+   else if(transfer == SVGA3D_READ_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_READ;
+   }
+   else {
+      assert(0);
+      return PIPE_ERROR_BAD_INPUT;
+   }
+
+   assert(numBoxes);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_DMA,
+                            sizeof *cmd + numBoxes * sizeof *boxes + sizeof *pSuffix,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags);
+   cmd->guest.pitch = 0;
+
+   swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags);
+   cmd->host.face = 0;
+   cmd->host.mipmap = 0;
+
+   cmd->transfer = transfer;
+
+   sbuf->hw.boxes = (SVGA3dCopyBox *)&cmd[1];
+   sbuf->hw.svga = svga;
+
+   /* Increment reference count */
+   dummy = NULL;
+   pipe_buffer_reference(&dummy, &sbuf->base);
+
+   pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + numBoxes * sizeof *boxes);
+   pSuffix->suffixSize = sizeof *pSuffix;
+   pSuffix->maximumOffset = sbuf->base.size;
+   pSuffix->flags = flags;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/**
+ * Patch up the upload DMA command reserved by svga_buffer_upload_command
+ * with the final ranges.
+ */
+static void
+svga_buffer_upload_flush(struct svga_context *svga,
+                         struct svga_buffer *sbuf)
+{
+   struct svga_screen *ss = svga_screen(svga->pipe.screen);
+   SVGA3dCopyBox *boxes;
+   unsigned i;
+
+   assert(sbuf->handle); 
+   assert(sbuf->hw.buf);
+   assert(sbuf->hw.num_ranges);
+   assert(sbuf->hw.svga == svga);
+   assert(sbuf->hw.boxes);
+   
+   /*
+    * Patch the DMA command with the final copy box.
+    */
+
+   SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle);
+
+   boxes = sbuf->hw.boxes;
+   for(i = 0; i < sbuf->hw.num_ranges; ++i) {
+      SVGA_DBG(DEBUG_DMA, "  bytes %u - %u\n",
+               sbuf->hw.ranges[i].start, sbuf->hw.ranges[i].end);
+
+      boxes[i].x = sbuf->hw.ranges[i].start;
+      boxes[i].y = 0;
+      boxes[i].z = 0;
+      boxes[i].w = sbuf->hw.ranges[i].end - sbuf->hw.ranges[i].start;
+      boxes[i].h = 1;
+      boxes[i].d = 1;
+      boxes[i].srcx = sbuf->hw.ranges[i].start;
+      boxes[i].srcy = 0;
+      boxes[i].srcz = 0;
+   }
+
+   sbuf->hw.num_ranges = 0;
+   memset(&sbuf->hw.flags, 0, sizeof sbuf->hw.flags);
+
+   assert(sbuf->head.prev && sbuf->head.next);
+   LIST_DEL(&sbuf->head);
+   sbuf->needs_flush = FALSE;
+   /* XXX: do we care about cached_buffers any more ?*/
+   LIST_ADD(&sbuf->head, &ss->cached_buffers);
+
+   sbuf->hw.svga = NULL;
+   sbuf->hw.boxes = NULL;
+
+   /* Decrement reference count */
+   pipe_buffer_reference((struct pipe_buffer **)&sbuf, NULL);
+}
+
+
+/**
+ * Queue a DMA upload of a range of this buffer to the host.
+ *
+ * This function only notes the range down. It doesn't actually emit a DMA
+ * upload command. That only happens when a context tries to refer to this
+ * buffer, and the DMA upload command is added to that context's command buffer.
+ * 
+ * We try to lump as many contiguous DMA transfers together as possible.
+ */
+static void
+svga_buffer_upload_queue(struct svga_buffer *sbuf,
+                         unsigned start,
+                         unsigned end)
+{
+   unsigned i;
+
+   assert(sbuf->hw.buf);
+   assert(end > start);
+   
+   /*
+    * Try to grow one of the ranges.
+    *
+    * Note that it is not this function task to care about overlapping ranges,
+    * as the GMR was already given so it is too late to do anything. Situations
+    * where overlapping ranges may pose a problem should be detected via
+    * pipe_context::is_buffer_referenced and the context that refers to the
+    * buffer should be flushed.
+    */
+
+   for(i = 0; i < sbuf->hw.num_ranges; ++i) {
+      if(start <= sbuf->hw.ranges[i].end && sbuf->hw.ranges[i].start <= end) {
+         sbuf->hw.ranges[i].start = MIN2(sbuf->hw.ranges[i].start, start);
+         sbuf->hw.ranges[i].end   = MAX2(sbuf->hw.ranges[i].end,    end);
+         return;
+      }
+   }
+
+   /*
+    * We cannot add a new range to an existing DMA command, so patch-up the
+    * pending DMA upload and start clean.
+    */
+
+   if(sbuf->needs_flush)
+      svga_buffer_upload_flush(sbuf->hw.svga, sbuf);
+
+   assert(!sbuf->needs_flush);
+   assert(!sbuf->hw.svga);
+   assert(!sbuf->hw.boxes);
+
+   /*
+    * Add a new range.
+    */
+
+   sbuf->hw.ranges[sbuf->hw.num_ranges].start = start;
+   sbuf->hw.ranges[sbuf->hw.num_ranges].end = end;
+   ++sbuf->hw.num_ranges;
+}
+
+
+static void *
+svga_buffer_map_range( struct pipe_screen *screen,
+                       struct pipe_buffer *buf,
+                       unsigned offset, unsigned length,
+                       unsigned usage )
+{
+   struct svga_screen *ss = svga_screen(screen); 
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_buffer *sbuf = svga_buffer( buf );
+   void *map;
+
+   if(sbuf->swbuf) {
+      /* User/malloc buffer */
+      map = sbuf->swbuf;
+   }
+   else {
+      if(!sbuf->hw.buf) {
+         struct svga_winsys_surface *handle = sbuf->handle;
+
+         if(svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK)
+            return NULL;
+         
+         /* Populate the hardware storage if the host surface pre-existed */
+         if((usage & PIPE_BUFFER_USAGE_CPU_READ) && handle) {
+            SVGA3dSurfaceDMAFlags flags;
+            enum pipe_error ret;
+            struct pipe_fence_handle *fence = NULL;
+            
+            SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p, bytes %u - %u\n", 
+                     sbuf->handle, 0, sbuf->base.size);
+
+            memset(&flags, 0, sizeof flags);
+            
+            ret = SVGA3D_BufferDMA(ss->swc,
+                                   sbuf->hw.buf,
+                                   sbuf->handle,
+                                   SVGA3D_READ_HOST_VRAM,
+                                   sbuf->base.size,
+                                   0,
+                                   flags);
+            if(ret != PIPE_OK) {
+               ss->swc->flush(ss->swc, NULL);
+               
+               ret = SVGA3D_BufferDMA(ss->swc,
+                                      sbuf->hw.buf,
+                                      sbuf->handle,
+                                      SVGA3D_READ_HOST_VRAM,
+                                      sbuf->base.size,
+                                      0,
+                                      flags);
+               assert(ret == PIPE_OK);
+            }
+            
+            ss->swc->flush(ss->swc, &fence);
+            sws->fence_finish(sws, fence, 0);
+            sws->fence_reference(sws, &fence, NULL);
+         }
+      }
+      else {
+         if((usage & PIPE_BUFFER_USAGE_CPU_READ) && !sbuf->needs_flush) {
+            /* We already had the hardware storage but we would have to issue
+             * a download if we hadn't, so move the buffer to the begginning
+             * of the LRU list.
+             */
+            assert(sbuf->head.prev && sbuf->head.next);
+            LIST_DEL(&sbuf->head);
+            LIST_ADD(&sbuf->head, &ss->cached_buffers);
+         }
+      }
+         
+      map = sws->buffer_map(sws, sbuf->hw.buf, usage);
+   }
+
+   if(map) {
+      pipe_mutex_lock(ss->swc_mutex);
+
+      ++sbuf->map.count;
+
+      if (usage & PIPE_BUFFER_USAGE_CPU_WRITE) {
+         assert(sbuf->map.count <= 1);
+         sbuf->map.writing = TRUE;
+         if (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT)
+            sbuf->map.flush_explicit = TRUE;
+      }
+      
+      pipe_mutex_unlock(ss->swc_mutex);
+   }
+   
+   return map;
+}
+
+static void 
+svga_buffer_flush_mapped_range( struct pipe_screen *screen,
+                                struct pipe_buffer *buf,
+                                unsigned offset, unsigned length)
+{
+   struct svga_buffer *sbuf = svga_buffer( buf );
+   struct svga_screen *ss = svga_screen(screen);
+   
+   pipe_mutex_lock(ss->swc_mutex);
+   assert(sbuf->map.writing);
+   if(sbuf->map.writing) {
+      assert(sbuf->map.flush_explicit);
+      if(sbuf->hw.buf)
+         svga_buffer_upload_queue(sbuf, offset, offset + length);
+   }
+   pipe_mutex_unlock(ss->swc_mutex);
+}
+
+static void 
+svga_buffer_unmap( struct pipe_screen *screen,
+                   struct pipe_buffer *buf)
+{
+   struct svga_screen *ss = svga_screen(screen); 
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_buffer *sbuf = svga_buffer( buf );
+   
+   pipe_mutex_lock(ss->swc_mutex);
+   
+   assert(sbuf->map.count);
+   if(sbuf->map.count)
+      --sbuf->map.count;
+
+   if(sbuf->hw.buf)
+      sws->buffer_unmap(sws, sbuf->hw.buf);
+
+   if(sbuf->map.writing) {
+      if(!sbuf->map.flush_explicit) {
+         /* No mapped range was flushed -- flush the whole buffer */
+         SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n");
+   
+         if(sbuf->hw.buf)
+            svga_buffer_upload_queue(sbuf, 0, sbuf->base.size);
+      }
+      
+      sbuf->map.writing = FALSE;
+      sbuf->map.flush_explicit = FALSE;
+   }
+
+   pipe_mutex_unlock(ss->swc_mutex);
+}
+
+static void
+svga_buffer_destroy( struct pipe_buffer *buf )
+{
+   struct svga_screen *ss = svga_screen(buf->screen); 
+   struct svga_buffer *sbuf = svga_buffer( buf );
+
+   assert(!p_atomic_read(&buf->reference.count));
+   
+   assert(!sbuf->needs_flush);
+
+   if(sbuf->handle) {
+      SVGA_DBG(DEBUG_DMA, "release sid %p sz %d\n", sbuf->handle, sbuf->base.size);
+      svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle);
+   }
+   
+   if(sbuf->hw.buf)
+      svga_buffer_destroy_hw_storage(ss, sbuf);
+   
+   if(sbuf->swbuf && !sbuf->user)
+      align_free(sbuf->swbuf);
+   
+   FREE(sbuf);
+}
+
+static struct pipe_buffer *
+svga_buffer_create(struct pipe_screen *screen,
+                   unsigned alignment,
+                   unsigned usage,
+                   unsigned size)
+{
+   struct svga_screen *ss = svga_screen(screen);
+   struct svga_buffer *sbuf;
+   
+   sbuf = CALLOC_STRUCT(svga_buffer);
+   if(!sbuf)
+      goto error1;
+      
+   sbuf->magic = SVGA_BUFFER_MAGIC;
+   
+   pipe_reference_init(&sbuf->base.reference, 1);
+   sbuf->base.screen = screen;
+   sbuf->base.alignment = alignment;
+   sbuf->base.usage = usage;
+   sbuf->base.size = size;
+
+   if(svga_buffer_needs_hw_storage(usage)) {
+      if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
+         goto error2;
+   }
+   else {
+      if(alignment < sizeof(void*))
+         alignment = sizeof(void*);
+
+      usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+      
+      sbuf->swbuf = align_malloc(size, alignment);
+      if(!sbuf->swbuf)
+         goto error2;
+   }
+      
+   return &sbuf->base; 
+
+error2:
+   FREE(sbuf);
+error1:
+   return NULL;
+}
+
+static struct pipe_buffer *
+svga_user_buffer_create(struct pipe_screen *screen,
+                        void *ptr,
+                        unsigned bytes)
+{
+   struct svga_buffer *sbuf;
+   
+   sbuf = CALLOC_STRUCT(svga_buffer);
+   if(!sbuf)
+      goto no_sbuf;
+      
+   sbuf->magic = SVGA_BUFFER_MAGIC;
+   
+   sbuf->swbuf = ptr;
+   sbuf->user = TRUE;
+   
+   pipe_reference_init(&sbuf->base.reference, 1);
+   sbuf->base.screen = screen;
+   sbuf->base.alignment = 1;
+   sbuf->base.usage = 0;
+   sbuf->base.size = bytes;
+   
+   return &sbuf->base; 
+
+no_sbuf:
+   return NULL;
+}
+
+   
+void
+svga_screen_init_buffer_functions(struct pipe_screen *screen)
+{
+   screen->buffer_create = svga_buffer_create;
+   screen->user_buffer_create = svga_user_buffer_create;
+   screen->buffer_map_range = svga_buffer_map_range;
+   screen->buffer_flush_mapped_range = svga_buffer_flush_mapped_range;
+   screen->buffer_unmap = svga_buffer_unmap;
+   screen->buffer_destroy = svga_buffer_destroy;
+}
+
+
+/** 
+ * Copy the contents of the user buffer / malloc buffer to a hardware buffer.
+ */
+static INLINE enum pipe_error
+svga_buffer_update_hw(struct svga_screen *ss, struct svga_buffer *sbuf)
+{
+   if(!sbuf->hw.buf) {
+      enum pipe_error ret;
+      void *map;
+      
+      assert(sbuf->swbuf);
+      if(!sbuf->swbuf)
+         return PIPE_ERROR;
+      
+      ret = svga_buffer_create_hw_storage(ss, sbuf);
+      assert(ret == PIPE_OK);
+      if(ret != PIPE_OK)
+         return ret;
+
+      pipe_mutex_lock(ss->swc_mutex);
+      map = ss->sws->buffer_map(ss->sws, sbuf->hw.buf, PIPE_BUFFER_USAGE_CPU_WRITE);
+      assert(map);
+      if(!map) {
+	 pipe_mutex_unlock(ss->swc_mutex);
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+
+      memcpy(map, sbuf->swbuf, sbuf->base.size);
+      ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf);
+
+      /* This user/malloc buffer is now indistinguishable from a gpu buffer */
+      assert(!sbuf->map.count);
+      if(!sbuf->map.count) {
+         if(sbuf->user)
+            sbuf->user = FALSE;
+         else
+            align_free(sbuf->swbuf);
+         sbuf->swbuf = NULL;
+      }
+      
+      svga_buffer_upload_queue(sbuf, 0, sbuf->base.size);
+   }
+   
+   pipe_mutex_unlock(ss->swc_mutex);
+   return PIPE_OK;
+}
+
+
+struct svga_winsys_surface *
+svga_buffer_handle(struct svga_context *svga,
+                   struct pipe_buffer *buf)
+{
+   struct pipe_screen *screen = svga->pipe.screen;
+   struct svga_screen *ss = svga_screen(screen);
+   struct svga_buffer *sbuf;
+   enum pipe_error ret;
+
+   if(!buf)
+      return NULL;
+
+   sbuf = svga_buffer(buf);
+   
+   assert(!sbuf->map.count);
+   
+   if(!sbuf->handle) {
+      ret = svga_buffer_create_host_surface(ss, sbuf);
+      if(ret != PIPE_OK)
+	 return NULL;
+
+      ret = svga_buffer_update_hw(ss, sbuf);
+      if(ret != PIPE_OK)
+	 return NULL;
+   }
+
+   if(!sbuf->needs_flush && sbuf->hw.num_ranges) {
+      /* Queue the buffer for flushing */
+      ret = svga_buffer_upload_command(svga, sbuf);
+      if(ret != PIPE_OK)
+         /* XXX: Should probably have a richer return value */
+         return NULL;
+
+      assert(sbuf->hw.svga == svga);
+
+      sbuf->needs_flush = TRUE;
+      assert(sbuf->head.prev && sbuf->head.next);
+      LIST_DEL(&sbuf->head);
+      LIST_ADDTAIL(&sbuf->head, &svga->dirty_buffers);
+   }
+
+   return sbuf->handle;
+}
+
+struct pipe_buffer *
+svga_screen_buffer_wrap_surface(struct pipe_screen *screen,
+				enum SVGA3dSurfaceFormat format,
+				struct svga_winsys_surface *srf)
+{
+   struct pipe_buffer *buf;
+   struct svga_buffer *sbuf;
+   struct svga_winsys_screen *sws = svga_winsys_screen(screen);
+
+   buf = svga_buffer_create(screen, 0, SVGA_BUFFER_USAGE_WRAPPED, 0);
+   if (!buf)
+      return NULL;
+
+   sbuf = svga_buffer(buf);
+
+   /*
+    * We are not the creator of this surface and therefore we must not
+    * cache it for reuse. The caching code only caches SVGA3D_BUFFER surfaces
+    * so make sure this isn't one of those.
+    */
+
+   assert(format != SVGA3D_BUFFER);
+   sbuf->key.format = format;
+   sws->surface_reference(sws, &sbuf->handle, srf);
+
+   return buf;
+}
+
+
+struct svga_winsys_surface *
+svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer)
+{
+   struct svga_winsys_screen *sws = svga_winsys_screen(buffer->screen);
+   struct svga_winsys_surface *vsurf = NULL;
+
+   sws->surface_reference(sws, &vsurf, svga_buffer(buffer)->handle);
+   return vsurf;
+}
+
+void
+svga_context_flush_buffers(struct svga_context *svga)
+{
+   struct list_head *curr, *next;
+   struct svga_buffer *sbuf;
+
+   curr = svga->dirty_buffers.next;
+   next = curr->next;
+   while(curr != &svga->dirty_buffers) {
+      sbuf = LIST_ENTRY(struct svga_buffer, curr, head);
+
+      assert(p_atomic_read(&sbuf->base.reference.count) != 0);
+      assert(sbuf->needs_flush);
+      
+      svga_buffer_upload_flush(svga, sbuf);
+
+      curr = next; 
+      next = curr->next;
+   }
+}
diff --git a/src/gallium/drivers/svga/svga_screen_buffer.h b/src/gallium/drivers/svga/svga_screen_buffer.h
new file mode 100644
index 0000000000..5d7af5a7c5
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_buffer.h
@@ -0,0 +1,190 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_BUFFER_H
+#define SVGA_BUFFER_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+#include "util/u_double_list.h"
+
+#include "svga_screen_cache.h"
+
+
+#define SVGA_BUFFER_MAGIC 0x344f9005
+
+/**
+ * Maximum number of discontiguous ranges
+ */
+#define SVGA_BUFFER_MAX_RANGES 32
+
+
+struct svga_screen;
+struct svga_context;
+struct svga_winsys_buffer;
+struct svga_winsys_surface;
+
+
+struct svga_buffer_range
+{
+   unsigned start;
+   unsigned end;
+};
+
+
+/**
+ * Describe a
+ *
+ * This holds the information to emit a SVGA3dCmdSurfaceDMA.
+ */
+struct svga_buffer_upload
+{
+   /**
+    * Guest memory region.
+    */
+   struct svga_winsys_buffer *buf;
+
+   struct svga_buffer_range ranges[SVGA_BUFFER_MAX_RANGES];
+   unsigned num_ranges;
+
+   SVGA3dSurfaceDMAFlags flags;
+
+   /**
+    * Pointer to the DMA copy box *inside* the command buffer.
+    */
+   SVGA3dCopyBox *boxes;
+
+   /**
+    * Context that has the pending DMA to this buffer.
+    */
+   struct svga_context *svga;
+};
+
+
+/**
+ * SVGA pipe buffer.
+ */
+struct svga_buffer 
+{
+   struct pipe_buffer base;
+
+   /** 
+    * Marker to detect bad casts in runtime.
+    */ 
+   uint32_t magic;
+
+   /**
+    * Regular (non DMA'able) memory.
+    * 
+    * Used for user buffers or for buffers which we know before hand that can
+    * never be used by the virtual hardware directly, such as constant buffers.
+    */
+   void *swbuf;
+   
+   /** 
+    * Whether swbuf was created by the user or not.
+    */
+   boolean user;
+   
+   /**
+    * DMA'ble memory.
+    * 
+    * A piece of GMR memory. It is created when mapping the buffer, and will be
+    * used to upload/download vertex data from the host.
+    */
+   struct svga_buffer_upload hw;
+
+   /**
+    * Creation key for the host surface handle.
+    * 
+    * This structure describes all the host surface characteristics so that it 
+    * can be looked up in cache, since creating a host surface is often a slow
+    * operation.
+    */
+   struct svga_host_surface_cache_key key;
+   
+   /**
+    * Host surface handle.
+    * 
+    * This is a platform independent abstraction for host SID. We create when 
+    * trying to bind
+    */
+   struct svga_winsys_surface *handle;
+   
+   struct {
+      unsigned count;
+      boolean writing;
+      boolean flush_explicit;
+   } map;
+   
+   boolean needs_flush;
+   struct list_head head;
+};
+
+
+static INLINE struct svga_buffer *
+svga_buffer(struct pipe_buffer *buffer)
+{
+   if (buffer) {
+      assert(((struct svga_buffer *)buffer)->magic == SVGA_BUFFER_MAGIC);
+      return (struct svga_buffer *)buffer;
+   }
+   return NULL;
+}
+
+
+/**
+ * Returns TRUE for user buffers.  We may
+ * decide to use an alternate upload path for these buffers.
+ */
+static INLINE boolean 
+svga_buffer_is_user_buffer( struct pipe_buffer *buffer )
+{
+   return svga_buffer(buffer)->user;
+}
+
+
+void
+svga_screen_init_buffer_functions(struct pipe_screen *screen);
+
+struct svga_winsys_surface *
+svga_buffer_handle(struct svga_context *svga,
+                   struct pipe_buffer *buf);
+
+void
+svga_context_flush_buffers(struct svga_context *svga);
+
+boolean
+svga_buffer_free_cached_hw_storage(struct svga_screen *ss);
+
+struct svga_winsys_buffer *
+svga_winsys_buffer_create(struct svga_screen *ss,
+                          unsigned alignment, 
+                          unsigned usage,
+                          unsigned size);
+
+#endif /* SVGA_BUFFER_H */
diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
new file mode 100644
index 0000000000..7360c1688b
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -0,0 +1,307 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_memory.h"
+
+#include "svga_debug.h"
+#include "svga_winsys.h"
+#include "svga_screen.h"
+#include "svga_screen_cache.h"
+
+
+#define SVGA_SURFACE_CACHE_ENABLED 1
+
+
+/** 
+ * Compute the bucket for this key. 
+ * 
+ * We simply compute log2(width) for now, but
+ */
+static INLINE unsigned
+svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key)
+{
+   unsigned bucket = 0;
+   unsigned size = key->size.width;
+   
+   while ((size >>= 1))
+      ++bucket;
+   
+   if(key->flags & SVGA3D_SURFACE_HINT_INDEXBUFFER)
+      bucket += 32;
+   
+   assert(bucket < SVGA_HOST_SURFACE_CACHE_BUCKETS);
+   
+   return bucket;
+}
+
+
+static INLINE struct svga_winsys_surface *
+svga_screen_cache_lookup(struct svga_screen *svgascreen,
+                         const struct svga_host_surface_cache_key *key)
+{
+   struct svga_host_surface_cache *cache = &svgascreen->cache;
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_host_surface_cache_entry *entry;
+   struct svga_winsys_surface *handle = NULL;
+   struct list_head *curr, *next;
+   unsigned bucket;
+   unsigned tries = 0;
+
+   bucket = svga_screen_cache_bucket(key);
+
+   pipe_mutex_lock(cache->mutex);
+
+   curr = cache->bucket[bucket].next;
+   next = curr->next;
+   while(curr != &cache->bucket[bucket]) {
+      ++tries;
+      
+      entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, bucket_head);
+
+      assert(entry->handle);
+      
+      if(memcmp(&entry->key, key, sizeof *key) == 0 &&
+         sws->fence_signalled( sws, entry->fence, 0 ) == 0) {
+         assert(sws->surface_is_flushed(sws, entry->handle));
+         
+         handle = entry->handle; // Reference is transfered here.
+         entry->handle = NULL;
+         
+         LIST_DEL(&entry->bucket_head);
+
+         LIST_DEL(&entry->head);
+         
+         LIST_ADD(&entry->head, &cache->empty);
+
+         break;
+      }
+
+      curr = next; 
+      next = curr->next;
+   }
+
+   pipe_mutex_unlock(cache->mutex);
+   
+#if 0
+   _debug_printf("%s: cache %s after %u tries\n", __FUNCTION__, handle ? "hit" : "miss", tries);
+#else
+   (void)tries;
+#endif
+   
+   return handle;
+}
+
+
+/*
+ * Transfers a handle reference.
+ */
+                           
+static INLINE void
+svga_screen_cache_add(struct svga_screen *svgascreen,
+                      const struct svga_host_surface_cache_key *key, 
+                      struct svga_winsys_surface **p_handle)
+{
+   struct svga_host_surface_cache *cache = &svgascreen->cache;
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_host_surface_cache_entry *entry = NULL;
+   struct svga_winsys_surface *handle = *p_handle;
+   
+
+   assert(handle);
+   if(!handle)
+      return;
+   
+   *p_handle = NULL;
+   pipe_mutex_lock(cache->mutex);
+   
+   if(!LIST_IS_EMPTY(&cache->empty)) {
+        /* use the first empty entry */
+        entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->empty.next, head);
+        
+        LIST_DEL(&entry->head);
+     }
+   else if(!LIST_IS_EMPTY(&cache->unused)) {
+      /* free the last used buffer and reuse its entry */
+      entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->unused.prev, head);
+      SVGA_DBG(DEBUG_DMA, "unref sid %p\n", entry->handle);
+      sws->surface_reference(sws, &entry->handle, NULL);
+
+      LIST_DEL(&entry->bucket_head);
+
+      LIST_DEL(&entry->head);
+   }
+
+   if(entry) {
+      entry->handle = handle;
+      memcpy(&entry->key, key, sizeof entry->key);
+   
+      LIST_ADD(&entry->head, &cache->validated);
+   }
+   else {
+      /* Couldn't cache the buffer -- this really shouldn't happen */
+      SVGA_DBG(DEBUG_DMA, "unref sid %p\n", handle);
+      sws->surface_reference(sws, &handle, NULL);
+   }
+   
+   pipe_mutex_unlock(cache->mutex);
+}
+
+
+/**
+ * Called during the screen flush to move all buffers not in a validate list
+ * into the unused list.
+ */
+void
+svga_screen_cache_flush(struct svga_screen *svgascreen,
+                        struct pipe_fence_handle *fence)
+{
+   struct svga_host_surface_cache *cache = &svgascreen->cache;
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_host_surface_cache_entry *entry;
+   struct list_head *curr, *next;
+   unsigned bucket;
+
+   pipe_mutex_lock(cache->mutex);
+
+   curr = cache->validated.next;
+   next = curr->next;
+   while(curr != &cache->validated) {
+      entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, head);
+
+      assert(entry->handle);
+
+      if(sws->surface_is_flushed(sws, entry->handle)) {
+         LIST_DEL(&entry->head);
+         
+         svgascreen->sws->fence_reference(svgascreen->sws, &entry->fence, fence);
+
+         LIST_ADD(&entry->head, &cache->unused);
+
+         bucket = svga_screen_cache_bucket(&entry->key);
+         LIST_ADD(&entry->bucket_head, &cache->bucket[bucket]);
+      }
+
+      curr = next; 
+      next = curr->next;
+   }
+
+   pipe_mutex_unlock(cache->mutex);
+}
+
+
+void
+svga_screen_cache_cleanup(struct svga_screen *svgascreen)
+{
+   struct svga_host_surface_cache *cache = &svgascreen->cache;
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   unsigned i;
+   
+   for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) {
+      if(cache->entries[i].handle) {
+	 SVGA_DBG(DEBUG_DMA, "unref sid %p\n", cache->entries[i].handle);
+	 sws->surface_reference(sws, &cache->entries[i].handle, NULL);
+      }
+
+      if(cache->entries[i].fence)
+         svgascreen->sws->fence_reference(svgascreen->sws, &cache->entries[i].fence, NULL);
+   }
+   
+   pipe_mutex_destroy(cache->mutex);
+}
+
+
+enum pipe_error
+svga_screen_cache_init(struct svga_screen *svgascreen)
+{
+   struct svga_host_surface_cache *cache = &svgascreen->cache;
+   unsigned i;
+
+   pipe_mutex_init(cache->mutex);
+   
+   for(i = 0; i < SVGA_HOST_SURFACE_CACHE_BUCKETS; ++i)
+      LIST_INITHEAD(&cache->bucket[i]);
+
+   LIST_INITHEAD(&cache->unused);
+   
+   LIST_INITHEAD(&cache->validated);
+   
+   LIST_INITHEAD(&cache->empty);
+   for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i)
+      LIST_ADDTAIL(&cache->entries[i].head, &cache->empty);
+
+   return PIPE_OK;
+}
+
+                           
+struct svga_winsys_surface *
+svga_screen_surface_create(struct svga_screen *svgascreen,
+                           struct svga_host_surface_cache_key *key)
+{
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_winsys_surface *handle = NULL;
+
+   if (SVGA_SURFACE_CACHE_ENABLED && key->format == SVGA3D_BUFFER) {
+      /* round the buffer size up to the nearest power of two to increase the
+       * probability of cache hits */
+      uint32_t size = 1;
+      while(size < key->size.width)
+         size <<= 1;
+      key->size.width = size;
+      
+      handle = svga_screen_cache_lookup(svgascreen, key);
+      if (handle)
+         SVGA_DBG(DEBUG_DMA, "  reuse sid %p sz %d\n", handle, size);
+   }
+
+   if (!handle) {
+      handle = sws->surface_create(sws,
+                                   key->flags,
+                                   key->format,
+                                   key->size, 
+                                   key->numFaces, 
+                                   key->numMipLevels);
+      if (handle)
+         SVGA_DBG(DEBUG_DMA, "create sid %p sz %d\n", handle, key->size);
+   }
+
+   return handle;
+}
+
+
+void
+svga_screen_surface_destroy(struct svga_screen *svgascreen,
+                            const struct svga_host_surface_cache_key *key,
+                            struct svga_winsys_surface **p_handle)
+{
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   
+   if(SVGA_SURFACE_CACHE_ENABLED && key->format == SVGA3D_BUFFER) {
+      svga_screen_cache_add(svgascreen, key, p_handle);
+   }
+   else {
+      SVGA_DBG(DEBUG_DMA, "unref sid %p\n", *p_handle);
+      sws->surface_reference(sws, p_handle, NULL);
+   }
+}
diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h
new file mode 100644
index 0000000000..1bbe987768
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_cache.h
@@ -0,0 +1,135 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SCREEN_CACHE_H_
+#define SVGA_SCREEN_CACHE_H_
+
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+
+#include "pipe/p_thread.h"
+
+#include "util/u_double_list.h"
+
+
+/* TODO: Reduce this once we don't allocate an index buffer per draw call */ 
+#define SVGA_HOST_SURFACE_CACHE_SIZE 1024
+
+#define SVGA_HOST_SURFACE_CACHE_BUCKETS 64
+
+
+struct svga_winsys_surface;
+struct svga_screen;
+
+/**
+ * Same as svga_winsys_screen::surface_create.
+ */
+struct svga_host_surface_cache_key
+{
+   SVGA3dSurfaceFlags flags;
+   SVGA3dSurfaceFormat format;
+   SVGA3dSize size;
+   uint32_t numFaces;
+   uint32_t numMipLevels;
+};
+
+
+struct svga_host_surface_cache_entry 
+{
+   /** 
+    * Head for the LRU list, svga_host_surface_cache::unused, and
+    * svga_host_surface_cache::empty
+    */
+   struct list_head head;
+   
+   /** Head for the bucket lists. */
+   struct list_head bucket_head;
+
+   struct svga_host_surface_cache_key key;
+   struct svga_winsys_surface *handle;
+   
+   struct pipe_fence_handle *fence;
+};
+
+
+/**
+ * Cache of the host surfaces.
+ * 
+ * A cache entry can be in the following stages:
+ * 1. empty
+ * 2. holding a buffer in a validate list
+ * 3. holding a flushed buffer (not in any validate list) with an active fence
+ * 4. holding a flushed buffer with an expired fence
+ * 
+ * An entry progresses from 1 -> 2 -> 3 -> 4. When we need an entry to put a 
+ * buffer into we preferencial take from 1, or from the least recentely used 
+ * buffer from 3/4.
+ */
+struct svga_host_surface_cache 
+{
+   pipe_mutex mutex;
+   
+   /* Unused buffers are put in buckets to speed up lookups */
+   struct list_head bucket[SVGA_HOST_SURFACE_CACHE_BUCKETS];
+   
+   /* Entries with unused buffers, ordered from most to least recently used 
+    * (3 and 4) */
+   struct list_head unused;
+   
+   /* Entries with buffers still in validate lists (2) */
+   struct list_head validated;
+   
+   /** Empty entries (1) */
+   struct list_head empty;
+
+   /** The actual storage for the entries */
+   struct svga_host_surface_cache_entry entries[SVGA_HOST_SURFACE_CACHE_SIZE];
+};
+
+
+void
+svga_screen_cache_cleanup(struct svga_screen *svgascreen);
+
+void
+svga_screen_cache_flush(struct svga_screen *svgascreen,
+                        struct pipe_fence_handle *fence);
+
+enum pipe_error
+svga_screen_cache_init(struct svga_screen *svgascreen);
+
+
+struct svga_winsys_surface *
+svga_screen_surface_create(struct svga_screen *svgascreen,
+                           struct svga_host_surface_cache_key *key);
+
+void
+svga_screen_surface_destroy(struct svga_screen *svgascreen,
+                            const struct svga_host_surface_cache_key *key,
+                            struct svga_winsys_surface **handle);
+
+
+#endif /* SVGA_SCREEN_CACHE_H_ */
diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
new file mode 100644
index 0000000000..8472dea04d
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -0,0 +1,1065 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_thread.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_screen.h"
+#include "svga_context.h"
+#include "svga_screen_texture.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_debug.h"
+#include "svga_screen_buffer.h"
+
+#include <util/u_string.h>
+
+
+/* XXX: This isn't a real hardware flag, but just a hack for kernel to
+ * know about primary surfaces. Find a better way to accomplish this.
+ */
+#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
+
+
+/*
+ * Helper function and arrays
+ */
+
+SVGA3dSurfaceFormat
+svga_translate_format(enum pipe_format format)
+{
+   switch(format) {
+   
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+      return SVGA3D_A8R8G8B8;
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+      return SVGA3D_X8R8G8B8;
+
+      /* Required for GL2.1:
+       */
+   case PIPE_FORMAT_A8R8G8B8_SRGB:
+      return SVGA3D_A8R8G8B8;
+
+   case PIPE_FORMAT_R5G6B5_UNORM:
+      return SVGA3D_R5G6B5;
+   case PIPE_FORMAT_A1R5G5B5_UNORM:
+      return SVGA3D_A1R5G5B5;
+   case PIPE_FORMAT_A4R4G4B4_UNORM:
+      return SVGA3D_A4R4G4B4;
+
+      
+   /* XXX: Doesn't seem to work properly.
+   case PIPE_FORMAT_Z32_UNORM:
+      return SVGA3D_Z_D32;
+    */
+   case PIPE_FORMAT_Z16_UNORM:
+      return SVGA3D_Z_D16;
+   case PIPE_FORMAT_Z24S8_UNORM:
+      return SVGA3D_Z_D24S8;
+   case PIPE_FORMAT_Z24X8_UNORM:
+      return SVGA3D_Z_D24X8;
+
+   case PIPE_FORMAT_A8_UNORM:
+      return SVGA3D_ALPHA8;
+   case PIPE_FORMAT_L8_UNORM:
+      return SVGA3D_LUMINANCE8;
+
+   case PIPE_FORMAT_DXT1_RGB:
+   case PIPE_FORMAT_DXT1_RGBA:
+      return SVGA3D_DXT1;
+   case PIPE_FORMAT_DXT3_RGBA:
+      return SVGA3D_DXT3;
+   case PIPE_FORMAT_DXT5_RGBA:
+      return SVGA3D_DXT5;
+
+   default:
+      return SVGA3D_FORMAT_INVALID;
+   }
+}
+
+
+SVGA3dSurfaceFormat
+svga_translate_format_render(enum pipe_format format)
+{
+   switch(format) { 
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+   case PIPE_FORMAT_A1R5G5B5_UNORM:
+   case PIPE_FORMAT_A4R4G4B4_UNORM:
+   case PIPE_FORMAT_R5G6B5_UNORM:
+   case PIPE_FORMAT_Z24S8_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_Z32_UNORM:
+   case PIPE_FORMAT_Z16_UNORM:
+   case PIPE_FORMAT_L8_UNORM:
+      return svga_translate_format(format);
+
+#if 1
+   /* For on host conversion */
+   case PIPE_FORMAT_DXT1_RGB:
+      return SVGA3D_X8R8G8B8;
+   case PIPE_FORMAT_DXT1_RGBA:
+   case PIPE_FORMAT_DXT3_RGBA:
+   case PIPE_FORMAT_DXT5_RGBA:
+      return SVGA3D_A8R8G8B8;
+#endif
+
+   default:
+      return SVGA3D_FORMAT_INVALID;
+   }
+}
+
+
+static INLINE void
+svga_transfer_dma_band(struct svga_transfer *st,
+                       SVGA3dTransferType transfer,
+                       unsigned y, unsigned h, unsigned srcy)
+{
+   struct svga_texture *texture = svga_texture(st->base.texture); 
+   struct svga_screen *screen = svga_screen(texture->base.screen);
+   SVGA3dCopyBox box;
+   enum pipe_error ret;
+   
+   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
+                transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", 
+                texture->handle,
+                st->base.face,
+                st->base.x,
+                y,
+                st->base.zslice,
+                st->base.x + st->base.width,
+                y + h,
+                st->base.zslice + 1,
+                texture->base.block.size*8/(texture->base.block.width*texture->base.block.height));
+   
+   box.x = st->base.x;
+   box.y = y;
+   box.z = st->base.zslice;
+   box.w = st->base.width;
+   box.h = h;
+   box.d = 1;
+   box.srcx = 0;
+   box.srcy = srcy;
+   box.srcz = 0;
+
+   pipe_mutex_lock(screen->swc_mutex);
+   ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1);
+   if(ret != PIPE_OK) {
+      screen->swc->flush(screen->swc, NULL);
+      ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1);
+      assert(ret == PIPE_OK);
+   }
+   pipe_mutex_unlock(screen->swc_mutex);
+}
+
+
+static INLINE void
+svga_transfer_dma(struct svga_transfer *st,
+                 SVGA3dTransferType transfer)
+{
+   struct svga_texture *texture = svga_texture(st->base.texture); 
+   struct svga_screen *screen = svga_screen(texture->base.screen);
+   struct svga_winsys_screen *sws = screen->sws;
+   struct pipe_fence_handle *fence = NULL;
+   
+   if (transfer == SVGA3D_READ_HOST_VRAM) {
+      SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__);
+   }
+
+
+   if(!st->swbuf) {
+      /* Do the DMA transfer in a single go */
+      
+      svga_transfer_dma_band(st, transfer, st->base.y, st->base.height, 0);
+
+      if(transfer == SVGA3D_READ_HOST_VRAM) {
+         svga_screen_flush(screen, &fence);
+         sws->fence_finish(sws, fence, 0);
+         //sws->fence_reference(sws, &fence, NULL);
+      }
+   }
+   else {
+      unsigned y, h, srcy;
+      h = st->hw_nblocksy * st->base.block.height;
+      srcy = 0;
+      for(y = 0; y < st->base.height; y += h) {
+         unsigned offset, length;
+         void *hw, *sw;
+
+         if (y + h > st->base.height)
+            h = st->base.height - y;
+
+         /* Transfer band must be aligned to pixel block boundaries */
+         assert(y % st->base.block.height == 0);
+         assert(h % st->base.block.height == 0);
+         
+         offset = y * st->base.stride / st->base.block.height;
+         length = h * st->base.stride / st->base.block.height;
+
+         sw = (uint8_t *)st->swbuf + offset;
+         
+         if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+            /* Wait for the previous DMAs to complete */
+            /* TODO: keep one DMA (at half the size) in the background */
+            if(y) {
+               svga_screen_flush(screen, &fence);
+               sws->fence_finish(sws, fence, 0);
+               //sws->fence_reference(sws, &fence, NULL);
+            }
+
+            hw = sws->buffer_map(sws, st->hwbuf, PIPE_BUFFER_USAGE_CPU_WRITE);
+            assert(hw);
+            if(hw) {
+               memcpy(hw, sw, length);
+               sws->buffer_unmap(sws, st->hwbuf);
+            }
+         }
+         
+         svga_transfer_dma_band(st, transfer, y, h, srcy);
+         
+         if(transfer == SVGA3D_READ_HOST_VRAM) {
+            svga_screen_flush(screen, &fence);
+            sws->fence_finish(sws, fence, 0);
+
+            hw = sws->buffer_map(sws, st->hwbuf, PIPE_BUFFER_USAGE_CPU_READ);
+            assert(hw);
+            if(hw) {
+               memcpy(sw, hw, length);
+               sws->buffer_unmap(sws, st->hwbuf);
+            }
+         }
+      }
+   }
+}
+
+
+static struct pipe_texture *
+svga_texture_create(struct pipe_screen *screen,
+                    const struct pipe_texture *templat)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_texture *tex = CALLOC_STRUCT(svga_texture);
+   unsigned width, height, depth;
+   SVGA3dSurfaceFlags flags = 0;
+   SVGA3dSurfaceFormat format;
+   SVGA3dSize size;
+   uint32 numFaces;
+   uint32 numMipLevels;
+   unsigned level;
+   
+   if (!tex)
+      goto error1;
+
+   tex->base = *templat;
+   pipe_reference_init(&tex->base.reference, 1);
+   tex->base.screen = screen;
+
+   assert(templat->last_level < SVGA_MAX_TEXTURE_LEVELS);
+   if(templat->last_level >= SVGA_MAX_TEXTURE_LEVELS)
+      goto error2;
+   
+   width = templat->width[0];
+   height = templat->height[0];
+   depth = templat->depth[0];
+   for(level = 0; level <= templat->last_level; ++level) {
+      tex->base.width[level] = width;
+      tex->base.height[level] = height;
+      tex->base.depth[level] = depth;
+      tex->base.nblocksx[level] = pf_get_nblocksx(&tex->base.block, width);  
+      tex->base.nblocksy[level] = pf_get_nblocksy(&tex->base.block, height);  
+      width  = minify(width);
+      height = minify(height);
+      depth = minify(depth);
+   }
+   
+   size.width = templat->width[0];
+   size.height = templat->height[0];
+   size.depth = templat->depth[0];
+   
+   if(templat->target == PIPE_TEXTURE_CUBE) {
+      flags |= SVGA3D_SURFACE_CUBEMAP;
+      numFaces = 6;
+   }
+   else {
+      numFaces = 1;
+   }
+
+   if(templat->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER)
+      flags |= SVGA3D_SURFACE_HINT_TEXTURE;
+
+   if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
+      flags |= SVGA3D_SURFACE_HINT_SCANOUT;
+   
+   /* 
+    * XXX: Never pass the SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot
+    * know beforehand whether a texture will be used as a rendertarget or not
+    * and it always requests PIPE_TEXTURE_USAGE_RENDER_TARGET, therefore
+    * passing the SVGA3D_SURFACE_HINT_RENDERTARGET here defeats its purpose.
+    */
+#if 0
+   if((templat->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) &&
+      !pf_is_compressed(templat->format))
+      flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
+#endif
+   
+   if(templat->tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL)
+      flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+   
+   numMipLevels = templat->last_level + 1;
+   
+   format = svga_translate_format(templat->format);
+   if(format == SVGA3D_FORMAT_INVALID)
+      goto error2;
+   
+   tex->handle = sws->surface_create(sws, flags, format, size, numFaces, numMipLevels);
+   if (tex->handle)
+      SVGA_DBG(DEBUG_DMA, "create sid %p (texture)\n", tex->handle);
+
+   return &tex->base;
+
+error2:
+   FREE(tex);
+error1:
+   return NULL;
+}
+
+
+static struct pipe_texture *
+svga_texture_blanket(struct pipe_screen * screen,
+                     const struct pipe_texture *base,
+                     const unsigned *stride,
+                     struct pipe_buffer *buffer)
+{
+   struct svga_texture *tex;
+   struct svga_buffer *sbuf = svga_buffer(buffer);
+   struct svga_winsys_screen *sws = svga_winsys_screen(screen);
+   assert(screen);
+
+   /* Only supports one type */
+   if (base->target != PIPE_TEXTURE_2D ||
+       base->last_level != 0 ||
+       base->depth[0] != 1) {
+      return NULL;
+   }
+
+   /**
+    * We currently can't do texture blanket on
+    * SVGA3D_BUFFER. Need to blit to a temporary surface?
+    */
+
+   assert(sbuf->handle);
+   if (!sbuf->handle)
+      return NULL;
+
+   if (svga_translate_format(base->format) != sbuf->key.format) {
+      unsigned f1 = svga_translate_format(base->format);
+      unsigned f2 = sbuf->key.format;
+
+      /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */
+      if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+              (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) ||
+              (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ) ) {
+         debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2);
+         return NULL;
+      }
+   }
+
+   tex = CALLOC_STRUCT(svga_texture);
+   if (!tex)
+      return NULL;
+
+   tex->base = *base;
+
+   if (sbuf->key.format == 1)
+      tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+   else if (sbuf->key.format == 2)
+      tex->base.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+
+   pipe_reference_init(&tex->base.reference, 1);
+   tex->base.screen = screen;
+
+   sws->surface_reference(sws, &tex->handle, sbuf->handle);
+
+   return &tex->base;
+}
+
+
+static void
+svga_texture_destroy(struct pipe_texture *pt)
+{
+   struct svga_screen *ss = svga_screen(pt->screen);
+   struct svga_texture *tex = (struct svga_texture *)pt;
+
+   ss->texture_timestamp++;
+
+   svga_sampler_view_reference(&tex->cached_view, NULL);
+
+   /*
+     DBG("%s deleting %p\n", __FUNCTION__, (void *) tex);
+   */
+   SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle);
+   ss->sws->surface_reference(ss->sws, &tex->handle, NULL);
+
+   FREE(tex);
+}
+
+
+static void
+svga_texture_copy_handle(struct svga_context *svga,
+                         struct svga_screen *ss,
+                         struct svga_winsys_surface *src_handle,
+                         unsigned src_x, unsigned src_y, unsigned src_z,
+                         unsigned src_level, unsigned src_face,
+                         struct svga_winsys_surface *dst_handle,
+                         unsigned dst_x, unsigned dst_y, unsigned dst_z,
+                         unsigned dst_level, unsigned dst_face,
+                         unsigned width, unsigned height, unsigned depth)
+{
+   struct svga_surface dst, src;
+   enum pipe_error ret;
+   SVGA3dCopyBox box, *boxes;
+
+   assert(svga || ss);
+
+   src.handle = src_handle;
+   src.real_level = src_level;
+   src.real_face = src_face;
+   src.real_zslice = 0;
+
+   dst.handle = dst_handle;
+   dst.real_level = dst_level;
+   dst.real_face = dst_face;
+   dst.real_zslice = 0;
+
+   box.x = dst_x;
+   box.y = dst_y;
+   box.z = dst_z;
+   box.w = width;
+   box.h = height;
+   box.d = depth;
+   box.srcx = src_x;
+   box.srcy = src_y;
+   box.srcz = src_z;
+
+/*
+   SVGA_DBG(DEBUG_VIEWS, "mipcopy src: %p %u (%ux%ux%u), dst: %p %u (%ux%ux%u)\n",
+            src_handle, src_level, src_x, src_y, src_z,
+            dst_handle, dst_level, dst_x, dst_y, dst_z);
+*/
+
+   if (svga) {
+      ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+                                    &src.base,
+                                    &dst.base,
+                                    &boxes, 1);
+      if(ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+                                       &src.base,
+                                       &dst.base,
+                                       &boxes, 1);
+         assert(ret == PIPE_OK);
+      }
+      *boxes = box;
+      SVGA_FIFOCommitAll(svga->swc);
+   } else {
+      pipe_mutex_lock(ss->swc_mutex);
+      ret = SVGA3D_BeginSurfaceCopy(ss->swc,
+                                    &src.base,
+                                    &dst.base,
+                                    &boxes, 1);
+      if(ret != PIPE_OK) {
+         ss->swc->flush(ss->swc, NULL);
+         ret = SVGA3D_BeginSurfaceCopy(ss->swc,
+                                       &src.base,
+                                       &dst.base,
+                                       &boxes, 1);
+         assert(ret == PIPE_OK);
+      }
+      *boxes = box;
+      SVGA_FIFOCommitAll(ss->swc);
+      pipe_mutex_unlock(ss->swc_mutex);
+   }
+}
+
+static struct svga_winsys_surface *
+svga_texture_view_surface(struct pipe_context *pipe,
+                          struct svga_texture *tex,
+                          SVGA3dSurfaceFormat format,
+                          unsigned start_mip,
+                          unsigned num_mip,
+                          int face_pick,
+                          int zslice_pick)
+{
+   struct svga_screen *ss = svga_screen(tex->base.screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_winsys_surface *handle;
+   int i, j;
+   SVGA3dSurfaceFlags flags = 0;
+   SVGA3dSize size;
+   uint32 numFaces;
+   uint32 numMipLevels = num_mip;
+   unsigned z_offset = 0;
+
+   SVGA_DBG(DEBUG_PERF, 
+            "svga: Create surface view: face %d zslice %d mips %d..%d\n",
+            face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
+
+   size.width = tex->base.width[start_mip];
+   size.height = tex->base.height[start_mip];
+   size.depth = zslice_pick < 0 ? tex->base.depth[start_mip] : 1;
+   assert(size.depth == 1);
+   
+   if(tex->base.target == PIPE_TEXTURE_CUBE && face_pick < 0) {
+      flags |= SVGA3D_SURFACE_CUBEMAP;
+      numFaces = 6;
+   } else {
+      numFaces = 1;
+   }
+
+   if(format == SVGA3D_FORMAT_INVALID)
+      return NULL;
+
+   handle = sws->surface_create(sws, flags, format, size, numFaces, numMipLevels);
+
+   if (!handle)
+      return NULL;
+
+   SVGA_DBG(DEBUG_DMA, "create sid %p (texture view)\n", handle);
+
+   if (face_pick < 0)
+      face_pick = 0;
+
+   if (zslice_pick >= 0)
+       z_offset = zslice_pick;
+
+   for (i = 0; i < num_mip; i++) {
+      for (j = 0; j < numFaces; j++) {
+         if(tex->defined[j + face_pick][i + start_mip]) {
+            unsigned depth = zslice_pick < 0 ? tex->base.depth[i + start_mip] : 1;
+            svga_texture_copy_handle(svga_context(pipe), ss,
+                                     tex->handle, 0, 0, z_offset, i + start_mip, j + face_pick,
+                                     handle, 0, 0, 0, i, j,
+                                     tex->base.width[i + start_mip], tex->base.height[i + start_mip], depth);
+         }
+      }
+   }
+
+   return handle;
+}
+
+
+static struct pipe_surface *
+svga_get_tex_surface(struct pipe_screen *screen,
+                     struct pipe_texture *pt,
+                     unsigned face, unsigned level, unsigned zslice,
+                     unsigned flags)
+{
+   struct svga_texture *tex = svga_texture(pt);
+   struct svga_surface *s;
+   struct pipe_surface *ps;
+   boolean render = flags & PIPE_BUFFER_USAGE_GPU_WRITE ? TRUE : FALSE;
+   boolean view = FALSE;
+   SVGA3dSurfaceFormat format;
+
+   s = CALLOC_STRUCT(svga_surface);
+   ps = &s->base;
+   if (!ps)
+      return NULL;
+
+   pipe_reference_init(&ps->reference, 1);
+   pipe_texture_reference(&ps->texture, pt);
+   ps->format = pt->format;
+   ps->width = pt->width[level];
+   ps->height = pt->height[level];
+   ps->usage = flags;
+   ps->level = level;
+   ps->face = face;
+   ps->zslice = zslice;
+
+   if (!render)
+      format = svga_translate_format(pt->format);
+   else
+      format = svga_translate_format_render(pt->format);
+
+   assert(format != SVGA3D_FORMAT_INVALID);
+   assert(!(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE));
+
+
+   if (svga_screen(screen)->debug.force_surface_view)
+      view = TRUE;
+
+   /* Currently only used for compressed textures */
+   if (render && (format != svga_translate_format(pt->format))) {
+      view = TRUE;
+   }
+
+   if (level != 0 && svga_screen(screen)->debug.force_level_surface_view)
+      view = TRUE;
+
+   if (pt->target == PIPE_TEXTURE_3D)
+      view = TRUE;
+
+   if (svga_screen(screen)->debug.no_surface_view)
+      view = FALSE;
+
+   if (view) {
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
+               pt, level, face, zslice, ps);
+
+      s->handle = svga_texture_view_surface(NULL, tex, format, level, 1, face, zslice);
+      s->real_face = 0;
+      s->real_level = 0;
+      s->real_zslice = 0;
+   } else {
+      struct svga_winsys_screen *sws = svga_winsys_screen(screen);
+
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
+               pt, level, face, zslice, ps);
+
+      sws->surface_reference(sws, &s->handle, tex->handle);
+      s->real_face = face;
+      s->real_level = level;
+      s->real_zslice = zslice;
+   }
+
+   return ps;
+}
+
+
+static void
+svga_tex_surface_destroy(struct pipe_surface *surf)
+{
+   struct svga_surface *s = svga_surface(surf);
+   struct svga_screen *ss = svga_screen(surf->texture->screen);
+
+   SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
+   ss->sws->surface_reference(ss->sws, &s->handle, NULL);
+   pipe_texture_reference(&surf->texture, NULL);
+   FREE(surf);
+}
+
+
+static INLINE void 
+svga_mark_surface_dirty(struct pipe_surface *surf)
+{
+   struct svga_surface *s = svga_surface(surf);
+
+   if(!s->dirty) {
+      struct svga_texture *tex = svga_texture(surf->texture);
+
+      s->dirty = TRUE;
+
+      if (s->handle == tex->handle)
+         tex->defined[surf->face][surf->level] = TRUE;
+      else {
+         /* this will happen later in svga_propagate_surface */
+      }
+   }
+}
+
+
+void svga_mark_surfaces_dirty(struct svga_context *svga)
+{
+   unsigned i;
+
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+      if (svga->curr.framebuffer.cbufs[i])
+         svga_mark_surface_dirty(svga->curr.framebuffer.cbufs[i]);
+   }
+   if (svga->curr.framebuffer.zsbuf)
+      svga_mark_surface_dirty(svga->curr.framebuffer.zsbuf);
+}
+
+/**
+ * Progagate any changes from surfaces to texture.
+ * pipe is optional context to inline the blit command in.
+ */
+void
+svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf)
+{
+   struct svga_surface *s = svga_surface(surf);
+   struct svga_texture *tex = svga_texture(surf->texture);
+   struct svga_screen *ss = svga_screen(surf->texture->screen);
+
+   if (!s->dirty)
+      return;
+
+   s->dirty = FALSE;
+   ss->texture_timestamp++;
+   tex->view_age[surf->level] = ++(tex->age);
+
+   if (s->handle != tex->handle) {
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->level, surf);
+      svga_texture_copy_handle(svga_context(pipe), ss,
+                               s->handle, 0, 0, 0, s->real_level, s->real_face,
+                               tex->handle, 0, 0, surf->zslice, surf->level, surf->face,
+                               tex->base.width[surf->level], tex->base.height[surf->level], 1);
+      tex->defined[surf->face][surf->level] = TRUE;
+   }
+}
+
+/**
+ * Check if we should call svga_propagate_surface on the surface.
+ */
+extern boolean
+svga_surface_needs_propagation(struct pipe_surface *surf)
+{
+   struct svga_surface *s = svga_surface(surf);
+   struct svga_texture *tex = svga_texture(surf->texture);
+
+   return s->dirty && s->handle != tex->handle;
+}
+
+
+static struct pipe_transfer *
+svga_get_tex_transfer(struct pipe_screen *screen,
+                     struct pipe_texture *texture,
+                     unsigned face, unsigned level, unsigned zslice,
+                     enum pipe_transfer_usage usage, unsigned x, unsigned y,
+                     unsigned w, unsigned h)
+{
+   struct svga_screen *ss = svga_screen(screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_transfer *st;
+
+   /* We can't map texture storage directly */
+   if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+      return NULL;
+
+   st = CALLOC_STRUCT(svga_transfer);
+   if (!st)
+      return NULL;
+   
+   st->base.format = texture->format;
+   st->base.block = texture->block;
+   st->base.x = x;
+   st->base.y = y;
+   st->base.width = w;
+   st->base.height = h;
+   st->base.nblocksx = pf_get_nblocksx(&texture->block, w);
+   st->base.nblocksy = pf_get_nblocksy(&texture->block, h);
+   st->base.stride = st->base.nblocksx*st->base.block.size;
+   st->base.usage = usage;
+   st->base.face = face;
+   st->base.level = level;
+   st->base.zslice = zslice;
+
+   st->hw_nblocksy = st->base.nblocksy;
+   
+   st->hwbuf = svga_winsys_buffer_create(ss, 
+                                         1, 
+                                         0,
+                                         st->hw_nblocksy*st->base.stride);
+   while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
+      st->hwbuf = svga_winsys_buffer_create(ss, 
+                                            1, 
+                                            0,
+                                            st->hw_nblocksy*st->base.stride);
+   }
+
+   if(!st->hwbuf)
+      goto no_hwbuf;
+
+   if(st->hw_nblocksy < st->base.nblocksy) {
+      /* We couldn't allocate a hardware buffer big enough for the transfer, 
+       * so allocate regular malloc memory instead */
+      debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n",
+                   __FUNCTION__,
+                   (st->base.nblocksy*st->base.stride + 1023)/1024,
+                   (st->base.nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
+                   (st->hw_nblocksy*st->base.stride + 1023)/1024);
+      st->swbuf = MALLOC(st->base.nblocksy*st->base.stride);
+      if(!st->swbuf)
+         goto no_swbuf;
+   }
+   
+   pipe_texture_reference(&st->base.texture, texture);
+
+   if (usage & PIPE_TRANSFER_READ)
+      svga_transfer_dma(st, SVGA3D_READ_HOST_VRAM);
+
+   return &st->base;
+
+no_swbuf:
+   sws->buffer_destroy(sws, st->hwbuf);
+no_hwbuf:
+   FREE(st);
+   return NULL;
+}
+
+
+static void *
+svga_transfer_map( struct pipe_screen *screen,
+                   struct pipe_transfer *transfer )
+{
+   struct svga_screen *ss = svga_screen(screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_transfer *st = svga_transfer(transfer);
+
+   if(st->swbuf)
+      return st->swbuf;
+   else
+      /* The wait for read transfers already happened when svga_transfer_dma
+       * was called. */
+      return sws->buffer_map(sws, st->hwbuf,
+                             pipe_transfer_buffer_flags(transfer));
+}
+
+
+static void
+svga_transfer_unmap(struct pipe_screen *screen,
+                    struct pipe_transfer *transfer)
+{
+   struct svga_screen *ss = svga_screen(screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_transfer *st = svga_transfer(transfer);
+   
+   if(!st->swbuf)
+      sws->buffer_unmap(sws, st->hwbuf);
+}
+
+
+static void
+svga_tex_transfer_destroy(struct pipe_transfer *transfer)
+{
+   struct svga_texture *tex = svga_texture(transfer->texture);
+   struct svga_screen *ss = svga_screen(transfer->texture->screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_transfer *st = svga_transfer(transfer);
+
+   if (st->base.usage & PIPE_TRANSFER_WRITE) {
+      svga_transfer_dma(st, SVGA3D_WRITE_HOST_VRAM);
+      ss->texture_timestamp++;
+      tex->view_age[transfer->level] = ++(tex->age);
+      tex->defined[transfer->face][transfer->level] = TRUE;
+   }
+
+   pipe_texture_reference(&st->base.texture, NULL);
+   FREE(st->swbuf);
+   sws->buffer_destroy(sws, st->hwbuf);
+   FREE(st);
+}
+
+void
+svga_screen_init_texture_functions(struct pipe_screen *screen)
+{
+   screen->texture_create = svga_texture_create;
+   screen->texture_destroy = svga_texture_destroy;
+   screen->get_tex_surface = svga_get_tex_surface;
+   screen->tex_surface_destroy = svga_tex_surface_destroy;
+   screen->texture_blanket = svga_texture_blanket;
+   screen->get_tex_transfer = svga_get_tex_transfer;
+   screen->transfer_map = svga_transfer_map;
+   screen->transfer_unmap = svga_transfer_unmap;
+   screen->tex_transfer_destroy = svga_tex_transfer_destroy;
+}
+
+/*********************************************************************** 
+ */
+
+struct svga_sampler_view *
+svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
+                          unsigned min_lod, unsigned max_lod)
+{
+   struct svga_screen *ss = svga_screen(pt->screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_texture *tex = svga_texture(pt); 
+   struct svga_sampler_view *sv = NULL;
+   SVGA3dSurfaceFormat format = svga_translate_format(pt->format);
+   boolean view = TRUE;
+
+   assert(pt);
+   assert(min_lod >= 0);
+   assert(min_lod <= max_lod);
+   assert(max_lod <= pt->last_level);
+
+
+   /* Is a view needed */
+   {
+      /*
+       * Can't control max lod. For first level views and when we only
+       * look at one level we disable mip filtering to achive the same
+       * results as a view.
+       */
+      if (min_lod == 0 && max_lod >= pt->last_level)
+         view = FALSE;
+
+      if (pf_is_compressed(pt->format) && view) {
+         format = svga_translate_format_render(pt->format);
+      }
+
+      if (ss->debug.no_sampler_view)
+         view = FALSE;
+
+      if (ss->debug.force_sampler_view)
+         view = TRUE;
+   }
+
+   /* First try the cache */
+   if (view) {
+      pipe_mutex_lock(ss->tex_mutex);
+      if (tex->cached_view &&
+          tex->cached_view->min_lod == min_lod &&
+          tex->cached_view->max_lod == max_lod) {
+         svga_sampler_view_reference(&sv, tex->cached_view);
+         pipe_mutex_unlock(ss->tex_mutex);
+         SVGA_DBG(DEBUG_VIEWS, "svga: Sampler view: reuse %p, %u %u, last %u\n",
+                              pt, min_lod, max_lod, pt->last_level);
+         svga_validate_sampler_view(svga_context(pipe), sv);
+         return sv;
+      }
+      pipe_mutex_unlock(ss->tex_mutex);
+   }
+
+   sv = CALLOC_STRUCT(svga_sampler_view);
+   pipe_reference_init(&sv->reference, 1);
+   sv->texture = tex;
+   sv->min_lod = min_lod;
+   sv->max_lod = max_lod;
+
+   /* No view needed just use the whole texture */
+   if (!view) {
+      SVGA_DBG(DEBUG_VIEWS,
+               "svga: Sampler view: no %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n",
+               pt, min_lod, max_lod,
+               max_lod - min_lod + 1,
+               pt->width[0],
+               pt->height[0],
+               pt->depth[0],
+               pt->last_level);
+      sws->surface_reference(sws, &sv->handle, tex->handle);
+      return sv;
+   }
+
+   SVGA_DBG(DEBUG_VIEWS,
+            "svga: Sampler view: yes %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n",
+            pt, min_lod, max_lod,
+            max_lod - min_lod + 1,
+            pt->width[0],
+            pt->height[0],
+            pt->depth[0],
+            pt->last_level);
+
+   sv->age = tex->age;
+   sv->handle = svga_texture_view_surface(pipe, tex, format,
+                                          min_lod,
+                                          max_lod - min_lod + 1,
+                                          -1, -1);
+
+   if (!sv->handle) {
+      assert(0);
+      sws->surface_reference(sws, &sv->handle, tex->handle);
+      return sv;
+   }
+
+   pipe_mutex_lock(ss->tex_mutex);
+   svga_sampler_view_reference(&tex->cached_view, sv);
+   pipe_mutex_unlock(ss->tex_mutex);
+
+   return sv;
+}
+
+void
+svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v)
+{
+   struct svga_texture *tex = v->texture;
+   unsigned numFaces;
+   unsigned age = 0;
+   int i, k;
+
+   assert(svga);
+
+   if (v->handle == v->texture->handle)
+      return;
+
+   age = tex->age;
+
+   if(tex->base.target == PIPE_TEXTURE_CUBE)
+      numFaces = 6;
+   else
+      numFaces = 1;
+
+   for (i = v->min_lod; i <= v->max_lod; i++) {
+      for (k = 0; k < numFaces; k++) {
+         if (v->age < tex->view_age[i])
+            svga_texture_copy_handle(svga, NULL,
+                                     tex->handle, 0, 0, 0, i, k,
+                                     v->handle, 0, 0, 0, i - v->min_lod, k,
+                                     tex->base.width[i],
+                                     tex->base.height[i],
+                                     tex->base.depth[i]);
+      }
+   }
+
+   v->age = age;
+}
+
+void
+svga_destroy_sampler_view_priv(struct svga_sampler_view *v)
+{
+   struct svga_screen *ss = svga_screen(v->texture->base.screen);
+
+   SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle);
+   ss->sws->surface_reference(ss->sws, &v->handle, NULL);
+
+   FREE(v);
+}
+
+boolean
+svga_screen_buffer_from_texture(struct pipe_texture *texture,
+				struct pipe_buffer **buffer,
+				unsigned *stride)
+{
+   struct svga_texture *stex = svga_texture(texture);
+
+   *buffer = svga_screen_buffer_wrap_surface
+      (texture->screen,
+       svga_translate_format(texture->format),
+       stex->handle);
+
+   *stride = pf_get_nblocksx(&texture->block, texture->width[0]) *
+      texture->block.size;
+
+   return *buffer != NULL;
+}
+
+
+struct svga_winsys_surface *
+svga_screen_texture_get_winsys_surface(struct pipe_texture *texture)
+{
+   struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen);
+   struct svga_winsys_surface *vsurf = NULL;
+
+   sws->surface_reference(sws, &vsurf, svga_texture(texture)->handle);
+   return vsurf;
+}
diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h
new file mode 100644
index 0000000000..1e6fef59a3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_texture.h
@@ -0,0 +1,177 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_TEXTURE_H
+#define SVGA_TEXTURE_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+
+struct pipe_context;
+struct pipe_screen;
+struct svga_context;
+struct svga_winsys_surface;
+enum SVGA3dSurfaceFormat;
+
+
+#define SVGA_MAX_TEXTURE_LEVELS 12 /* 2048x2048 */
+
+
+/**
+ * A sampler's view into a texture
+ *
+ * We currently cache one sampler view on
+ * the texture and in there by holding a reference
+ * from the texture to the sampler view.
+ *
+ * Because of this we can not hold a refernce to the
+ * texture from the sampler view. So the user
+ * of the sampler views must make sure that the
+ * texture has a reference take for as long as
+ * the sampler view is refrenced.
+ *
+ * Just unreferencing the sampler_view before the
+ * texture is enough.
+ */
+struct svga_sampler_view
+{
+   struct pipe_reference reference;
+
+   struct svga_texture *texture;
+
+   int min_lod;
+   int max_lod;
+
+   unsigned age;
+
+   struct svga_winsys_surface *handle;
+};
+
+
+struct svga_texture 
+{
+   struct pipe_texture base;
+
+   struct svga_winsys_surface *handle;
+
+   boolean defined[6][PIPE_MAX_TEXTURE_LEVELS];
+   
+   struct svga_sampler_view *cached_view;
+
+   unsigned view_age[SVGA_MAX_TEXTURE_LEVELS];
+   unsigned age;
+
+   boolean views_modified;
+};
+
+
+struct svga_surface
+{
+   struct pipe_surface base;
+
+   struct svga_winsys_surface *handle;
+
+   unsigned real_face;
+   unsigned real_level;
+   unsigned real_zslice;
+
+   boolean dirty;
+};
+
+
+struct svga_transfer
+{
+   struct pipe_transfer base;
+
+   struct svga_winsys_buffer *hwbuf;
+
+   /* Height of the hardware buffer in pixel blocks */
+   unsigned hw_nblocksy;
+
+   /* Temporary malloc buffer when we can't allocate a hardware buffer
+    * big enough */
+   void *swbuf;
+};
+
+
+static INLINE struct svga_texture *
+svga_texture(struct pipe_texture *texture)
+{
+   return (struct svga_texture *)texture;
+}
+
+static INLINE struct svga_surface *
+svga_surface(struct pipe_surface *surface)
+{
+   assert(surface);
+   return (struct svga_surface *)surface;
+}
+
+static INLINE struct svga_transfer *
+svga_transfer(struct pipe_transfer *transfer)
+{
+   assert(transfer);
+   return (struct svga_transfer *)transfer;
+}
+
+extern struct svga_sampler_view *
+svga_get_tex_sampler_view(struct pipe_context *pipe,
+                          struct pipe_texture *pt,
+                          unsigned min_lod, unsigned max_lod);
+
+void
+svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v);
+
+void
+svga_destroy_sampler_view_priv(struct svga_sampler_view *v);
+
+static INLINE void
+svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v)
+{
+   struct svga_sampler_view *old = *ptr;
+
+   if (pipe_reference((struct pipe_reference **)ptr, &v->reference))
+      svga_destroy_sampler_view_priv(old);
+}
+
+extern void
+svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf);
+
+extern boolean
+svga_surface_needs_propagation(struct pipe_surface *surf);
+
+extern void
+svga_screen_init_texture_functions(struct pipe_screen *screen);
+
+enum SVGA3dSurfaceFormat
+svga_translate_format(enum pipe_format format);
+
+enum SVGA3dSurfaceFormat
+svga_translate_format_render(enum pipe_format format);
+
+
+#endif /* SVGA_TEXTURE_H */
diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c
new file mode 100644
index 0000000000..1c21d3acfe
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state.c
@@ -0,0 +1,278 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_debug.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_state.h"
+#include "svga_draw.h"
+#include "svga_cmd.h"
+#include "svga_hw_reg.h"
+
+/* This is just enough to decide whether we need to use the draw
+ * module (swtnl) or not.
+ */
+static const struct svga_tracked_state *need_swtnl_state[] =
+{
+   &svga_update_need_swvfetch,
+   &svga_update_need_pipeline,
+   &svga_update_need_swtnl,
+   NULL
+};
+
+
+/* Atoms to update hardware state prior to emitting a clear or draw
+ * packet.
+ */
+static const struct svga_tracked_state *hw_clear_state[] =
+{
+   &svga_hw_scissor,
+   &svga_hw_viewport,
+   &svga_hw_framebuffer,
+   NULL
+};
+
+
+/* Atoms to update hardware state prior to emitting a draw packet.
+ */
+static const struct svga_tracked_state *hw_draw_state[] =
+{
+   &svga_hw_update_zero_stride,
+   &svga_hw_fs,
+   &svga_hw_vs,
+   &svga_hw_rss,
+   &svga_hw_tss,
+   &svga_hw_tss_binding,
+   &svga_hw_clip_planes,
+   &svga_hw_vdecl,
+   &svga_hw_fs_parameters,
+   &svga_hw_vs_parameters,
+   NULL
+};
+
+
+static const struct svga_tracked_state *swtnl_draw_state[] =
+{
+   &svga_update_swtnl_draw,
+   &svga_update_swtnl_vdecl,
+   NULL
+};
+
+/* Flattens the graph of state dependencies.  Could swap the positions
+ * of hw_clear_state and need_swtnl_state without breaking anything.
+ */
+static const struct svga_tracked_state **state_levels[] = 
+{
+   need_swtnl_state,
+   hw_clear_state,
+   hw_draw_state,
+   swtnl_draw_state
+};
+
+
+
+static unsigned check_state( unsigned a,
+                             unsigned b )
+{
+   return (a & b);
+}
+
+static void accumulate_state( unsigned *a,
+			      unsigned b )
+{
+   *a |= b;
+}
+
+
+static void xor_states( unsigned *result,
+                        unsigned a,
+                        unsigned b )
+{
+   *result = a ^ b;
+}
+
+
+
+static int update_state( struct svga_context *svga,
+                         const struct svga_tracked_state *atoms[],
+                         unsigned *state )
+{
+   boolean debug = TRUE;
+   enum pipe_error ret = 0;
+   unsigned i;
+
+   ret = svga_hwtnl_flush( svga->hwtnl );
+   if (ret != 0)
+      return ret;
+
+   if (debug) {
+      /* Debug version which enforces various sanity checks on the
+       * state flags which are generated and checked to help ensure
+       * state atoms are ordered correctly in the list.
+       */
+      unsigned examined, prev;      
+
+      examined = 0;
+      prev = *state;
+
+      for (i = 0; atoms[i] != NULL; i++) {	 
+	 unsigned generated;
+
+	 assert(atoms[i]->dirty); 
+	 assert(atoms[i]->update);
+
+	 if (check_state(*state, atoms[i]->dirty)) {
+	    if (0)
+               debug_printf("update: %s\n", atoms[i]->name);
+	    ret = atoms[i]->update( svga, *state );
+            if (ret != 0)
+               return ret;
+	 }
+
+	 /* generated = (prev ^ state)
+	  * if (examined & generated)
+	  *     fail;
+	  */
+	 xor_states(&generated, prev, *state);
+	 if (check_state(examined, generated)) {
+	    debug_printf("state atom %s generated state already examined\n", 
+                         atoms[i]->name);
+	    assert(0);
+	 }
+			 
+	 prev = *state;
+	 accumulate_state(&examined, atoms[i]->dirty);
+      }
+   }
+   else {
+      for (i = 0; atoms[i] != NULL; i++) {	 
+	 if (check_state(*state, atoms[i]->dirty)) {
+	    ret = atoms[i]->update( svga, *state );
+            if (ret != 0)
+               return ret;
+         }
+      }
+   }
+
+   return 0;
+}
+
+
+
+int svga_update_state( struct svga_context *svga,
+                       unsigned max_level )
+{
+   struct svga_screen *screen = svga_screen(svga->pipe.screen);
+   int ret = 0;
+   int i;
+
+   /* Check for updates to bound textures.  This can't be done in an
+    * atom as there is no flag which could provoke this test, and we
+    * cannot create one.
+    */
+   if (svga->state.texture_timestamp != screen->texture_timestamp) {
+      svga->state.texture_timestamp = screen->texture_timestamp;
+      svga->dirty |= SVGA_NEW_TEXTURE;
+   }
+
+   for (i = 0; i <= max_level; i++) {
+      svga->dirty |= svga->state.dirty[i];
+
+      if (svga->dirty) {
+         ret = update_state( svga, 
+                             state_levels[i], 
+                             &svga->dirty );
+         if (ret != 0)
+            return ret;
+
+         svga->state.dirty[i] = 0;
+      }
+   }
+   
+   for (; i < SVGA_STATE_MAX; i++) 
+      svga->state.dirty[i] |= svga->dirty;
+
+   svga->dirty = 0;
+   return 0;
+}
+
+
+
+
+void svga_update_state_retry( struct svga_context *svga,
+                              unsigned max_level )
+{
+   int ret;
+
+   ret = svga_update_state( svga, max_level );
+
+   if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
+      svga_context_flush(svga, NULL);
+      ret = svga_update_state( svga, max_level );
+   }
+
+   assert( ret == 0 );
+}
+
+
+
+#define EMIT_RS(_rs, _count, _name, _value)     \
+do {                                            \
+   _rs[_count].state = _name;                   \
+   _rs[_count].uintValue = _value;              \
+   _count++;                                    \
+} while (0)
+
+
+/* Setup any hardware state which will be constant through the life of
+ * a context.
+ */
+enum pipe_error svga_emit_initial_state( struct svga_context *svga )
+{
+   SVGA3dRenderState *rs;
+   unsigned count = 0;
+   const unsigned COUNT = 2;
+   enum pipe_error ret;
+
+   ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
+   if (ret)
+      return ret;
+
+   /* Always use D3D style coordinate space as this is the only one
+    * which is implemented on all backends.
+    */
+   EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, SVGA3D_COORDINATE_LEFTHANDED );
+   EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
+   
+   assert( COUNT == count );
+   SVGA_FIFOCommitAll( svga->swc );
+
+   return 0;
+
+}
diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h
new file mode 100644
index 0000000000..22d5a6d552
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state.h
@@ -0,0 +1,95 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_STATE_H
+#define SVGA_STATE_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+
+struct svga_context;
+
+
+void svga_init_state( struct svga_context *svga );
+void svga_destroy_state( struct svga_context *svga );
+
+
+struct svga_tracked_state {
+   const char *name;
+   unsigned dirty;
+   int (*update)( struct svga_context *svga, unsigned dirty );
+};
+
+/* NEED_SWTNL
+ */
+extern struct svga_tracked_state svga_update_need_swvfetch;
+extern struct svga_tracked_state svga_update_need_pipeline;
+extern struct svga_tracked_state svga_update_need_swtnl;
+
+/* HW_CLEAR
+ */
+extern struct svga_tracked_state svga_hw_viewport;
+extern struct svga_tracked_state svga_hw_scissor;
+extern struct svga_tracked_state svga_hw_framebuffer;
+
+/* HW_DRAW
+ */
+extern struct svga_tracked_state svga_hw_vs;
+extern struct svga_tracked_state svga_hw_fs;
+extern struct svga_tracked_state svga_hw_rss;
+extern struct svga_tracked_state svga_hw_tss;
+extern struct svga_tracked_state svga_hw_tss_binding;
+extern struct svga_tracked_state svga_hw_clip_planes;
+extern struct svga_tracked_state svga_hw_vdecl;
+extern struct svga_tracked_state svga_hw_fs_parameters;
+extern struct svga_tracked_state svga_hw_vs_parameters;
+extern struct svga_tracked_state svga_hw_update_zero_stride;
+
+/* SWTNL_DRAW
+ */
+extern struct svga_tracked_state svga_update_swtnl_draw;
+extern struct svga_tracked_state svga_update_swtnl_vdecl;
+
+/* Bring the hardware fully up-to-date so that we can emit draw
+ * commands.
+ */
+#define SVGA_STATE_NEED_SWTNL        0
+#define SVGA_STATE_HW_CLEAR          1
+#define SVGA_STATE_HW_DRAW           2
+#define SVGA_STATE_SWTNL_DRAW        3
+#define SVGA_STATE_MAX               4
+
+
+enum pipe_error svga_update_state( struct svga_context *svga,
+                                   unsigned level );
+
+void svga_update_state_retry( struct svga_context *svga,
+                              unsigned level );
+
+
+enum pipe_error svga_emit_initial_state( struct svga_context *svga );
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
new file mode 100644
index 0000000000..18cce7dde1
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -0,0 +1,239 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_tgsi.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+
+/***********************************************************************
+ * Hardware update 
+ */
+
+/* Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ */
+static int svga_shader_type( int unit )
+{
+   return unit + 1;
+}
+
+
+static int emit_const( struct svga_context *svga,
+                       int unit,
+                       int i,
+                       const float *value )
+{
+   int ret = PIPE_OK;
+
+   if (memcmp(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float)) != 0) {
+      if (SVGA_DEBUG & DEBUG_CONSTS)
+         debug_printf("%s %s %d: %f %f %f %f\n",
+                      __FUNCTION__,
+                      unit == PIPE_SHADER_VERTEX ? "VERT" : "FRAG",
+                      i,
+                      value[0],
+                      value[1],
+                      value[2],
+                      value[3]);
+
+      ret = SVGA3D_SetShaderConst( svga->swc, 
+                                   i,
+                                   svga_shader_type(unit),
+                                   SVGA3D_CONST_TYPE_FLOAT,
+                                   value );
+      if (ret)
+         return ret;
+
+      memcpy(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float));
+   }
+   
+   return ret;
+}
+
+static int emit_consts( struct svga_context *svga,
+                        int offset,
+                        int unit )
+{
+   struct pipe_screen *screen = svga->pipe.screen;
+   unsigned count;
+   const float (*data)[4] = NULL;
+   unsigned i;
+   int ret = PIPE_OK;
+
+   if (svga->curr.cb[unit] == NULL)
+      goto done;
+
+   count = svga->curr.cb[unit]->size / (4 * sizeof(float));
+
+   data = (const float (*)[4])pipe_buffer_map(screen,
+                                              svga->curr.cb[unit],
+                                              PIPE_BUFFER_USAGE_CPU_READ);
+   if (data == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto done;
+   }
+
+   for (i = 0; i < count; i++) {
+      ret = emit_const( svga, unit, offset + i, data[i] );
+      if (ret)
+         goto done;
+   }
+
+done:
+   if (data)
+      pipe_buffer_unmap(screen, svga->curr.cb[unit]);
+
+   return ret;
+}
+   
+static int emit_fs_consts( struct svga_context *svga,
+                           unsigned dirty )
+{
+   const struct svga_shader_result *result = svga->state.hw_draw.fs;
+   const struct svga_fs_compile_key *key = &result->key.fkey;
+   int ret = 0;
+
+   ret = emit_consts( svga, 0, PIPE_SHADER_FRAGMENT );
+   if (ret)
+      return ret;
+
+   /* The internally generated fragment shader for xor blending
+    * doesn't have a 'result' struct.  It should be fixed to avoid
+    * this special case, but work around it with a NULL check:
+    */
+   if (result != NULL &&
+       key->num_unnormalized_coords)
+   {
+      unsigned offset = result->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      int i;
+
+      for (i = 0; i < key->num_textures; i++) {
+         if (key->tex[i].unnormalized) {
+            struct pipe_texture *tex = svga->curr.texture[i];
+            float data[4];
+
+            data[0] = 1.0 / (float)tex->width[0];
+            data[1] = 1.0 / (float)tex->height[0];
+            data[2] = 1.0;
+            data[3] = 1.0;
+
+            ret = emit_const( svga,
+                              PIPE_SHADER_FRAGMENT,
+                              key->tex[i].width_height_idx + offset,
+                              data );
+            if (ret)
+               return ret;
+         }
+      }
+
+      offset += key->num_unnormalized_coords;
+   }
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_hw_fs_parameters = 
+{
+   "hw fs params",
+   (SVGA_NEW_FS_CONST_BUFFER |
+    SVGA_NEW_FS_RESULT |
+    SVGA_NEW_TEXTURE_BINDING),
+   emit_fs_consts
+};
+
+/***********************************************************************
+ */
+
+static int emit_vs_consts( struct svga_context *svga,
+                           unsigned dirty )
+{
+   const struct svga_shader_result *result = svga->state.hw_draw.vs;
+   const struct svga_vs_compile_key *key = &result->key.vkey;
+   int ret = 0;
+   unsigned offset;
+
+   /* SVGA_NEW_VS_RESULT
+    */
+   if (result == NULL) 
+      return 0;
+
+   /* SVGA_NEW_VS_CONST_BUFFER 
+    */
+   ret = emit_consts( svga, 0, PIPE_SHADER_VERTEX );
+   if (ret)
+      return ret;
+
+   offset = result->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+   /* SVGA_NEW_VS_RESULT
+    */
+   if (key->need_prescale) {
+      ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++,
+                        svga->state.hw_clear.prescale.scale );
+      if (ret)
+         return ret;
+
+      ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++,
+                        svga->state.hw_clear.prescale.translate );
+      if (ret)
+         return ret;
+   }
+
+   /* SVGA_NEW_ZERO_STRIDE
+    */
+   if (key->zero_stride_vertex_elements) {
+      unsigned i, curr_zero_stride = 0;
+      for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) {
+         if (key->zero_stride_vertex_elements & (1 << i)) {
+            ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++,
+                              svga->curr.zero_stride_constants +
+                              4 * curr_zero_stride );
+            if (ret)
+               return ret;
+            ++curr_zero_stride;
+         }
+      }
+   }
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_hw_vs_parameters = 
+{
+   "hw vs params",
+   (SVGA_NEW_VS_CONST_BUFFER |
+    SVGA_NEW_ZERO_STRIDE |
+    SVGA_NEW_VS_RESULT),
+   emit_vs_consts
+};
+
diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c
new file mode 100644
index 0000000000..7d7f93d8e3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -0,0 +1,455 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+
+
+/***********************************************************************
+ * Hardware state update
+ */
+
+
+static int emit_framebuffer( struct svga_context *svga,
+                             unsigned dirty )
+{
+   const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   unsigned i;
+   enum pipe_error ret;
+
+   /* XXX: Need shadow state in svga->hw to eliminate redundant
+    * uploads, especially of NULL buffers.
+    */
+   
+   for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+      if (curr->cbufs[i] != hw->cbufs[i]) {
+         ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, curr->cbufs[i]);
+         if (ret != PIPE_OK)
+            return ret;
+         
+         pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]);
+      }
+   }
+
+   
+   if (curr->zsbuf != hw->zsbuf) {
+      ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf);
+      if (ret != PIPE_OK)
+         return ret;
+
+      if (curr->zsbuf &&
+          curr->zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) {
+         ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, curr->zsbuf);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      else {
+         ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, NULL);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      
+      pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
+   }
+
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_hw_framebuffer = 
+{
+   "hw framebuffer state",
+   SVGA_NEW_FRAME_BUFFER,
+   emit_framebuffer
+};
+
+
+
+
+/*********************************************************************** 
+ */
+
+static int emit_viewport( struct svga_context *svga,
+                          unsigned dirty )
+{
+   const struct pipe_viewport_state *viewport = &svga->curr.viewport;
+   struct svga_prescale prescale;
+   SVGA3dRect rect;
+   /* Not sure if this state is relevant with POSITIONT.  Probably
+    * not, but setting to 0,1 avoids some state pingponging.
+    */
+   float range_min = 0.0;
+   float range_max = 1.0;
+   float flip = -1.0;
+   boolean degenerate = FALSE;
+   enum pipe_error ret;
+
+   float fb_width = svga->curr.framebuffer.width;
+   float fb_height = svga->curr.framebuffer.height;
+
+   memset( &prescale, 0, sizeof(prescale) );
+
+   if (svga->curr.rast->templ.bypass_vs_clip_and_viewport) {
+
+      /* Avoid POSITIONT as it has a non trivial implementation outside the D3D
+       * API. Always generate a vertex shader.
+       */
+      rect.x = 0;
+      rect.y = 0;
+      rect.w = svga->curr.framebuffer.width;
+      rect.h = svga->curr.framebuffer.height;
+
+      prescale.scale[0] = 2.0 / (float)rect.w;
+      prescale.scale[1] = - 2.0 / (float)rect.h;
+      prescale.scale[2] = 1.0;
+      prescale.scale[3] = 1.0;
+      prescale.translate[0] = -1.0f;
+      prescale.translate[1] = 1.0f;
+      prescale.translate[2] = 0;
+      prescale.translate[3] = 0;
+      prescale.enabled = TRUE;
+   } else {
+
+      /* Examine gallium viewport transformation and produce a screen
+       * rectangle and possibly vertex shader pre-transformation to
+       * get the same results.
+       */
+      float fx =        viewport->scale[0] * -1.0 + viewport->translate[0];
+      float fy = flip * viewport->scale[1] * -1.0 + viewport->translate[1];
+      float fw =        viewport->scale[0] * 2; 
+      float fh = flip * viewport->scale[1] * 2; 
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "\ninitial %f,%f %fx%f\n",
+               fx,
+               fy,
+               fw,
+               fh);
+
+      prescale.scale[0] = 1.0;
+      prescale.scale[1] = 1.0;
+      prescale.scale[2] = 1.0;
+      prescale.scale[3] = 1.0;
+      prescale.translate[0] = 0;
+      prescale.translate[1] = 0;
+      prescale.translate[2] = 0;
+      prescale.translate[3] = 0;
+      prescale.enabled = TRUE;
+
+
+
+      if (fw < 0) {
+         prescale.scale[0] *= -1.0;
+         prescale.translate[0] += -fw;
+         fw = -fw;
+         fx =        viewport->scale[0] * 1.0 + viewport->translate[0];
+      }
+
+      if (fh < 0) {
+         prescale.scale[1] *= -1.0;
+         prescale.translate[1] += -fh;
+         fh = -fh;
+         fy = flip * viewport->scale[1] * 1.0 + viewport->translate[1];
+      }
+
+      if (fx < 0) {
+         prescale.translate[0] += fx;
+         prescale.scale[0] *= fw / (fw + fx); 
+         fw += fx;
+         fx = 0;
+      }
+
+      if (fy < 0) {
+         prescale.translate[1] += fy;
+         prescale.scale[1] *= fh / (fh + fy); 
+         fh += fy;
+         fy = 0;
+      }
+
+      if (fx + fw > fb_width) {
+         prescale.scale[0] *= fw / (fb_width - fx); 
+         prescale.translate[0] -= fx * (fw / (fb_width - fx));
+         prescale.translate[0] += fx;
+         fw = fb_width - fx;
+         
+      }
+
+      if (fy + fh > fb_height) {
+         prescale.scale[1] *= fh / (fb_height - fy);
+         prescale.translate[1] -= fy * (fh / (fb_height - fy));
+         prescale.translate[1] += fy;
+         fh = fb_height - fy;
+      }
+
+      if (fw < 0 || fh < 0) {
+         fw = fh = fx = fy = 0;
+         degenerate = TRUE;
+         goto out;
+      }
+
+
+      /* D3D viewport is integer space.  Convert fx,fy,etc. to
+       * integers.
+       *
+       * TODO: adjust pretranslate correct for any subpixel error
+       * introduced converting to integers.
+       */
+      rect.x = fx;
+      rect.y = fy;
+      rect.w = fw;
+      rect.h = fh;
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "viewport error %f,%f %fx%f\n",
+               fabs((float)rect.x - fx),
+               fabs((float)rect.y - fy),
+               fabs((float)rect.w - fw),
+               fabs((float)rect.h - fh));
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "viewport %d,%d %dx%d\n",
+               rect.x,
+               rect.y,
+               rect.w,
+               rect.h);
+      
+
+      /* Finally, to get GL rasterization rules, need to tweak the
+       * screen-space coordinates slightly relative to D3D which is
+       * what hardware implements natively.
+       */
+      if (svga->curr.rast->templ.gl_rasterization_rules) {
+         float adjust_x = 0.0;
+         float adjust_y = 0.0;
+
+         switch (svga->curr.reduced_prim) {
+         case PIPE_PRIM_LINES:
+            adjust_x = -0.5;
+            adjust_y = 0;
+            break;
+         case PIPE_PRIM_POINTS:
+         case PIPE_PRIM_TRIANGLES:
+            adjust_x = -0.375;
+            adjust_y = -0.5;
+            break;
+         }
+
+         prescale.translate[0] += adjust_x;
+         prescale.translate[1] += adjust_y;
+         prescale.translate[2] = 0.5; /* D3D clip space */
+         prescale.scale[2]     = 0.5; /* D3D clip space */
+      }
+
+
+      range_min = viewport->scale[2] * -1.0 + viewport->translate[2];
+      range_max = viewport->scale[2] *  1.0 + viewport->translate[2];
+
+      /* D3D (and by implication SVGA) doesn't like dealing with zmax
+       * less than zmin.  Detect that case, flip the depth range and
+       * invert our z-scale factor to achieve the same effect.
+       */
+      if (range_min > range_max) {
+         float range_tmp;
+         range_tmp = range_min; 
+         range_min = range_max; 
+         range_max = range_tmp;
+         prescale.scale[2]     = -prescale.scale[2];
+      }
+   }
+
+   if (prescale.enabled) {
+      float H[2];
+      float J[2];
+      int i;
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "prescale %f,%f %fx%f\n",
+               prescale.translate[0],
+               prescale.translate[1],
+               prescale.scale[0],
+               prescale.scale[1]);
+
+      H[0] = (float)rect.w / 2.0;
+      H[1] = -(float)rect.h / 2.0;
+      J[0] = (float)rect.x + (float)rect.w / 2.0;
+      J[1] = (float)rect.y + (float)rect.h / 2.0;
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "H %f,%f\n"
+               "J %fx%f\n",
+               H[0],
+               H[1],
+               J[0],
+               J[1]);
+
+      /* Adjust prescale to take into account the fact that it is
+       * going to be applied prior to the perspective divide and
+       * viewport transformation.
+       * 
+       * Vwin = H(Vc/Vc.w) + J
+       *
+       * We want to tweak Vwin with scale and translation from above,
+       * as in:
+       *
+       * Vwin' = S Vwin + T
+       *
+       * But we can only modify the values at Vc.  Plugging all the
+       * above together, and rearranging, eventually we get:
+       *
+       *   Vwin' = H(Vc'/Vc'.w) + J
+       * where:
+       *   Vc' = SVc + KVc.w
+       *   K = (T + (S-1)J) / H
+       *
+       * Overwrite prescale.translate with values for K:
+       */
+      for (i = 0; i < 2; i++) {
+         prescale.translate[i] = ((prescale.translate[i] +
+                                   (prescale.scale[i] - 1.0) * J[i]) / H[i]);
+      }
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "clipspace %f,%f %fx%f\n",
+               prescale.translate[0],
+               prescale.translate[1],
+               prescale.scale[0],
+               prescale.scale[1]);
+   }
+
+out:
+   if (degenerate) {
+      rect.x = 0;
+      rect.y = 0;
+      rect.w = 1;
+      rect.h = 1;
+      prescale.enabled = FALSE;
+   }
+
+   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
+      ret = SVGA3D_SetViewport(svga->swc, &rect);
+      if(ret != PIPE_OK)
+         return ret;
+
+      memcpy(&svga->state.hw_clear.viewport, &rect, sizeof(rect));
+      assert(sizeof(rect) == sizeof(svga->state.hw_clear.viewport));
+   }
+
+   if (svga->state.hw_clear.depthrange.zmin != range_min ||
+       svga->state.hw_clear.depthrange.zmax != range_max) 
+   {
+      ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
+      if(ret != PIPE_OK)
+         return ret;
+
+      svga->state.hw_clear.depthrange.zmin = range_min;
+      svga->state.hw_clear.depthrange.zmax = range_max;
+   }
+
+   if (memcmp(&prescale, &svga->state.hw_clear.prescale, sizeof prescale) != 0) {
+      svga->dirty |= SVGA_NEW_PRESCALE;
+      svga->state.hw_clear.prescale = prescale;
+   }
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_hw_viewport = 
+{
+   "hw viewport state",
+   ( SVGA_NEW_FRAME_BUFFER |
+     SVGA_NEW_VIEWPORT |
+     SVGA_NEW_RAST |
+     SVGA_NEW_REDUCED_PRIMITIVE ),
+   emit_viewport
+};
+
+
+/***********************************************************************
+ * Scissor state
+ */
+static int emit_scissor_rect( struct svga_context *svga,
+                              unsigned dirty )
+{
+   const struct pipe_scissor_state *scissor = &svga->curr.scissor;
+   SVGA3dRect rect;
+
+   rect.x = scissor->minx;
+   rect.y = scissor->miny;
+   rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
+   rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+   
+   return SVGA3D_SetScissorRect(svga->swc, &rect);
+}
+
+
+struct svga_tracked_state svga_hw_scissor = 
+{
+   "hw scissor state",
+   SVGA_NEW_SCISSOR,
+   emit_scissor_rect
+};
+
+
+/***********************************************************************
+ * Userclip state
+ */
+
+static int emit_clip_planes( struct svga_context *svga,
+                             unsigned dirty )
+{
+   unsigned i;
+   enum pipe_error ret;
+
+   /* TODO: just emit directly from svga_set_clip_state()?
+    */
+   for (i = 0; i < svga->curr.clip.nr; i++) {
+      ret = SVGA3D_SetClipPlane( svga->swc,
+                                 i,
+                                 svga->curr.clip.ucp[i] );
+      if(ret != PIPE_OK)
+         return ret;
+   }
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_hw_clip_planes = 
+{
+   "hw viewport state",
+   SVGA_NEW_CLIP,
+   emit_clip_planes
+};
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
new file mode 100644
index 0000000000..6ec38ed3e4
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -0,0 +1,282 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_tgsi.h"
+
+#include "svga_hw_reg.h"
+
+
+
+static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a,
+                                   const struct svga_fs_compile_key *b )
+{
+   unsigned keysize = svga_fs_key_size( a );
+   return memcmp( a, b, keysize );
+}
+
+
+static struct svga_shader_result *search_fs_key( struct svga_fragment_shader *fs,
+                                                 const struct svga_fs_compile_key *key )
+{
+   struct svga_shader_result *result = fs->base.results;
+
+   assert(key);
+
+   for ( ; result; result = result->next) {
+      if (compare_fs_keys( key, &result->key.fkey ) == 0)
+         return result;
+   }
+   
+   return NULL;
+}
+
+
+static enum pipe_error compile_fs( struct svga_context *svga,
+                                   struct svga_fragment_shader *fs,
+                                   const struct svga_fs_compile_key *key,
+                                   struct svga_shader_result **out_result )
+{
+   struct svga_shader_result *result;
+   enum pipe_error ret;
+
+   result = svga_translate_fragment_program( fs, key );
+   if (result == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto fail;
+   }
+
+
+   ret = SVGA3D_DefineShader(svga->swc, 
+                             svga->state.next_fs_id,
+                             SVGA3D_SHADERTYPE_PS,
+                             result->tokens, 
+                             result->nr_tokens * sizeof result->tokens[0]);
+   if (ret)
+      goto fail;
+
+   *out_result = result;
+   result->id = svga->state.next_fs_id++;
+   result->next = fs->base.results;
+   fs->base.results = result;
+   return PIPE_OK;
+
+fail:
+   if (result)
+      svga_destroy_shader_result( result );
+   return ret;
+}
+
+/* The blend workaround for simulating logicop xor behaviour requires
+ * that the incoming fragment color be white.  This change achieves
+ * that by hooking up a hard-wired fragment shader that just emits
+ * color 1,1,1,1
+ *   
+ * This is a slightly incomplete solution as it assumes that the
+ * actual bound shader has no other effects beyond generating a
+ * fragment color.  In particular shaders containing TEXKIL and/or
+ * depth-write will not have the correct behaviour, nor will those
+ * expecting to use alphatest.
+ *   
+ * These are avoidable issues, but they are not much worse than the
+ * unavoidable ones associated with this technique, so it's not clear
+ * how much effort should be expended trying to resolve them - the
+ * ultimate result will still not be correct in most cases.
+ *
+ * Shader below was generated with:
+ *   SVGA_DEBUG=tgsi ./mesa/progs/fp/fp-tri white.txt
+ */
+static int emit_white_fs( struct svga_context *svga )
+{
+   int ret;
+
+   /* ps_3_0
+    * def c0, 1.000000, 0.000000, 0.000000, 1.000000
+    * mov oC0, c0.x
+    * end
+    */
+   static const unsigned white_tokens[] = {
+      0xffff0300,
+      0x05000051,
+      0xa00f0000,
+      0x3f800000,
+      0x00000000,
+      0x00000000,
+      0x3f800000,
+      0x02000001,
+      0x800f0800,
+      0xa0000000,
+      0x0000ffff,
+   };
+
+   ret = SVGA3D_DefineShader(svga->swc, 
+                             svga->state.next_fs_id,
+                             SVGA3D_SHADERTYPE_PS,
+                             white_tokens, 
+                             sizeof(white_tokens));
+   if (ret)
+      return ret;
+
+   svga->state.white_fs_id = svga->state.next_fs_id++;
+   return 0;
+}
+
+
+/* SVGA_NEW_TEXTURE_BINDING
+ * SVGA_NEW_RAST
+ * SVGA_NEW_NEED_SWTNL
+ * SVGA_NEW_SAMPLER
+ */
+static int make_fs_key( const struct svga_context *svga,
+                        struct svga_fs_compile_key *key )
+{
+   int i;
+   int idx = 0;
+
+   memset(key, 0, sizeof *key);
+
+   /* Only need fragment shader fixup for twoside lighting if doing
+    * hwtnl.  Otherwise the draw module does the whole job for us.
+    *
+    * SVGA_NEW_SWTNL
+    */
+   if (!svga->state.sw.need_swtnl) {
+      /* SVGA_NEW_RAST
+       */
+      key->light_twoside = svga->curr.rast->templ.light_twoside;
+      key->front_cw = (svga->curr.rast->templ.front_winding == 
+                       PIPE_WINDING_CW);
+   }
+
+   
+   /* XXX: want to limit this to the textures that the shader actually
+    * refers to.
+    *
+    * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
+    */
+   for (i = 0; i < svga->curr.num_textures; i++) {
+      if (svga->curr.texture[i]) {
+         assert(svga->curr.sampler[i]);
+         key->tex[i].texture_target = svga->curr.texture[i]->target;
+         if (!svga->curr.sampler[i]->normalized_coords) {
+            key->tex[i].width_height_idx = idx++;
+            key->tex[i].unnormalized = TRUE;
+            ++key->num_unnormalized_coords;
+         }
+      }
+   }
+   key->num_textures = svga->curr.num_textures;
+
+   idx = 0;
+   for (i = 0; i < svga->curr.num_samplers; ++i) {
+      if (svga->curr.sampler[i]) {
+         key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode;
+         key->tex[i].compare_func = svga->curr.sampler[i]->compare_func;
+      }
+   }
+
+   return 0;
+}
+
+
+
+static int emit_hw_fs( struct svga_context *svga,
+                       unsigned dirty )
+{
+   struct svga_shader_result *result = NULL;
+   unsigned id = SVGA3D_INVALID_ID;
+   int ret = 0;
+
+   /* SVGA_NEW_BLEND
+    */
+   if (svga->curr.blend->need_white_fragments) {
+      if (svga->state.white_fs_id == SVGA3D_INVALID_ID) {
+         ret = emit_white_fs( svga );
+         if (ret)
+            return ret;
+      }
+      id = svga->state.white_fs_id;
+   }
+   else {
+      struct svga_fragment_shader *fs = svga->curr.fs;
+      struct svga_fs_compile_key key;
+
+      /* SVGA_NEW_TEXTURE_BINDING
+       * SVGA_NEW_RAST
+       * SVGA_NEW_NEED_SWTNL
+       * SVGA_NEW_SAMPLER
+       */
+      ret = make_fs_key( svga, &key );
+      if (ret)
+         return ret;
+
+      result = search_fs_key( fs, &key );
+      if (!result) {
+         ret = compile_fs( svga, fs, &key, &result );
+         if (ret)
+            return ret;
+      }
+
+      assert (result);
+      id = result->id;
+   }
+
+   assert(id != SVGA3D_INVALID_ID);
+
+   if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) {
+      ret = SVGA3D_SetShader(svga->swc, 
+                             SVGA3D_SHADERTYPE_PS, 
+                             id );
+      if (ret)
+         return ret;
+
+      svga->dirty |= SVGA_NEW_FS_RESULT;
+      svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id;
+      svga->state.hw_draw.fs = result;      
+   }
+
+   return 0;
+}
+
+struct svga_tracked_state svga_hw_fs = 
+{
+   "fragment shader (hwtnl)",
+   (SVGA_NEW_FS |
+    SVGA_NEW_TEXTURE_BINDING |
+    SVGA_NEW_NEED_SWTNL |
+    SVGA_NEW_RAST |
+    SVGA_NEW_SAMPLER |
+    SVGA_NEW_BLEND),
+   emit_hw_fs
+};
+
+
+
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
new file mode 100644
index 0000000000..00201b8091
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -0,0 +1,200 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_state.h"
+
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_debug.h"
+#include "svga_hw_reg.h"
+
+/***********************************************************************
+ */
+
+static INLINE SVGA3dDeclType 
+svga_translate_vertex_format(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_R32_FLOAT:            return SVGA3D_DECLTYPE_FLOAT1;
+   case PIPE_FORMAT_R32G32_FLOAT:         return SVGA3D_DECLTYPE_FLOAT2;
+   case PIPE_FORMAT_R32G32B32_FLOAT:      return SVGA3D_DECLTYPE_FLOAT3;
+   case PIPE_FORMAT_R32G32B32A32_FLOAT:   return SVGA3D_DECLTYPE_FLOAT4;
+   case PIPE_FORMAT_B8G8R8A8_UNORM:       return SVGA3D_DECLTYPE_D3DCOLOR;
+   case PIPE_FORMAT_R8G8B8A8_USCALED:     return SVGA3D_DECLTYPE_UBYTE4;
+   case PIPE_FORMAT_R16G16_SSCALED:       return SVGA3D_DECLTYPE_SHORT2;
+   case PIPE_FORMAT_R16G16B16A16_SSCALED: return SVGA3D_DECLTYPE_SHORT4;
+   case PIPE_FORMAT_R8G8B8A8_UNORM:       return SVGA3D_DECLTYPE_UBYTE4N;
+   case PIPE_FORMAT_R16G16_SNORM:         return SVGA3D_DECLTYPE_SHORT2N;
+   case PIPE_FORMAT_R16G16B16A16_SNORM:   return SVGA3D_DECLTYPE_SHORT4N;
+   case PIPE_FORMAT_R16G16_UNORM:         return SVGA3D_DECLTYPE_USHORT2N;
+   case PIPE_FORMAT_R16G16B16A16_UNORM:   return SVGA3D_DECLTYPE_USHORT4N;
+
+   /* These formats don't exist yet:
+    * 
+   case PIPE_FORMAT_R10G10B10_USCALED:    return SVGA3D_DECLTYPE_UDEC3;
+   case PIPE_FORMAT_R10G10B10_SNORM:      return SVGA3D_DECLTYPE_DEC3N;
+   case PIPE_FORMAT_R16G16_FLOAT:         return SVGA3D_DECLTYPE_FLOAT16_2;
+   case PIPE_FORMAT_R16G16B16A16_FLOAT:   return SVGA3D_DECLTYPE_FLOAT16_4;
+   */
+
+   default:
+      /* There are many formats without hardware support.  This case
+       * will be hit regularly, meaning we'll need swvfetch.
+       */
+      return SVGA3D_DECLTYPE_MAX;
+   }
+}
+
+
+static int update_need_swvfetch( struct svga_context *svga,
+                                 unsigned dirty )
+{
+   unsigned i;
+   boolean need_swvfetch = FALSE;
+
+   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+      svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.ve[i].src_format);
+      if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) {
+         need_swvfetch = TRUE;
+         break;
+      }
+   }
+
+   if (need_swvfetch != svga->state.sw.need_swvfetch) {
+      svga->state.sw.need_swvfetch = need_swvfetch;
+      svga->dirty |= SVGA_NEW_NEED_SWVFETCH;
+   }
+   
+   return 0;
+}
+
+struct svga_tracked_state svga_update_need_swvfetch = 
+{
+   "update need_swvfetch",
+   ( SVGA_NEW_VELEMENT ),
+   update_need_swvfetch
+};
+
+
+/*********************************************************************** 
+ */
+
+static int update_need_pipeline( struct svga_context *svga,
+                                 unsigned dirty )
+{
+   
+   boolean need_pipeline = FALSE;
+
+   /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
+    */
+   if (svga->curr.rast->need_pipeline & (1 << svga->curr.reduced_prim)) {
+      SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline (%d) & prim (%x)\n", 
+                 __FUNCTION__,
+                 svga->curr.rast->need_pipeline,
+                 (1 << svga->curr.reduced_prim) );
+      need_pipeline = TRUE;
+   }
+
+   /* SVGA_NEW_EDGEFLAGS
+    */
+   if (svga->curr.rast->hw_unfilled != PIPE_POLYGON_MODE_FILL &&
+       svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES && 
+       svga->curr.edgeflags != NULL) {
+      SVGA_DBG(DEBUG_SWTNL, "%s: edgeflags\n", __FUNCTION__);
+      need_pipeline = TRUE;
+   }
+
+   /* SVGA_NEW_CLIP 
+    */
+   if (!svga->curr.rast->templ.bypass_vs_clip_and_viewport &&
+       svga->curr.clip.nr) {
+      SVGA_DBG(DEBUG_SWTNL, "%s: userclip\n", __FUNCTION__);
+      need_pipeline = TRUE;
+   }
+
+   if (need_pipeline != svga->state.sw.need_pipeline) {
+      svga->state.sw.need_pipeline = need_pipeline;
+      svga->dirty |= SVGA_NEW_NEED_PIPELINE;
+   }
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_update_need_pipeline = 
+{
+   "need pipeline",
+   (SVGA_NEW_RAST |
+    SVGA_NEW_CLIP |
+    SVGA_NEW_REDUCED_PRIMITIVE),
+   update_need_pipeline
+};
+
+
+/*********************************************************************** 
+ */
+
+static int update_need_swtnl( struct svga_context *svga,
+                              unsigned dirty )
+{
+   boolean need_swtnl;
+
+   if (svga->debug.no_swtnl) {
+      svga->state.sw.need_swvfetch = 0;
+      svga->state.sw.need_pipeline = 0;
+   }
+
+   need_swtnl = (svga->state.sw.need_swvfetch ||
+                 svga->state.sw.need_pipeline);
+
+   if (svga->debug.force_swtnl) {
+      need_swtnl = 1;
+   }
+
+   if (need_swtnl != svga->state.sw.need_swtnl) {
+      SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF,
+               "%s need_swvfetch: %s, need_pipeline %s\n",
+               __FUNCTION__,
+               svga->state.sw.need_swvfetch ? "true" : "false",
+               svga->state.sw.need_pipeline ? "true" : "false");
+
+      svga->state.sw.need_swtnl = need_swtnl;
+      svga->dirty |= SVGA_NEW_NEED_SWTNL;
+      svga->swtnl.new_vdecl = TRUE;
+   }
+  
+   return 0;
+}
+
+
+struct svga_tracked_state svga_update_need_swtnl =
+{
+   "need swtnl",
+   (SVGA_NEW_NEED_PIPELINE |
+    SVGA_NEW_NEED_SWVFETCH),
+   update_need_swtnl
+};
diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c
new file mode 100644
index 0000000000..8b6803a285
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@@ -0,0 +1,268 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+
+#include "svga_hw_reg.h"
+
+
+
+struct rs_queue {
+   unsigned rs_count;
+   SVGA3dRenderState rs[SVGA3D_RS_MAX];
+};
+
+
+#define EMIT_RS(svga, value, token, fail)                       \
+do {                                                            \
+   if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) {    \
+      svga_queue_rs( &queue, SVGA3D_RS_##token, value );        \
+      svga->state.hw_draw.rs[SVGA3D_RS_##token] = value;        \
+   }                                                            \
+} while (0)
+
+#define EMIT_RS_FLOAT(svga, fvalue, token, fail)                \
+do {                                                            \
+   unsigned value = fui(fvalue);                                \
+   if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) {    \
+      svga_queue_rs( &queue, SVGA3D_RS_##token, value );        \
+      svga->state.hw_draw.rs[SVGA3D_RS_##token] = value;        \
+   }                                                            \
+} while (0)
+
+
+static INLINE void
+svga_queue_rs( struct rs_queue *q,
+               unsigned rss,
+               unsigned value )
+{
+   q->rs[q->rs_count].state = rss;
+   q->rs[q->rs_count].uintValue = value;
+   q->rs_count++;
+}
+
+
+/* Compare old and new render states and emit differences between them
+ * to hardware.  Simplest implementation would be to emit the whole of
+ * the "to" state.
+ */
+static int emit_rss( struct svga_context *svga,
+                     unsigned dirty )
+{
+   struct rs_queue queue;
+
+   queue.rs_count = 0;
+
+   if (dirty & SVGA_NEW_BLEND) {
+      const struct svga_blend_state *curr = svga->curr.blend;
+
+      EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail );
+      EMIT_RS( svga, curr->rt[0].blend_enable, BLENDENABLE, fail );
+
+      if (curr->rt[0].blend_enable) {
+         EMIT_RS( svga, curr->rt[0].srcblend, SRCBLEND, fail );
+         EMIT_RS( svga, curr->rt[0].dstblend, DSTBLEND, fail );
+         EMIT_RS( svga, curr->rt[0].blendeq, BLENDEQUATION, fail );
+
+         EMIT_RS( svga, curr->rt[0].separate_alpha_blend_enable, 
+                  SEPARATEALPHABLENDENABLE, fail );
+
+         if (curr->rt[0].separate_alpha_blend_enable) {
+            EMIT_RS( svga, curr->rt[0].srcblend_alpha, SRCBLENDALPHA, fail );
+            EMIT_RS( svga, curr->rt[0].dstblend_alpha, DSTBLENDALPHA, fail );
+            EMIT_RS( svga, curr->rt[0].blendeq_alpha, BLENDEQUATIONALPHA, fail );
+         }
+      }
+   }
+
+
+   if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) {
+      const struct svga_depth_stencil_state *curr = svga->curr.depth; 
+      const struct svga_rasterizer_state *rast = svga->curr.rast; 
+
+      if (!curr->stencil[0].enabled) 
+      {
+         /* Stencil disabled
+          */
+         EMIT_RS( svga, FALSE, STENCILENABLE, fail );
+         EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail );
+      }
+      else if (curr->stencil[0].enabled && !curr->stencil[1].enabled)
+      {
+         /* Regular stencil
+          */
+         EMIT_RS( svga, TRUE, STENCILENABLE, fail );
+         EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail );
+
+         EMIT_RS( svga, curr->stencil[0].func,  STENCILFUNC, fail );
+         EMIT_RS( svga, curr->stencil[0].fail,  STENCILFAIL, fail );
+         EMIT_RS( svga, curr->stencil[0].zfail, STENCILZFAIL, fail );
+         EMIT_RS( svga, curr->stencil[0].pass,  STENCILPASS, fail );
+         
+         EMIT_RS( svga, curr->stencil_ref, STENCILREF, fail );
+         EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail );
+         EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail );
+      }
+      else 
+      {
+         int cw, ccw;
+
+         /* Hardware frontwinding is always CW, so if ours is also CW,
+          * then our definition of front face agrees with hardware.
+          * Otherwise need to flip.
+          */
+         if (rast->templ.front_winding == PIPE_WINDING_CW) {
+            cw = 0;
+            ccw = 1;
+         }
+         else {
+            cw = 1;
+            ccw = 0;
+         }
+
+         /* Twoside stencil
+          */
+         EMIT_RS( svga, TRUE, STENCILENABLE, fail );
+         EMIT_RS( svga, TRUE, STENCILENABLE2SIDED, fail );
+
+         EMIT_RS( svga, curr->stencil[cw].func,  STENCILFUNC, fail );
+         EMIT_RS( svga, curr->stencil[cw].fail,  STENCILFAIL, fail );
+         EMIT_RS( svga, curr->stencil[cw].zfail, STENCILZFAIL, fail );
+         EMIT_RS( svga, curr->stencil[cw].pass,  STENCILPASS, fail );
+
+         EMIT_RS( svga, curr->stencil[ccw].func,  CCWSTENCILFUNC, fail );
+         EMIT_RS( svga, curr->stencil[ccw].fail,  CCWSTENCILFAIL, fail );
+         EMIT_RS( svga, curr->stencil[ccw].zfail, CCWSTENCILZFAIL, fail );
+         EMIT_RS( svga, curr->stencil[ccw].pass,  CCWSTENCILPASS, fail );
+
+         EMIT_RS( svga, curr->stencil_ref, STENCILREF, fail );
+         EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail );
+         EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail );
+      }
+
+      EMIT_RS( svga, curr->zenable, ZENABLE, fail );
+      if (curr->zenable) {
+         EMIT_RS( svga, curr->zfunc, ZFUNC, fail );
+         EMIT_RS( svga, curr->zwriteenable, ZWRITEENABLE, fail );
+      }
+
+      EMIT_RS( svga, curr->alphatestenable, ALPHATESTENABLE, fail );
+      if (curr->alphatestenable) {
+         EMIT_RS( svga, curr->alphafunc, ALPHAFUNC, fail );
+         EMIT_RS_FLOAT( svga, curr->alpharef, ALPHAREF, fail );
+      }
+   }
+
+
+   if (dirty & SVGA_NEW_RAST)
+   {
+      const struct svga_rasterizer_state *curr = svga->curr.rast; 
+
+      /* Shademode: still need to rearrange index list to move
+       * flat-shading PV first vertex.
+       */
+      EMIT_RS( svga, curr->shademode, SHADEMODE, fail );
+      EMIT_RS( svga, curr->cullmode, CULLMODE, fail );
+      EMIT_RS( svga, curr->scissortestenable, SCISSORTESTENABLE, fail );
+      EMIT_RS( svga, curr->multisampleantialias, MULTISAMPLEANTIALIAS, fail );
+      EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail );
+      EMIT_RS( svga, curr->linepattern, LINEPATTERN, fail );
+      EMIT_RS_FLOAT( svga, curr->pointsize, POINTSIZE, fail );
+      EMIT_RS_FLOAT( svga, curr->pointsize_min, POINTSIZEMIN, fail );
+      EMIT_RS_FLOAT( svga, curr->pointsize_max, POINTSIZEMAX, fail );
+   }
+
+   if (dirty & (SVGA_NEW_RAST | SVGA_NEW_FRAME_BUFFER | SVGA_NEW_NEED_PIPELINE))
+   {
+      const struct svga_rasterizer_state *curr = svga->curr.rast; 
+      float slope = 0.0;
+      float bias  = 0.0;
+
+      /* Need to modify depth bias according to bound depthbuffer
+       * format.  Don't do hardware depthbias while the software
+       * pipeline is active.
+       */
+      if (!svga->state.sw.need_pipeline &&
+          svga->curr.framebuffer.zsbuf)
+      {
+         slope = curr->slopescaledepthbias;
+         bias  = svga->curr.depthscale * curr->depthbias;
+      }
+
+      EMIT_RS_FLOAT( svga, slope, SLOPESCALEDEPTHBIAS, fail );
+      EMIT_RS_FLOAT( svga, bias, DEPTHBIAS, fail );
+   }
+
+
+   if (queue.rs_count) {
+      SVGA3dRenderState *rs;
+
+      if (SVGA3D_BeginSetRenderState( svga->swc,
+                                      &rs,
+                                      queue.rs_count ) != PIPE_OK)
+         goto fail;
+
+      memcpy( rs,
+              queue.rs,
+              queue.rs_count * sizeof queue.rs[0]);
+      
+      SVGA_FIFOCommitAll( svga->swc );
+   }
+
+   /* Also blend color:
+    */
+
+   return 0;
+
+fail:
+   /* XXX: need to poison cached hardware state on failure to ensure
+    * dirty state gets re-emitted.  Fix this by re-instating partial
+    * FIFOCommit command and only updating cached hw state once the
+    * initial allocation has succeeded.
+    */
+   memset(svga->state.hw_draw.rs, 0xcd, sizeof(svga->state.hw_draw.rs));
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+struct svga_tracked_state svga_hw_rss = 
+{
+   "hw rss state",
+
+   (SVGA_NEW_BLEND |
+    SVGA_NEW_DEPTH_STENCIL |
+    SVGA_NEW_RAST |
+    SVGA_NEW_FRAME_BUFFER |
+    SVGA_NEW_NEED_PIPELINE),
+
+   emit_rss
+};
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
new file mode 100644
index 0000000000..b313794520
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -0,0 +1,279 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
+#include "svga_screen_texture.h"
+#include "svga_winsys.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+
+#include "svga_hw_reg.h"
+
+
+void svga_cleanup_tss_binding(struct svga_context *svga)
+{
+   int i;
+   unsigned count = MAX2( svga->curr.num_textures,
+                          svga->state.hw_draw.num_views );
+
+   for (i = 0; i < count; i++) {
+      struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
+
+      svga_sampler_view_reference(&view->v, NULL);
+      pipe_texture_reference( &svga->curr.texture[i], NULL );
+      pipe_texture_reference( &view->texture, NULL );
+
+      view->dirty = 1;
+   }
+}
+
+
+static int
+update_tss_binding(struct svga_context *svga, 
+                   unsigned dirty )
+{
+   unsigned i;
+   unsigned count = MAX2( svga->curr.num_textures,
+                          svga->state.hw_draw.num_views );
+   unsigned min_lod;
+   unsigned max_lod;
+
+
+   struct {
+      struct {
+         unsigned unit;
+         struct svga_hw_view_state *view;
+      } bind[PIPE_MAX_SAMPLERS];
+
+      unsigned bind_count;
+   } queue;
+
+   queue.bind_count = 0;
+   
+   for (i = 0; i < count; i++) {
+      const struct svga_sampler_state *s = svga->curr.sampler[i];
+      struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
+
+      /* get min max lod */
+      if (svga->curr.texture[i]) {
+         min_lod = MAX2(s->view_min_lod, 0);
+         max_lod = MIN2(s->view_max_lod, svga->curr.texture[i]->last_level);
+      } else {
+         min_lod = 0;
+         max_lod = 0;
+      }
+
+      if (view->texture != svga->curr.texture[i] ||
+          view->min_lod != min_lod ||
+          view->max_lod != max_lod) {
+
+         svga_sampler_view_reference(&view->v, NULL);
+         pipe_texture_reference( &view->texture, svga->curr.texture[i] );
+
+         view->dirty = TRUE;
+         view->min_lod = min_lod;
+         view->max_lod = max_lod;
+
+         if (svga->curr.texture[i])
+            view->v = svga_get_tex_sampler_view(&svga->pipe, 
+                                                svga->curr.texture[i], 
+                                                min_lod,
+                                                max_lod);
+      }
+
+      if (view->dirty) {
+         queue.bind[queue.bind_count].unit = i;
+         queue.bind[queue.bind_count].view = view;
+         queue.bind_count++;
+      } 
+      else if (view->v) {
+         svga_validate_sampler_view(svga, view->v);
+      }
+   }
+
+   svga->state.hw_draw.num_views = svga->curr.num_textures;
+
+   if (queue.bind_count) {
+      SVGA3dTextureState *ts;
+
+      if (SVGA3D_BeginSetTextureState( svga->swc,
+                                       &ts,
+                                       queue.bind_count ) != PIPE_OK)
+         goto fail;
+
+      for (i = 0; i < queue.bind_count; i++) {
+         ts[i].stage = queue.bind[i].unit;
+         ts[i].name = SVGA3D_TS_BIND_TEXTURE;
+
+         if (queue.bind[i].view->v) {
+            svga->swc->surface_relocation(svga->swc,
+                                          &ts[i].value,
+                                          queue.bind[i].view->v->handle,
+                                          PIPE_BUFFER_USAGE_GPU_READ);
+         }
+         else {
+            ts[i].value = SVGA3D_INVALID_ID;
+         }
+         
+         queue.bind[i].view->dirty = FALSE;
+      }
+
+      SVGA_FIFOCommitAll( svga->swc );
+   }
+
+   return 0;
+
+fail:
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+struct svga_tracked_state svga_hw_tss_binding = {
+   "texture binding emit",
+   SVGA_NEW_TEXTURE_BINDING |
+   SVGA_NEW_SAMPLER,
+   update_tss_binding
+};
+
+
+/***********************************************************************
+ */
+
+struct ts_queue {
+   unsigned ts_count;
+   SVGA3dTextureState ts[PIPE_MAX_SAMPLERS*SVGA3D_TS_MAX];
+};
+
+
+#define EMIT_TS(svga, unit, val, token, fail)                           \
+do {                                                                    \
+   if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
+      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
+      svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
+   }                                                                    \
+} while (0)
+
+#define EMIT_TS_FLOAT(svga, unit, fvalue, token, fail)                  \
+do {                                                                    \
+   unsigned val = fui(fvalue);                                          \
+   if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
+      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
+      svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
+   }                                                                    \
+} while (0)
+
+
+static INLINE void 
+svga_queue_tss( struct ts_queue *q,
+                unsigned unit,
+                unsigned tss,
+                unsigned value )
+{
+   assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0]));
+   q->ts[q->ts_count].stage = unit;
+   q->ts[q->ts_count].name = tss;
+   q->ts[q->ts_count].value = value;
+   q->ts_count++;
+}
+
+
+static int
+update_tss(struct svga_context *svga, 
+           unsigned dirty )
+{
+   unsigned i;
+   struct ts_queue queue;
+
+   queue.ts_count = 0;
+   for (i = 0; i < svga->curr.num_samplers; i++) {
+      if (svga->curr.sampler[i]) {
+         const struct svga_sampler_state *curr = svga->curr.sampler[i];
+
+         EMIT_TS(svga, i, curr->mipfilter, MIPFILTER, fail);
+         EMIT_TS(svga, i, curr->min_lod, TEXTURE_MIPMAP_LEVEL, fail);
+         EMIT_TS(svga, i, curr->magfilter, MAGFILTER, fail);
+         EMIT_TS(svga, i, curr->minfilter, MINFILTER, fail);
+         EMIT_TS(svga, i, curr->aniso_level, TEXTURE_ANISOTROPIC_LEVEL, fail);
+         EMIT_TS_FLOAT(svga, i, curr->lod_bias, TEXTURE_LOD_BIAS, fail);
+         EMIT_TS(svga, i, curr->addressu, ADDRESSU, fail);
+         EMIT_TS(svga, i, curr->addressw, ADDRESSW, fail);
+         EMIT_TS(svga, i, curr->bordercolor, BORDERCOLOR, fail);
+         // TEXCOORDINDEX -- hopefully not needed
+
+         if (svga->curr.tex_flags.flag_1d & (1 << i)) {
+            debug_printf("wrap 1d tex %d\n", i);
+            EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail);
+         }
+         else
+            EMIT_TS(svga, i, curr->addressv, ADDRESSV, fail);
+
+         if (svga->curr.tex_flags.flag_srgb & (1 << i))
+            EMIT_TS_FLOAT(svga, i, 2.2f, GAMMA, fail);
+         else
+            EMIT_TS_FLOAT(svga, i, 1.0f, GAMMA, fail);
+
+      }
+   }
+ 
+   if (queue.ts_count) {
+      SVGA3dTextureState *ts;
+
+      if (SVGA3D_BeginSetTextureState( svga->swc,
+                                       &ts,
+                                       queue.ts_count ) != PIPE_OK)
+         goto fail;
+
+      memcpy( ts,
+              queue.ts,
+              queue.ts_count * sizeof queue.ts[0]);
+      
+      SVGA_FIFOCommitAll( svga->swc );
+   }
+
+   return 0;
+
+fail:
+   /* XXX: need to poison cached hardware state on failure to ensure
+    * dirty state gets re-emitted.  Fix this by re-instating partial
+    * FIFOCommit command and only updating cached hw state once the
+    * initial allocation has succeeded.
+    */
+   memset(svga->state.hw_draw.ts, 0xcd, sizeof(svga->state.hw_draw.ts));
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+struct svga_tracked_state svga_hw_tss = {
+   "texture state emit",
+   (SVGA_NEW_SAMPLER |
+    SVGA_NEW_TEXTURE_FLAGS),
+   update_tss
+};
+
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
new file mode 100644
index 0000000000..c534308f50
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -0,0 +1,182 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_upload_mgr.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_draw.h"
+#include "svga_tgsi.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+
+#include "svga_hw_reg.h"
+
+
+static int
+upload_user_buffers( struct svga_context *svga )
+{
+   enum pipe_error ret = PIPE_OK;
+   int i;
+   int nr;
+
+   if (0) 
+      debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers);
+
+   nr = svga->curr.num_vertex_buffers;
+
+   for (i = 0; i < nr; i++) 
+   {
+      if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer))
+      {
+         struct pipe_buffer *upload_buffer = NULL;
+         unsigned offset = /*svga->curr.vb[i].buffer_offset*/ 0;
+         unsigned size = svga->curr.vb[i].buffer->size /*- offset*/;
+         unsigned upload_offset;
+
+         ret = u_upload_buffer( svga->upload_vb,
+                                offset,
+                                size,
+                                svga->curr.vb[i].buffer,
+                                &upload_offset,
+                                &upload_buffer );
+         if (ret)
+            return ret;
+
+         if (0)
+            debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n", 
+                         __FUNCTION__, 
+                         i,
+                         svga->curr.vb[i].buffer,
+                         upload_buffer, upload_offset, size);
+
+         /* Make sure we release the old buffer and end up with the
+          * correct refcount on the uploaded buffer.
+          */
+         pipe_buffer_reference( &svga->curr.vb[i].buffer, NULL );
+         svga->curr.vb[i].buffer = upload_buffer;
+         svga->curr.vb[i].buffer_offset = upload_offset;
+      }
+   }
+
+   if (0)
+      debug_printf("%s: DONE\n", __FUNCTION__);
+
+   return ret;
+}
+
+
+/***********************************************************************
+ */
+
+
+static int emit_hw_vs_vdecl( struct svga_context *svga,
+                             unsigned dirty )
+{
+   const struct pipe_vertex_element *ve = svga->curr.ve;
+   SVGA3dVertexDecl decl;
+   unsigned i;
+
+   assert(svga->curr.num_vertex_elements >=
+          svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
+
+   svga_hwtnl_reset_vdecl( svga->hwtnl, 
+                           svga->curr.num_vertex_elements );
+
+   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+      const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
+      unsigned usage, index;
+
+
+      svga_generate_vdecl_semantics( i, &usage, &index );
+
+      /* SVGA_NEW_VELEMENT
+       */
+      decl.identity.type = svga->state.sw.ve_format[i];
+      decl.identity.method = SVGA3D_DECLMETHOD_DEFAULT;
+      decl.identity.usage = usage;
+      decl.identity.usageIndex = index;
+      decl.array.stride = vb->stride;
+      decl.array.offset = (vb->buffer_offset +
+                           ve[i].src_offset);
+
+      svga_hwtnl_vdecl( svga->hwtnl,
+                        i,
+                        &decl,
+                        vb->buffer );
+   }
+
+   return 0;
+}
+
+
+static int emit_hw_vdecl( struct svga_context *svga,
+                          unsigned dirty )
+{
+   int ret = 0;
+
+   /* SVGA_NEW_NEED_SWTNL
+    */
+   if (svga->state.sw.need_swtnl)
+      return 0; /* Do not emit during swtnl */
+
+   /* If we get to here, we know that we're going to draw.  Upload
+    * userbuffers now and try to combine multiple userbuffers from
+    * multiple draw calls into a single host buffer for performance.
+    */
+   if (svga->curr.any_user_vertex_buffers &&
+       SVGA_COMBINE_USERBUFFERS)
+   {
+      ret = upload_user_buffers( svga );
+      if (ret)
+         return ret;
+
+      svga->curr.any_user_vertex_buffers = FALSE;
+   }
+
+   return emit_hw_vs_vdecl( svga, dirty );
+}
+
+
+struct svga_tracked_state svga_hw_vdecl = 
+{
+   "hw vertex decl state (hwtnl version)",
+   ( SVGA_NEW_NEED_SWTNL |
+     SVGA_NEW_VELEMENT |
+     SVGA_NEW_VBUFFER |
+     SVGA_NEW_RAST |
+     SVGA_NEW_FS |
+     SVGA_NEW_VS ),
+   emit_hw_vdecl
+};
+
+
+
+
+
+
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
new file mode 100644
index 0000000000..a947745732
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -0,0 +1,239 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "translate/translate.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_tgsi.h"
+
+#include "svga_hw_reg.h"
+
+/***********************************************************************
+ */
+
+
+static INLINE int compare_vs_keys( const struct svga_vs_compile_key *a,
+                                   const struct svga_vs_compile_key *b )
+{
+   unsigned keysize = svga_vs_key_size( a );
+   return memcmp( a, b, keysize );
+}
+
+
+static struct svga_shader_result *search_vs_key( struct svga_vertex_shader *vs,
+                                                 const struct svga_vs_compile_key *key )
+{
+   struct svga_shader_result *result = vs->base.results;
+
+   assert(key);
+
+   for ( ; result; result = result->next) {
+      if (compare_vs_keys( key, &result->key.vkey ) == 0)
+         return result;
+   }
+   
+   return NULL;
+}
+
+
+static enum pipe_error compile_vs( struct svga_context *svga,
+                                   struct svga_vertex_shader *vs,
+                                   const struct svga_vs_compile_key *key,
+                                   struct svga_shader_result **out_result )
+{
+   struct svga_shader_result *result;
+   enum pipe_error ret = PIPE_OK;
+
+   result = svga_translate_vertex_program( vs, key );
+   if (result == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto fail;
+   }
+
+   ret = SVGA3D_DefineShader(svga->swc, 
+                             svga->state.next_vs_id,
+                             SVGA3D_SHADERTYPE_VS,
+                             result->tokens, 
+                             result->nr_tokens * sizeof result->tokens[0]);
+   if (ret)
+      goto fail;
+
+   *out_result = result;
+   result->id = svga->state.next_vs_id++;
+   result->next = vs->base.results;
+   vs->base.results = result;
+   return PIPE_OK;
+
+fail:
+   if (result)
+      svga_destroy_shader_result( result );
+   return ret;
+}
+
+/* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_ZERO_STRIDE
+ */
+static int make_vs_key( struct svga_context *svga,
+                        struct svga_vs_compile_key *key )
+{
+   memset(key, 0, sizeof *key);
+   key->need_prescale = svga->state.hw_clear.prescale.enabled;
+   key->allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
+   key->zero_stride_vertex_elements =
+      svga->curr.zero_stride_vertex_elements;
+   key->num_zero_stride_vertex_elements =
+      svga->curr.num_zero_stride_vertex_elements;
+   return 0;
+}
+
+
+
+static int emit_hw_vs( struct svga_context *svga,
+                       unsigned dirty )
+{
+   struct svga_shader_result *result = NULL;
+   unsigned id = SVGA3D_INVALID_ID;
+   int ret = 0;
+
+   /* SVGA_NEW_NEED_SWTNL */
+   if (!svga->state.sw.need_swtnl) {
+      struct svga_vertex_shader *vs = svga->curr.vs;
+      struct svga_vs_compile_key key;
+
+      ret = make_vs_key( svga, &key );
+      if (ret)
+         return ret;
+
+      result = search_vs_key( vs, &key );
+      if (!result) {
+         ret = compile_vs( svga, vs, &key, &result );
+         if (ret)
+            return ret;
+      }
+
+      assert (result);
+      id = result->id;
+   }
+
+   if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) {
+      ret = SVGA3D_SetShader(svga->swc, 
+                             SVGA3D_SHADERTYPE_VS, 
+                             id );
+      if (ret)
+         return ret;
+
+      svga->dirty |= SVGA_NEW_VS_RESULT;
+      svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id;
+      svga->state.hw_draw.vs = result;      
+   }
+
+   return 0;
+}
+
+struct svga_tracked_state svga_hw_vs = 
+{
+   "vertex shader (hwtnl)",
+   (SVGA_NEW_VS |
+    SVGA_NEW_PRESCALE |
+    SVGA_NEW_NEED_SWTNL |
+    SVGA_NEW_ZERO_STRIDE),
+   emit_hw_vs
+};
+
+
+/***********************************************************************
+ */
+static int update_zero_stride( struct svga_context *svga,
+                               unsigned dirty )
+{
+   unsigned i;
+
+   svga->curr.zero_stride_vertex_elements = 0;
+   svga->curr.num_zero_stride_vertex_elements = 0;
+
+   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+      const struct pipe_vertex_element *vel = &svga->curr.ve[i];
+      const struct pipe_vertex_buffer *vbuffer = &svga->curr.vb[
+         vel->vertex_buffer_index];
+      if (vbuffer->stride == 0) {
+         unsigned const_idx =
+            svga->curr.num_zero_stride_vertex_elements;
+         struct translate *translate;
+         struct translate_key key;
+         void *mapped_buffer;
+
+         svga->curr.zero_stride_vertex_elements |= (1 << i);
+         ++svga->curr.num_zero_stride_vertex_elements;
+
+         key.output_stride = 4 * sizeof(float);
+         key.nr_elements = 1;
+         key.element[0].input_format = vel->src_format;
+         key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+         key.element[0].input_buffer = vel->vertex_buffer_index;
+         key.element[0].input_offset = vel->src_offset;
+         key.element[0].output_offset = const_idx * 4 * sizeof(float);
+
+         translate_key_sanitize(&key);
+         /* translate_generic_create is technically private but
+          * we don't want to code-generate, just want generic
+          * translation */
+         translate = translate_generic_create(&key);
+
+         assert(vel->src_offset == 0);
+         
+         mapped_buffer = pipe_buffer_map_range(svga->pipe.screen, 
+                                               vbuffer->buffer,
+                                               vel->src_offset,
+                                               pf_get_size(vel->src_format),
+                                               PIPE_BUFFER_USAGE_CPU_READ);
+         translate->set_buffer(translate, vel->vertex_buffer_index,
+                               mapped_buffer,
+                               vbuffer->stride);
+         translate->run(translate, 0, 1,
+                        svga->curr.zero_stride_constants);
+
+         pipe_buffer_unmap(svga->pipe.screen,
+                           vbuffer->buffer);
+         translate->release(translate);
+      }
+   }
+
+   if (svga->curr.num_zero_stride_vertex_elements)
+      svga->dirty |= SVGA_NEW_ZERO_STRIDE;
+
+   return 0;
+}
+
+struct svga_tracked_state svga_hw_update_zero_stride =
+{
+   "update zero_stride",
+   ( SVGA_NEW_VELEMENT |
+     SVGA_NEW_VBUFFER ),
+   update_zero_stride
+};
diff --git a/src/gallium/drivers/svga/svga_swtnl.h b/src/gallium/drivers/svga/svga_swtnl.h
new file mode 100644
index 0000000000..4882f26b17
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl.h
@@ -0,0 +1,52 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SWTNL_H
+#define SVGA_SWTNL_H
+
+#include "pipe/p_compiler.h"
+
+struct svga_context;
+struct pipe_context;
+struct pipe_buffer;
+struct vbuf_render;
+
+
+boolean svga_init_swtnl( struct svga_context *svga );
+void svga_destroy_swtnl( struct svga_context *svga );
+
+
+enum pipe_error
+svga_swtnl_draw_range_elements(struct svga_context *svga,
+                               struct pipe_buffer *indexBuffer,
+                               unsigned indexSize,
+                               unsigned min_index,
+                               unsigned max_index,
+                               unsigned prim, 
+                               unsigned start, 
+                               unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c
new file mode 100644
index 0000000000..b4f757a47a
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_backend.c
@@ -0,0 +1,349 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_vbuf.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+
+#include "util/u_debug.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_simple_shaders.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_swtnl.h"
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+#include "svga_draw.h"
+#include "svga_swtnl_private.h"
+
+
+static const struct vertex_info *
+svga_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+
+   svga_swtnl_update_vdecl(svga);
+
+   return &svga_render->vertex_info;
+}
+
+
+static boolean
+svga_vbuf_render_allocate_vertices( struct vbuf_render *render,
+                                    ushort vertex_size,
+                                    ushort nr_vertices )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+   struct pipe_screen *screen = svga->pipe.screen;
+   size_t size = (size_t)nr_vertices * (size_t)vertex_size;
+   boolean new_vbuf = FALSE;
+   boolean new_ibuf = FALSE;
+
+   if (svga_render->vertex_size != vertex_size)
+      svga->swtnl.new_vdecl = TRUE;
+   svga_render->vertex_size = (size_t)vertex_size;
+
+   if (svga->swtnl.new_vbuf)
+      new_ibuf = new_vbuf = TRUE;
+   svga->swtnl.new_vbuf = FALSE;
+
+   if (svga_render->vbuf_size < svga_render->vbuf_offset + svga_render->vbuf_used + size)
+      new_vbuf = TRUE;
+
+   if (new_vbuf)
+      pipe_buffer_reference(&svga_render->vbuf, NULL);
+   if (new_ibuf)
+      pipe_buffer_reference(&svga_render->ibuf, NULL);
+
+   if (!svga_render->vbuf) {
+      svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size);
+      svga_render->vbuf = pipe_buffer_create(screen,
+                                             0,
+                                             PIPE_BUFFER_USAGE_VERTEX,
+                                             svga_render->vbuf_size);
+      if(!svga_render->vbuf) {
+         svga_context_flush(svga, NULL);
+         svga_render->vbuf = pipe_buffer_create(screen,
+                                                0,
+                                                PIPE_BUFFER_USAGE_VERTEX,
+                                                svga_render->vbuf_size);
+         assert(svga_render->vbuf);
+      }
+
+      svga->swtnl.new_vdecl = TRUE;
+      svga_render->vbuf_offset = 0;
+   } else {
+      svga_render->vbuf_offset += svga_render->vbuf_used;
+   }
+
+   svga_render->vbuf_used = 0;
+
+   if (svga->swtnl.new_vdecl)
+      svga_render->vdecl_offset = svga_render->vbuf_offset;
+
+   return TRUE;
+}
+
+static void *
+svga_vbuf_render_map_vertices( struct vbuf_render *render )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+   struct pipe_screen *screen = svga->pipe.screen;
+
+   char *ptr = (char*)pipe_buffer_map(screen,
+                                      svga_render->vbuf,
+                                      PIPE_BUFFER_USAGE_CPU_WRITE | 
+                                      PIPE_BUFFER_USAGE_FLUSH_EXPLICIT);
+   return ptr + svga_render->vbuf_offset;
+}
+
+static void
+svga_vbuf_render_unmap_vertices( struct vbuf_render *render,
+                                 ushort min_index,
+                                 ushort max_index )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+   struct pipe_screen *screen = svga->pipe.screen;
+   unsigned offset, length;
+   size_t used = svga_render->vertex_size * ((size_t)max_index + 1);
+
+   offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index;
+   length = svga_render->vertex_size * (max_index + 1 - min_index);
+   pipe_buffer_flush_mapped_range(screen, svga_render->vbuf, offset, length);
+   pipe_buffer_unmap(screen, svga_render->vbuf);
+   svga_render->min_index = min_index;
+   svga_render->max_index = max_index;
+   svga_render->vbuf_used = MAX2(svga_render->vbuf_used, used);
+}
+
+static boolean
+svga_vbuf_render_set_primitive( struct vbuf_render *render,
+                                unsigned prim )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   svga_render->prim = prim;
+
+   return TRUE;
+}
+
+static void
+svga_vbuf_sumbit_state( struct svga_vbuf_render *svga_render )
+{
+   struct svga_context *svga = svga_render->svga;
+   SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
+   enum pipe_error ret;
+   int i;
+
+   /* if the vdecl or vbuf hasn't changed do nothing */
+   if (!svga->swtnl.new_vdecl)
+      return;
+
+   memcpy(vdecl, svga_render->vdecl, sizeof(vdecl));
+
+   /* flush the hw state */
+   ret = svga_hwtnl_flush(svga->hwtnl);
+   if (ret) {
+      svga_context_flush(svga, NULL);
+      ret = svga_hwtnl_flush(svga->hwtnl);
+      /* if we hit this path we might become synced with hw */
+      svga->swtnl.new_vbuf = TRUE;
+      assert(ret == 0);
+   }
+
+   svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count);
+
+   for (i = 0; i < svga_render->vdecl_count; i++) {
+      vdecl[i].array.offset += svga_render->vdecl_offset;
+
+      svga_hwtnl_vdecl( svga->hwtnl,
+                        i,
+                        &vdecl[i],
+                        svga_render->vbuf );
+   }
+
+   /* We have already taken care of flatshading, so let the hwtnl
+    * module use whatever is most convenient:
+    */
+   if (svga->state.sw.need_pipeline) {
+      svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE);
+      svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
+   }
+   else {
+      svga_hwtnl_set_flatshade( svga->hwtnl,
+                                svga->curr.rast->templ.flatshade,
+                                svga->curr.rast->templ.flatshade_first );
+
+      svga_hwtnl_set_unfilled( svga->hwtnl,
+                               svga->curr.rast->hw_unfilled );
+   }
+
+   svga->swtnl.new_vdecl = FALSE;
+}
+
+static void
+svga_vbuf_render_draw_arrays( struct vbuf_render *render,
+                              unsigned start,
+                              uint nr )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+   unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
+   enum pipe_error ret = 0;
+
+   svga_vbuf_sumbit_state(svga_render);
+
+   /* Need to call update_state() again as the draw module may have
+    * altered some of our state behind our backs.  Testcase:
+    * redbook/polys.c
+    */
+   svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
+
+   ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+      svga->swtnl.new_vbuf = TRUE;
+      assert(ret == PIPE_OK);
+   }
+}
+
+
+static void
+svga_vbuf_render_draw( struct vbuf_render *render,
+                       const ushort *indices,
+                       uint nr_indices)
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+   struct pipe_screen *screen = svga->pipe.screen;
+   unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
+   boolean ret;
+   size_t size = 2 * nr_indices;
+
+   assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0);
+   
+   if (svga_render->ibuf_size < svga_render->ibuf_offset + size)
+      pipe_buffer_reference(&svga_render->ibuf, NULL);
+
+   if (!svga_render->ibuf) {
+      svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size);
+      svga_render->ibuf = pipe_buffer_create(screen,
+                                             0,
+                                             PIPE_BUFFER_USAGE_VERTEX,
+                                             svga_render->ibuf_size);
+      svga_render->ibuf_offset = 0;
+   }
+
+   pipe_buffer_write(screen, svga_render->ibuf,
+                     svga_render->ibuf_offset, 2 * nr_indices, indices);
+
+
+   /* off to hardware */
+   svga_vbuf_sumbit_state(svga_render);
+
+   /* Need to call update_state() again as the draw module may have
+    * altered some of our state behind our backs.  Testcase:
+    * redbook/polys.c
+    */
+   svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
+
+   ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
+                                        svga_render->ibuf,
+                                        2,
+                                        svga_render->min_index,
+                                        svga_render->max_index,
+                                        svga_render->prim,
+                                        svga_render->ibuf_offset / 2, nr_indices, bias);
+   if(ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
+                                           svga_render->ibuf,
+                                           2,
+                                           svga_render->min_index,
+                                           svga_render->max_index,
+                                           svga_render->prim,
+                                           svga_render->ibuf_offset / 2, nr_indices, bias);
+      svga->swtnl.new_vbuf = TRUE;
+      assert(ret == PIPE_OK);
+   }
+
+   svga_render->ibuf_offset += size;
+}
+
+
+static void
+svga_vbuf_render_release_vertices( struct vbuf_render *render )
+{
+
+}
+
+
+static void
+svga_vbuf_render_destroy( struct vbuf_render *render )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+
+   pipe_buffer_reference(&svga_render->vbuf, NULL);
+   pipe_buffer_reference(&svga_render->ibuf, NULL);
+   FREE(svga_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+struct vbuf_render *
+svga_vbuf_render_create( struct svga_context *svga )
+{
+   struct svga_vbuf_render *svga_render = CALLOC_STRUCT(svga_vbuf_render);
+
+   svga_render->svga = svga;
+   svga_render->ibuf_size = 0;
+   svga_render->vbuf_size = 0;
+   svga_render->ibuf_alloc_size = 4*1024;
+   svga_render->vbuf_alloc_size = 64*1024;
+   svga_render->base.max_vertex_buffer_bytes = 64*1024/10;
+   svga_render->base.max_indices = 65536;
+   svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info;
+   svga_render->base.allocate_vertices = svga_vbuf_render_allocate_vertices;
+   svga_render->base.map_vertices = svga_vbuf_render_map_vertices;
+   svga_render->base.unmap_vertices = svga_vbuf_render_unmap_vertices;
+   svga_render->base.set_primitive = svga_vbuf_render_set_primitive;
+   svga_render->base.draw = svga_vbuf_render_draw;
+   svga_render->base.draw_arrays = svga_vbuf_render_draw_arrays;
+   svga_render->base.release_vertices = svga_vbuf_render_release_vertices;
+   svga_render->base.destroy = svga_vbuf_render_destroy;
+
+   return &svga_render->base;
+}
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
new file mode 100644
index 0000000000..8b14c913f7
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -0,0 +1,170 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_swtnl.h"
+#include "svga_state.h"
+#include "svga_swtnl_private.h"
+
+
+
+enum pipe_error
+svga_swtnl_draw_range_elements(struct svga_context *svga,
+                               struct pipe_buffer *indexBuffer,
+                               unsigned indexSize,
+                               unsigned min_index,
+                               unsigned max_index,
+                               unsigned prim, unsigned start, unsigned count)
+{
+   struct draw_context *draw = svga->swtnl.draw;
+   unsigned i;
+   const void *map;
+   enum pipe_error ret;
+
+   assert(!svga->dirty);
+   assert(svga->state.sw.need_swtnl);
+   assert(draw);
+
+   ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW);
+   if (ret) {
+      svga_context_flush(svga, NULL);
+      ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW);
+      svga->swtnl.new_vbuf = TRUE;
+      assert(ret == PIPE_OK);
+   }
+
+   /*
+    * Map vertex buffers
+    */
+   for (i = 0; i < svga->curr.num_vertex_buffers; i++) {
+      map = pipe_buffer_map(svga->pipe.screen,
+                            svga->curr.vb[i].buffer,
+                            PIPE_BUFFER_USAGE_CPU_READ);
+
+      draw_set_mapped_vertex_buffer(draw, i, map);
+   }
+
+   /* Map index buffer, if present */
+   if (indexBuffer) {
+      map = pipe_buffer_map(svga->pipe.screen, indexBuffer,
+                            PIPE_BUFFER_USAGE_CPU_READ);
+
+      draw_set_mapped_element_buffer_range(draw, 
+                                           indexSize, 
+                                           min_index,
+                                           max_index,
+                                           map);
+   }
+   
+   if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
+      map = pipe_buffer_map(svga->pipe.screen,
+                            svga->curr.cb[PIPE_SHADER_VERTEX],
+                            PIPE_BUFFER_USAGE_CPU_READ);
+      assert(map);
+      draw_set_mapped_constant_buffer(
+         draw, 
+         map,
+         svga->curr.cb[PIPE_SHADER_VERTEX]->size);
+   }
+
+   draw_arrays(svga->swtnl.draw, prim, start, count);
+
+   draw_flush(svga->swtnl.draw);
+
+   /* Ensure the draw module didn't touch this */
+   assert(i == svga->curr.num_vertex_buffers);
+   
+   /*
+    * unmap vertex/index buffers
+    */
+   for (i = 0; i < svga->curr.num_vertex_buffers; i++) {
+      pipe_buffer_unmap(svga->pipe.screen, svga->curr.vb[i].buffer);
+      draw_set_mapped_vertex_buffer(draw, i, NULL);
+   }
+
+   if (indexBuffer) {
+      pipe_buffer_unmap(svga->pipe.screen, indexBuffer);
+      draw_set_mapped_element_buffer(draw, 0, NULL);
+   }
+
+   if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
+      pipe_buffer_unmap(svga->pipe.screen,
+                        svga->curr.cb[PIPE_SHADER_VERTEX]);
+   }
+
+   return ret;
+}
+
+
+
+
+boolean svga_init_swtnl( struct svga_context *svga )
+{
+   svga->swtnl.backend = svga_vbuf_render_create(svga);
+   if(!svga->swtnl.backend)
+      goto fail;
+
+   /*
+    * Create drawing context and plug our rendering stage into it.
+    */
+   svga->swtnl.draw = draw_create();
+   if (svga->swtnl.draw == NULL)
+      goto fail;
+
+
+   draw_set_rasterize_stage(svga->swtnl.draw, 
+                            draw_vbuf_stage( svga->swtnl.draw, svga->swtnl.backend ));
+
+   draw_set_render(svga->swtnl.draw, svga->swtnl.backend);
+
+   draw_install_aaline_stage(svga->swtnl.draw, &svga->pipe);
+   draw_install_aapoint_stage(svga->swtnl.draw, &svga->pipe);
+   draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe);
+
+   draw_set_driver_clipping(svga->swtnl.draw, debug_get_bool_option("SVGA_SWTNL_FSE", FALSE));
+
+   return TRUE;
+
+fail:
+   if (svga->swtnl.backend)
+      svga->swtnl.backend->destroy( svga->swtnl.backend );
+
+   if (svga->swtnl.draw)
+      draw_destroy( svga->swtnl.draw );
+
+   return FALSE;
+}
+
+
+void svga_destroy_swtnl( struct svga_context *svga )
+{
+   draw_destroy( svga->swtnl.draw );
+}
diff --git a/src/gallium/drivers/svga/svga_swtnl_private.h b/src/gallium/drivers/svga/svga_swtnl_private.h
new file mode 100644
index 0000000000..9bbb42910f
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_private.h
@@ -0,0 +1,93 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SWTNL_PRIVATE_H
+#define SVGA_SWTNL_PRIVATE_H
+
+#include "svga_swtnl.h"
+#include "draw/draw_vertex.h"
+
+#include "svga_types.h"
+#include "svga3d_reg.h"
+
+/**
+ * Primitive renderer for svga.
+ */
+struct svga_vbuf_render {
+   struct vbuf_render base;
+
+   struct svga_context *svga;
+   struct vertex_info vertex_info;
+
+   unsigned vertex_size;
+
+   unsigned prim;
+
+   struct pipe_buffer *vbuf;
+   struct pipe_buffer *ibuf;
+
+   /* current size of buffer */
+   size_t vbuf_size;
+   size_t ibuf_size;
+
+   /* size of that the buffer should be */
+   size_t vbuf_alloc_size;
+   size_t ibuf_alloc_size;
+
+   /* current write place */
+   size_t vbuf_offset;
+   size_t ibuf_offset;
+
+   /* currently used */
+   size_t vbuf_used;
+
+   SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
+   unsigned vdecl_offset;
+   unsigned vdecl_count;
+
+   ushort min_index;
+   ushort max_index;
+};
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct svga_vbuf_render *
+svga_vbuf_render( struct vbuf_render *render )
+{
+   assert(render);
+   return (struct svga_vbuf_render *)render;
+}
+
+
+struct vbuf_render *
+svga_vbuf_render_create( struct svga_context *svga );
+
+
+int
+svga_swtnl_update_vdecl( struct svga_context *svga );
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
new file mode 100644
index 0000000000..1616312113
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -0,0 +1,242 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_swtnl.h"
+#include "svga_state.h"
+
+#include "svga_swtnl_private.h"
+
+
+#define SVGA_POINT_ADJ_X -0.375
+#define SVGA_POINT_ADJ_Y -0.5
+
+#define SVGA_LINE_ADJ_X -0.5
+#define SVGA_LINE_ADJ_Y -0.5
+
+#define SVGA_TRIANGLE_ADJ_X -0.375
+#define SVGA_TRIANGLE_ADJ_Y -0.5
+
+
+static void set_draw_viewport( struct svga_context *svga )
+{
+   struct pipe_viewport_state vp = svga->curr.viewport;
+   float adjx = 0;
+   float adjy = 0;
+
+   switch (svga->curr.reduced_prim) {
+   case PIPE_PRIM_POINTS:
+      adjx = SVGA_POINT_ADJ_X;
+      adjy = SVGA_POINT_ADJ_Y;
+      break;
+   case PIPE_PRIM_LINES:
+      /* XXX: This is to compensate for the fact that wide lines are
+       * going to be drawn with triangles, but we're not catching all
+       * cases where that will happen.
+       */
+      if (svga->curr.rast->templ.line_width > 1.0) 
+      {
+         adjx = SVGA_LINE_ADJ_X + 0.175;
+         adjy = SVGA_LINE_ADJ_Y - 0.175;
+      }
+      else {
+         adjx = SVGA_LINE_ADJ_X;
+         adjy = SVGA_LINE_ADJ_Y;
+      }
+      break;
+   case PIPE_PRIM_TRIANGLES:
+      adjx += SVGA_TRIANGLE_ADJ_X;
+      adjy += SVGA_TRIANGLE_ADJ_Y;
+      break;
+   }
+
+   vp.translate[0] += adjx;
+   vp.translate[1] += adjy;
+
+   draw_set_viewport_state(svga->swtnl.draw, &vp);
+}
+
+static int update_swtnl_draw( struct svga_context *svga,
+                              unsigned dirty )
+{
+   draw_flush( svga->swtnl.draw );
+
+   if (dirty & SVGA_NEW_VS) 
+      draw_bind_vertex_shader(svga->swtnl.draw,
+                              svga->curr.vs->draw_shader);
+
+   if (dirty & SVGA_NEW_VBUFFER)
+      draw_set_vertex_buffers(svga->swtnl.draw, 
+                              svga->curr.num_vertex_buffers, 
+                              svga->curr.vb);
+
+   if (dirty & SVGA_NEW_VELEMENT)
+      draw_set_vertex_elements(svga->swtnl.draw, 
+                               svga->curr.num_vertex_elements, 
+                               svga->curr.ve );
+
+   if (dirty & SVGA_NEW_CLIP)
+      draw_set_clip_state(svga->swtnl.draw, 
+                          &svga->curr.clip);
+
+   if (dirty & (SVGA_NEW_VIEWPORT |
+                SVGA_NEW_REDUCED_PRIMITIVE | 
+                SVGA_NEW_RAST))
+      set_draw_viewport( svga );
+
+   if (dirty & SVGA_NEW_RAST)
+      draw_set_rasterizer_state(svga->swtnl.draw,
+                                &svga->curr.rast->templ);
+
+   if (dirty & SVGA_NEW_FRAME_BUFFER)
+      draw_set_mrd(svga->swtnl.draw, 
+                   svga->curr.depthscale);
+
+   if (dirty & SVGA_NEW_EDGEFLAGS)
+      draw_set_edgeflags( svga->swtnl.draw, 
+                          svga->curr.edgeflags );
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_update_swtnl_draw =
+{
+   "update draw module state",
+   (SVGA_NEW_VS |
+    SVGA_NEW_VBUFFER |
+    SVGA_NEW_VELEMENT |
+    SVGA_NEW_CLIP |
+    SVGA_NEW_VIEWPORT |
+    SVGA_NEW_RAST |
+    SVGA_NEW_FRAME_BUFFER |
+    SVGA_NEW_REDUCED_PRIMITIVE |
+    SVGA_NEW_EDGEFLAGS),
+   update_swtnl_draw
+};
+
+
+int svga_swtnl_update_vdecl( struct svga_context *svga )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(svga->swtnl.backend);
+   struct draw_context *draw = svga->swtnl.draw;
+   struct vertex_info *vinfo = &svga_render->vertex_info;
+   SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
+   const enum interp_mode colorInterp =
+      svga->curr.rast->templ.flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
+   const struct svga_fragment_shader *fs = svga->curr.fs;
+   int offset = 0;
+   int nr_decls = 0;
+   int src, i;
+
+   memset(vinfo, 0, sizeof(*vinfo));
+   memset(vdecl, 0, sizeof(vdecl));
+
+   /* always add position */
+   src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0);
+   draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
+   vinfo->attrib[0].emit = EMIT_4F;
+   vdecl[0].array.offset = offset;
+   vdecl[0].identity.type = SVGA3D_DECLTYPE_FLOAT4;
+   vdecl[0].identity.usage = SVGA3D_DECLUSAGE_POSITIONT;
+   vdecl[0].identity.usageIndex = 0;
+   offset += 16;
+   nr_decls++;
+
+   for (i = 0; i < fs->base.info.num_inputs; i++) {
+      unsigned name = fs->base.info.input_semantic_name[i];
+      unsigned index = fs->base.info.input_semantic_index[i];
+      src = draw_find_vs_output(draw, name, index);
+      vdecl[nr_decls].array.offset = offset;
+      vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i];
+
+      switch (name) {
+      case TGSI_SEMANTIC_COLOR:
+         draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+         vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_COLOR;
+         vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4;
+         offset += 16;
+         nr_decls++;
+         break;
+      case TGSI_SEMANTIC_GENERIC:
+         draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+         vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD;
+         vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4;
+         vdecl[nr_decls].identity.usageIndex += 1;
+         offset += 16;
+         nr_decls++;
+         break;
+      case TGSI_SEMANTIC_FOG:
+         draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+         vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD;
+         vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT1;
+         assert(vdecl[nr_decls].identity.usageIndex == 0);
+         offset += 4;
+         nr_decls++;
+         break;
+      case TGSI_SEMANTIC_POSITION:
+         /* generated internally, not a vertex shader output */
+         break;
+      default:
+         assert(0);
+      }
+   }
+
+   draw_compute_vertex_size(vinfo);
+
+   svga_render->vdecl_count = nr_decls;
+   for (i = 0; i < svga_render->vdecl_count; i++)
+      vdecl[i].array.stride = offset;
+
+   if (memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)) == 0)
+      return 0;
+
+   memcpy(svga_render->vdecl, vdecl, sizeof(vdecl));
+   svga->swtnl.new_vdecl = TRUE;
+
+   return 0;
+}
+
+
+static int update_swtnl_vdecl( struct svga_context *svga,
+                               unsigned dirty )
+{
+   return svga_swtnl_update_vdecl( svga );
+}
+
+
+struct svga_tracked_state svga_update_swtnl_vdecl =
+{
+   "update draw module vdecl",
+   (SVGA_NEW_VS |
+    SVGA_NEW_FS),
+   update_swtnl_vdecl
+};
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
new file mode 100644
index 0000000000..44d0930bc0
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -0,0 +1,266 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+#include "util/u_memory.h"
+
+#include "svgadump/st_shader_dump.h"
+
+#include "svga_context.h"
+#include "svga_tgsi.h"
+#include "svga_tgsi_emit.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+
+/* Sinkhole used only in error conditions.
+ */
+static char err_buf[128];
+
+#if 0
+static void svga_destroy_shader_emitter( struct svga_shader_emitter *emit )
+{
+   if (emit->buf != err_buf)
+      FREE(emit->buf);
+}
+#endif
+
+
+static boolean svga_shader_expand( struct svga_shader_emitter *emit )
+{
+   char *new_buf;
+   unsigned newsize = emit->size * 2;
+
+   if(emit->buf != err_buf)
+      new_buf = REALLOC(emit->buf, emit->size, newsize);
+   else
+      new_buf = NULL;
+
+   if (new_buf == NULL) {
+      emit->ptr = err_buf;
+      emit->buf = err_buf;
+      emit->size = sizeof(err_buf);
+      return FALSE;
+   }
+
+   emit->size = newsize;
+   emit->ptr = new_buf + (emit->ptr - emit->buf);
+   emit->buf = new_buf;
+   return TRUE;
+}   
+
+static INLINE boolean reserve(  struct svga_shader_emitter *emit,
+                                unsigned nr_dwords )
+{
+   if (emit->ptr - emit->buf + nr_dwords * sizeof(unsigned) >= emit->size) {
+      if (!svga_shader_expand( emit ))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+boolean svga_shader_emit_dword( struct svga_shader_emitter *emit,
+                                unsigned dword )
+{
+   if (!reserve(emit, 1))
+      return FALSE;
+
+   *(unsigned *)emit->ptr = dword;
+   emit->ptr += sizeof dword;
+   return TRUE;
+}
+
+boolean svga_shader_emit_dwords( struct svga_shader_emitter *emit,
+                                 const unsigned *dwords,
+                                 unsigned nr )
+{
+   if (!reserve(emit, nr))
+      return FALSE;
+
+   memcpy( emit->ptr, dwords, nr * sizeof *dwords );
+   emit->ptr += nr * sizeof *dwords;
+   return TRUE;
+}
+
+boolean svga_shader_emit_opcode( struct svga_shader_emitter *emit,
+                                 unsigned opcode )
+{
+   SVGA3dShaderInstToken *here;
+
+   if (!reserve(emit, 1))
+      return FALSE;
+
+   here = (SVGA3dShaderInstToken *)emit->ptr;
+   here->value = opcode;
+
+   if (emit->insn_offset) {
+      SVGA3dShaderInstToken *prev = (SVGA3dShaderInstToken *)(emit->buf + 
+                                                              emit->insn_offset);
+      prev->size = (here - prev) - 1;
+   }
+   
+   emit->insn_offset = emit->ptr - emit->buf;
+   emit->ptr += sizeof(unsigned);
+   return TRUE;
+}
+
+#define SVGA3D_PS_2X (SVGA3D_PS_20 | 1)
+#define SVGA3D_VS_2X (SVGA3D_VS_20 | 1)
+
+static boolean svga_shader_emit_header( struct svga_shader_emitter *emit )
+{
+   SVGA3dShaderVersion header;
+
+   memset( &header, 0, sizeof header );
+
+   switch (emit->unit) {
+   case PIPE_SHADER_FRAGMENT:
+      header.value = emit->use_sm30 ? SVGA3D_PS_30 : SVGA3D_PS_2X;
+      break;
+   case PIPE_SHADER_VERTEX:
+      header.value = emit->use_sm30 ? SVGA3D_VS_30 : SVGA3D_VS_2X;
+      break;
+   }
+ 
+   return svga_shader_emit_dword( emit, header.value );
+}
+
+
+
+
+
+/* Parse TGSI shader and translate to SVGA/DX9 serialized
+ * representation.  
+ *
+ * In this function SVGA shader is emitted to an in-memory buffer that
+ * can be dynamically grown.  Once we've finished and know how large
+ * it is, it will be copied to a hardware buffer for upload.
+ */
+static struct svga_shader_result *
+svga_tgsi_translate( const struct svga_shader *shader,
+                     union svga_compile_key key,
+                     unsigned unit )
+{
+   struct svga_shader_result *result = NULL;
+   struct svga_shader_emitter emit;
+   int ret = 0;
+
+   memset(&emit, 0, sizeof(emit));
+
+   emit.use_sm30 = shader->use_sm30;
+   emit.size = 1024;
+   emit.buf = MALLOC(emit.size);
+   if (emit.buf == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto fail;
+   }
+
+   emit.ptr = emit.buf;
+   emit.unit = unit;
+   emit.key = key;
+
+   tgsi_scan_shader( shader->tokens, &emit.info);
+
+   emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1;
+   
+   if (unit == PIPE_SHADER_FRAGMENT)
+      emit.imm_start += key.fkey.num_unnormalized_coords;
+
+   if (unit == PIPE_SHADER_VERTEX) {
+      emit.imm_start += key.vkey.need_prescale ? 2 : 0;
+      emit.imm_start += key.vkey.num_zero_stride_vertex_elements;
+   }
+
+   emit.nr_hw_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1);
+
+   emit.nr_hw_temp = emit.info.file_max[TGSI_FILE_TEMPORARY] + 1;
+   emit.in_main_func = TRUE;
+
+   if (!svga_shader_emit_header( &emit ))
+      goto fail;
+
+   if (!svga_shader_emit_instructions( &emit, shader->tokens ))
+      goto fail;
+   
+   result = CALLOC_STRUCT(svga_shader_result);
+   if (result == NULL)
+      goto fail;
+
+   result->shader = shader;
+   result->tokens = (const unsigned *)emit.buf;
+   result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned);
+   memcpy(&result->key, &key, sizeof key);
+
+   return result;
+
+fail:
+   FREE(result);
+   FREE(emit.buf);
+   return NULL;
+}
+
+
+
+
+struct svga_shader_result *
+svga_translate_fragment_program( const struct svga_fragment_shader *fs,
+                                 const struct svga_fs_compile_key *fkey )
+{
+   union svga_compile_key key;
+   memcpy(&key.fkey, fkey, sizeof *fkey);
+
+   return svga_tgsi_translate( &fs->base, 
+                               key,
+                               PIPE_SHADER_FRAGMENT );
+}
+
+struct svga_shader_result *
+svga_translate_vertex_program( const struct svga_vertex_shader *vs,
+                               const struct svga_vs_compile_key *vkey )
+{
+   union svga_compile_key key;
+   memcpy(&key.vkey, vkey, sizeof *vkey);
+
+   return svga_tgsi_translate( &vs->base, 
+                               key,
+                               PIPE_SHADER_VERTEX );
+}
+
+
+void svga_destroy_shader_result( struct svga_shader_result *result )
+{
+   FREE((unsigned *)result->tokens);
+   FREE(result);
+}
+
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
new file mode 100644
index 0000000000..896c90a89a
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -0,0 +1,139 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_TGSI_H
+#define SVGA_TGSI_H
+
+#include "pipe/p_state.h"
+
+#include "svga_hw_reg.h"
+
+struct svga_fragment_shader;
+struct svga_vertex_shader;
+struct svga_shader;
+struct tgsi_shader_info;
+struct tgsi_token;
+
+
+struct svga_vs_compile_key
+{
+   ubyte need_prescale:1;
+   ubyte allow_psiz:1;
+   unsigned zero_stride_vertex_elements;
+   ubyte num_zero_stride_vertex_elements:6;
+};
+
+struct svga_fs_compile_key
+{
+   boolean light_twoside:1;
+   boolean front_cw:1;
+   ubyte num_textures;
+   ubyte num_unnormalized_coords;
+   struct {
+      ubyte compare_mode       : 1;
+      ubyte compare_func       : 3;
+      ubyte unnormalized       : 1;
+
+      ubyte width_height_idx   : 7;
+
+      ubyte texture_target;
+   } tex[PIPE_MAX_SAMPLERS];
+};
+
+union svga_compile_key {
+   struct svga_vs_compile_key vkey;
+   struct svga_fs_compile_key fkey;
+};
+
+struct svga_shader_result
+{
+   const struct svga_shader *shader;
+
+   /* Parameters used to generate this compilation result:
+    */
+   union svga_compile_key key;
+
+   /* Compiled shader tokens:
+    */
+   const unsigned *tokens;
+   unsigned nr_tokens;
+
+   /* SVGA Shader ID:
+    */
+   unsigned id;
+   
+   /* Next compilation result:
+    */
+   struct svga_shader_result *next;
+};
+
+
+/* TGSI doesn't provide use with VS input semantics (they're actually
+ * pretty meaningless), so we just generate some plausible ones here.
+ * This is called both from within the TGSI translator and when
+ * building vdecls to ensure they match up.
+ *
+ * The real use of this information is matching vertex elements to
+ * fragment shader inputs in the case where vertex shader is disabled.
+ */
+static INLINE void svga_generate_vdecl_semantics( unsigned idx,
+                                                  unsigned *usage,
+                                                  unsigned *usage_index )
+{
+   if (idx == 0) {
+      *usage = SVGA3D_DECLUSAGE_POSITION;
+      *usage_index = 0;
+   }
+   else {
+      *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+      *usage_index = idx - 1;
+   }
+}
+
+
+
+static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
+{
+   return sizeof *key;
+}
+
+static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
+{
+   return (const char *)&key->tex[key->num_textures].texture_target -
+      (const char *)key;
+}
+
+struct svga_shader_result *
+svga_translate_fragment_program( const struct svga_fragment_shader *fs,
+                                 const struct svga_fs_compile_key *fkey );
+
+struct svga_shader_result *
+svga_translate_vertex_program( const struct svga_vertex_shader *fs,
+                               const struct svga_vs_compile_key *vkey );
+
+
+void svga_destroy_shader_result( struct svga_shader_result *result );
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
new file mode 100644
index 0000000000..54457082a0
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
@@ -0,0 +1,280 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+
+#include "svga_tgsi_emit.h"
+#include "svga_context.h"
+
+
+
+
+static boolean ps20_input( struct svga_shader_emitter *emit,
+                           struct tgsi_declaration_semantic semantic,
+                           unsigned idx )
+{
+   struct src_register reg;
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   switch (semantic.SemanticName) {
+   case TGSI_SEMANTIC_POSITION:
+      /* Special case:
+       */
+      reg = src_register( SVGA3DREG_MISCTYPE, 
+                          SVGA3DMISCREG_POSITION );
+      break;
+   case TGSI_SEMANTIC_COLOR:
+      reg = src_register( SVGA3DREG_INPUT, 
+                          semantic.SemanticIndex );
+      break;
+   case TGSI_SEMANTIC_FOG:
+      assert(semantic.SemanticIndex == 0);
+      reg = src_register( SVGA3DREG_TEXTURE, 0 );
+      break;
+   case TGSI_SEMANTIC_GENERIC:
+      reg = src_register( SVGA3DREG_TEXTURE,
+                          semantic.SemanticIndex + 1 );
+      break;
+   default:
+      assert(0);
+      return TRUE;
+   }
+
+   emit->input_map[idx] = reg;
+
+   dcl.dst = dst( reg );
+
+   dcl.usage = 0;
+   dcl.index = 0;
+
+   dcl.values[0] |= 1<<31;
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+static boolean ps20_output( struct svga_shader_emitter *emit,
+                            struct tgsi_declaration_semantic semantic,
+                            unsigned idx )
+{
+   SVGA3dShaderDestToken reg;
+
+   switch (semantic.SemanticName) {
+   case TGSI_SEMANTIC_COLOR:
+      if (semantic.SemanticIndex < PIPE_MAX_COLOR_BUFS) {
+         unsigned cbuf = semantic.SemanticIndex;
+
+         emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                               emit->nr_hw_temp++ );
+         emit->temp_col[cbuf] = emit->output_map[idx];
+         emit->true_col[cbuf] = dst_register( SVGA3DREG_COLOROUT, 
+                                              semantic.SemanticIndex );
+      }
+      else {
+         assert(0);
+         reg = dst_register( SVGA3DREG_COLOROUT, 0 );
+      }
+      break;
+   case TGSI_SEMANTIC_POSITION:
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_pos = emit->output_map[idx];
+      emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, 
+                                     semantic.SemanticIndex );
+      break;
+   default:
+      assert(0);
+      reg = dst_register( SVGA3DREG_COLOROUT, 0 );
+      break;
+   }
+
+   return TRUE;
+}
+
+
+static boolean vs20_input( struct svga_shader_emitter *emit,
+                           struct tgsi_declaration_semantic semantic,
+                           unsigned idx )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   emit->input_map[idx] = src_register( SVGA3DREG_INPUT, idx );
+   dcl.dst = dst_register( SVGA3DREG_INPUT, idx );
+
+   assert(dcl.dst.reserved0);
+
+   /* Mesa doesn't provide use with VS input semantics (they're
+    * actually pretty meaningless), so we just generate some plausible
+    * ones here.  This has to match what we declare in the vdecl code
+    * in svga_pipe_vertex.c.
+    */
+   if (idx == 0) {
+      dcl.usage = SVGA3D_DECLUSAGE_POSITION;
+      dcl.index = 0;
+   }
+   else {
+      dcl.usage = SVGA3D_DECLUSAGE_TEXCOORD;
+      dcl.index = idx - 1;
+   }
+
+   dcl.values[0] |= 1<<31;
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+static boolean vs20_output( struct svga_shader_emitter *emit,
+                         struct tgsi_declaration_semantic semantic,
+                         unsigned idx )
+{
+   /* Don't emit dcl instruction for vs20 inputs
+    */
+
+   /* Just build the register map table: 
+    */
+   switch (semantic.SemanticName) {
+   case TGSI_SEMANTIC_POSITION:
+      assert(semantic.SemanticIndex == 0);
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_pos = emit->output_map[idx];
+      emit->true_pos = dst_register( SVGA3DREG_RASTOUT, 
+                                     SVGA3DRASTOUT_POSITION);
+      break;
+   case TGSI_SEMANTIC_PSIZE:
+      assert(semantic.SemanticIndex == 0);
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_psiz = emit->output_map[idx];
+      emit->true_psiz = dst_register( SVGA3DREG_RASTOUT, 
+                                      SVGA3DRASTOUT_PSIZE );
+      break;
+   case TGSI_SEMANTIC_FOG:
+      assert(semantic.SemanticIndex == 0);
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, 0 );
+      break;
+   case TGSI_SEMANTIC_COLOR:
+      /* oD0 */
+      emit->output_map[idx] = dst_register( SVGA3DREG_ATTROUT,
+                                            semantic.SemanticIndex );
+      break;
+   case TGSI_SEMANTIC_GENERIC:
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT,
+                                            semantic.SemanticIndex + 1 );
+      break;
+   default:
+      assert(0);
+      emit->output_map[idx] = dst_register(  SVGA3DREG_TEMP, 0 );
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+static boolean ps20_sampler( struct svga_shader_emitter *emit,
+                          struct tgsi_declaration_semantic semantic,
+                          unsigned idx )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   dcl.dst = dst_register( SVGA3DREG_SAMPLER, idx );
+   dcl.type = svga_tgsi_sampler_type( emit, idx );
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit,
+                             const struct tgsi_full_declaration *decl )
+{
+   unsigned first = decl->DeclarationRange.First;
+   unsigned last = decl->DeclarationRange.Last;
+   unsigned semantic = 0;
+   unsigned semantic_idx = 0;
+   unsigned idx;
+   
+   if (decl->Declaration.Semantic) {
+      semantic = decl->Semantic.SemanticName;
+      semantic_idx = decl->Semantic.SemanticIndex;
+   }
+
+   for( idx = first; idx <= last; idx++ ) {
+      boolean ok;
+
+      switch (decl->Declaration.File) {
+      case TGSI_FILE_SAMPLER:
+         assert (emit->unit == PIPE_SHADER_FRAGMENT);
+         ok = ps20_sampler( emit, decl->Semantic, idx );
+         break;
+
+      case TGSI_FILE_INPUT:
+         if (emit->unit == PIPE_SHADER_VERTEX)
+            ok = vs20_input( emit, decl->Semantic, idx );
+         else
+            ok = ps20_input( emit, decl->Semantic, idx );
+         break;
+
+      case TGSI_FILE_OUTPUT:
+         if (emit->unit == PIPE_SHADER_VERTEX)
+            ok = vs20_output( emit, decl->Semantic, idx );
+         else
+            ok = ps20_output( emit, decl->Semantic, idx );
+         break;
+
+      default:
+         /* don't need to declare other vars */
+         ok = TRUE;
+      }
+
+      if (!ok)
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
new file mode 100644
index 0000000000..08e7dfb117
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@@ -0,0 +1,385 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+
+#include "svga_tgsi_emit.h"
+#include "svga_context.h"
+
+static boolean translate_vs_ps_semantic( struct tgsi_declaration_semantic semantic,
+                                         unsigned *usage,
+                                         unsigned *idx )
+{
+   switch (semantic.SemanticName) {
+   case TGSI_SEMANTIC_POSITION:  
+      *idx = semantic.SemanticIndex;
+      *usage = SVGA3D_DECLUSAGE_POSITION;
+      break;
+   case TGSI_SEMANTIC_COLOR:     
+
+      *idx = semantic.SemanticIndex;
+      *usage = SVGA3D_DECLUSAGE_COLOR;
+      break;
+   case TGSI_SEMANTIC_BCOLOR:
+      *idx = semantic.SemanticIndex + 2; /* sharing with COLOR */
+      *usage = SVGA3D_DECLUSAGE_COLOR;
+      break;
+   case TGSI_SEMANTIC_FOG:       
+      *idx = 0;
+      assert(semantic.SemanticIndex == 0);
+      *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+      break;
+   case TGSI_SEMANTIC_PSIZE:     
+      *idx = semantic.SemanticIndex;
+      *usage = SVGA3D_DECLUSAGE_PSIZE;
+      break;
+   case TGSI_SEMANTIC_GENERIC:   
+      *idx = semantic.SemanticIndex + 1; /* texcoord[0] is reserved for fog */
+      *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+      break;
+   case TGSI_SEMANTIC_NORMAL:    
+      *idx = semantic.SemanticIndex;
+      *usage = SVGA3D_DECLUSAGE_NORMAL;
+      break;
+   default:
+      assert(0);
+      *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+      *idx = 0;
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+static boolean emit_decl( struct svga_shader_emitter *emit,
+                          SVGA3dShaderDestToken reg,
+                          unsigned usage, 
+                          unsigned index )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   dcl.dst = reg;
+   dcl.usage = usage;
+   dcl.index = index;
+   dcl.values[0] |= 1<<31;
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+static boolean emit_vface_decl( struct svga_shader_emitter *emit )
+{
+   if (!emit->emitted_vface) {
+      SVGA3dShaderDestToken reg =
+         dst_register( SVGA3DREG_MISCTYPE,
+                       SVGA3DMISCREG_FACE );
+
+      if (!emit_decl( emit, reg, 0, 0 ))
+         return FALSE;
+
+      emit->emitted_vface = TRUE;
+   }
+   return TRUE;
+}
+
+static boolean ps30_input( struct svga_shader_emitter *emit,
+                           struct tgsi_declaration_semantic semantic,
+                           unsigned idx )
+{
+   unsigned usage, index;
+   SVGA3dShaderDestToken reg;
+
+   if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+      emit->input_map[idx] = src_register( SVGA3DREG_MISCTYPE,
+                                           SVGA3DMISCREG_POSITION );
+
+      emit->input_map[idx].base.swizzle = TRANSLATE_SWIZZLE( TGSI_SWIZZLE_X,
+                                                             TGSI_SWIZZLE_Y,
+                                                             TGSI_SWIZZLE_Y,
+                                                             TGSI_SWIZZLE_Y );
+
+      reg = writemask( dst(emit->input_map[idx]),
+                       TGSI_WRITEMASK_XY );
+
+      return emit_decl( emit, reg, 0, 0 );
+   }
+   else if (emit->key.fkey.light_twoside &&
+            (semantic.SemanticName == TGSI_SEMANTIC_COLOR)) {
+
+      if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+         return FALSE;
+
+      emit->internal_color_idx[emit->internal_color_count] = idx;
+      emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count );
+      emit->ps30_input_count++;
+      emit->internal_color_count++;
+
+      reg = dst( emit->input_map[idx] );
+
+      if (!emit_decl( emit, reg, usage, index ))
+         return FALSE;
+
+      semantic.SemanticName = TGSI_SEMANTIC_BCOLOR;
+      if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+         return FALSE;
+
+      reg = dst_register( SVGA3DREG_INPUT, emit->ps30_input_count++ );
+
+      if (!emit_decl( emit, reg, usage, index ))
+         return FALSE;
+
+      if (!emit_vface_decl( emit ))
+         return FALSE;
+
+      return TRUE;
+   }
+   else if (semantic.SemanticName == TGSI_SEMANTIC_FACE) {
+      if (!emit_vface_decl( emit ))
+         return FALSE;
+      emit->emit_frontface = TRUE;
+      emit->internal_frontface_idx = idx;
+      return TRUE;
+   }
+   else {
+
+      if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+         return FALSE;
+
+      emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count++ );
+      reg = dst( emit->input_map[idx] );
+
+      return emit_decl( emit, reg, usage, index );
+   }
+
+}
+
+
+/* PS output registers are the same as 2.0
+ */
+static boolean ps30_output( struct svga_shader_emitter *emit,
+                            struct tgsi_declaration_semantic semantic,
+                            unsigned idx )
+{
+   SVGA3dShaderDestToken reg;
+
+   switch (semantic.SemanticName) {
+   case TGSI_SEMANTIC_COLOR:
+      emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, 
+                                            semantic.SemanticIndex );
+      break;
+   case TGSI_SEMANTIC_POSITION:
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_pos = emit->output_map[idx];
+      emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, 
+                                     semantic.SemanticIndex );
+      break;
+   default:
+      assert(0);
+      reg = dst_register( SVGA3DREG_COLOROUT, 0 );
+      break;
+   }
+
+   return TRUE;
+}
+
+
+/* We still make up the input semantics the same as in 2.0
+ */
+static boolean vs30_input( struct svga_shader_emitter *emit,
+                           struct tgsi_declaration_semantic semantic,
+                           unsigned idx )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+   unsigned usage, index;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   if (emit->key.vkey.zero_stride_vertex_elements & (1 << idx)) {
+      unsigned i;
+      unsigned offset = 0;
+      unsigned start_idx = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      /* adjust for prescale constants */
+      start_idx += emit->key.vkey.need_prescale ? 2 : 0;
+      /* compute the offset from the start of zero stride constants */
+      for (i = 0; i < PIPE_MAX_ATTRIBS && i < idx; ++i) {
+         if (emit->key.vkey.zero_stride_vertex_elements & (1<<i))
+            ++offset;
+      }
+      emit->input_map[idx] = src_register( SVGA3DREG_CONST,
+                                           start_idx + offset );
+   } else {
+      emit->input_map[idx] = src_register( SVGA3DREG_INPUT, idx );
+      dcl.dst = dst_register( SVGA3DREG_INPUT, idx );
+
+      assert(dcl.dst.reserved0);
+
+      svga_generate_vdecl_semantics( idx, &usage, &index );
+
+      dcl.usage = usage;
+      dcl.index = index;
+      dcl.values[0] |= 1<<31;
+
+      return  (emit_instruction(emit, opcode) &&
+               svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+   }
+   return TRUE;
+}
+
+/* VS3.0 outputs have proper declarations and semantic info for
+ * matching against PS inputs.
+ */
+static boolean vs30_output( struct svga_shader_emitter *emit,
+                         struct tgsi_declaration_semantic semantic,
+                         unsigned idx )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+   unsigned usage, index;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+      return FALSE;
+
+   dcl.dst = dst_register( SVGA3DREG_OUTPUT, idx );
+   dcl.usage = usage;
+   dcl.index = index;
+   dcl.values[0] |= 1<<31;
+
+   if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+      assert(idx == 0);
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_pos = emit->output_map[idx];
+      emit->true_pos = dcl.dst;
+   }
+   else if (semantic.SemanticName == TGSI_SEMANTIC_PSIZE) {
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_psiz = emit->output_map[idx];
+
+      /* This has the effect of not declaring psiz (below) and not 
+       * emitting the final MOV to true_psiz in the postamble.
+       */
+      if (!emit->key.vkey.allow_psiz)
+         return TRUE;
+
+      emit->true_psiz = dcl.dst;
+   }
+   else {
+      emit->output_map[idx] = dcl.dst;
+   }
+
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+static boolean ps30_sampler( struct svga_shader_emitter *emit,
+                          struct tgsi_declaration_semantic semantic,
+                          unsigned idx )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   dcl.dst = dst_register( SVGA3DREG_SAMPLER, idx );
+   dcl.type = svga_tgsi_sampler_type( emit, idx );
+   dcl.values[0] |= 1<<31;
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit,
+                             const struct tgsi_full_declaration *decl )
+{
+   unsigned first = decl->DeclarationRange.First;
+   unsigned last = decl->DeclarationRange.Last;
+   unsigned semantic = 0;
+   unsigned semantic_idx = 0;
+   unsigned idx;
+
+   if (decl->Declaration.Semantic) {
+      semantic = decl->Semantic.SemanticName;
+      semantic_idx = decl->Semantic.SemanticIndex;
+   }
+
+   for( idx = first; idx <= last; idx++ ) {
+      boolean ok;
+
+      switch (decl->Declaration.File) {
+      case TGSI_FILE_SAMPLER:
+         assert (emit->unit == PIPE_SHADER_FRAGMENT);
+         ok = ps30_sampler( emit, decl->Semantic, idx );
+         break;
+
+      case TGSI_FILE_INPUT:
+         if (emit->unit == PIPE_SHADER_VERTEX)
+            ok = vs30_input( emit, decl->Semantic, idx );
+         else
+            ok = ps30_input( emit, decl->Semantic, idx );
+         break;
+
+      case TGSI_FILE_OUTPUT:
+         if (emit->unit == PIPE_SHADER_VERTEX)
+            ok = vs30_output( emit, decl->Semantic, idx );
+         else
+            ok = ps30_output( emit, decl->Semantic, idx );
+         break;
+
+      default:
+         /* don't need to declare other vars */
+         ok = TRUE;
+      }
+
+      if (!ok)
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
new file mode 100644
index 0000000000..2557824293
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -0,0 +1,345 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_TGSI_EMIT_H
+#define SVGA_TGSI_EMIT_H
+
+#include "tgsi/tgsi_scan.h"
+#include "svga_hw_reg.h"
+#include "svga_tgsi.h"
+#include "svga3d_shaderdefs.h"
+
+struct src_register
+{
+   SVGA3dShaderSrcToken base;
+   SVGA3dShaderSrcToken indirect;
+};
+
+
+struct svga_arl_consts {
+   int number;
+   int idx;
+   int swizzle;
+   int arl_num;
+};
+
+/* Internal functions:
+ */
+
+struct svga_shader_emitter
+{
+   boolean use_sm30;
+   
+   unsigned size;
+   char *buf;
+   char *ptr;
+
+   union svga_compile_key key;
+   struct tgsi_shader_info info;
+   int unit;
+
+   int imm_start;
+
+   int nr_hw_const;
+   int nr_hw_temp;
+   
+   int insn_offset;
+
+   int internal_temp_count;
+   int internal_imm_count;
+
+   int internal_color_idx[2]; /* diffuse, specular */
+   int internal_color_count;
+
+   boolean emitted_vface;
+   boolean emit_frontface;
+   int internal_frontface_idx;
+
+   int ps30_input_count;
+
+   boolean in_main_func;
+
+   boolean created_zero_immediate;
+   int zero_immediate_idx;
+
+   boolean created_loop_const;
+   int loop_const_idx;
+
+   boolean created_sincos_consts;
+   int sincos_consts_idx;
+
+   unsigned label[32];
+   unsigned nr_labels;
+
+   struct src_register input_map[PIPE_MAX_ATTRIBS];
+   SVGA3dShaderDestToken output_map[PIPE_MAX_ATTRIBS];
+
+   struct src_register imm_0055;
+   SVGA3dShaderDestToken temp_pos;
+   SVGA3dShaderDestToken true_pos;
+
+   SVGA3dShaderDestToken temp_col[PIPE_MAX_COLOR_BUFS];
+   SVGA3dShaderDestToken true_col[PIPE_MAX_COLOR_BUFS];
+
+   SVGA3dShaderDestToken temp_psiz;
+   SVGA3dShaderDestToken true_psiz;
+
+   struct svga_arl_consts arl_consts[12];
+   int num_arl_consts;
+   int current_arl;
+};
+
+
+boolean svga_shader_emit_dword( struct svga_shader_emitter *emit,
+                                unsigned dword );
+
+boolean svga_shader_emit_dwords( struct svga_shader_emitter *emit,
+                                 const unsigned *dwords,
+                                 unsigned nr );
+
+boolean svga_shader_emit_opcode( struct svga_shader_emitter *emit,
+                                 unsigned opcode );
+
+boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
+                                       const struct tgsi_token *tokens );
+
+boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit,
+                               const struct tgsi_full_declaration *decl );
+
+boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit,
+                               const struct tgsi_full_declaration *decl );
+
+
+static INLINE boolean emit_dst( struct svga_shader_emitter *emit,
+                         SVGA3dShaderDestToken dest )
+{
+   assert(dest.reserved0);
+   return svga_shader_emit_dword( emit, dest.value );
+}
+
+static INLINE boolean emit_src( struct svga_shader_emitter *emit,
+                         const struct src_register src )
+{
+   if (src.base.relAddr) {
+      assert(src.base.reserved0);
+      assert(src.indirect.reserved0);
+      return (svga_shader_emit_dword( emit, src.base.value ) &&
+              svga_shader_emit_dword( emit, src.indirect.value ));
+   }
+   else {
+      assert(src.base.reserved0);
+      return svga_shader_emit_dword( emit, src.base.value );
+   }
+}
+
+
+static INLINE boolean emit_instruction( struct svga_shader_emitter *emit,
+                                 SVGA3dShaderInstToken opcode )
+{
+   return svga_shader_emit_opcode( emit, opcode.value );
+}
+
+
+static INLINE boolean emit_op1( struct svga_shader_emitter *emit,
+                         SVGA3dShaderInstToken inst,
+                         SVGA3dShaderDestToken dest,
+                         struct src_register src0 )
+{
+   return (emit_instruction( emit, inst ) &&
+           emit_dst( emit, dest ) &&
+           emit_src( emit, src0 ));
+}
+
+static INLINE boolean emit_op2( struct svga_shader_emitter *emit,
+                     SVGA3dShaderInstToken inst,
+                     SVGA3dShaderDestToken dest,
+                     struct src_register src0,
+                     struct src_register src1 )
+{
+   return (emit_instruction( emit, inst ) &&
+           emit_dst( emit, dest ) &&
+           emit_src( emit, src0 ) &&
+           emit_src( emit, src1 ));
+}
+
+static INLINE boolean emit_op3( struct svga_shader_emitter *emit,
+                         SVGA3dShaderInstToken inst,
+                         SVGA3dShaderDestToken dest,
+                         struct src_register src0,
+                         struct src_register src1,
+                         struct src_register src2 )
+{
+   return (emit_instruction( emit, inst ) &&
+           emit_dst( emit, dest ) &&
+           emit_src( emit, src0 ) &&
+           emit_src( emit, src1 ) &&
+           emit_src( emit, src2 ));
+}
+
+
+#define TRANSLATE_SWIZZLE(x,y,z,w)  ((x) | ((y) << 2) | ((z) << 4) | ((w) << 6))
+#define SWIZZLE_XYZW  \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_W)
+#define SWIZZLE_XXXX  \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_X,TGSI_SWIZZLE_X,TGSI_SWIZZLE_X)
+#define SWIZZLE_YYYY  \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y)
+#define SWIZZLE_ZZZZ  \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z)
+#define SWIZZLE_WWWW  \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_W,TGSI_SWIZZLE_W,TGSI_SWIZZLE_W,TGSI_SWIZZLE_W)
+
+
+
+static INLINE SVGA3dShaderInstToken
+inst_token( unsigned opcode )
+{
+   SVGA3dShaderInstToken inst;
+
+   inst.value = 0;
+   inst.op = opcode;
+
+   return inst;
+}
+
+static INLINE SVGA3dShaderDestToken 
+dst_register( unsigned file,
+              int number )
+{
+   SVGA3dShaderDestToken dest;
+
+   dest.value = 0;
+   dest.num = number;
+   dest.type_upper = file >> 3;
+   dest.relAddr = 0;
+   dest.reserved1 = 0;
+   dest.mask = 0xf;
+   dest.dstMod = 0;
+   dest.shfScale = 0;
+   dest.type_lower = file & 0x7;
+   dest.reserved0 = 1;          /* is_reg */
+   
+   return dest;
+}
+
+static INLINE SVGA3dShaderDestToken
+writemask( SVGA3dShaderDestToken dest,
+           unsigned mask )
+{
+   dest.mask &= mask;
+   return dest;
+}
+
+
+static INLINE SVGA3dShaderSrcToken 
+src_token( unsigned file, int number )
+{
+   SVGA3dShaderSrcToken src;
+
+   src.value = 0;
+   src.num = number;
+   src.type_upper = file >> 3;
+   src.relAddr = 0;
+   src.reserved1 = 0;
+   src.swizzle = SWIZZLE_XYZW;
+   src.srcMod = 0;
+   src.type_lower = file & 0x7;
+   src.reserved0 = 1;           /* is_reg */
+
+   return src;
+}
+
+
+static INLINE struct src_register 
+absolute( struct src_register src )
+{
+   src.base.srcMod = SVGA3DSRCMOD_ABS;
+
+   return src;
+}
+
+
+static INLINE struct src_register 
+negate( struct src_register src )
+{
+   switch (src.base.srcMod) {
+   case SVGA3DSRCMOD_ABS:
+      src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
+      break;
+   case SVGA3DSRCMOD_ABSNEG:
+      src.base.srcMod = SVGA3DSRCMOD_ABS;
+      break;
+   case SVGA3DSRCMOD_NEG:
+      src.base.srcMod = SVGA3DSRCMOD_NONE;
+      break;
+   case SVGA3DSRCMOD_NONE:
+      src.base.srcMod = SVGA3DSRCMOD_NEG;
+      break;
+   }
+   return src;
+}
+
+
+static INLINE struct src_register 
+src_register( unsigned file, int number )
+{
+   struct src_register src;
+   
+   src.base = src_token( file, number );
+   src.indirect.value = 0;
+
+   return src;
+}
+
+static INLINE SVGA3dShaderDestToken dst( struct src_register src )
+{
+   return dst_register( SVGA3dShaderGetRegType( src.base.value ),
+                        src.base.num );
+}
+
+static INLINE struct src_register src( SVGA3dShaderDestToken dst )
+{
+   return src_register( SVGA3dShaderGetRegType( dst.value ),
+                        dst.num );
+}
+
+static INLINE ubyte svga_tgsi_sampler_type( struct svga_shader_emitter *emit,
+                                            int idx )
+{
+   switch (emit->key.fkey.tex[idx].texture_target) {
+   case PIPE_TEXTURE_1D:
+      return SVGA3DSAMP_2D;
+   case PIPE_TEXTURE_2D:
+      return SVGA3DSAMP_2D;
+   case PIPE_TEXTURE_3D:
+      return SVGA3DSAMP_VOLUME;
+   case PIPE_TEXTURE_CUBE:
+      return SVGA3DSAMP_CUBE;
+   }
+
+   return SVGA3DSAMP_UNKNOWN;
+}
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
new file mode 100644
index 0000000000..ea409b7e16
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -0,0 +1,2716 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+
+#include "svga_tgsi_emit.h"
+#include "svga_context.h"
+
+
+static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
+static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
+
+
+
+ 
+static unsigned
+translate_opcode(
+   uint opcode )
+{
+   switch (opcode) {
+   case TGSI_OPCODE_ABS:        return SVGA3DOP_ABS;
+   case TGSI_OPCODE_ADD:        return SVGA3DOP_ADD;
+   case TGSI_OPCODE_BREAKC:     return SVGA3DOP_BREAKC;
+   case TGSI_OPCODE_DDX:        return SVGA3DOP_DSX;
+   case TGSI_OPCODE_DDY:        return SVGA3DOP_DSY;
+   case TGSI_OPCODE_DP2A:       return SVGA3DOP_DP2ADD;
+   case TGSI_OPCODE_DP3:        return SVGA3DOP_DP3;
+   case TGSI_OPCODE_DP4:        return SVGA3DOP_DP4;
+   case TGSI_OPCODE_ENDFOR:     return SVGA3DOP_ENDLOOP;
+   case TGSI_OPCODE_FRC:        return SVGA3DOP_FRC;
+   case TGSI_OPCODE_BGNFOR:     return SVGA3DOP_LOOP;
+   case TGSI_OPCODE_MAD:        return SVGA3DOP_MAD;
+   case TGSI_OPCODE_MAX:        return SVGA3DOP_MAX;
+   case TGSI_OPCODE_MIN:        return SVGA3DOP_MIN;
+   case TGSI_OPCODE_MOV:        return SVGA3DOP_MOV;
+   case TGSI_OPCODE_MUL:        return SVGA3DOP_MUL;
+   case TGSI_OPCODE_NOP:        return SVGA3DOP_NOP;
+   case TGSI_OPCODE_NRM4:       return SVGA3DOP_NRM;
+   case TGSI_OPCODE_SSG:        return SVGA3DOP_SGN;
+   default:
+      debug_printf("Unkown opcode %u\n", opcode);
+      assert( 0 );
+      return SVGA3DOP_LAST_INST;
+   }
+}
+
+
+static unsigned translate_file( unsigned file )
+{
+   switch (file) {
+   case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
+   case TGSI_FILE_INPUT:     return SVGA3DREG_INPUT;
+   case TGSI_FILE_OUTPUT:    return SVGA3DREG_OUTPUT; /* VS3.0+ only */
+   case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
+   case TGSI_FILE_CONSTANT:  return SVGA3DREG_CONST;
+   case TGSI_FILE_SAMPLER:   return SVGA3DREG_SAMPLER;
+   case TGSI_FILE_ADDRESS:   return SVGA3DREG_ADDR;
+   default:
+      assert( 0 );
+      return SVGA3DREG_TEMP;
+   }
+}
+
+
+
+
+
+
+static SVGA3dShaderDestToken 
+translate_dst_register( struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn,
+                        unsigned idx )
+{
+   const struct tgsi_full_dst_register *reg = &insn->FullDstRegisters[idx];
+   SVGA3dShaderDestToken dest;
+
+   switch (reg->DstRegister.File) {
+   case TGSI_FILE_OUTPUT:
+      /* Output registers encode semantic information in their name.
+       * Need to lookup a table built at decl time:
+       */
+      dest = emit->output_map[reg->DstRegister.Index];
+      break;
+
+   default:
+      dest = dst_register( translate_file( reg->DstRegister.File ),
+                           reg->DstRegister.Index );
+      break;
+   }
+
+   dest.mask = reg->DstRegister.WriteMask;
+
+   if (insn->Instruction.Saturate) 
+      dest.dstMod = SVGA3DDSTMOD_SATURATE;
+
+   return dest;
+}
+
+
+static struct src_register 
+swizzle( struct src_register src,
+         int x,
+         int y,
+         int z,
+         int w )
+{
+   x = (src.base.swizzle >> (x * 2)) & 0x3;
+   y = (src.base.swizzle >> (y * 2)) & 0x3;
+   z = (src.base.swizzle >> (z * 2)) & 0x3;
+   w = (src.base.swizzle >> (w * 2)) & 0x3;
+
+   src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
+
+   return src;
+}
+
+static struct src_register
+scalar( struct src_register src,
+        int comp )
+{
+   return swizzle( src, comp, comp, comp, comp );
+}
+
+static INLINE boolean
+svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
+{
+   int i;
+
+   for (i = 0; i < emit->num_arl_consts; ++i) {
+      if (emit->arl_consts[i].arl_num == emit->current_arl)
+         return TRUE;
+   }
+   return FALSE;
+}
+
+static INLINE int
+svga_arl_adjustment( const struct svga_shader_emitter *emit )
+{
+   int i;
+
+   for (i = 0; i < emit->num_arl_consts; ++i) {
+      if (emit->arl_consts[i].arl_num == emit->current_arl)
+         return emit->arl_consts[i].number;
+   }
+   return 0;
+}
+
+static struct src_register 
+translate_src_register( const struct svga_shader_emitter *emit,
+                        const struct tgsi_full_src_register *reg )
+{
+   struct src_register src;
+
+   switch (reg->SrcRegister.File) {
+   case TGSI_FILE_INPUT:
+      /* Input registers are referred to by their semantic name rather
+       * than by index.  Use the mapping build up from the decls:
+       */
+      src = emit->input_map[reg->SrcRegister.Index];
+      break;
+       
+   case TGSI_FILE_IMMEDIATE:
+      /* Immediates are appended after TGSI constants in the D3D
+       * constant buffer.
+       */
+      src = src_register( translate_file( reg->SrcRegister.File ),
+                          reg->SrcRegister.Index + 
+                          emit->imm_start );
+      break;
+
+   default:
+      src = src_register( translate_file( reg->SrcRegister.File ),
+                          reg->SrcRegister.Index );
+
+      break;
+   }
+
+   /* Indirect addressing (for coninstant buffer lookups only)
+    */
+   if (reg->SrcRegister.Indirect)
+   {
+      /* we shift the offset towards the minimum */
+      if (svga_arl_needs_adjustment( emit )) {
+         src.base.num -= svga_arl_adjustment( emit );
+      }
+      src.base.relAddr = 1;
+
+      /* Not really sure what should go in the second token:
+       */
+      src.indirect = src_token( SVGA3DREG_ADDR,
+                                reg->SrcRegisterInd.Index );
+
+      src.indirect.swizzle = SWIZZLE_XXXX;
+   }
+
+   src = swizzle( src,
+                  reg->SrcRegister.SwizzleX,
+                  reg->SrcRegister.SwizzleY,
+                  reg->SrcRegister.SwizzleZ,
+                  reg->SrcRegister.SwizzleW );
+
+   /* src.mod isn't a bitfield, unfortunately:
+    * See tgsi_util_get_full_src_register_sign_mode for implementation details.
+    */
+   if (reg->SrcRegisterExtMod.Absolute) {
+      if (reg->SrcRegisterExtMod.Negate)
+         src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
+      else
+         src.base.srcMod = SVGA3DSRCMOD_ABS;
+   }
+   else {
+      if (reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate)
+         src.base.srcMod = SVGA3DSRCMOD_NEG;
+      else
+         src.base.srcMod = SVGA3DSRCMOD_NONE;
+   }
+
+   return src;
+}
+
+
+/*
+ * Get a temporary register, return -1 if none available
+ */
+static INLINE SVGA3dShaderDestToken 
+get_temp( struct svga_shader_emitter *emit )
+{
+   int i = emit->nr_hw_temp + emit->internal_temp_count++;
+
+   return dst_register( SVGA3DREG_TEMP, i );
+}
+
+/* Release a single temp.  Currently only effective if it was the last
+ * allocated temp, otherwise release will be delayed until the next
+ * call to reset_temp_regs().
+ */
+static INLINE void 
+release_temp( struct svga_shader_emitter *emit,
+              SVGA3dShaderDestToken temp )
+{
+   if (temp.num == emit->internal_temp_count - 1)
+      emit->internal_temp_count--;
+}
+
+static void reset_temp_regs( struct svga_shader_emitter *emit )
+{
+   emit->internal_temp_count = 0;
+}
+   
+
+static boolean submit_op0( struct svga_shader_emitter *emit,
+                           SVGA3dShaderInstToken inst,
+                           SVGA3dShaderDestToken dest )
+{
+   return (emit_instruction( emit, inst ) && 
+           emit_dst( emit, dest ));
+}
+
+static boolean submit_op1( struct svga_shader_emitter *emit,
+                           SVGA3dShaderInstToken inst,
+                           SVGA3dShaderDestToken dest,
+                           struct src_register src0 )
+{
+   return emit_op1( emit, inst, dest, src0 );
+}
+
+
+/* SVGA shaders may not refer to >1 constant register in a single
+ * instruction.  This function checks for that usage and inserts a
+ * move to temporary if detected.
+ *
+ * The same applies to input registers -- at most a single input
+ * register may be read by any instruction.
+ */
+static boolean submit_op2( struct svga_shader_emitter *emit,
+                           SVGA3dShaderInstToken inst,
+                           SVGA3dShaderDestToken dest,
+                           struct src_register src0,
+                           struct src_register src1 )
+{
+   SVGA3dShaderDestToken temp;
+   SVGA3dShaderRegType type0, type1;
+   boolean need_temp = FALSE;
+
+   temp.value = 0;
+   type0 = SVGA3dShaderGetRegType( src0.base.value );
+   type1 = SVGA3dShaderGetRegType( src1.base.value );
+
+   if (type0 == SVGA3DREG_CONST &&
+       type1 == SVGA3DREG_CONST &&
+       src0.base.num != src1.base.num)
+      need_temp = TRUE;
+
+   if (type0 == SVGA3DREG_INPUT &&
+       type1 == SVGA3DREG_INPUT &&
+       src0.base.num != src1.base.num)
+      need_temp = TRUE;
+
+   if (need_temp)
+   {
+      temp = get_temp( emit );
+
+      if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 ))
+         return FALSE;
+
+      src0 = src( temp );
+   }
+
+   if (!emit_op2( emit, inst, dest, src0, src1 ))
+      return FALSE;
+
+   if (need_temp)
+      release_temp( emit, temp );
+
+   return TRUE;
+}
+
+
+/* SVGA shaders may not refer to >1 constant register in a single
+ * instruction.  This function checks for that usage and inserts a
+ * move to temporary if detected.
+ */
+static boolean submit_op3( struct svga_shader_emitter *emit,
+                           SVGA3dShaderInstToken inst,
+                           SVGA3dShaderDestToken dest,
+                           struct src_register src0,
+                           struct src_register src1,
+                           struct src_register src2 )
+{
+   SVGA3dShaderDestToken temp0;
+   SVGA3dShaderDestToken temp1;
+   boolean need_temp0 = FALSE;
+   boolean need_temp1 = FALSE;
+   SVGA3dShaderRegType type0, type1, type2;
+
+   temp0.value = 0;
+   temp1.value = 0;
+   type0 = SVGA3dShaderGetRegType( src0.base.value );
+   type1 = SVGA3dShaderGetRegType( src1.base.value );
+   type2 = SVGA3dShaderGetRegType( src2.base.value );
+
+   if (inst.op != SVGA3DOP_SINCOS) {
+      if (type0 == SVGA3DREG_CONST &&
+          ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
+           (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
+         need_temp0 = TRUE;
+
+      if (type1 == SVGA3DREG_CONST &&
+          (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
+         need_temp1 = TRUE;
+   }
+
+   if (type0 == SVGA3DREG_INPUT &&
+       ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
+        (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
+      need_temp0 = TRUE;
+
+   if (type1 == SVGA3DREG_INPUT &&
+       (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
+      need_temp1 = TRUE;
+
+   if (need_temp0)
+   {
+      temp0 = get_temp( emit );
+ 
+      if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
+         return FALSE;
+         
+      src0 = src( temp0 );
+   }
+
+   if (need_temp1)
+   {
+      temp1 = get_temp( emit );
+
+      if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 ))
+         return FALSE;
+
+      src1 = src( temp1 );
+   }
+
+   if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
+      return FALSE;
+
+   if (need_temp1)
+      release_temp( emit, temp1 );
+   if (need_temp0)
+      release_temp( emit, temp0 );
+   return TRUE;
+}
+
+
+static boolean emit_def_const( struct svga_shader_emitter *emit,
+                               SVGA3dShaderConstType type,
+                               unsigned idx,
+                               float a,
+                               float b,
+                               float c,
+                               float d )
+{
+   SVGA3DOpDefArgs def;
+   SVGA3dShaderInstToken opcode;
+
+   switch (type) {
+   case SVGA3D_CONST_TYPE_FLOAT:
+      opcode = inst_token( SVGA3DOP_DEF );
+      def.dst = dst_register( SVGA3DREG_CONST, idx );
+      def.constValues[0] = a;
+      def.constValues[1] = b;
+      def.constValues[2] = c;
+      def.constValues[3] = d;
+      break;
+   case SVGA3D_CONST_TYPE_INT:
+      opcode = inst_token( SVGA3DOP_DEFI );
+      def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
+      def.constIValues[0] = (int)a;
+      def.constIValues[1] = (int)b;
+      def.constIValues[2] = (int)c;
+      def.constIValues[3] = (int)d;
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
+   if (!emit_instruction(emit, opcode) ||
+       !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
+      return FALSE;
+
+   return TRUE;
+}
+
+static INLINE boolean
+create_zero_immediate( struct svga_shader_emitter *emit )
+{
+   unsigned idx = emit->nr_hw_const++;
+
+   if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+                        idx, 0, 0, 0, 1 ))
+      return FALSE;
+
+   emit->zero_immediate_idx = idx;
+   emit->created_zero_immediate = TRUE;
+
+   return TRUE;
+}
+
+static INLINE boolean
+create_loop_const( struct svga_shader_emitter *emit )
+{
+   unsigned idx = emit->nr_hw_const++;
+
+   if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
+                        255, /* iteration count */
+                        0, /* initial value */
+                        1, /* step size */
+                        0 /* not used, must be 0 */))
+      return FALSE;
+
+   emit->loop_const_idx = idx;
+   emit->created_loop_const = TRUE;
+
+   return TRUE;
+}
+
+static INLINE boolean
+create_sincos_consts( struct svga_shader_emitter *emit )
+{
+   unsigned idx = emit->nr_hw_const++;
+
+   if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
+                        -1.5500992e-006f,
+                        -2.1701389e-005f,
+                        0.0026041667f,
+                        0.00026041668f ))
+      return FALSE;
+
+   emit->sincos_consts_idx = idx;
+   idx = emit->nr_hw_const++;
+
+   if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
+                        -0.020833334f,
+                        -0.12500000f,
+                        1.0f,
+                        0.50000000f ))
+      return FALSE;
+
+   emit->created_sincos_consts = TRUE;
+
+   return TRUE;
+}
+
+static INLINE boolean
+create_arl_consts( struct svga_shader_emitter *emit )
+{
+   int i;
+
+   for (i = 0; i < emit->num_arl_consts; i += 4) {
+      int j;
+      unsigned idx = emit->nr_hw_const++;
+      float vals[4];
+      for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
+         vals[j] = emit->arl_consts[i + j].number;
+         emit->arl_consts[i + j].idx = idx;
+         switch (j) {
+         case 0:
+            emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
+            break;
+         case 1:
+            emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
+            break;
+         case 2:
+            emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
+            break;
+         case 3:
+            emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
+            break;
+         }
+      }
+      while (j < 4)
+         vals[j++] = 0;
+
+      if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
+                           vals[0], vals[1],
+                           vals[2], vals[3]))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static INLINE struct src_register
+get_vface( struct svga_shader_emitter *emit )
+{
+   assert(emit->emitted_vface);
+   return src_register(SVGA3DREG_MISCTYPE, 
+                       SVGA3DMISCREG_FACE);
+}
+
+/* returns {0, 0, 0, 1} immediate */
+static INLINE struct src_register
+get_zero_immediate( struct svga_shader_emitter *emit )
+{
+   assert(emit->created_zero_immediate);
+   assert(emit->zero_immediate_idx >= 0);
+   return src_register( SVGA3DREG_CONST,
+                        emit->zero_immediate_idx );
+}
+
+/* returns the loop const */
+static INLINE struct src_register
+get_loop_const( struct svga_shader_emitter *emit )
+{
+   assert(emit->created_loop_const);
+   assert(emit->loop_const_idx >= 0);
+   return src_register( SVGA3DREG_CONSTINT,
+                        emit->loop_const_idx );
+}
+
+/* returns a sincos const */
+static INLINE struct src_register
+get_sincos_const( struct svga_shader_emitter *emit,
+                  unsigned index )
+{
+   assert(emit->created_sincos_consts);
+   assert(emit->sincos_consts_idx >= 0);
+   assert(index == 0 || index == 1);
+   return src_register( SVGA3DREG_CONST,
+                        emit->sincos_consts_idx + index );
+}
+
+static INLINE struct src_register
+get_fake_arl_const( struct svga_shader_emitter *emit )
+{
+   struct src_register reg;
+   int idx = 0, swizzle = 0, i;
+
+   for (i = 0; i < emit->num_arl_consts; ++ i) {
+      if (emit->arl_consts[i].arl_num == emit->current_arl) {
+         idx = emit->arl_consts[i].idx;
+         swizzle = emit->arl_consts[i].swizzle;
+      }
+   }
+
+   reg = src_register( SVGA3DREG_CONST, idx );
+   return scalar(reg, swizzle);
+}
+
+static INLINE struct src_register
+get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
+{
+   int idx;
+   struct src_register reg;
+
+   /* the width/height indexes start right after constants */
+   idx = emit->key.fkey.tex[sampler_num].width_height_idx +
+         emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+   reg = src_register( SVGA3DREG_CONST, idx );
+   return reg;
+}
+
+static boolean emit_fake_arl(struct svga_shader_emitter *emit,
+                             const struct tgsi_full_instruction *insn)
+{
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 = get_fake_arl_const( emit );
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   SVGA3dShaderDestToken tmp = get_temp( emit );
+
+   if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
+      return FALSE;
+
+   if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
+                    src1))
+      return FALSE;
+
+   /* replicate the original swizzle */
+   src1 = src(tmp);
+   src1.base.swizzle = src0.base.swizzle;
+
+   return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
+                      dst, src1 );
+}
+
+static boolean emit_if(struct svga_shader_emitter *emit,
+                       const struct tgsi_full_instruction *insn)
+{
+   const struct src_register src = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register zero = get_zero_immediate( emit );
+   SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
+
+   if_token.control = SVGA3DOPCOMPC_NE;
+   zero = scalar(zero, TGSI_SWIZZLE_X);
+
+   return (emit_instruction( emit, if_token ) &&
+           emit_src( emit, src ) &&
+           emit_src( emit, zero ) );
+}
+
+static boolean emit_endif(struct svga_shader_emitter *emit,
+                       const struct tgsi_full_instruction *insn)
+{
+   return (emit_instruction( emit,
+                             inst_token( SVGA3DOP_ENDIF )));
+}
+
+static boolean emit_else(struct svga_shader_emitter *emit,
+                         const struct tgsi_full_instruction *insn)
+{
+   return (emit_instruction( emit,
+                             inst_token( SVGA3DOP_ELSE )));
+}
+
+/* Translate the following TGSI FLR instruction.
+ *    FLR  DST, SRC
+ * To the following SVGA3D instruction sequence.
+ *    FRC  TMP, SRC
+ *    SUB  DST, SRC, TMP
+ */
+static boolean emit_floor(struct svga_shader_emitter *emit,
+                          const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* FRC  TMP, SRC */
+   if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
+      return FALSE;
+
+   /* SUB  DST, SRC, TMP */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
+                    negate( src( temp ) ) ))
+      return FALSE;
+
+   return TRUE;
+}
+
+
+/* Translate the following TGSI CMP instruction.
+ *    CMP  DST, SRC0, SRC1, SRC2
+ * To the following SVGA3D instruction sequence.
+ *    CMP  DST, SRC0, SRC2, SRC1
+ */
+static boolean emit_cmp(struct svga_shader_emitter *emit,
+                          const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   const struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   const struct src_register src2 = translate_src_register(
+      emit, &insn->FullSrcRegisters[2] );
+
+   /* CMP  DST, SRC0, SRC2, SRC1 */
+   return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
+}
+
+
+
+/* Translate the following TGSI DIV instruction.
+ *    DIV  DST.xy, SRC0, SRC1
+ * To the following SVGA3D instruction sequence.
+ *    RCP  TMP.x, SRC1.xxxx
+ *    RCP  TMP.y, SRC1.yyyy
+ *    MUL  DST.xy, SRC0, TMP
+ */
+static boolean emit_div(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   const struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+   int i;
+
+   /* For each enabled element, perform a RCP instruction.  Note that
+    * RCP is scalar in SVGA3D:
+    */
+   for (i = 0; i < 4; i++) {
+      unsigned channel = 1 << i;
+      if (dst.mask & channel) {
+         /* RCP  TMP.?, SRC1.???? */
+         if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), 
+                          writemask(temp, channel), 
+                          scalar(src1, i) ))
+            return FALSE;
+      }
+   }
+
+   /* Then multiply them out with a single mul:
+    *
+    * MUL  DST, SRC0, TMP
+    */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
+                    src( temp ) ))
+      return FALSE;
+
+   return TRUE;
+}
+
+/* Translate the following TGSI DP2 instruction.
+ *    DP2  DST, SRC1, SRC2
+ * To the following SVGA3D instruction sequence.
+ *    MUL  TMP, SRC1, SRC2
+ *    ADD  DST, TMP.xxxx, TMP.yyyy
+ */
+static boolean emit_dp2(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   const struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+   struct src_register temp_src0, temp_src1;
+
+   /* MUL  TMP, SRC1, SRC2 */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
+      return FALSE;
+
+   temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
+   temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
+
+   /* ADD  DST, TMP.xxxx, TMP.yyyy */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
+                    temp_src0, temp_src1 ))
+      return FALSE;
+
+   return TRUE;
+}
+
+
+/* Translate the following TGSI DPH instruction.
+ *    DPH  DST, SRC1, SRC2
+ * To the following SVGA3D instruction sequence.
+ *    DP3  TMP, SRC1, SRC2
+ *    ADD  DST, TMP, SRC2.wwww
+ */
+static boolean emit_dph(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* DP3  TMP, SRC1, SRC2 */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
+      return FALSE;
+
+   src1 = scalar(src1, TGSI_SWIZZLE_W);
+
+   /* ADD  DST, TMP, SRC2.wwww */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
+                    src( temp ), src1 ))
+      return FALSE;
+
+   return TRUE;
+}
+
+/* Translate the following TGSI DST instruction.
+ *    NRM  DST, SRC
+ * To the following SVGA3D instruction sequence.
+ *    DP3  TMP, SRC, SRC
+ *    RSQ  TMP, TMP
+ *    MUL  DST, SRC, TMP
+ */
+static boolean emit_nrm(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* DP3  TMP, SRC, SRC */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
+      return FALSE;
+
+   /* RSQ  TMP, TMP */
+   if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
+      return FALSE;
+
+   /* MUL  DST, SRC, TMP */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
+                    src0, src( temp )))
+      return FALSE;
+
+   return TRUE;
+
+}
+
+static boolean do_emit_sincos(struct svga_shader_emitter *emit,
+                              SVGA3dShaderDestToken dst,
+                              struct src_register src0)
+{
+   src0 = scalar(src0, TGSI_SWIZZLE_X);
+
+   if (emit->use_sm30) {
+      return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ),
+                         dst, src0 );
+   } else {
+      struct src_register const1 = get_sincos_const( emit, 0 );
+      struct src_register const2 = get_sincos_const( emit, 1 );
+
+      return submit_op3( emit, inst_token( SVGA3DOP_SINCOS ),
+                         dst, src0, const1, const2 );
+   }
+}
+
+static boolean emit_sincos(struct svga_shader_emitter *emit,
+                           const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* SCS TMP SRC */
+   if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
+      return FALSE;
+
+   /* MOV DST TMP */
+   if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
+      return FALSE;
+
+   return TRUE;
+}
+
+/*
+ * SCS TMP SRC
+ * MOV DST TMP.yyyy
+ */
+static boolean emit_sin(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* SCS TMP SRC */
+   if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
+      return FALSE;
+
+   src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
+
+   /* MOV DST TMP.yyyy */
+   if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
+      return FALSE;
+
+   return TRUE;
+}
+
+/*
+ * SCS TMP SRC
+ * MOV DST TMP.xxxx
+ */
+static boolean emit_cos(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* SCS TMP SRC */
+   if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
+      return FALSE;
+
+   src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
+
+   /* MOV DST TMP.xxxx */
+   if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
+      return FALSE;
+
+   return TRUE;
+}
+
+
+/*
+ * ADD DST SRC0, negate(SRC0)
+ */
+static boolean emit_sub(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+
+   src1 = negate(src1);
+
+   if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
+                    src0, src1 ))
+      return FALSE;
+
+   return TRUE;
+}
+
+
+static boolean emit_kil(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst;
+   const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0];
+   struct src_register src0;
+
+   inst = inst_token( SVGA3DOP_TEXKILL );
+   src0 = translate_src_register( emit, reg );
+
+   if (reg->SrcRegisterExtMod.Absolute ||
+       reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate ||
+       reg->SrcRegister.Indirect ||
+       reg->SrcRegister.SwizzleX != 0 ||
+       reg->SrcRegister.SwizzleY != 1 ||
+       reg->SrcRegister.SwizzleZ != 2 ||
+       reg->SrcRegister.File != TGSI_FILE_TEMPORARY)
+   {
+      SVGA3dShaderDestToken temp = get_temp( emit );
+
+      submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 );
+      src0 = src( temp );
+   }
+
+   return submit_op0( emit, inst, dst(src0) );
+}
+
+
+/* mesa state tracker always emits kilp as an unconditional
+ * kil */
+static boolean emit_kilp(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst;
+   SVGA3dShaderDestToken temp;
+   struct src_register one = get_zero_immediate( emit );
+
+   inst = inst_token( SVGA3DOP_TEXKILL );
+   one = scalar( one, TGSI_SWIZZLE_W );
+
+   /* texkill doesn't allow negation on the operand so lets move
+    * negation of {1} to a temp register */
+   temp = get_temp( emit );
+   if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
+                    negate( one ) ))
+      return FALSE;
+
+   return submit_op0( emit, inst, temp );
+}
+
+/* Implement conditionals by initializing destination reg to 'fail',
+ * then set predicate reg with UFOP_SETP, then move 'pass' to dest
+ * based on predicate reg.
+ *
+ * SETP src0, cmp, src1  -- do this first to avoid aliasing problems.
+ * MOV dst, fail
+ * MOV dst, pass, p0 
+ */
+static boolean
+emit_conditional(struct svga_shader_emitter *emit,
+                 unsigned compare_func,
+                 SVGA3dShaderDestToken dst,
+                 struct src_register src0,
+                 struct src_register src1,
+                 struct src_register pass,
+                 struct src_register fail)
+{
+   SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
+   SVGA3dShaderInstToken setp_token, mov_token;
+   setp_token = inst_token( SVGA3DOP_SETP );
+
+   switch (compare_func) {
+   case PIPE_FUNC_NEVER:
+      return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                         dst, fail );
+      break;
+   case PIPE_FUNC_LESS:
+      setp_token.control = SVGA3DOPCOMP_LT;
+      break;
+   case PIPE_FUNC_EQUAL:
+      setp_token.control = SVGA3DOPCOMP_EQ;
+      break;
+   case PIPE_FUNC_LEQUAL:
+      setp_token.control = SVGA3DOPCOMP_LE;
+      break;
+   case PIPE_FUNC_GREATER:
+      setp_token.control = SVGA3DOPCOMP_GT;
+      break;
+   case PIPE_FUNC_NOTEQUAL:
+      setp_token.control = SVGA3DOPCOMPC_NE;
+      break;
+   case PIPE_FUNC_GEQUAL:
+      setp_token.control = SVGA3DOPCOMP_GE;
+      break;
+   case PIPE_FUNC_ALWAYS:
+      return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                         dst, pass );
+      break;
+   }
+
+   /* SETP src0, COMPOP, src1 */
+   if (!submit_op2( emit, setp_token, pred_reg,
+                    src0, src1 ))
+      return FALSE;
+
+   mov_token = inst_token( SVGA3DOP_MOV );
+
+   /* MOV dst, fail */
+   if (!submit_op1( emit, mov_token, dst,
+                    fail ))
+      return FALSE;
+
+   /* MOV dst, pass (predicated)
+    *
+    * Note that the predicate reg (and possible modifiers) is passed
+    * as the first source argument.
+    */
+   mov_token.predicated = 1;
+   if (!submit_op2( emit, mov_token, dst,
+                    src( pred_reg ), pass ))
+      return FALSE;
+
+   return TRUE;
+}
+
+
+static boolean
+emit_select(struct svga_shader_emitter *emit,
+            unsigned compare_func,
+            SVGA3dShaderDestToken dst,
+            struct src_register src0,
+            struct src_register src1 )
+{
+   /* There are some SVGA instructions which implement some selects
+    * directly, but they are only available in the vertex shader.
+    */
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      switch (compare_func) {
+      case PIPE_FUNC_GEQUAL:
+         return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
+      case PIPE_FUNC_LEQUAL:
+         return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
+      case PIPE_FUNC_GREATER:
+         return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
+      case PIPE_FUNC_LESS:
+         return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
+      default:
+         break;
+      }
+   }
+
+
+   /* Otherwise, need to use the setp approach:
+    */
+   {
+      struct src_register one, zero;
+      /* zero immediate is 0,0,0,1 */
+      zero = get_zero_immediate( emit );
+      one  = scalar( zero, TGSI_SWIZZLE_W );
+      zero = scalar( zero, TGSI_SWIZZLE_X );
+
+      return emit_conditional(
+         emit,
+         compare_func,
+         dst,
+         src0,
+         src1,
+         one, zero);
+   }
+}
+
+
+static boolean emit_select_op(struct svga_shader_emitter *emit,
+                              unsigned compare,
+                              const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+      
+   return emit_select( emit, compare, dst, src0, src1 );
+}
+
+
+/* Translate texture instructions to SVGA3D representation.
+ */
+static boolean emit_tex2(struct svga_shader_emitter *emit,
+                         const struct tgsi_full_instruction *insn,
+                         SVGA3dShaderDestToken dst )
+{
+   SVGA3dShaderInstToken inst;
+   struct src_register src0;
+   struct src_register src1;
+
+   inst.value = 0;
+   inst.op = SVGA3DOP_TEX;
+
+   switch (insn->Instruction.Opcode) {
+   case TGSI_OPCODE_TEX:
+      break;
+   case TGSI_OPCODE_TXP:
+      inst.control = SVGA3DOPCONT_PROJECT;
+      break;
+   case TGSI_OPCODE_TXB:
+      inst.control = SVGA3DOPCONT_BIAS;
+      break;
+   default:
+      assert(0);
+      return FALSE;
+   }
+
+   src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] );
+
+   if (emit->key.fkey.tex[src1.base.num].unnormalized) {
+      struct src_register wh = get_tex_dimensions( emit, src1.base.num );
+      SVGA3dShaderDestToken tmp = get_temp( emit );
+
+      /* MUL  tmp, SRC0, WH */
+      if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
+                       tmp, src0, wh ))
+         return FALSE;
+      src0 = src( tmp );
+   }
+
+   return submit_op2( emit, inst, dst, src0, src1 );
+}
+
+
+
+
+/* Translate texture instructions to SVGA3D representation.
+ */
+static boolean emit_tex3(struct svga_shader_emitter *emit,
+                         const struct tgsi_full_instruction *insn,
+                         SVGA3dShaderDestToken dst )
+{
+   SVGA3dShaderInstToken inst;
+   struct src_register src0;
+   struct src_register src1;
+   struct src_register src2;
+
+   inst.value = 0;
+
+   switch (insn->Instruction.Opcode) {
+   case TGSI_OPCODE_TXD: 
+      inst.op = SVGA3DOP_TEXLDD;
+      break;
+   case TGSI_OPCODE_TXL:
+      inst.op = SVGA3DOP_TEXLDL;
+      break;
+   }
+
+   src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] );
+   src2 = translate_src_register( emit, &insn->FullSrcRegisters[2] );
+
+   return submit_op3( emit, inst, dst, src0, src1, src2 );
+}
+
+
+static boolean emit_tex(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = 
+      translate_dst_register( emit, insn, 0 );
+   struct src_register src0 =
+      translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 =
+      translate_src_register( emit, &insn->FullSrcRegisters[1] );
+
+   SVGA3dShaderDestToken tex_result;
+
+   /* check for shadow samplers */
+   boolean compare = (emit->key.fkey.tex[src1.base.num].compare_mode ==
+                      PIPE_TEX_COMPARE_R_TO_TEXTURE);
+
+
+   /* If doing compare processing, need to put this value into a
+    * temporary so it can be used as a source later on.
+    */
+   if (compare ||
+       (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) ) {
+      tex_result = get_temp( emit );
+   }
+   else {
+      tex_result = dst;
+   }
+
+   switch(insn->Instruction.Opcode) {
+   case TGSI_OPCODE_TEX:
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXP:
+      if (!emit_tex2( emit, insn, tex_result ))
+         return FALSE;
+      break;
+   case TGSI_OPCODE_TXL:
+   case TGSI_OPCODE_TXD:
+      if (!emit_tex3( emit, insn, tex_result ))
+         return FALSE;
+      break;
+   default:
+      assert(0);
+   }
+
+
+   if (compare) {
+      SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
+      struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
+      struct src_register one =
+         scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W );
+
+      /* Divide texcoord R by Q */
+      if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
+                       src0_zdivw,
+                       scalar(src0, TGSI_SWIZZLE_W) ))
+         return FALSE;
+
+      if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
+                       src0_zdivw,
+                       scalar(src0, TGSI_SWIZZLE_Z),
+                       src(src0_zdivw) ))
+         return FALSE;
+
+      if (!emit_select(
+             emit,
+             emit->key.fkey.tex[src1.base.num].compare_func,
+             dst,
+             src(src0_zdivw),
+             tex_src_x))
+         return FALSE;
+
+      return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                         writemask( dst, TGSI_WRITEMASK_W),
+                         one );
+   }
+   else if (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) 
+   {
+      if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
+                              const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
+   struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
+   struct src_register const_int = get_loop_const( emit );
+
+   return (emit_instruction( emit, inst ) &&
+           emit_src( emit, loop_reg ) &&
+           emit_src( emit, const_int ) );
+}
+
+static boolean emit_endloop2( struct svga_shader_emitter *emit,
+                              const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
+   return emit_instruction( emit, inst );
+}
+
+static boolean emit_brk( struct svga_shader_emitter *emit,
+                         const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
+   return emit_instruction( emit, inst );
+}
+
+static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
+                                unsigned opcode,
+                                const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst;
+   SVGA3dShaderDestToken dst;
+   struct src_register src;
+
+   inst = inst_token( opcode );
+   dst = translate_dst_register( emit, insn, 0 );
+   src = translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   src = scalar( src, TGSI_SWIZZLE_X );
+
+   return submit_op1( emit, inst, dst, src );
+}
+
+
+static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
+                                       unsigned opcode,
+                                       const struct tgsi_full_instruction *insn )
+{
+   const struct tgsi_full_src_register *src = insn->FullSrcRegisters;
+   SVGA3dShaderInstToken inst;
+   SVGA3dShaderDestToken dst;
+
+   inst = inst_token( opcode );
+   dst = translate_dst_register( emit, insn, 0 );
+
+   switch (insn->Instruction.NumSrcRegs) {
+   case 0:
+      return submit_op0( emit, inst, dst );
+   case 1:
+      return submit_op1( emit, inst, dst,
+                         translate_src_register( emit, &src[0] ));
+   case 2:
+      return submit_op2( emit, inst, dst,
+                         translate_src_register( emit, &src[0] ),
+                         translate_src_register( emit, &src[1] ) );
+   case 3:
+      return submit_op3( emit, inst, dst,
+                         translate_src_register( emit, &src[0] ),
+                         translate_src_register( emit, &src[1] ),
+                         translate_src_register( emit, &src[2] ) );
+   default:
+      assert(0);
+      return FALSE;
+   }
+}
+
+static boolean emit_arl(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   ++emit->current_arl;
+   if (svga_arl_needs_adjustment( emit )) {
+      return emit_fake_arl( emit, insn );
+   } else {
+      /* no need to adjust, just emit straight arl */
+      return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
+   }
+}
+
+static boolean alias_src_dst( struct src_register src,
+                              SVGA3dShaderDestToken dst )
+{
+   if (src.base.num != dst.num)
+      return FALSE;
+
+   if (SVGA3dShaderGetRegType(dst.value) != 
+       SVGA3dShaderGetRegType(src.base.value))
+      return FALSE;
+
+   return TRUE;
+}
+
+static boolean emit_pow(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   boolean need_tmp = FALSE;
+   
+   /* POW can only output to a temporary */
+   if (insn->FullDstRegisters[0].DstRegister.File != TGSI_FILE_TEMPORARY)
+      need_tmp = TRUE;
+   
+   /* POW src1 must not be the same register as dst */
+   if (alias_src_dst( src1, dst ))
+      need_tmp = TRUE;
+
+   /* it's a scalar op */
+   src0 = scalar( src0, TGSI_SWIZZLE_X );
+   src1 = scalar( src1, TGSI_SWIZZLE_X );
+
+   if (need_tmp) {
+      SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X );
+
+      if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
+         return FALSE;
+
+      return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) );
+   } 
+   else {
+      return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
+   }
+}
+
+static boolean emit_xpd(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   const struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   boolean need_dst_tmp = FALSE;
+
+   /* XPD can only output to a temporary */
+   if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP) 
+      need_dst_tmp = TRUE;
+
+   /* The dst reg must not be the same as src0 or src1*/
+   if (alias_src_dst(src0, dst) ||
+       alias_src_dst(src1, dst))
+      need_dst_tmp = TRUE;
+
+   if (need_dst_tmp) {
+      SVGA3dShaderDestToken tmp = get_temp( emit );
+
+      /* Obey DX9 restrictions on mask:
+       */
+      tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
+
+      if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
+         return FALSE;
+
+      if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
+         return FALSE;
+   } 
+   else {
+      if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
+         return FALSE;
+   }
+
+   /* Need to emit 1.0 to dst.w?
+    */
+   if (dst.mask & TGSI_WRITEMASK_W) {
+      struct src_register zero = get_zero_immediate( emit );
+
+      if (!submit_op1(emit, 
+                      inst_token( SVGA3DOP_MOV ), 
+                      writemask(dst, TGSI_WRITEMASK_W),
+                      zero))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+static boolean emit_lrp(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   SVGA3dShaderDestToken tmp;
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   const struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   const struct src_register src2 = translate_src_register(
+      emit, &insn->FullSrcRegisters[2] );
+   boolean need_dst_tmp = FALSE;
+
+   /* The dst reg must not be the same as src0 or src2 */
+   if (alias_src_dst(src0, dst) ||
+       alias_src_dst(src2, dst))
+      need_dst_tmp = TRUE;
+
+   if (need_dst_tmp) {
+      tmp = get_temp( emit );
+      tmp.mask = dst.mask;
+   }
+   else {
+      tmp = dst;
+   }
+
+   if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
+      return FALSE;
+
+   if (need_dst_tmp) {
+      if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
+         return FALSE;      
+   } 
+
+   return TRUE;
+}
+
+
+static boolean emit_dst_insn(struct svga_shader_emitter *emit,
+                             const struct tgsi_full_instruction *insn )
+{
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
+       */
+      return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
+   }
+   else {
+
+      /* result[0] = 1    * 1;
+       * result[1] = a[1] * b[1];
+       * result[2] = a[2] * 1;
+       * result[3] = 1    * b[3];
+       */
+
+      SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+      SVGA3dShaderDestToken tmp;
+      const struct src_register src0 = translate_src_register(
+         emit, &insn->FullSrcRegisters[0] );
+      const struct src_register src1 = translate_src_register(
+         emit, &insn->FullSrcRegisters[1] );
+      struct src_register zero = get_zero_immediate( emit );
+      boolean need_tmp = FALSE;
+
+      if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
+          alias_src_dst(src0, dst) ||
+          alias_src_dst(src1, dst))
+         need_tmp = TRUE;
+
+      if (need_tmp) {
+         tmp = get_temp( emit );
+      }
+      else {
+         tmp = dst;
+      }
+
+      /* tmp.xw = 1.0
+       */
+      if (tmp.mask & TGSI_WRITEMASK_XW) {
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+                          writemask(tmp, TGSI_WRITEMASK_XW ),
+                          scalar( zero, 3 )))
+            return FALSE;
+      }
+      
+      /* tmp.yz = src0
+       */
+      if (tmp.mask & TGSI_WRITEMASK_YZ) {
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+                          writemask(tmp, TGSI_WRITEMASK_YZ ),
+                          src0))
+            return FALSE;
+      }
+
+      /* tmp.yw = tmp * src1
+       */
+      if (tmp.mask & TGSI_WRITEMASK_YW) {
+         if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 
+                          writemask(tmp, TGSI_WRITEMASK_YW ),
+                          src(tmp),
+                          src1))
+            return FALSE;
+      }
+
+      /* dst = tmp
+       */
+      if (need_tmp) {
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+                          dst,
+                          src(tmp)))
+            return FALSE;
+      }      
+   }
+   
+   return TRUE;
+}
+
+
+static boolean emit_exp(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 =
+      translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   struct src_register zero = get_zero_immediate( emit );
+   SVGA3dShaderDestToken fraction;
+
+   if (dst.mask & TGSI_WRITEMASK_Y)
+      fraction = dst;
+   else if (dst.mask & TGSI_WRITEMASK_X)
+      fraction = get_temp( emit );
+
+   /* If y is being written, fill it with src0 - floor(src0).
+    */
+   if (dst.mask & TGSI_WRITEMASK_XY) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
+                       writemask( fraction, TGSI_WRITEMASK_Y ),
+                       src0 ))
+         return FALSE;
+   }
+
+   /* If x is being written, fill it with 2 ^ floor(src0).
+    */
+   if (dst.mask & TGSI_WRITEMASK_X) {
+      if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
+                       writemask( dst, dst.mask & TGSI_WRITEMASK_X ),
+                       src0,
+                       scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
+         return FALSE;
+
+      if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
+                       writemask( dst, dst.mask & TGSI_WRITEMASK_X ),
+                       scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
+         return FALSE;
+
+      if (!(dst.mask & TGSI_WRITEMASK_Y))
+         release_temp( emit, fraction );
+   }
+
+   /* If z is being written, fill it with 2 ^ src0 (partial precision).
+    */
+   if (dst.mask & TGSI_WRITEMASK_Z) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
+                       writemask( dst, dst.mask & TGSI_WRITEMASK_Z ),
+                       src0 ) )
+         return FALSE;
+   }
+
+   /* If w is being written, fill it with one.
+    */
+   if (dst.mask & TGSI_WRITEMASK_W) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                       writemask(dst, TGSI_WRITEMASK_W),
+                       scalar( zero, TGSI_SWIZZLE_W ) ))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static boolean emit_lit(struct svga_shader_emitter *emit,
+                             const struct tgsi_full_instruction *insn )
+{
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
+       */
+      return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
+   }
+   else {
+
+      /* D3D vs. GL semantics can be fairly easily accomodated by
+       * variations on this sequence.
+       *
+       * GL:
+       *   tmp.y = src.x
+       *   tmp.z = pow(src.y,src.w)
+       *   p0 = src0.xxxx > 0
+       *   result = zero.wxxw
+       *   (p0) result.yz = tmp
+       *
+       * D3D:
+       *   tmp.y = src.x
+       *   tmp.z = pow(src.y,src.w)
+       *   p0 = src0.xxyy > 0
+       *   result = zero.wxxw
+       *   (p0) result.yz = tmp
+       *
+       * Will implement the GL version for now.
+       */
+
+      SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+      SVGA3dShaderDestToken tmp = get_temp( emit );
+      const struct src_register src0 = translate_src_register(
+         emit, &insn->FullSrcRegisters[0] );
+      struct src_register zero = get_zero_immediate( emit );
+
+      /* tmp = pow(src.y, src.w)
+       */
+      if (dst.mask & TGSI_WRITEMASK_Z) {
+         if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), 
+                         tmp, 
+                         scalar(src0, 1), 
+                         scalar(src0, 3)))
+            return FALSE;
+      }
+
+      /* tmp.y = src.x
+       */
+      if (dst.mask & TGSI_WRITEMASK_Y) {
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+                          writemask(tmp, TGSI_WRITEMASK_Y ),
+                          scalar(src0, 0)))
+            return FALSE;
+      }
+      
+      /* Can't quite do this with emit conditional due to the extra
+       * writemask on the predicated mov:
+       */
+      {
+         SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
+         SVGA3dShaderInstToken setp_token, mov_token;
+         struct src_register predsrc;
+
+         setp_token = inst_token( SVGA3DOP_SETP );
+         mov_token = inst_token( SVGA3DOP_MOV );
+
+         setp_token.control = SVGA3DOPCOMP_GT;
+
+         /* D3D vs GL semantics:
+          */
+         if (0)
+            predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
+         else
+            predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
+
+         /* SETP src0.xxyy, GT, {0}.x */
+         if (!submit_op2( emit, setp_token, pred_reg,
+                          predsrc, 
+                          swizzle(zero, 0, 0, 0, 0) ))
+            return FALSE;
+         
+         /* MOV dst, fail */
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
+                          swizzle(zero, 3, 0, 0, 3 )))
+             return FALSE;
+
+         /* MOV dst.yz, tmp (predicated)
+          *
+          * Note that the predicate reg (and possible modifiers) is passed
+          * as the first source argument.
+          */
+         if (dst.mask & TGSI_WRITEMASK_YZ) {
+            mov_token.predicated = 1;
+            if (!submit_op2( emit, mov_token,
+                             writemask(dst, TGSI_WRITEMASK_YZ),
+                             src( pred_reg ), src( tmp ) ))
+               return FALSE;
+         }
+      }
+   }
+
+   return TRUE;
+}
+
+
+
+
+static boolean emit_ex2( struct svga_shader_emitter *emit,
+                         const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst;
+   SVGA3dShaderDestToken dst;
+   struct src_register src0;
+
+   inst = inst_token( SVGA3DOP_EXP );
+   dst = translate_dst_register( emit, insn, 0 );
+   src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   src0 = scalar( src0, TGSI_SWIZZLE_X );
+
+   if (dst.mask != TGSI_WRITEMASK_XYZW) {
+      SVGA3dShaderDestToken tmp = get_temp( emit );
+
+      if (!submit_op1( emit, inst, tmp, src0 ))
+         return FALSE;
+
+      return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                         dst,
+                         scalar( src( tmp ), TGSI_SWIZZLE_X ) );
+   }
+
+   return submit_op1( emit, inst, dst, src0 );
+}
+
+
+static boolean emit_log(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 =
+      translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   struct src_register zero = get_zero_immediate( emit );
+   SVGA3dShaderDestToken abs_tmp;
+   struct src_register abs_src0;
+   SVGA3dShaderDestToken log2_abs;
+
+   if (dst.mask & TGSI_WRITEMASK_Z)
+      log2_abs = dst;
+   else if (dst.mask & TGSI_WRITEMASK_XY)
+      log2_abs = get_temp( emit );
+
+   /* If z is being written, fill it with log2( abs( src0 ) ).
+    */
+   if (dst.mask & TGSI_WRITEMASK_XYZ) {
+      if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
+         abs_src0 = src0;
+      else {
+         abs_tmp = get_temp( emit );
+
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                          abs_tmp,
+                          src0 ) )
+            return FALSE;
+
+         abs_src0 = src( abs_tmp );
+      }
+
+      abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
+
+      if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
+                       writemask( log2_abs, TGSI_WRITEMASK_Z ),
+                       abs_src0 ) )
+         return FALSE;
+   }
+
+   if (dst.mask & TGSI_WRITEMASK_XY) {
+      SVGA3dShaderDestToken floor_log2;
+
+      if (dst.mask & TGSI_WRITEMASK_X)
+         floor_log2 = dst;
+      else
+         floor_log2 = get_temp( emit );
+
+      /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
+       */
+      if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
+                       writemask( floor_log2, TGSI_WRITEMASK_X ),
+                       scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
+         return FALSE;
+
+      if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
+                       writemask( floor_log2, TGSI_WRITEMASK_X ),
+                       scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
+                       negate( src( floor_log2 ) ) ) )
+         return FALSE;
+
+      /* If y is being written, fill it with
+       * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
+       */
+      if (dst.mask & TGSI_WRITEMASK_Y) {
+         if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
+                          writemask( dst, TGSI_WRITEMASK_Y ),
+                          negate( scalar( src( floor_log2 ),
+                                          TGSI_SWIZZLE_X ) ) ) )
+            return FALSE;
+
+         if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
+                          writemask( dst, TGSI_WRITEMASK_Y ),
+                          src( dst ),
+                          abs_src0 ) )
+            return FALSE;
+      }
+
+      if (!(dst.mask & TGSI_WRITEMASK_X))
+         release_temp( emit, floor_log2 );
+
+      if (!(dst.mask & TGSI_WRITEMASK_Z))
+         release_temp( emit, log2_abs );
+   }
+
+   if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
+       src0.base.srcMod != SVGA3DSRCMOD_ABS)
+      release_temp( emit, abs_tmp );
+
+   /* If w is being written, fill it with one.
+    */
+   if (dst.mask & TGSI_WRITEMASK_W) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                       writemask(dst, TGSI_WRITEMASK_W),
+                       scalar( zero, TGSI_SWIZZLE_W ) ))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+static boolean emit_bgnsub( struct svga_shader_emitter *emit,
+                           unsigned position,
+                           const struct tgsi_full_instruction *insn )
+{
+   unsigned i;
+
+   /* Note that we've finished the main function and are now emitting
+    * subroutines.  This affects how we terminate the generated
+    * shader.
+    */
+   emit->in_main_func = FALSE;
+   
+   for (i = 0; i < emit->nr_labels; i++) {
+      if (emit->label[i] == position) {
+         return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
+                 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
+                 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
+      }
+   }
+
+   assert(0);
+   return TRUE;
+}
+
+static boolean emit_call( struct svga_shader_emitter *emit,
+                           const struct tgsi_full_instruction *insn )
+{
+   unsigned position = insn->InstructionExtLabel.Label;
+   unsigned i;
+   
+   for (i = 0; i < emit->nr_labels; i++) {
+      if (emit->label[i] == position) 
+         break;
+   }
+
+   if (emit->nr_labels == Elements(emit->label))
+      return FALSE;
+
+   if (i == emit->nr_labels) {
+      emit->label[i] = position;
+      emit->nr_labels++;
+   }
+
+   return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
+           emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
+}
+
+
+static boolean emit_end( struct svga_shader_emitter *emit )
+{
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      return emit_vs_postamble( emit );
+   }
+   else {
+      return emit_ps_postamble( emit );
+   }
+}
+
+
+
+static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
+                                      unsigned position,
+                                      const struct tgsi_full_instruction *insn )
+{
+   switch (insn->Instruction.Opcode) {
+
+   case TGSI_OPCODE_ARL:
+      return emit_arl( emit, insn );
+
+   case TGSI_OPCODE_TEX:
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXP:
+   case TGSI_OPCODE_TXL:
+   case TGSI_OPCODE_TXD:
+      return emit_tex( emit, insn );
+
+   case TGSI_OPCODE_BGNSUB:
+      return emit_bgnsub( emit, position, insn );
+
+   case TGSI_OPCODE_ENDSUB:
+      return TRUE;
+
+   case TGSI_OPCODE_CAL:
+      return emit_call( emit, insn );
+
+   case TGSI_OPCODE_FLR:
+   case TGSI_OPCODE_TRUNC:        /* should be TRUNC, not FLR */
+      return emit_floor( emit, insn );
+
+   case TGSI_OPCODE_CMP:
+      return emit_cmp( emit, insn );
+
+   case TGSI_OPCODE_DIV:
+      return emit_div( emit, insn );
+
+   case TGSI_OPCODE_DP2:
+      return emit_dp2( emit, insn );
+
+   case TGSI_OPCODE_DPH:
+      return emit_dph( emit, insn );
+
+   case TGSI_OPCODE_NRM:
+      return emit_nrm( emit, insn );
+
+   case TGSI_OPCODE_COS:
+      return emit_cos( emit, insn );
+
+   case TGSI_OPCODE_SIN:
+      return emit_sin( emit, insn );
+
+   case TGSI_OPCODE_SCS:
+      return emit_sincos( emit, insn );
+
+   case TGSI_OPCODE_END:
+      /* TGSI always finishes the main func with an END */
+      return emit_end( emit );
+
+   case TGSI_OPCODE_KIL:
+      return emit_kil( emit, insn );
+
+      /* Selection opcodes.  The underlying language is fairly
+       * non-orthogonal about these.
+       */
+   case TGSI_OPCODE_SEQ:
+      return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
+
+   case TGSI_OPCODE_SNE:
+      return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
+
+   case TGSI_OPCODE_SGT:
+      return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
+
+   case TGSI_OPCODE_SGE:
+      return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
+
+   case TGSI_OPCODE_SLT:
+      return emit_select_op( emit, PIPE_FUNC_LESS, insn );
+
+   case TGSI_OPCODE_SLE:
+      return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
+
+   case TGSI_OPCODE_SUB:
+      return emit_sub( emit, insn );
+
+   case TGSI_OPCODE_POW:
+      return emit_pow( emit, insn );
+
+   case TGSI_OPCODE_EX2:
+      return emit_ex2( emit, insn );
+
+   case TGSI_OPCODE_EXP:
+      return emit_exp( emit, insn );
+
+   case TGSI_OPCODE_LOG:
+      return emit_log( emit, insn );
+
+   case TGSI_OPCODE_LG2:
+      return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
+
+   case TGSI_OPCODE_RSQ:
+      return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
+
+   case TGSI_OPCODE_RCP:
+      return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
+
+   case TGSI_OPCODE_CONT:
+   case TGSI_OPCODE_RET:
+      /* This is a noop -- we tell mesa that we can't support RET
+       * within a function (early return), so this will always be
+       * followed by an ENDSUB.
+       */
+      return TRUE;
+
+      /* These aren't actually used by any of the frontends we care
+       * about:
+       */
+   case TGSI_OPCODE_CLAMP:
+   case TGSI_OPCODE_ROUND:
+   case TGSI_OPCODE_AND:
+   case TGSI_OPCODE_OR:
+   case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_NOT:
+   case TGSI_OPCODE_SHL:
+   case TGSI_OPCODE_SHR:
+   case TGSI_OPCODE_XOR:
+      return FALSE;
+
+   case TGSI_OPCODE_IF:
+      return emit_if( emit, insn );
+   case TGSI_OPCODE_ELSE:
+      return emit_else( emit, insn );
+   case TGSI_OPCODE_ENDIF:
+      return emit_endif( emit, insn );
+
+   case TGSI_OPCODE_BGNLOOP:
+      return emit_bgnloop2( emit, insn );
+   case TGSI_OPCODE_ENDLOOP:
+      return emit_endloop2( emit, insn );
+   case TGSI_OPCODE_BRK:
+      return emit_brk( emit, insn );
+
+   case TGSI_OPCODE_XPD:
+      return emit_xpd( emit, insn );
+
+   case TGSI_OPCODE_KILP:
+      return emit_kilp( emit, insn );
+
+   case TGSI_OPCODE_DST:
+      return emit_dst_insn( emit, insn );
+
+   case TGSI_OPCODE_LIT:
+      return emit_lit( emit, insn );
+
+   case TGSI_OPCODE_LRP:
+      return emit_lrp( emit, insn );
+
+   default: {
+      unsigned opcode = translate_opcode(insn->Instruction.Opcode);
+
+      if (opcode == SVGA3DOP_LAST_INST)
+         return FALSE;
+
+      if (!emit_simple_instruction( emit, opcode, insn ))
+         return FALSE;
+   }
+   }
+
+   return TRUE;
+}
+
+
+static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
+                                    struct tgsi_full_immediate *imm)
+{
+   static const float id[4] = {0,0,0,1};
+   float value[4];
+   unsigned i;
+
+   assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
+   for (i = 0; i < imm->Immediate.NrTokens - 1; i++)
+      value[i] = imm->u[i].Float;
+
+   for ( ; i < 4; i++ )
+      value[i] = id[i];
+
+   return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+                          emit->imm_start + emit->internal_imm_count++,
+                          value[0], value[1], value[2], value[3]);
+}
+
+static boolean make_immediate( struct svga_shader_emitter *emit,
+                               float a,
+                               float b,
+                               float c,
+                               float d,
+                               struct src_register *out )
+{
+   unsigned idx = emit->nr_hw_const++;
+
+   if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+                        idx, a, b, c, d ))
+      return FALSE;
+
+   *out = src_register( SVGA3DREG_CONST, idx );
+
+   return TRUE;
+}
+
+static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
+{
+   if (!emit->key.vkey.need_prescale) {
+      if (!make_immediate( emit, 0, 0, .5, .5,
+                           &emit->imm_0055))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
+{
+   unsigned i;
+
+   /* For SM20, need to initialize the temporaries we're using to hold
+    * color outputs to some value.  Shaders which don't set all of
+    * these values are likely to be rejected by the DX9 runtime.
+    */
+   if (!emit->use_sm30) {
+      struct src_register zero = get_zero_immediate( emit );
+      for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+         if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
+            
+            if (!submit_op1( emit,
+                             inst_token(SVGA3DOP_MOV),
+                             emit->temp_col[i],
+                             zero ))
+               return FALSE;
+         }
+      }
+   }
+   
+   return TRUE;
+}
+
+static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
+{
+   unsigned i;
+
+   /* PS oDepth is incredibly fragile and it's very hard to catch the
+    * types of usage that break it during shader emit.  Easier just to
+    * redirect the main program to a temporary and then only touch
+    * oDepth with a hand-crafted MOV below.
+    */
+   if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
+
+      if (!submit_op1( emit,
+                       inst_token(SVGA3DOP_MOV),
+                       emit->true_pos,
+                       scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
+         return FALSE;
+   }
+
+   /* Similarly for SM20 color outputs...  Luckily SM30 isn't so
+    * fragile.
+    */
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+      if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
+
+         if (!submit_op1( emit,
+                          inst_token(SVGA3DOP_MOV),
+                          emit->true_col[i],
+                          src(emit->temp_col[i]) ))
+            return FALSE;
+      }
+   }
+
+   return TRUE;
+}
+
+static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
+{
+   /* PSIZ output is incredibly fragile and it's very hard to catch
+    * the types of usage that break it during shader emit.  Easier
+    * just to redirect the main program to a temporary and then only
+    * touch PSIZ with a hand-crafted MOV below.
+    */
+   if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
+      
+      if (!submit_op1( emit,
+                       inst_token(SVGA3DOP_MOV),
+                       emit->true_psiz,
+                       scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
+         return FALSE;
+   }
+
+   /* Need to perform various manipulations on vertex position to cope
+    * with the different GL and D3D clip spaces.
+    */
+   if (emit->key.vkey.need_prescale) {
+      SVGA3dShaderDestToken temp_pos = emit->temp_pos;
+      SVGA3dShaderDestToken pos = emit->true_pos;
+      unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      struct src_register prescale_scale = src_register( SVGA3DREG_CONST, 
+                                                         offset + 0 ); 
+      struct src_register prescale_trans = src_register( SVGA3DREG_CONST, 
+                                                         offset + 1 ); 
+
+      /* MUL temp_pos.xyz,    temp_pos,      prescale.scale
+       * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
+       *   --> Note that prescale.trans.w == 0
+       */
+      if (!submit_op2( emit, 
+                       inst_token(SVGA3DOP_MUL), 
+                       writemask(temp_pos, TGSI_WRITEMASK_XYZ), 
+                       src(temp_pos),
+                       prescale_scale ))
+         return FALSE;
+
+      if (!submit_op3( emit, 
+                       inst_token(SVGA3DOP_MAD), 
+                       pos, 
+                       swizzle(src(temp_pos), 3, 3, 3, 3),
+                       prescale_trans,
+                       src(temp_pos)))
+         return FALSE;
+   }
+   else {
+      SVGA3dShaderDestToken temp_pos = emit->temp_pos;
+      SVGA3dShaderDestToken pos = emit->true_pos;
+      struct src_register imm_0055 = emit->imm_0055;
+
+      /* Adjust GL clipping coordinate space to hardware (D3D-style):
+       *
+       * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
+       * MOV result.position, temp_pos 
+       */
+      if (!submit_op2( emit, 
+                       inst_token(SVGA3DOP_DP4), 
+                       writemask(temp_pos, TGSI_WRITEMASK_Z), 
+                       imm_0055, 
+                       src(temp_pos) ))
+         return FALSE;
+
+      if (!submit_op1( emit,
+                       inst_token(SVGA3DOP_MOV),
+                       pos,
+                       src(temp_pos) ))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+/*
+  0: IF VFACE :4
+  1:   COLOR = FrontColor;
+  2: ELSE
+  3:   COLOR = BackColor;
+  4: ENDIF
+ */
+static boolean emit_light_twoside( struct svga_shader_emitter *emit )
+{
+   struct src_register vface, zero;
+   struct src_register front[2];
+   struct src_register back[2];
+   SVGA3dShaderDestToken color[2];
+   int count =  emit->internal_color_count;
+   int i;
+   SVGA3dShaderInstToken if_token;
+
+   if (count == 0)
+      return TRUE;
+
+   vface = get_vface( emit );
+   zero = get_zero_immediate( emit );
+
+   /* Can't use get_temp() to allocate the color reg as such
+    * temporaries will be reclaimed after each instruction by the call
+    * to reset_temp_regs().
+    */
+   for (i = 0; i < count; i++) {
+      color[i] = dst_register( SVGA3DREG_TEMP, 
+                               emit->nr_hw_temp++ );
+
+      front[i] = emit->input_map[emit->internal_color_idx[i]];
+
+      /* Back is always the next input:
+       */
+      back[i] = front[i];
+      back[i].base.num = front[i].base.num + 1;
+
+      /* Reassign the input_map to the actual front-face color:
+       */
+      emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
+   }
+   
+   if_token = inst_token( SVGA3DOP_IFC );
+
+   if (emit->key.fkey.front_cw)
+      if_token.control = SVGA3DOPCOMP_GT;
+   else
+      if_token.control = SVGA3DOPCOMP_LT;
+
+   zero = scalar(zero, TGSI_SWIZZLE_X);
+
+   if (!(emit_instruction( emit, if_token ) &&
+         emit_src( emit, vface ) &&
+         emit_src( emit, zero ) ))
+      return FALSE;
+
+   for (i = 0; i < count; i++) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
+         return FALSE;
+   }
+
+   if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
+      return FALSE;
+   
+   for (i = 0; i < count; i++) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
+         return FALSE;
+   }
+
+   if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
+      return FALSE;
+
+   return TRUE;
+}
+
+/*
+  0: SETP_GT TEMP, VFACE, 0
+  where TEMP is a fake frontface register
+ */
+static boolean emit_frontface( struct svga_shader_emitter *emit )
+{
+   struct src_register vface, zero;
+   SVGA3dShaderDestToken temp;
+   struct src_register pass, fail;
+
+   vface = get_vface( emit );
+   zero = get_zero_immediate( emit );
+
+   /* Can't use get_temp() to allocate the fake frontface reg as such
+    * temporaries will be reclaimed after each instruction by the call
+    * to reset_temp_regs().
+    */
+   temp = dst_register( SVGA3DREG_TEMP,
+                        emit->nr_hw_temp++ );
+
+   if (emit->key.fkey.front_cw) {
+      pass = scalar( zero, TGSI_SWIZZLE_W );
+      fail = scalar( zero, TGSI_SWIZZLE_X );
+   } else {
+      pass = scalar( zero, TGSI_SWIZZLE_X );
+      fail = scalar( zero, TGSI_SWIZZLE_W );
+   }
+
+   if (!emit_conditional(emit, PIPE_FUNC_GREATER,
+                         temp, vface, scalar( zero, TGSI_SWIZZLE_X ),
+                         pass, fail))
+      return FALSE;
+
+   /* Reassign the input_map to the actual front-face color:
+    */
+   emit->input_map[emit->internal_frontface_idx] = src(temp);
+
+   return TRUE;
+}
+
+static INLINE boolean
+needs_to_create_zero( struct svga_shader_emitter *emit )
+{
+   int i;
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      if (!emit->use_sm30)
+         return TRUE;
+
+      if (emit->key.fkey.light_twoside)
+         return TRUE;
+
+      if (emit->emit_frontface)
+         return TRUE;
+
+      if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
+          emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
+         return TRUE;
+   }
+
+   if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
+      return TRUE;
+
+   for (i = 0; i < emit->key.fkey.num_textures; i++) {
+      if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
+         return TRUE;
+   }
+
+   return FALSE;
+}
+
+static INLINE boolean
+needs_to_create_loop_const( struct svga_shader_emitter *emit )
+{
+   return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
+}
+
+static INLINE boolean
+needs_to_create_sincos_consts( struct svga_shader_emitter *emit )
+{
+   return !emit->use_sm30 && (emit->info.opcode_count[TGSI_OPCODE_SIN] >= 1 ||
+                              emit->info.opcode_count[TGSI_OPCODE_COS] >= 1 ||
+                              emit->info.opcode_count[TGSI_OPCODE_SCS] >= 1);
+}
+
+static INLINE boolean
+needs_to_create_arl_consts( struct svga_shader_emitter *emit )
+{
+   return (emit->num_arl_consts > 0);
+}
+
+static INLINE boolean
+pre_parse_add_indirect( struct svga_shader_emitter *emit,
+                        int num, int current_arl)
+{
+   int i;
+   assert(num < 0);
+
+   for (i = 0; i < emit->num_arl_consts; ++i) {
+      if (emit->arl_consts[i].arl_num == current_arl)
+         break;
+   }
+   /* new entry */
+   if (emit->num_arl_consts == i) {
+      ++emit->num_arl_consts;
+   }
+   emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
+                                num :
+                                emit->arl_consts[i].number;
+   emit->arl_consts[i].arl_num = current_arl;
+   return TRUE;
+}
+
+static boolean
+pre_parse_instruction( struct svga_shader_emitter *emit,
+                       const struct tgsi_full_instruction *insn,
+                       int current_arl)
+{
+   if (insn->FullSrcRegisters[0].SrcRegister.Indirect &&
+       insn->FullSrcRegisters[0].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+      const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0];
+      if (reg->SrcRegister.Index < 0) {
+         pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
+      }
+   }
+
+   if (insn->FullSrcRegisters[1].SrcRegister.Indirect &&
+       insn->FullSrcRegisters[1].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+      const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[1];
+      if (reg->SrcRegister.Index < 0) {
+         pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
+      }
+   }
+
+   if (insn->FullSrcRegisters[2].SrcRegister.Indirect &&
+       insn->FullSrcRegisters[2].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+      const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[2];
+      if (reg->SrcRegister.Index < 0) {
+         pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
+      }
+   }
+
+   return TRUE;
+}
+
+static boolean
+pre_parse_tokens( struct svga_shader_emitter *emit,
+                  const struct tgsi_token *tokens )
+{
+   struct tgsi_parse_context parse;
+   int current_arl = 0;
+
+   tgsi_parse_init( &parse, tokens );
+
+   while (!tgsi_parse_end_of_tokens( &parse )) {
+      tgsi_parse_token( &parse );
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         break;
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (parse.FullToken.FullInstruction.Instruction.Opcode ==
+             TGSI_OPCODE_ARL) {
+            ++current_arl;
+         }
+         if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
+                                     current_arl ))
+            return FALSE;
+         break;
+      default:
+         break;
+      }
+
+   }
+   return TRUE;
+}
+
+static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
+
+{
+   if (needs_to_create_zero( emit )) {
+      create_zero_immediate( emit );
+   }
+   if (needs_to_create_loop_const( emit )) {
+      create_loop_const( emit );
+   }
+   if (needs_to_create_sincos_consts( emit )) {
+      create_sincos_consts( emit );
+   }
+   if (needs_to_create_arl_consts( emit )) {
+      create_arl_consts( emit );
+   }
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      if (!emit_ps_preamble( emit ))
+         return FALSE;
+
+      if (emit->key.fkey.light_twoside) {
+         if (!emit_light_twoside( emit ))
+            return FALSE;
+      }
+      if (emit->emit_frontface) {
+         if (!emit_frontface( emit ))
+            return FALSE;
+      }
+   }
+
+   return TRUE;
+}
+
+boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
+                                       const struct tgsi_token *tokens )
+{
+   struct tgsi_parse_context parse;
+   boolean ret = TRUE;
+   boolean helpers_emitted = FALSE;
+   unsigned line_nr = 0;
+
+   tgsi_parse_init( &parse, tokens );
+   emit->internal_imm_count = 0;
+
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      ret = emit_vs_preamble( emit );
+      if (!ret)
+         goto done;
+   }
+
+   pre_parse_tokens(emit, tokens);
+
+   while (!tgsi_parse_end_of_tokens( &parse )) {
+      tgsi_parse_token( &parse );
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
+         if (!ret)
+            goto done;
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         if (emit->use_sm30)
+            ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
+         else
+            ret = svga_translate_decl_sm20( emit, &parse.FullToken.FullDeclaration );
+         if (!ret)
+            goto done;
+         break;
+         
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (!helpers_emitted) {
+            if (!svga_shader_emit_helpers( emit ))
+               goto done;
+            helpers_emitted = TRUE;
+         }
+         ret = svga_emit_instruction( emit, 
+                                      line_nr++,
+                                      &parse.FullToken.FullInstruction );
+         if (!ret)
+            goto done;
+         break;
+      default:
+         break;
+      }
+      
+      reset_temp_regs( emit );
+   }
+
+   /* Need to terminate the current subroutine.  Note that the
+    * hardware doesn't tolerate shaders without sub-routines
+    * terminating with RET+END.
+    */
+   if (!emit->in_main_func) {
+      ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
+      if (!ret)
+         goto done;
+   }
+
+   /* Need to terminate the whole shader:
+    */
+   ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
+   if (!ret)
+      goto done;
+
+done:
+   assert(ret);
+   tgsi_parse_free( &parse );
+   return ret;
+}
+
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
new file mode 100644
index 0000000000..59f299c185
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -0,0 +1,299 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * VMware SVGA specific winsys interface.
+ * 
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ * 
+ * Documentation taken from the VMware SVGA DDK.
+ */
+
+#ifndef SVGA_WINSYS_H_
+#define SVGA_WINSYS_H_
+
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+
+
+struct svga_winsys_screen;
+struct svga_winsys_buffer;
+struct pipe_screen;
+struct pipe_context;
+struct pipe_fence_handle;
+struct pipe_texture;
+struct svga_region;
+
+
+#define SVGA_BUFFER_USAGE_PINNED  (PIPE_BUFFER_USAGE_CUSTOM << 0)
+#define SVGA_BUFFER_USAGE_WRAPPED (PIPE_BUFFER_USAGE_CUSTOM << 1)
+
+
+/** Opaque surface handle */
+struct svga_winsys_surface;
+
+/** Opaque buffer handle */
+struct svga_winsys_handle;
+
+
+/**
+ * SVGA per-context winsys interface.
+ */
+struct svga_winsys_context
+{
+   void
+   (*destroy)(struct svga_winsys_context *swc);
+
+   void *       
+   (*reserve)(struct svga_winsys_context *swc, 
+	      uint32_t nr_bytes, uint32_t nr_relocs );
+   
+   /**
+    * Emit a relocation for a host surface.
+    * 
+    * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE
+    * 
+    * NOTE: Order of this call does matter. It should be the same order
+    * as relocations appear in the command buffer.
+    */
+   void
+   (*surface_relocation)(struct svga_winsys_context *swc, 
+	                 uint32 *sid, 
+	                 struct svga_winsys_surface *surface,
+	                 unsigned flags);
+   
+   /**
+    * Emit a relocation for a guest memory region.
+    * 
+    * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE
+    * 
+    * NOTE: Order of this call does matter. It should be the same order
+    * as relocations appear in the command buffer.
+    */
+   void
+   (*region_relocation)(struct svga_winsys_context *swc, 
+	                struct SVGAGuestPtr *ptr, 
+	                struct svga_winsys_buffer *buffer,
+	                uint32 offset,
+                        unsigned flags);
+
+   void
+   (*commit)(struct svga_winsys_context *swc);
+   
+   enum pipe_error
+   (*flush)(struct svga_winsys_context *swc, 
+	    struct pipe_fence_handle **pfence);
+
+   /** 
+    * Context ID used to fill in the commands
+    * 
+    * Context IDs are arbitrary small non-negative integers,
+    * global to the entire SVGA device.
+    */
+   uint32 cid;
+};
+
+
+/**
+ * SVGA per-screen winsys interface.
+ */
+struct svga_winsys_screen
+{
+   void
+   (*destroy)(struct svga_winsys_screen *sws);
+   
+   boolean
+   (*get_cap)(struct svga_winsys_screen *sws,
+              SVGA3dDevCapIndex index,
+              SVGA3dDevCapResult *result);
+   
+   /**
+    * Create a new context.
+    *
+    * Context objects encapsulate all render state, and shader
+    * objects are per-context.
+    *
+    * Surfaces are not per-context. The same surface can be shared
+    * between multiple contexts, and surface operations can occur
+    * without a context.
+    */
+   struct svga_winsys_context *
+   (*context_create)(struct svga_winsys_screen *sws);
+   
+   
+   /**
+    * This creates a "surface" object in the SVGA3D device,
+    * and returns the surface ID (sid). Surfaces are generic
+    * containers for host VRAM objects like textures, vertex
+    * buffers, and depth/stencil buffers.
+    *
+    * Surfaces are hierarchial:
+    *
+    * - Surface may have multiple faces (for cube maps)
+    *
+    * - Each face has a list of mipmap levels
+    *
+    * - Each mipmap image may have multiple volume
+    *   slices, if the image is three dimensional.
+    *
+    * - Each slice is a 2D array of 'blocks'
+    *
+    * - Each block may be one or more pixels.
+    *   (Usually 1, more for DXT or YUV formats.)
+    *
+    * Surfaces are generic host VRAM objects. The SVGA3D device
+    * may optimize surfaces according to the format they were
+    * created with, but this format does not limit the ways in
+    * which the surface may be used. For example, a depth surface
+    * can be used as a texture, or a floating point image may
+    * be used as a vertex buffer. Some surface usages may be
+    * lower performance, due to software emulation, but any
+    * usage should work with any surface.
+    */
+   struct svga_winsys_surface *
+   (*surface_create)(struct svga_winsys_screen *sws,
+                     SVGA3dSurfaceFlags flags,
+                     SVGA3dSurfaceFormat format,
+                     SVGA3dSize size,
+                     uint32 numFaces,
+                     uint32 numMipLevels);
+
+   /**
+    * Whether this surface is sitting in a validate list
+    */
+   boolean
+   (*surface_is_flushed)(struct svga_winsys_screen *sws,
+                         struct svga_winsys_surface *surface);
+
+   /**
+    * Reference a SVGA3D surface object. This allows sharing of a
+    * surface between different objects.
+    */
+   void 
+   (*surface_reference)(struct svga_winsys_screen *sws,
+			struct svga_winsys_surface **pdst,
+			struct svga_winsys_surface *src);
+
+   /**
+    * Buffer management. Buffer attributes are mostly fixed over its lifetime.
+    *
+    * Remember that gallium gets to choose the interface it needs, and the
+    * window systems must then implement that interface (rather than the
+    * other way around...).
+    *
+    * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This
+    * usage argument is only an optimization hint, not a guarantee, therefore 
+    * proper behavior must be observed in all circumstances.
+    *
+    * alignment indicates the client's alignment requirements, eg for
+    * SSE instructions.
+    */
+   struct svga_winsys_buffer *
+   (*buffer_create)( struct svga_winsys_screen *sws, 
+	             unsigned alignment, 
+	             unsigned usage,
+	             unsigned size );
+
+   /** 
+    * Map the entire data store of a buffer object into the client's address.
+    * flags is a bitmask of:
+    * - PIPE_BUFFER_USAGE_CPU_READ/WRITE
+    * - PIPE_BUFFER_USAGE_DONTBLOCK
+    * - PIPE_BUFFER_USAGE_UNSYNCHRONIZED
+    */
+   void *
+   (*buffer_map)( struct svga_winsys_screen *sws, 
+	          struct svga_winsys_buffer *buf,
+		  unsigned usage );
+   
+   void 
+   (*buffer_unmap)( struct svga_winsys_screen *sws, 
+                    struct svga_winsys_buffer *buf );
+
+   void 
+   (*buffer_destroy)( struct svga_winsys_screen *sws,
+	              struct svga_winsys_buffer *buf );
+
+
+   /**
+    * Reference a fence object.
+    */
+   void
+   (*fence_reference)( struct svga_winsys_screen *sws,
+                       struct pipe_fence_handle **pdst,
+                       struct pipe_fence_handle *src );
+
+   /**
+    * Checks whether the fence has been signalled.
+    * \param flags  driver-specific meaning
+    * \return zero on success.
+    */
+   int (*fence_signalled)( struct svga_winsys_screen *sws,
+                           struct pipe_fence_handle *fence,
+                           unsigned flag );
+
+   /**
+    * Wait for the fence to finish.
+    * \param flags  driver-specific meaning
+    * \return zero on success.
+    */
+   int (*fence_finish)( struct svga_winsys_screen *sws,
+                        struct pipe_fence_handle *fence,
+                        unsigned flag );
+
+};
+
+
+struct pipe_context *
+svga_context_create(struct pipe_screen *screen);
+
+struct pipe_screen *
+svga_screen_create(struct svga_winsys_screen *sws);
+
+struct svga_winsys_screen *
+svga_winsys_screen(struct pipe_screen *screen);
+
+struct pipe_buffer *
+svga_screen_buffer_wrap_surface(struct pipe_screen *screen,
+				enum SVGA3dSurfaceFormat format,
+				struct svga_winsys_surface *srf);
+
+struct svga_winsys_surface *
+svga_screen_texture_get_winsys_surface(struct pipe_texture *texture);
+struct svga_winsys_surface *
+svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer);
+
+boolean
+svga_screen_buffer_from_texture(struct pipe_texture *texture,
+				struct pipe_buffer **buffer,
+				unsigned *stride);
+
+#endif /* SVGA_WINSYS_H_ */
diff --git a/src/gallium/drivers/svga/svgadump/st_shader.h b/src/gallium/drivers/svga/svgadump/st_shader.h
new file mode 100644
index 0000000000..2fc1796a90
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/st_shader.h
@@ -0,0 +1,214 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Definitions
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef ST_SHADER_SVGA_H
+#define ST_SHADER_SVGA_H
+
+#include "pipe/p_compiler.h"
+
+struct sh_op
+{
+   unsigned opcode:16;
+   unsigned control:8;
+   unsigned length:4;
+   unsigned predicated:1;
+   unsigned unused:1;
+   unsigned coissue:1;
+   unsigned is_reg:1;
+};
+
+struct sh_reg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:14;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_reg_type( struct sh_reg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_cdata
+{
+   float xyzw[4];
+};
+
+struct sh_def
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   struct sh_cdata cdata;
+};
+
+struct sh_defb
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   uint data;
+};
+
+struct sh_idata
+{
+   int xyzw[4];
+};
+
+struct sh_defi
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   struct sh_idata idata;
+};
+
+#define PS_TEXTURETYPE_UNKNOWN   SVGA3DSAMP_UNKNOWN
+#define PS_TEXTURETYPE_2D        SVGA3DSAMP_2D
+#define PS_TEXTURETYPE_CUBE      SVGA3DSAMP_CUBE
+#define PS_TEXTURETYPE_VOLUME    SVGA3DSAMP_VOLUME
+
+struct ps_sampleinfo
+{
+   unsigned unused:27;
+   unsigned texture_type:4;
+   unsigned is_reg:1;
+};
+
+struct vs_semantic
+{
+   unsigned usage:5;
+   unsigned unused1:11;
+   unsigned usage_index:4;
+   unsigned unused2:12;
+};
+
+struct sh_dstreg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:2;
+   unsigned write_mask:4;
+   unsigned modifier:4;
+   unsigned shift_scale:4;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_dstreg_type( struct sh_dstreg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_dcl
+{
+   struct sh_op op;
+   union {
+      struct {
+         struct ps_sampleinfo sampleinfo;
+      } ps;
+      struct {
+         struct vs_semantic semantic;
+      } vs;
+   } u;
+   struct sh_dstreg reg;
+};
+
+
+struct sh_srcreg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:2;
+   unsigned swizzle_x:2;
+   unsigned swizzle_y:2;
+   unsigned swizzle_z:2;
+   unsigned swizzle_w:2;
+   unsigned modifier:4;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_srcreg_type( struct sh_srcreg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_dstop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+};
+
+struct sh_srcop
+{
+   struct sh_op op;
+   struct sh_srcreg src;
+};
+
+struct sh_src2op
+{
+   struct sh_op op;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+};
+
+struct sh_unaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src;
+};
+
+struct sh_binaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+};
+
+struct sh_trinaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+   struct sh_srcreg src2;
+};
+
+#endif /* ST_SHADER_SVGA_H */
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.c b/src/gallium/drivers/svga/svgadump/st_shader_dump.c
new file mode 100644
index 0000000000..d65cc93bfd
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/st_shader_dump.c
@@ -0,0 +1,649 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Dump Facilities
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#include "st_shader.h"
+#include "st_shader_dump.h"
+#include "st_shader_op.h"
+#include "util/u_debug.h"
+
+#include "../svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+struct dump_info
+{
+   SVGA3dShaderVersion version;
+   boolean is_ps;
+};
+
+static void dump_op( struct sh_op op, const char *mnemonic )
+{
+   assert( op.predicated == 0 );
+   assert( op.is_reg == 0 );
+
+   if (op.coissue)
+      debug_printf( "+" );
+   debug_printf( "%s", mnemonic );
+   switch (op.control) {
+   case 0:
+      break;
+   case SVGA3DOPCONT_PROJECT:
+      debug_printf( "p" );
+      break;
+   case SVGA3DOPCONT_BIAS:
+      debug_printf( "b" );
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_comp_op( struct sh_op op, const char *mnemonic )
+{
+   assert( op.is_reg == 0 );
+
+   if (op.coissue)
+      debug_printf( "+" );
+   debug_printf( "%s", mnemonic );
+   switch (op.control) {
+   case SVGA3DOPCOMP_RESERVED0:
+      break;
+   case SVGA3DOPCOMP_GT:
+      debug_printf("_gt");
+      break;
+   case SVGA3DOPCOMP_EQ:
+      debug_printf("_eq");
+      break;
+   case SVGA3DOPCOMP_GE:
+      debug_printf("_ge");
+      break;
+   case SVGA3DOPCOMP_LT:
+      debug_printf("_lt");
+      break;
+   case SVGA3DOPCOMPC_NE:
+      debug_printf("_ne");
+      break;
+   case SVGA3DOPCOMP_LE:
+      debug_printf("_le");
+      break;
+   case SVGA3DOPCOMP_RESERVED1:
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di )
+{
+   assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 );
+   assert( reg.is_reg == 1 );
+
+   switch (sh_reg_type( reg )) {
+   case SVGA3DREG_TEMP:
+      debug_printf( "r%u", reg.number );
+      break;
+
+   case SVGA3DREG_INPUT:
+      debug_printf( "v%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONST:
+      if (reg.relative) {
+         if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP)
+            debug_printf( "c[aL+%u]", reg.number );
+         else
+            debug_printf( "c[a%u.x+%u]", indreg->number, reg.number );
+      }
+      else
+         debug_printf( "c%u", reg.number );
+      break;
+
+   case SVGA3DREG_ADDR:    /* VS */
+   /* SVGA3DREG_TEXTURE */ /* PS */
+      if (di->is_ps)
+         debug_printf( "t%u", reg.number );
+      else
+         debug_printf( "a%u", reg.number );
+      break;
+
+   case SVGA3DREG_RASTOUT:
+      switch (reg.number) {
+      case 0 /*POSITION*/:
+         debug_printf( "oPos" );
+         break;
+      case 1 /*FOG*/:
+         debug_printf( "oFog" );
+         break;
+      case 2 /*POINT_SIZE*/:
+         debug_printf( "oPts" );
+         break;
+      default:
+         assert( 0 );
+         debug_printf( "???" );
+      }
+      break;
+
+   case SVGA3DREG_ATTROUT:
+      assert( reg.number < 2 );
+      debug_printf( "oD%u", reg.number );
+      break;
+
+   case SVGA3DREG_TEXCRDOUT:
+   /* SVGA3DREG_OUTPUT */
+      debug_printf( "oT%u", reg.number );
+      break;
+
+   case SVGA3DREG_COLOROUT:
+      debug_printf( "oC%u", reg.number );
+      break;
+
+   case SVGA3DREG_DEPTHOUT:
+      debug_printf( "oD%u", reg.number );
+      break;
+
+   case SVGA3DREG_SAMPLER:
+      debug_printf( "s%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONSTBOOL:
+      assert( !reg.relative );
+      debug_printf( "b%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONSTINT:
+      assert( !reg.relative );
+      debug_printf( "i%u", reg.number );
+      break;
+
+   case SVGA3DREG_LOOP:
+      assert( reg.number == 0 );
+      debug_printf( "aL" );
+      break;
+
+   case SVGA3DREG_MISCTYPE:
+      switch (reg.number) {
+      case SVGA3DMISCREG_POSITION:
+         debug_printf( "vPos" );
+         break;
+      case SVGA3DMISCREG_FACE:
+         debug_printf( "vFace" );
+         break;
+      default:
+         assert(0);
+         break;
+      }
+      break;
+
+   case SVGA3DREG_LABEL:
+      debug_printf( "l%u", reg.number );
+      break;
+
+   case SVGA3DREG_PREDICATE:
+      debug_printf( "p%u", reg.number );
+      break;
+
+
+   default:
+      assert( 0 );
+      debug_printf( "???" );
+   }
+}
+
+static void dump_cdata( struct sh_cdata cdata )
+{
+   debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] );
+}
+
+static void dump_idata( struct sh_idata idata )
+{
+   debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] );
+}
+
+static void dump_bdata( boolean bdata )
+{
+   debug_printf( bdata ? "TRUE" : "FALSE" );
+}
+
+static void dump_sampleinfo( struct ps_sampleinfo sampleinfo )
+{
+   switch (sampleinfo.texture_type) {
+   case SVGA3DSAMP_2D:
+      debug_printf( "_2d" );
+      break;
+   case SVGA3DSAMP_CUBE:
+      debug_printf( "_cube" );
+      break;
+   case SVGA3DSAMP_VOLUME:
+      debug_printf( "_volume" );
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_usageinfo( struct vs_semantic semantic )
+{
+   switch (semantic.usage) {
+   case SVGA3D_DECLUSAGE_POSITION:
+      debug_printf("_position" );
+      break;
+   case SVGA3D_DECLUSAGE_BLENDWEIGHT:
+      debug_printf("_blendweight" );
+      break;
+   case SVGA3D_DECLUSAGE_BLENDINDICES:
+      debug_printf("_blendindices" );
+      break;
+   case SVGA3D_DECLUSAGE_NORMAL:
+      debug_printf("_normal" );
+      break;
+   case SVGA3D_DECLUSAGE_PSIZE:
+      debug_printf("_psize" );
+      break;
+   case SVGA3D_DECLUSAGE_TEXCOORD:
+      debug_printf("_texcoord");
+      break;
+   case SVGA3D_DECLUSAGE_TANGENT:
+      debug_printf("_tangent" );
+      break;
+   case SVGA3D_DECLUSAGE_BINORMAL:
+      debug_printf("_binormal" );
+      break;
+   case SVGA3D_DECLUSAGE_TESSFACTOR:
+      debug_printf("_tessfactor" );
+      break;
+   case SVGA3D_DECLUSAGE_POSITIONT:
+      debug_printf("_positiont" );
+      break;
+   case SVGA3D_DECLUSAGE_COLOR:
+      debug_printf("_color" );
+      break;
+   case SVGA3D_DECLUSAGE_FOG:
+      debug_printf("_fog" );
+      break;
+   case SVGA3D_DECLUSAGE_DEPTH:
+      debug_printf("_depth" );
+      break;
+   case SVGA3D_DECLUSAGE_SAMPLE:
+      debug_printf("_sample");
+      break;
+   default:
+      assert( 0 );
+      return;
+   }
+
+   if (semantic.usage_index != 0) {
+      debug_printf("%d", semantic.usage_index );
+   }
+}
+
+static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di )
+{
+   union {
+      struct sh_reg reg;
+      struct sh_dstreg dstreg;
+   } u;
+
+   assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier );
+
+   if (dstreg.modifier & SVGA3DDSTMOD_SATURATE)
+      debug_printf( "_sat" );
+   if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION)
+      debug_printf( "_pp" );
+   switch (dstreg.shift_scale) {
+   case 0:
+      break;
+   case 1:
+      debug_printf( "_x2" );
+      break;
+   case 2:
+      debug_printf( "_x4" );
+      break;
+   case 3:
+      debug_printf( "_x8" );
+      break;
+   case 13:
+      debug_printf( "_d8" );
+      break;
+   case 14:
+      debug_printf( "_d4" );
+      break;
+   case 15:
+      debug_printf( "_d2" );
+      break;
+   default:
+      assert( 0 );
+   }
+   debug_printf( " " );
+
+   u.dstreg = dstreg;
+   dump_reg( u.reg, NULL, di );
+   if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) {
+      debug_printf( "." );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_0)
+         debug_printf( "x" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_1)
+         debug_printf( "y" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_2)
+         debug_printf( "z" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_3)
+         debug_printf( "w" );
+   }
+}
+
+static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di )
+{
+   union {
+      struct sh_reg reg;
+      struct sh_srcreg srcreg;
+   } u;
+
+   switch (srcreg.modifier) {
+   case SVGA3DSRCMOD_NEG:
+   case SVGA3DSRCMOD_BIASNEG:
+   case SVGA3DSRCMOD_SIGNNEG:
+   case SVGA3DSRCMOD_X2NEG:
+      debug_printf( "-" );
+      break;
+   case SVGA3DSRCMOD_ABS:
+      debug_printf( "|" );
+      break;
+   case SVGA3DSRCMOD_ABSNEG:
+      debug_printf( "-|" );
+      break;
+   case SVGA3DSRCMOD_COMP:
+      debug_printf( "1-" );
+      break;
+   case SVGA3DSRCMOD_NOT:
+      debug_printf( "!" );
+   }
+
+   u.srcreg = srcreg;
+   dump_reg( u.reg, indreg, di );
+   switch (srcreg.modifier) {
+   case SVGA3DSRCMOD_NONE:
+   case SVGA3DSRCMOD_NEG:
+   case SVGA3DSRCMOD_COMP:
+   case SVGA3DSRCMOD_NOT:
+      break;
+   case SVGA3DSRCMOD_ABS:
+   case SVGA3DSRCMOD_ABSNEG:
+      debug_printf( "|" );
+      break;
+   case SVGA3DSRCMOD_BIAS:
+   case SVGA3DSRCMOD_BIASNEG:
+      debug_printf( "_bias" );
+      break;
+   case SVGA3DSRCMOD_SIGN:
+   case SVGA3DSRCMOD_SIGNNEG:
+      debug_printf( "_bx2" );
+      break;
+   case SVGA3DSRCMOD_X2:
+   case SVGA3DSRCMOD_X2NEG:
+      debug_printf( "_x2" );
+      break;
+   case SVGA3DSRCMOD_DZ:
+      debug_printf( "_dz" );
+      break;
+   case SVGA3DSRCMOD_DW:
+      debug_printf( "_dw" );
+      break;
+   default:
+      assert( 0 );
+   }
+   if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) {
+      debug_printf( "." );
+      if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) {
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+      }
+      else {
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_y] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_z] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_w] );
+      }
+   }
+}
+
+void
+sh_svga_dump(
+   const unsigned *assem,
+   unsigned dwords,
+   unsigned do_binary )
+{
+   const unsigned *start = assem;
+   boolean finished = FALSE;
+   struct dump_info di;
+   unsigned i;
+
+   if (do_binary) {
+      for (i = 0; i < dwords; i++) 
+         debug_printf("  0x%08x,\n", assem[i]);
+      
+      debug_printf("\n\n");
+   }
+
+   di.version.value = *assem++;
+   di.is_ps = (di.version.type == SVGA3D_PS_TYPE);
+
+   debug_printf(
+      "%s_%u_%u\n",
+      di.is_ps ? "ps" : "vs",
+      di.version.major,
+      di.version.minor );
+
+   while (!finished) {
+      struct sh_op op = *(struct sh_op *) assem;
+
+      if (assem - start >= dwords) {
+         debug_printf("... ran off end of buffer\n");
+         assert(0);
+         return;
+      }
+
+      switch (op.opcode) {
+      case SVGA3DOP_DCL:
+         {
+            struct sh_dcl dcl = *(struct sh_dcl *) assem;
+
+            debug_printf( "dcl" );
+            if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER)
+               dump_sampleinfo( dcl.u.ps.sampleinfo );
+            else if (di.is_ps) {
+               if (di.version.major == 3 && 
+                   sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE)
+                  dump_usageinfo( dcl.u.vs.semantic );
+            }
+            else
+               dump_usageinfo( dcl.u.vs.semantic );
+            dump_dstreg( dcl.reg, &di );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_dcl ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_DEFB:
+         {
+            struct sh_defb defb = *(struct sh_defb *) assem;
+
+            debug_printf( "defb " );
+            dump_reg( defb.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_bdata( defb.data );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_defb ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_DEFI:
+         {
+            struct sh_defi defi = *(struct sh_defi *) assem;
+
+            debug_printf( "defi " );
+            dump_reg( defi.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_idata( defi.idata );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_defi ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_TEXCOORD:
+         assert( di.is_ps );
+         dump_op( op, "texcoord" );
+         if (0) {
+            struct sh_dstop dstop = *(struct sh_dstop *) assem;
+            dump_dstreg( dstop.dst, &di );
+            assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
+         }
+         else {
+            struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
+            dump_dstreg( unaryop.dst, &di );
+            debug_printf( ", " );
+            dump_srcreg( unaryop.src, NULL, &di );
+            assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
+         }
+         debug_printf( "\n" );
+         break;
+
+      case SVGA3DOP_TEX:
+         assert( di.is_ps );
+         if (0) {
+            dump_op( op, "tex" );
+            if (0) {
+               struct sh_dstop dstop = *(struct sh_dstop *) assem;
+
+               dump_dstreg( dstop.dst, &di );
+               assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
+            }
+            else {
+               struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
+
+               dump_dstreg( unaryop.dst, &di );
+               debug_printf( ", " );
+               dump_srcreg( unaryop.src, NULL, &di );
+               assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
+            }
+         }
+         else {
+            struct sh_binaryop binaryop = *(struct sh_binaryop *) assem;
+
+            dump_op( op, "texld" );
+            dump_dstreg( binaryop.dst, &di );
+            debug_printf( ", " );
+            dump_srcreg( binaryop.src0, NULL, &di );
+            debug_printf( ", " );
+            dump_srcreg( binaryop.src1, NULL, &di );
+            assem += sizeof( struct sh_binaryop ) / sizeof( unsigned );
+         }
+         debug_printf( "\n" );
+         break;
+
+      case SVGA3DOP_DEF:
+         {
+            struct sh_def def = *(struct sh_def *) assem;
+
+            debug_printf( "def " );
+            dump_reg( def.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_cdata( def.cdata );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_def ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_PHASE:
+         debug_printf( "phase\n" );
+         assem += sizeof( struct sh_op ) / sizeof( unsigned );
+         break;
+
+      case SVGA3DOP_COMMENT:
+         assert( 0 );
+         break;
+
+      case SVGA3DOP_RET:
+         debug_printf( "ret\n" );
+         assem += sizeof( struct sh_op ) / sizeof( unsigned );
+         break;
+
+      case SVGA3DOP_END:
+         debug_printf( "end\n" );
+         finished = TRUE;
+         break;
+
+      default:
+         {
+            const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode );
+            uint i;
+            uint num_src = info->num_src + op.predicated;
+            boolean not_first_arg = FALSE;
+
+            assert( info->num_dst <= 1 );
+
+            if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3)
+               num_src += 2;
+
+            dump_comp_op( op, info->mnemonic );
+            assem += sizeof( struct sh_op ) / sizeof( unsigned );
+
+            if (info->num_dst > 0) {
+               struct sh_dstreg dstreg = *(struct sh_dstreg *) assem;
+
+               dump_dstreg( dstreg, &di );
+               assem += sizeof( struct sh_dstreg ) / sizeof( unsigned );
+               not_first_arg = TRUE;
+            }
+
+            for (i = 0; i < num_src; i++) {
+               struct sh_srcreg srcreg;
+               struct sh_srcreg indreg;
+
+               srcreg = *(struct sh_srcreg *) assem;
+               assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
+               if (srcreg.relative && !di.is_ps && di.version.major >= 2) {
+                  indreg = *(struct sh_srcreg *) assem;
+                  assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
+               }
+
+               if (not_first_arg)
+                  debug_printf( ", " );
+               else
+                  debug_printf( " " );
+               dump_srcreg( srcreg, &indreg, &di );
+               not_first_arg = TRUE;
+            }
+
+            debug_printf( "\n" );
+         }
+      }
+   }
+}
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.h b/src/gallium/drivers/svga/svgadump/st_shader_dump.h
new file mode 100644
index 0000000000..af5549cdba
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/st_shader_dump.h
@@ -0,0 +1,42 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Dump Facilities
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef ST_SHADER_SVGA_DUMP_H
+#define ST_SHADER_SVGA_DUMP_H
+
+void
+sh_svga_dump(
+   const unsigned *assem,
+   unsigned dwords,
+   unsigned do_binary );
+
+#endif /* ST_SHADER_SVGA_DUMP_H */
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.c b/src/gallium/drivers/svga/svgadump/st_shader_op.c
new file mode 100644
index 0000000000..2c05382ab9
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/st_shader_op.c
@@ -0,0 +1,168 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Opcode Info
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#include "util/u_debug.h"
+#include "st_shader_op.h"
+
+#include "../svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+#define SVGA3DOP_INVALID SVGA3DOP_END
+#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST
+
+static struct sh_opcode_info opcode_info[] =
+{
+   { "nop",          0, 0, SVGA3DOP_NOP          },
+   { "mov",          1, 1, SVGA3DOP_MOV,         },
+   { "add",          1, 2, SVGA3DOP_ADD,         },
+   { "sub",          1, 2, SVGA3DOP_SUB,         },
+   { "mad",          1, 3, SVGA3DOP_MAD,         },
+   { "mul",          1, 2, SVGA3DOP_MUL,         },
+   { "rcp",          1, 1, SVGA3DOP_RCP,         },
+   { "rsq",          1, 1, SVGA3DOP_RSQ,         },
+   { "dp3",          1, 2, SVGA3DOP_DP3,         },
+   { "dp4",          1, 2, SVGA3DOP_DP4,         },
+   { "min",          1, 2, SVGA3DOP_MIN,         },
+   { "max",          1, 2, SVGA3DOP_MAX,         },
+   { "slt",          1, 2, SVGA3DOP_SLT,         },
+   { "sge",          1, 2, SVGA3DOP_SGE,         },
+   { "exp",          1, 1, SVGA3DOP_EXP,         },
+   { "log",          1, 1, SVGA3DOP_LOG,         },
+   { "lit",          1, 1, SVGA3DOP_LIT,         },
+   { "dst",          1, 2, SVGA3DOP_DST,         },
+   { "lrp",          1, 3, SVGA3DOP_LRP,         },
+   { "frc",          1, 1, SVGA3DOP_FRC,         },
+   { "m4x4",         1, 2, SVGA3DOP_M4x4,        },
+   { "m4x3",         1, 2, SVGA3DOP_M4x3,        },
+   { "m3x4",         1, 2, SVGA3DOP_M3x4,        },
+   { "m3x3",         1, 2, SVGA3DOP_M3x3,        },
+   { "m3x2",         1, 2, SVGA3DOP_M3x2,        },
+   { "call",         0, 1, SVGA3DOP_CALL,        },
+   { "callnz",       0, 2, SVGA3DOP_CALLNZ,      },
+   { "loop",         0, 2, SVGA3DOP_LOOP,        },
+   { "ret",          0, 0, SVGA3DOP_RET,         },
+   { "endloop",      0, 0, SVGA3DOP_ENDLOOP,     },
+   { "label",        0, 1, SVGA3DOP_LABEL,       },
+   { "dcl",          0, 0, SVGA3DOP_DCL,         },
+   { "pow",          1, 2, SVGA3DOP_POW,         },
+   { "crs",          1, 2, SVGA3DOP_CRS,         },
+   { "sgn",          1, 3, SVGA3DOP_SGN,         },
+   { "abs",          1, 1, SVGA3DOP_ABS,         },
+   { "nrm",          1, 1, SVGA3DOP_NRM,         }, /* 3-componenet normalization */
+   { "sincos",       1, 1, SVGA3DOP_SINCOS,      },
+   { "rep",          0, 1, SVGA3DOP_REP,         },
+   { "endrep",       0, 0, SVGA3DOP_ENDREP,      },
+   { "if",           0, 1, SVGA3DOP_IF,          },
+   { "ifc",          0, 2, SVGA3DOP_IFC,         },
+   { "else",         0, 0, SVGA3DOP_ELSE,        },
+   { "endif",        0, 0, SVGA3DOP_ENDIF,       },
+   { "break",        0, 0, SVGA3DOP_BREAK,       },
+   { "breakc",       0, 0, SVGA3DOP_BREAKC,      },
+   { "mova",         1, 1, SVGA3DOP_MOVA,        },
+   { "defb",         0, 0, SVGA3DOP_DEFB,        },
+   { "defi",         0, 0, SVGA3DOP_DEFI,        },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "texcoord",     0, 0, SVGA3DOP_TEXCOORD,    },
+   { "texkill",      1, 0, SVGA3DOP_TEXKILL,     },
+   { "tex",          0, 0, SVGA3DOP_TEX,         },
+   { "texbem",       1, 1, SVGA3DOP_TEXBEM,      },
+   { "texbeml",      1, 1, SVGA3DOP_TEXBEML,     },
+   { "texreg2ar",    1, 1, SVGA3DOP_TEXREG2AR,   },
+   { "texreg2gb",    1, 1, SVGA3DOP_TEXREG2GB,   },
+   { "texm3x2pad",   1, 1, SVGA3DOP_TEXM3x2PAD,  },
+   { "texm3x2tex",   1, 1, SVGA3DOP_TEXM3x2TEX,  },
+   { "texm3x3pad",   1, 1, SVGA3DOP_TEXM3x3PAD,  },
+   { "texm3x3tex",   1, 1, SVGA3DOP_TEXM3x3TEX,  },
+   { "reserved0",    0, 0, SVGA3DOP_RESERVED0,   },
+   { "texm3x3spec",  1, 2, SVGA3DOP_TEXM3x3SPEC, },
+   { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,},
+   { "expp",         1, 1, SVGA3DOP_EXPP,        },
+   { "logp",         1, 1, SVGA3DOP_LOGP,        },
+   { "cnd",          1, 3, SVGA3DOP_CND,         },
+   { "def",          0, 0, SVGA3DOP_DEF,         },
+   { "texreg2rgb",   1, 1, SVGA3DOP_TEXREG2RGB,  },
+   { "texdp3tex",    1, 1, SVGA3DOP_TEXDP3TEX,   },
+   { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,},
+   { "texdp3",       1, 1, SVGA3DOP_TEXDP3,      },
+   { "texm3x3",      1, 1, SVGA3DOP_TEXM3x3,     },
+   { "texdepth",     1, 0, SVGA3DOP_TEXDEPTH,    },
+   { "cmp",          1, 3, SVGA3DOP_CMP,         },
+   { "bem",          1, 2, SVGA3DOP_BEM,         },
+   { "dp2add",       1, 3, SVGA3DOP_DP2ADD,      },
+   { "dsx",          1, 1, SVGA3DOP_INVALID,     },
+   { "dsy",          1, 1, SVGA3DOP_INVALID,     },
+   { "texldd",       1, 1, SVGA3DOP_INVALID,     },
+   { "setp",         1, 2, SVGA3DOP_SETP,        },
+   { "texldl",       1, 1, SVGA3DOP_INVALID,     },
+   { "breakp",       1, 1, SVGA3DOP_INVALID,     },
+};
+
+const struct sh_opcode_info *sh_svga_opcode_info( uint op )
+{
+   struct sh_opcode_info *info;
+
+   if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) {
+      /* The opcode is either PHASE, COMMENT, END or out of range.
+       */
+      assert( 0 );
+      return NULL;
+   }
+
+   info = &opcode_info[op];
+
+   if (info->svga_opcode == SVGA3DOP_INVALID) {
+      /* No valid information. Please provide number of dst/src registers.
+       */
+      assert( 0 );
+      return NULL;
+   }
+
+   /* Sanity check.
+    */
+   assert( op == info->svga_opcode );
+
+   return info;
+}
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.h b/src/gallium/drivers/svga/svgadump/st_shader_op.h
new file mode 100644
index 0000000000..01d39dca84
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/st_shader_op.h
@@ -0,0 +1,46 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Opcode Info
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef ST_SHADER_SVGA_OP_H
+#define ST_SHADER_SVGA_OP_H
+
+struct sh_opcode_info
+{
+   const char *mnemonic;
+   unsigned num_dst:8;
+   unsigned num_src:8;
+   unsigned svga_opcode:16;
+};
+
+const struct sh_opcode_info *sh_svga_opcode_info( unsigned op );
+
+#endif /* ST_SHADER_SVGA_OP_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
new file mode 100644
index 0000000000..180dde8dc1
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -0,0 +1,1736 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * Dump SVGA commands.
+ *
+ * Generated automatically from svga3d_reg.h by svga_dump.py.
+ */
+
+#include "svga_types.h"
+#include "st_shader_dump.h"
+#include "svga3d_reg.h"
+
+#include "util/u_debug.h"
+#include "svga_dump.h"
+
+static void
+dump_SVGA3dVertexDecl(const SVGA3dVertexDecl *cmd)
+{
+   switch((*cmd).identity.type) {
+   case SVGA3D_DECLTYPE_FLOAT1:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT1\n");
+      break;
+   case SVGA3D_DECLTYPE_FLOAT2:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT2\n");
+      break;
+   case SVGA3D_DECLTYPE_FLOAT3:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT3\n");
+      break;
+   case SVGA3D_DECLTYPE_FLOAT4:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT4\n");
+      break;
+   case SVGA3D_DECLTYPE_D3DCOLOR:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_D3DCOLOR\n");
+      break;
+   case SVGA3D_DECLTYPE_UBYTE4:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4\n");
+      break;
+   case SVGA3D_DECLTYPE_SHORT2:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2\n");
+      break;
+   case SVGA3D_DECLTYPE_SHORT4:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4\n");
+      break;
+   case SVGA3D_DECLTYPE_UBYTE4N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4N\n");
+      break;
+   case SVGA3D_DECLTYPE_SHORT2N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2N\n");
+      break;
+   case SVGA3D_DECLTYPE_SHORT4N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4N\n");
+      break;
+   case SVGA3D_DECLTYPE_USHORT2N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT2N\n");
+      break;
+   case SVGA3D_DECLTYPE_USHORT4N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT4N\n");
+      break;
+   case SVGA3D_DECLTYPE_UDEC3:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UDEC3\n");
+      break;
+   case SVGA3D_DECLTYPE_DEC3N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_DEC3N\n");
+      break;
+   case SVGA3D_DECLTYPE_FLOAT16_2:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_2\n");
+      break;
+   case SVGA3D_DECLTYPE_FLOAT16_4:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_4\n");
+      break;
+   case SVGA3D_DECLTYPE_MAX:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.identity.type = %i\n", (*cmd).identity.type);
+      break;
+   }
+   switch((*cmd).identity.method) {
+   case SVGA3D_DECLMETHOD_DEFAULT:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_DEFAULT\n");
+      break;
+   case SVGA3D_DECLMETHOD_PARTIALU:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALU\n");
+      break;
+   case SVGA3D_DECLMETHOD_PARTIALV:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALV\n");
+      break;
+   case SVGA3D_DECLMETHOD_CROSSUV:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_CROSSUV\n");
+      break;
+   case SVGA3D_DECLMETHOD_UV:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_UV\n");
+      break;
+   case SVGA3D_DECLMETHOD_LOOKUP:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUP\n");
+      break;
+   case SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED\n");
+      break;
+   default:
+      debug_printf("\t\t.identity.method = %i\n", (*cmd).identity.method);
+      break;
+   }
+   switch((*cmd).identity.usage) {
+   case SVGA3D_DECLUSAGE_POSITION:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITION\n");
+      break;
+   case SVGA3D_DECLUSAGE_BLENDWEIGHT:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDWEIGHT\n");
+      break;
+   case SVGA3D_DECLUSAGE_BLENDINDICES:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDINDICES\n");
+      break;
+   case SVGA3D_DECLUSAGE_NORMAL:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_NORMAL\n");
+      break;
+   case SVGA3D_DECLUSAGE_PSIZE:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_PSIZE\n");
+      break;
+   case SVGA3D_DECLUSAGE_TEXCOORD:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TEXCOORD\n");
+      break;
+   case SVGA3D_DECLUSAGE_TANGENT:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TANGENT\n");
+      break;
+   case SVGA3D_DECLUSAGE_BINORMAL:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BINORMAL\n");
+      break;
+   case SVGA3D_DECLUSAGE_TESSFACTOR:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TESSFACTOR\n");
+      break;
+   case SVGA3D_DECLUSAGE_POSITIONT:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITIONT\n");
+      break;
+   case SVGA3D_DECLUSAGE_COLOR:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_COLOR\n");
+      break;
+   case SVGA3D_DECLUSAGE_FOG:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_FOG\n");
+      break;
+   case SVGA3D_DECLUSAGE_DEPTH:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_DEPTH\n");
+      break;
+   case SVGA3D_DECLUSAGE_SAMPLE:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_SAMPLE\n");
+      break;
+   case SVGA3D_DECLUSAGE_MAX:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.identity.usage = %i\n", (*cmd).identity.usage);
+      break;
+   }
+   debug_printf("\t\t.identity.usageIndex = %u\n", (*cmd).identity.usageIndex);
+   debug_printf("\t\t.array.surfaceId = %u\n", (*cmd).array.surfaceId);
+   debug_printf("\t\t.array.offset = %u\n", (*cmd).array.offset);
+   debug_printf("\t\t.array.stride = %u\n", (*cmd).array.stride);
+   debug_printf("\t\t.rangeHint.first = %u\n", (*cmd).rangeHint.first);
+   debug_printf("\t\t.rangeHint.last = %u\n", (*cmd).rangeHint.last);
+}
+
+static void
+dump_SVGA3dTextureState(const SVGA3dTextureState *cmd)
+{
+   debug_printf("\t\t.stage = %u\n", (*cmd).stage);
+   switch((*cmd).name) {
+   case SVGA3D_TS_INVALID:
+      debug_printf("\t\t.name = SVGA3D_TS_INVALID\n");
+      break;
+   case SVGA3D_TS_BIND_TEXTURE:
+      debug_printf("\t\t.name = SVGA3D_TS_BIND_TEXTURE\n");
+      break;
+   case SVGA3D_TS_COLOROP:
+      debug_printf("\t\t.name = SVGA3D_TS_COLOROP\n");
+      break;
+   case SVGA3D_TS_COLORARG1:
+      debug_printf("\t\t.name = SVGA3D_TS_COLORARG1\n");
+      break;
+   case SVGA3D_TS_COLORARG2:
+      debug_printf("\t\t.name = SVGA3D_TS_COLORARG2\n");
+      break;
+   case SVGA3D_TS_ALPHAOP:
+      debug_printf("\t\t.name = SVGA3D_TS_ALPHAOP\n");
+      break;
+   case SVGA3D_TS_ALPHAARG1:
+      debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG1\n");
+      break;
+   case SVGA3D_TS_ALPHAARG2:
+      debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG2\n");
+      break;
+   case SVGA3D_TS_ADDRESSU:
+      debug_printf("\t\t.name = SVGA3D_TS_ADDRESSU\n");
+      break;
+   case SVGA3D_TS_ADDRESSV:
+      debug_printf("\t\t.name = SVGA3D_TS_ADDRESSV\n");
+      break;
+   case SVGA3D_TS_MIPFILTER:
+      debug_printf("\t\t.name = SVGA3D_TS_MIPFILTER\n");
+      break;
+   case SVGA3D_TS_MAGFILTER:
+      debug_printf("\t\t.name = SVGA3D_TS_MAGFILTER\n");
+      break;
+   case SVGA3D_TS_MINFILTER:
+      debug_printf("\t\t.name = SVGA3D_TS_MINFILTER\n");
+      break;
+   case SVGA3D_TS_BORDERCOLOR:
+      debug_printf("\t\t.name = SVGA3D_TS_BORDERCOLOR\n");
+      break;
+   case SVGA3D_TS_TEXCOORDINDEX:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDINDEX\n");
+      break;
+   case SVGA3D_TS_TEXTURETRANSFORMFLAGS:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXTURETRANSFORMFLAGS\n");
+      break;
+   case SVGA3D_TS_TEXCOORDGEN:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDGEN\n");
+      break;
+   case SVGA3D_TS_BUMPENVMAT00:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT00\n");
+      break;
+   case SVGA3D_TS_BUMPENVMAT01:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT01\n");
+      break;
+   case SVGA3D_TS_BUMPENVMAT10:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT10\n");
+      break;
+   case SVGA3D_TS_BUMPENVMAT11:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT11\n");
+      break;
+   case SVGA3D_TS_TEXTURE_MIPMAP_LEVEL:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_MIPMAP_LEVEL\n");
+      break;
+   case SVGA3D_TS_TEXTURE_LOD_BIAS:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_LOD_BIAS\n");
+      break;
+   case SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL\n");
+      break;
+   case SVGA3D_TS_ADDRESSW:
+      debug_printf("\t\t.name = SVGA3D_TS_ADDRESSW\n");
+      break;
+   case SVGA3D_TS_GAMMA:
+      debug_printf("\t\t.name = SVGA3D_TS_GAMMA\n");
+      break;
+   case SVGA3D_TS_BUMPENVLSCALE:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLSCALE\n");
+      break;
+   case SVGA3D_TS_BUMPENVLOFFSET:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLOFFSET\n");
+      break;
+   case SVGA3D_TS_COLORARG0:
+      debug_printf("\t\t.name = SVGA3D_TS_COLORARG0\n");
+      break;
+   case SVGA3D_TS_ALPHAARG0:
+      debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG0\n");
+      break;
+   case SVGA3D_TS_MAX:
+      debug_printf("\t\t.name = SVGA3D_TS_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.name = %i\n", (*cmd).name);
+      break;
+   }
+   debug_printf("\t\t.value = %u\n", (*cmd).value);
+   debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue);
+}
+
+static void
+dump_SVGA3dCopyBox(const SVGA3dCopyBox *cmd)
+{
+   debug_printf("\t\t.x = %u\n", (*cmd).x);
+   debug_printf("\t\t.y = %u\n", (*cmd).y);
+   debug_printf("\t\t.z = %u\n", (*cmd).z);
+   debug_printf("\t\t.w = %u\n", (*cmd).w);
+   debug_printf("\t\t.h = %u\n", (*cmd).h);
+   debug_printf("\t\t.d = %u\n", (*cmd).d);
+   debug_printf("\t\t.srcx = %u\n", (*cmd).srcx);
+   debug_printf("\t\t.srcy = %u\n", (*cmd).srcy);
+   debug_printf("\t\t.srcz = %u\n", (*cmd).srcz);
+}
+
+static void
+dump_SVGA3dCmdSetClipPlane(const SVGA3dCmdSetClipPlane *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.index = %u\n", (*cmd).index);
+   debug_printf("\t\t.plane[0] = %f\n", (*cmd).plane[0]);
+   debug_printf("\t\t.plane[1] = %f\n", (*cmd).plane[1]);
+   debug_printf("\t\t.plane[2] = %f\n", (*cmd).plane[2]);
+   debug_printf("\t\t.plane[3] = %f\n", (*cmd).plane[3]);
+}
+
+static void
+dump_SVGA3dCmdWaitForQuery(const SVGA3dCmdWaitForQuery *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_QUERYTYPE_OCCLUSION:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+      break;
+   case SVGA3D_QUERYTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+   debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId);
+   debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset);
+}
+
+static void
+dump_SVGA3dCmdSetRenderTarget(const SVGA3dCmdSetRenderTarget *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_RT_DEPTH:
+      debug_printf("\t\t.type = SVGA3D_RT_DEPTH\n");
+      break;
+   case SVGA3D_RT_STENCIL:
+      debug_printf("\t\t.type = SVGA3D_RT_STENCIL\n");
+      break;
+   default:
+      debug_printf("\t\t.type = SVGA3D_RT_COLOR%u\n", (*cmd).type - SVGA3D_RT_COLOR0);
+      break;
+   }
+   debug_printf("\t\t.target.sid = %u\n", (*cmd).target.sid);
+   debug_printf("\t\t.target.face = %u\n", (*cmd).target.face);
+   debug_printf("\t\t.target.mipmap = %u\n", (*cmd).target.mipmap);
+}
+
+static void
+dump_SVGA3dCmdSetTextureState(const SVGA3dCmdSetTextureState *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dCmdSurfaceCopy(const SVGA3dCmdSurfaceCopy *cmd)
+{
+   debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid);
+   debug_printf("\t\t.src.face = %u\n", (*cmd).src.face);
+   debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap);
+   debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid);
+   debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face);
+   debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap);
+}
+
+static void
+dump_SVGA3dCmdSetMaterial(const SVGA3dCmdSetMaterial *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).face) {
+   case SVGA3D_FACE_INVALID:
+      debug_printf("\t\t.face = SVGA3D_FACE_INVALID\n");
+      break;
+   case SVGA3D_FACE_NONE:
+      debug_printf("\t\t.face = SVGA3D_FACE_NONE\n");
+      break;
+   case SVGA3D_FACE_FRONT:
+      debug_printf("\t\t.face = SVGA3D_FACE_FRONT\n");
+      break;
+   case SVGA3D_FACE_BACK:
+      debug_printf("\t\t.face = SVGA3D_FACE_BACK\n");
+      break;
+   case SVGA3D_FACE_FRONT_BACK:
+      debug_printf("\t\t.face = SVGA3D_FACE_FRONT_BACK\n");
+      break;
+   case SVGA3D_FACE_MAX:
+      debug_printf("\t\t.face = SVGA3D_FACE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.face = %i\n", (*cmd).face);
+      break;
+   }
+   debug_printf("\t\t.material.diffuse[0] = %f\n", (*cmd).material.diffuse[0]);
+   debug_printf("\t\t.material.diffuse[1] = %f\n", (*cmd).material.diffuse[1]);
+   debug_printf("\t\t.material.diffuse[2] = %f\n", (*cmd).material.diffuse[2]);
+   debug_printf("\t\t.material.diffuse[3] = %f\n", (*cmd).material.diffuse[3]);
+   debug_printf("\t\t.material.ambient[0] = %f\n", (*cmd).material.ambient[0]);
+   debug_printf("\t\t.material.ambient[1] = %f\n", (*cmd).material.ambient[1]);
+   debug_printf("\t\t.material.ambient[2] = %f\n", (*cmd).material.ambient[2]);
+   debug_printf("\t\t.material.ambient[3] = %f\n", (*cmd).material.ambient[3]);
+   debug_printf("\t\t.material.specular[0] = %f\n", (*cmd).material.specular[0]);
+   debug_printf("\t\t.material.specular[1] = %f\n", (*cmd).material.specular[1]);
+   debug_printf("\t\t.material.specular[2] = %f\n", (*cmd).material.specular[2]);
+   debug_printf("\t\t.material.specular[3] = %f\n", (*cmd).material.specular[3]);
+   debug_printf("\t\t.material.emissive[0] = %f\n", (*cmd).material.emissive[0]);
+   debug_printf("\t\t.material.emissive[1] = %f\n", (*cmd).material.emissive[1]);
+   debug_printf("\t\t.material.emissive[2] = %f\n", (*cmd).material.emissive[2]);
+   debug_printf("\t\t.material.emissive[3] = %f\n", (*cmd).material.emissive[3]);
+   debug_printf("\t\t.material.shininess = %f\n", (*cmd).material.shininess);
+}
+
+static void
+dump_SVGA3dCmdSetLightData(const SVGA3dCmdSetLightData *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.index = %u\n", (*cmd).index);
+   switch((*cmd).data.type) {
+   case SVGA3D_LIGHTTYPE_INVALID:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_INVALID\n");
+      break;
+   case SVGA3D_LIGHTTYPE_POINT:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_POINT\n");
+      break;
+   case SVGA3D_LIGHTTYPE_SPOT1:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT1\n");
+      break;
+   case SVGA3D_LIGHTTYPE_SPOT2:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT2\n");
+      break;
+   case SVGA3D_LIGHTTYPE_DIRECTIONAL:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_DIRECTIONAL\n");
+      break;
+   case SVGA3D_LIGHTTYPE_MAX:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.data.type = %i\n", (*cmd).data.type);
+      break;
+   }
+   debug_printf("\t\t.data.inWorldSpace = %u\n", (*cmd).data.inWorldSpace);
+   debug_printf("\t\t.data.diffuse[0] = %f\n", (*cmd).data.diffuse[0]);
+   debug_printf("\t\t.data.diffuse[1] = %f\n", (*cmd).data.diffuse[1]);
+   debug_printf("\t\t.data.diffuse[2] = %f\n", (*cmd).data.diffuse[2]);
+   debug_printf("\t\t.data.diffuse[3] = %f\n", (*cmd).data.diffuse[3]);
+   debug_printf("\t\t.data.specular[0] = %f\n", (*cmd).data.specular[0]);
+   debug_printf("\t\t.data.specular[1] = %f\n", (*cmd).data.specular[1]);
+   debug_printf("\t\t.data.specular[2] = %f\n", (*cmd).data.specular[2]);
+   debug_printf("\t\t.data.specular[3] = %f\n", (*cmd).data.specular[3]);
+   debug_printf("\t\t.data.ambient[0] = %f\n", (*cmd).data.ambient[0]);
+   debug_printf("\t\t.data.ambient[1] = %f\n", (*cmd).data.ambient[1]);
+   debug_printf("\t\t.data.ambient[2] = %f\n", (*cmd).data.ambient[2]);
+   debug_printf("\t\t.data.ambient[3] = %f\n", (*cmd).data.ambient[3]);
+   debug_printf("\t\t.data.position[0] = %f\n", (*cmd).data.position[0]);
+   debug_printf("\t\t.data.position[1] = %f\n", (*cmd).data.position[1]);
+   debug_printf("\t\t.data.position[2] = %f\n", (*cmd).data.position[2]);
+   debug_printf("\t\t.data.position[3] = %f\n", (*cmd).data.position[3]);
+   debug_printf("\t\t.data.direction[0] = %f\n", (*cmd).data.direction[0]);
+   debug_printf("\t\t.data.direction[1] = %f\n", (*cmd).data.direction[1]);
+   debug_printf("\t\t.data.direction[2] = %f\n", (*cmd).data.direction[2]);
+   debug_printf("\t\t.data.direction[3] = %f\n", (*cmd).data.direction[3]);
+   debug_printf("\t\t.data.range = %f\n", (*cmd).data.range);
+   debug_printf("\t\t.data.falloff = %f\n", (*cmd).data.falloff);
+   debug_printf("\t\t.data.attenuation0 = %f\n", (*cmd).data.attenuation0);
+   debug_printf("\t\t.data.attenuation1 = %f\n", (*cmd).data.attenuation1);
+   debug_printf("\t\t.data.attenuation2 = %f\n", (*cmd).data.attenuation2);
+   debug_printf("\t\t.data.theta = %f\n", (*cmd).data.theta);
+   debug_printf("\t\t.data.phi = %f\n", (*cmd).data.phi);
+}
+
+static void
+dump_SVGA3dCmdSetViewport(const SVGA3dCmdSetViewport *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x);
+   debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y);
+   debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w);
+   debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h);
+}
+
+static void
+dump_SVGA3dCmdSetScissorRect(const SVGA3dCmdSetScissorRect *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x);
+   debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y);
+   debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w);
+   debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h);
+}
+
+static void
+dump_SVGA3dCopyRect(const SVGA3dCopyRect *cmd)
+{
+   debug_printf("\t\t.x = %u\n", (*cmd).x);
+   debug_printf("\t\t.y = %u\n", (*cmd).y);
+   debug_printf("\t\t.w = %u\n", (*cmd).w);
+   debug_printf("\t\t.h = %u\n", (*cmd).h);
+   debug_printf("\t\t.srcx = %u\n", (*cmd).srcx);
+   debug_printf("\t\t.srcy = %u\n", (*cmd).srcy);
+}
+
+static void
+dump_SVGA3dCmdSetShader(const SVGA3dCmdSetShader *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_SHADERTYPE_COMPILED_DX8:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      break;
+   case SVGA3D_SHADERTYPE_VS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      break;
+   case SVGA3D_SHADERTYPE_PS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      break;
+   case SVGA3D_SHADERTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+   debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+}
+
+static void
+dump_SVGA3dCmdEndQuery(const SVGA3dCmdEndQuery *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_QUERYTYPE_OCCLUSION:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+      break;
+   case SVGA3D_QUERYTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+   debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId);
+   debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset);
+}
+
+static void
+dump_SVGA3dSize(const SVGA3dSize *cmd)
+{
+   debug_printf("\t\t.width = %u\n", (*cmd).width);
+   debug_printf("\t\t.height = %u\n", (*cmd).height);
+   debug_printf("\t\t.depth = %u\n", (*cmd).depth);
+}
+
+static void
+dump_SVGA3dCmdDestroySurface(const SVGA3dCmdDestroySurface *cmd)
+{
+   debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+}
+
+static void
+dump_SVGA3dCmdDefineContext(const SVGA3dCmdDefineContext *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dRect(const SVGA3dRect *cmd)
+{
+   debug_printf("\t\t.x = %u\n", (*cmd).x);
+   debug_printf("\t\t.y = %u\n", (*cmd).y);
+   debug_printf("\t\t.w = %u\n", (*cmd).w);
+   debug_printf("\t\t.h = %u\n", (*cmd).h);
+}
+
+static void
+dump_SVGA3dCmdBeginQuery(const SVGA3dCmdBeginQuery *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_QUERYTYPE_OCCLUSION:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+      break;
+   case SVGA3D_QUERYTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dRenderState(const SVGA3dRenderState *cmd)
+{
+   switch((*cmd).state) {
+   case SVGA3D_RS_INVALID:
+      debug_printf("\t\t.state = SVGA3D_RS_INVALID\n");
+      break;
+   case SVGA3D_RS_ZENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_ZENABLE\n");
+      break;
+   case SVGA3D_RS_ZWRITEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_ZWRITEENABLE\n");
+      break;
+   case SVGA3D_RS_ALPHATESTENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_ALPHATESTENABLE\n");
+      break;
+   case SVGA3D_RS_DITHERENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_DITHERENABLE\n");
+      break;
+   case SVGA3D_RS_BLENDENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_BLENDENABLE\n");
+      break;
+   case SVGA3D_RS_FOGENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGENABLE\n");
+      break;
+   case SVGA3D_RS_SPECULARENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_SPECULARENABLE\n");
+      break;
+   case SVGA3D_RS_STENCILENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE\n");
+      break;
+   case SVGA3D_RS_LIGHTINGENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_LIGHTINGENABLE\n");
+      break;
+   case SVGA3D_RS_NORMALIZENORMALS:
+      debug_printf("\t\t.state = SVGA3D_RS_NORMALIZENORMALS\n");
+      break;
+   case SVGA3D_RS_POINTSPRITEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSPRITEENABLE\n");
+      break;
+   case SVGA3D_RS_POINTSCALEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALEENABLE\n");
+      break;
+   case SVGA3D_RS_STENCILREF:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILREF\n");
+      break;
+   case SVGA3D_RS_STENCILMASK:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILMASK\n");
+      break;
+   case SVGA3D_RS_STENCILWRITEMASK:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILWRITEMASK\n");
+      break;
+   case SVGA3D_RS_FOGSTART:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGSTART\n");
+      break;
+   case SVGA3D_RS_FOGEND:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGEND\n");
+      break;
+   case SVGA3D_RS_FOGDENSITY:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGDENSITY\n");
+      break;
+   case SVGA3D_RS_POINTSIZE:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSIZE\n");
+      break;
+   case SVGA3D_RS_POINTSIZEMIN:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMIN\n");
+      break;
+   case SVGA3D_RS_POINTSIZEMAX:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMAX\n");
+      break;
+   case SVGA3D_RS_POINTSCALE_A:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_A\n");
+      break;
+   case SVGA3D_RS_POINTSCALE_B:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_B\n");
+      break;
+   case SVGA3D_RS_POINTSCALE_C:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_C\n");
+      break;
+   case SVGA3D_RS_FOGCOLOR:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGCOLOR\n");
+      break;
+   case SVGA3D_RS_AMBIENT:
+      debug_printf("\t\t.state = SVGA3D_RS_AMBIENT\n");
+      break;
+   case SVGA3D_RS_CLIPPLANEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_CLIPPLANEENABLE\n");
+      break;
+   case SVGA3D_RS_FOGMODE:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGMODE\n");
+      break;
+   case SVGA3D_RS_FILLMODE:
+      debug_printf("\t\t.state = SVGA3D_RS_FILLMODE\n");
+      break;
+   case SVGA3D_RS_SHADEMODE:
+      debug_printf("\t\t.state = SVGA3D_RS_SHADEMODE\n");
+      break;
+   case SVGA3D_RS_LINEPATTERN:
+      debug_printf("\t\t.state = SVGA3D_RS_LINEPATTERN\n");
+      break;
+   case SVGA3D_RS_SRCBLEND:
+      debug_printf("\t\t.state = SVGA3D_RS_SRCBLEND\n");
+      break;
+   case SVGA3D_RS_DSTBLEND:
+      debug_printf("\t\t.state = SVGA3D_RS_DSTBLEND\n");
+      break;
+   case SVGA3D_RS_BLENDEQUATION:
+      debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATION\n");
+      break;
+   case SVGA3D_RS_CULLMODE:
+      debug_printf("\t\t.state = SVGA3D_RS_CULLMODE\n");
+      break;
+   case SVGA3D_RS_ZFUNC:
+      debug_printf("\t\t.state = SVGA3D_RS_ZFUNC\n");
+      break;
+   case SVGA3D_RS_ALPHAFUNC:
+      debug_printf("\t\t.state = SVGA3D_RS_ALPHAFUNC\n");
+      break;
+   case SVGA3D_RS_STENCILFUNC:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILFUNC\n");
+      break;
+   case SVGA3D_RS_STENCILFAIL:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILFAIL\n");
+      break;
+   case SVGA3D_RS_STENCILZFAIL:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILZFAIL\n");
+      break;
+   case SVGA3D_RS_STENCILPASS:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILPASS\n");
+      break;
+   case SVGA3D_RS_ALPHAREF:
+      debug_printf("\t\t.state = SVGA3D_RS_ALPHAREF\n");
+      break;
+   case SVGA3D_RS_FRONTWINDING:
+      debug_printf("\t\t.state = SVGA3D_RS_FRONTWINDING\n");
+      break;
+   case SVGA3D_RS_COORDINATETYPE:
+      debug_printf("\t\t.state = SVGA3D_RS_COORDINATETYPE\n");
+      break;
+   case SVGA3D_RS_ZBIAS:
+      debug_printf("\t\t.state = SVGA3D_RS_ZBIAS\n");
+      break;
+   case SVGA3D_RS_RANGEFOGENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_RANGEFOGENABLE\n");
+      break;
+   case SVGA3D_RS_COLORWRITEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE\n");
+      break;
+   case SVGA3D_RS_VERTEXMATERIALENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_VERTEXMATERIALENABLE\n");
+      break;
+   case SVGA3D_RS_DIFFUSEMATERIALSOURCE:
+      debug_printf("\t\t.state = SVGA3D_RS_DIFFUSEMATERIALSOURCE\n");
+      break;
+   case SVGA3D_RS_SPECULARMATERIALSOURCE:
+      debug_printf("\t\t.state = SVGA3D_RS_SPECULARMATERIALSOURCE\n");
+      break;
+   case SVGA3D_RS_AMBIENTMATERIALSOURCE:
+      debug_printf("\t\t.state = SVGA3D_RS_AMBIENTMATERIALSOURCE\n");
+      break;
+   case SVGA3D_RS_EMISSIVEMATERIALSOURCE:
+      debug_printf("\t\t.state = SVGA3D_RS_EMISSIVEMATERIALSOURCE\n");
+      break;
+   case SVGA3D_RS_TEXTUREFACTOR:
+      debug_printf("\t\t.state = SVGA3D_RS_TEXTUREFACTOR\n");
+      break;
+   case SVGA3D_RS_LOCALVIEWER:
+      debug_printf("\t\t.state = SVGA3D_RS_LOCALVIEWER\n");
+      break;
+   case SVGA3D_RS_SCISSORTESTENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_SCISSORTESTENABLE\n");
+      break;
+   case SVGA3D_RS_BLENDCOLOR:
+      debug_printf("\t\t.state = SVGA3D_RS_BLENDCOLOR\n");
+      break;
+   case SVGA3D_RS_STENCILENABLE2SIDED:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE2SIDED\n");
+      break;
+   case SVGA3D_RS_CCWSTENCILFUNC:
+      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFUNC\n");
+      break;
+   case SVGA3D_RS_CCWSTENCILFAIL:
+      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFAIL\n");
+      break;
+   case SVGA3D_RS_CCWSTENCILZFAIL:
+      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILZFAIL\n");
+      break;
+   case SVGA3D_RS_CCWSTENCILPASS:
+      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILPASS\n");
+      break;
+   case SVGA3D_RS_VERTEXBLEND:
+      debug_printf("\t\t.state = SVGA3D_RS_VERTEXBLEND\n");
+      break;
+   case SVGA3D_RS_SLOPESCALEDEPTHBIAS:
+      debug_printf("\t\t.state = SVGA3D_RS_SLOPESCALEDEPTHBIAS\n");
+      break;
+   case SVGA3D_RS_DEPTHBIAS:
+      debug_printf("\t\t.state = SVGA3D_RS_DEPTHBIAS\n");
+      break;
+   case SVGA3D_RS_OUTPUTGAMMA:
+      debug_printf("\t\t.state = SVGA3D_RS_OUTPUTGAMMA\n");
+      break;
+   case SVGA3D_RS_ZVISIBLE:
+      debug_printf("\t\t.state = SVGA3D_RS_ZVISIBLE\n");
+      break;
+   case SVGA3D_RS_LASTPIXEL:
+      debug_printf("\t\t.state = SVGA3D_RS_LASTPIXEL\n");
+      break;
+   case SVGA3D_RS_CLIPPING:
+      debug_printf("\t\t.state = SVGA3D_RS_CLIPPING\n");
+      break;
+   case SVGA3D_RS_WRAP0:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP0\n");
+      break;
+   case SVGA3D_RS_WRAP1:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP1\n");
+      break;
+   case SVGA3D_RS_WRAP2:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP2\n");
+      break;
+   case SVGA3D_RS_WRAP3:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP3\n");
+      break;
+   case SVGA3D_RS_WRAP4:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP4\n");
+      break;
+   case SVGA3D_RS_WRAP5:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP5\n");
+      break;
+   case SVGA3D_RS_WRAP6:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP6\n");
+      break;
+   case SVGA3D_RS_WRAP7:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP7\n");
+      break;
+   case SVGA3D_RS_WRAP8:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP8\n");
+      break;
+   case SVGA3D_RS_WRAP9:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP9\n");
+      break;
+   case SVGA3D_RS_WRAP10:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP10\n");
+      break;
+   case SVGA3D_RS_WRAP11:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP11\n");
+      break;
+   case SVGA3D_RS_WRAP12:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP12\n");
+      break;
+   case SVGA3D_RS_WRAP13:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP13\n");
+      break;
+   case SVGA3D_RS_WRAP14:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP14\n");
+      break;
+   case SVGA3D_RS_WRAP15:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP15\n");
+      break;
+   case SVGA3D_RS_MULTISAMPLEANTIALIAS:
+      debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEANTIALIAS\n");
+      break;
+   case SVGA3D_RS_MULTISAMPLEMASK:
+      debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEMASK\n");
+      break;
+   case SVGA3D_RS_INDEXEDVERTEXBLENDENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_INDEXEDVERTEXBLENDENABLE\n");
+      break;
+   case SVGA3D_RS_TWEENFACTOR:
+      debug_printf("\t\t.state = SVGA3D_RS_TWEENFACTOR\n");
+      break;
+   case SVGA3D_RS_ANTIALIASEDLINEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_ANTIALIASEDLINEENABLE\n");
+      break;
+   case SVGA3D_RS_COLORWRITEENABLE1:
+      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE1\n");
+      break;
+   case SVGA3D_RS_COLORWRITEENABLE2:
+      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE2\n");
+      break;
+   case SVGA3D_RS_COLORWRITEENABLE3:
+      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE3\n");
+      break;
+   case SVGA3D_RS_SEPARATEALPHABLENDENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_SEPARATEALPHABLENDENABLE\n");
+      break;
+   case SVGA3D_RS_SRCBLENDALPHA:
+      debug_printf("\t\t.state = SVGA3D_RS_SRCBLENDALPHA\n");
+      break;
+   case SVGA3D_RS_DSTBLENDALPHA:
+      debug_printf("\t\t.state = SVGA3D_RS_DSTBLENDALPHA\n");
+      break;
+   case SVGA3D_RS_BLENDEQUATIONALPHA:
+      debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATIONALPHA\n");
+      break;
+   case SVGA3D_RS_MAX:
+      debug_printf("\t\t.state = SVGA3D_RS_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.state = %i\n", (*cmd).state);
+      break;
+   }
+   debug_printf("\t\t.uintValue = %u\n", (*cmd).uintValue);
+   debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue);
+}
+
+static void
+dump_SVGA3dVertexDivisor(const SVGA3dVertexDivisor *cmd)
+{
+   debug_printf("\t\t.value = %u\n", (*cmd).value);
+   debug_printf("\t\t.count = %u\n", (*cmd).count);
+   debug_printf("\t\t.indexedData = %u\n", (*cmd).indexedData);
+   debug_printf("\t\t.instanceData = %u\n", (*cmd).instanceData);
+}
+
+static void
+dump_SVGA3dCmdDefineShader(const SVGA3dCmdDefineShader *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+   switch((*cmd).type) {
+   case SVGA3D_SHADERTYPE_COMPILED_DX8:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      break;
+   case SVGA3D_SHADERTYPE_VS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      break;
+   case SVGA3D_SHADERTYPE_PS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      break;
+   case SVGA3D_SHADERTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dCmdSetShaderConst(const SVGA3dCmdSetShaderConst *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.reg = %u\n", (*cmd).reg);
+   switch((*cmd).type) {
+   case SVGA3D_SHADERTYPE_COMPILED_DX8:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      break;
+   case SVGA3D_SHADERTYPE_VS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      break;
+   case SVGA3D_SHADERTYPE_PS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      break;
+   case SVGA3D_SHADERTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+   switch((*cmd).ctype) {
+   case SVGA3D_CONST_TYPE_FLOAT:
+      debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_FLOAT\n");
+      debug_printf("\t\t.values[0] = %f\n", *(const float *)&(*cmd).values[0]);
+      debug_printf("\t\t.values[1] = %f\n", *(const float *)&(*cmd).values[1]);
+      debug_printf("\t\t.values[2] = %f\n", *(const float *)&(*cmd).values[2]);
+      debug_printf("\t\t.values[3] = %f\n", *(const float *)&(*cmd).values[3]);
+      break;
+   case SVGA3D_CONST_TYPE_INT:
+      debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_INT\n");
+      debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+      debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+      debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+      debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+      break;
+   case SVGA3D_CONST_TYPE_BOOL:
+      debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_BOOL\n");
+      debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+      debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+      debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+      debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+      break;
+   default:
+      debug_printf("\t\t.ctype = %i\n", (*cmd).ctype);
+      debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+      debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+      debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+      debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dCmdSetZRange(const SVGA3dCmdSetZRange *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.zRange.min = %f\n", (*cmd).zRange.min);
+   debug_printf("\t\t.zRange.max = %f\n", (*cmd).zRange.max);
+}
+
+static void
+dump_SVGA3dCmdDrawPrimitives(const SVGA3dCmdDrawPrimitives *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.numVertexDecls = %u\n", (*cmd).numVertexDecls);
+   debug_printf("\t\t.numRanges = %u\n", (*cmd).numRanges);
+}
+
+static void
+dump_SVGA3dCmdSetLightEnabled(const SVGA3dCmdSetLightEnabled *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.index = %u\n", (*cmd).index);
+   debug_printf("\t\t.enabled = %u\n", (*cmd).enabled);
+}
+
+static void
+dump_SVGA3dPrimitiveRange(const SVGA3dPrimitiveRange *cmd)
+{
+   switch((*cmd).primType) {
+   case SVGA3D_PRIMITIVE_INVALID:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_INVALID\n");
+      break;
+   case SVGA3D_PRIMITIVE_TRIANGLELIST:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLELIST\n");
+      break;
+   case SVGA3D_PRIMITIVE_POINTLIST:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_POINTLIST\n");
+      break;
+   case SVGA3D_PRIMITIVE_LINELIST:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINELIST\n");
+      break;
+   case SVGA3D_PRIMITIVE_LINESTRIP:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINESTRIP\n");
+      break;
+   case SVGA3D_PRIMITIVE_TRIANGLESTRIP:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLESTRIP\n");
+      break;
+   case SVGA3D_PRIMITIVE_TRIANGLEFAN:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLEFAN\n");
+      break;
+   case SVGA3D_PRIMITIVE_MAX:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.primType = %i\n", (*cmd).primType);
+      break;
+   }
+   debug_printf("\t\t.primitiveCount = %u\n", (*cmd).primitiveCount);
+   debug_printf("\t\t.indexArray.surfaceId = %u\n", (*cmd).indexArray.surfaceId);
+   debug_printf("\t\t.indexArray.offset = %u\n", (*cmd).indexArray.offset);
+   debug_printf("\t\t.indexArray.stride = %u\n", (*cmd).indexArray.stride);
+   debug_printf("\t\t.indexWidth = %u\n", (*cmd).indexWidth);
+   debug_printf("\t\t.indexBias = %i\n", (*cmd).indexBias);
+}
+
+static void
+dump_SVGA3dCmdPresent(const SVGA3dCmdPresent *cmd)
+{
+   debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+}
+
+static void
+dump_SVGA3dCmdSetRenderState(const SVGA3dCmdSetRenderState *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dCmdSurfaceStretchBlt(const SVGA3dCmdSurfaceStretchBlt *cmd)
+{
+   debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid);
+   debug_printf("\t\t.src.face = %u\n", (*cmd).src.face);
+   debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap);
+   debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid);
+   debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face);
+   debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap);
+   debug_printf("\t\t.boxSrc.x = %u\n", (*cmd).boxSrc.x);
+   debug_printf("\t\t.boxSrc.y = %u\n", (*cmd).boxSrc.y);
+   debug_printf("\t\t.boxSrc.z = %u\n", (*cmd).boxSrc.z);
+   debug_printf("\t\t.boxSrc.w = %u\n", (*cmd).boxSrc.w);
+   debug_printf("\t\t.boxSrc.h = %u\n", (*cmd).boxSrc.h);
+   debug_printf("\t\t.boxSrc.d = %u\n", (*cmd).boxSrc.d);
+   debug_printf("\t\t.boxDest.x = %u\n", (*cmd).boxDest.x);
+   debug_printf("\t\t.boxDest.y = %u\n", (*cmd).boxDest.y);
+   debug_printf("\t\t.boxDest.z = %u\n", (*cmd).boxDest.z);
+   debug_printf("\t\t.boxDest.w = %u\n", (*cmd).boxDest.w);
+   debug_printf("\t\t.boxDest.h = %u\n", (*cmd).boxDest.h);
+   debug_printf("\t\t.boxDest.d = %u\n", (*cmd).boxDest.d);
+   switch((*cmd).mode) {
+   case SVGA3D_STRETCH_BLT_POINT:
+      debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_POINT\n");
+      break;
+   case SVGA3D_STRETCH_BLT_LINEAR:
+      debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_LINEAR\n");
+      break;
+   case SVGA3D_STRETCH_BLT_MAX:
+      debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.mode = %i\n", (*cmd).mode);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dCmdSurfaceDMA(const SVGA3dCmdSurfaceDMA *cmd)
+{
+   debug_printf("\t\t.guest.ptr.gmrId = %u\n", (*cmd).guest.ptr.gmrId);
+   debug_printf("\t\t.guest.ptr.offset = %u\n", (*cmd).guest.ptr.offset);
+   debug_printf("\t\t.guest.pitch = %u\n", (*cmd).guest.pitch);
+   debug_printf("\t\t.host.sid = %u\n", (*cmd).host.sid);
+   debug_printf("\t\t.host.face = %u\n", (*cmd).host.face);
+   debug_printf("\t\t.host.mipmap = %u\n", (*cmd).host.mipmap);
+   switch((*cmd).transfer) {
+   case SVGA3D_WRITE_HOST_VRAM:
+      debug_printf("\t\t.transfer = SVGA3D_WRITE_HOST_VRAM\n");
+      break;
+   case SVGA3D_READ_HOST_VRAM:
+      debug_printf("\t\t.transfer = SVGA3D_READ_HOST_VRAM\n");
+      break;
+   default:
+      debug_printf("\t\t.transfer = %i\n", (*cmd).transfer);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dCmdSurfaceDMASuffix(const SVGA3dCmdSurfaceDMASuffix *cmd)
+{
+   debug_printf("\t\t.suffixSize = %u\n", (*cmd).suffixSize);
+   debug_printf("\t\t.maximumOffset = %u\n", (*cmd).maximumOffset);
+   debug_printf("\t\t.flags.discard = %u\n", (*cmd).flags.discard);
+   debug_printf("\t\t.flags.unsynchronized = %u\n", (*cmd).flags.unsynchronized);
+}
+
+static void
+dump_SVGA3dCmdSetTransform(const SVGA3dCmdSetTransform *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_TRANSFORM_INVALID:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_INVALID\n");
+      break;
+   case SVGA3D_TRANSFORM_WORLD:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD\n");
+      break;
+   case SVGA3D_TRANSFORM_VIEW:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_VIEW\n");
+      break;
+   case SVGA3D_TRANSFORM_PROJECTION:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_PROJECTION\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE0:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE0\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE1:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE1\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE2:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE2\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE3:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE3\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE4:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE4\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE5:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE5\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE6:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE6\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE7:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE7\n");
+      break;
+   case SVGA3D_TRANSFORM_WORLD1:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD1\n");
+      break;
+   case SVGA3D_TRANSFORM_WORLD2:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD2\n");
+      break;
+   case SVGA3D_TRANSFORM_WORLD3:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD3\n");
+      break;
+   case SVGA3D_TRANSFORM_MAX:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+   debug_printf("\t\t.matrix[0] = %f\n", (*cmd).matrix[0]);
+   debug_printf("\t\t.matrix[1] = %f\n", (*cmd).matrix[1]);
+   debug_printf("\t\t.matrix[2] = %f\n", (*cmd).matrix[2]);
+   debug_printf("\t\t.matrix[3] = %f\n", (*cmd).matrix[3]);
+   debug_printf("\t\t.matrix[4] = %f\n", (*cmd).matrix[4]);
+   debug_printf("\t\t.matrix[5] = %f\n", (*cmd).matrix[5]);
+   debug_printf("\t\t.matrix[6] = %f\n", (*cmd).matrix[6]);
+   debug_printf("\t\t.matrix[7] = %f\n", (*cmd).matrix[7]);
+   debug_printf("\t\t.matrix[8] = %f\n", (*cmd).matrix[8]);
+   debug_printf("\t\t.matrix[9] = %f\n", (*cmd).matrix[9]);
+   debug_printf("\t\t.matrix[10] = %f\n", (*cmd).matrix[10]);
+   debug_printf("\t\t.matrix[11] = %f\n", (*cmd).matrix[11]);
+   debug_printf("\t\t.matrix[12] = %f\n", (*cmd).matrix[12]);
+   debug_printf("\t\t.matrix[13] = %f\n", (*cmd).matrix[13]);
+   debug_printf("\t\t.matrix[14] = %f\n", (*cmd).matrix[14]);
+   debug_printf("\t\t.matrix[15] = %f\n", (*cmd).matrix[15]);
+}
+
+static void
+dump_SVGA3dCmdDestroyShader(const SVGA3dCmdDestroyShader *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+   switch((*cmd).type) {
+   case SVGA3D_SHADERTYPE_COMPILED_DX8:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      break;
+   case SVGA3D_SHADERTYPE_VS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      break;
+   case SVGA3D_SHADERTYPE_PS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      break;
+   case SVGA3D_SHADERTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dCmdDestroyContext(const SVGA3dCmdDestroyContext *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dCmdClear(const SVGA3dCmdClear *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).clearFlag) {
+   case SVGA3D_CLEAR_COLOR:
+      debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_COLOR\n");
+      break;
+   case SVGA3D_CLEAR_DEPTH:
+      debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_DEPTH\n");
+      break;
+   case SVGA3D_CLEAR_STENCIL:
+      debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_STENCIL\n");
+      break;
+   default:
+      debug_printf("\t\t.clearFlag = %i\n", (*cmd).clearFlag);
+      break;
+   }
+   debug_printf("\t\t.color = %u\n", (*cmd).color);
+   debug_printf("\t\t.depth = %f\n", (*cmd).depth);
+   debug_printf("\t\t.stencil = %u\n", (*cmd).stencil);
+}
+
+static void
+dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd)
+{
+   debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+   switch((*cmd).surfaceFlags) {
+   case SVGA3D_SURFACE_CUBEMAP:
+      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_CUBEMAP\n");
+      break;
+   case SVGA3D_SURFACE_HINT_STATIC:
+      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_STATIC\n");
+      break;
+   case SVGA3D_SURFACE_HINT_DYNAMIC:
+      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_DYNAMIC\n");
+      break;
+   case SVGA3D_SURFACE_HINT_INDEXBUFFER:
+      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_INDEXBUFFER\n");
+      break;
+   case SVGA3D_SURFACE_HINT_VERTEXBUFFER:
+      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_VERTEXBUFFER\n");
+      break;
+   default:
+      debug_printf("\t\t.surfaceFlags = %i\n", (*cmd).surfaceFlags);
+      break;
+   }
+   switch((*cmd).format) {
+   case SVGA3D_FORMAT_INVALID:
+      debug_printf("\t\t.format = SVGA3D_FORMAT_INVALID\n");
+      break;
+   case SVGA3D_X8R8G8B8:
+      debug_printf("\t\t.format = SVGA3D_X8R8G8B8\n");
+      break;
+   case SVGA3D_A8R8G8B8:
+      debug_printf("\t\t.format = SVGA3D_A8R8G8B8\n");
+      break;
+   case SVGA3D_R5G6B5:
+      debug_printf("\t\t.format = SVGA3D_R5G6B5\n");
+      break;
+   case SVGA3D_X1R5G5B5:
+      debug_printf("\t\t.format = SVGA3D_X1R5G5B5\n");
+      break;
+   case SVGA3D_A1R5G5B5:
+      debug_printf("\t\t.format = SVGA3D_A1R5G5B5\n");
+      break;
+   case SVGA3D_A4R4G4B4:
+      debug_printf("\t\t.format = SVGA3D_A4R4G4B4\n");
+      break;
+   case SVGA3D_Z_D32:
+      debug_printf("\t\t.format = SVGA3D_Z_D32\n");
+      break;
+   case SVGA3D_Z_D16:
+      debug_printf("\t\t.format = SVGA3D_Z_D16\n");
+      break;
+   case SVGA3D_Z_D24S8:
+      debug_printf("\t\t.format = SVGA3D_Z_D24S8\n");
+      break;
+   case SVGA3D_Z_D15S1:
+      debug_printf("\t\t.format = SVGA3D_Z_D15S1\n");
+      break;
+   case SVGA3D_LUMINANCE8:
+      debug_printf("\t\t.format = SVGA3D_LUMINANCE8\n");
+      break;
+   case SVGA3D_LUMINANCE4_ALPHA4:
+      debug_printf("\t\t.format = SVGA3D_LUMINANCE4_ALPHA4\n");
+      break;
+   case SVGA3D_LUMINANCE16:
+      debug_printf("\t\t.format = SVGA3D_LUMINANCE16\n");
+      break;
+   case SVGA3D_LUMINANCE8_ALPHA8:
+      debug_printf("\t\t.format = SVGA3D_LUMINANCE8_ALPHA8\n");
+      break;
+   case SVGA3D_DXT1:
+      debug_printf("\t\t.format = SVGA3D_DXT1\n");
+      break;
+   case SVGA3D_DXT2:
+      debug_printf("\t\t.format = SVGA3D_DXT2\n");
+      break;
+   case SVGA3D_DXT3:
+      debug_printf("\t\t.format = SVGA3D_DXT3\n");
+      break;
+   case SVGA3D_DXT4:
+      debug_printf("\t\t.format = SVGA3D_DXT4\n");
+      break;
+   case SVGA3D_DXT5:
+      debug_printf("\t\t.format = SVGA3D_DXT5\n");
+      break;
+   case SVGA3D_BUMPU8V8:
+      debug_printf("\t\t.format = SVGA3D_BUMPU8V8\n");
+      break;
+   case SVGA3D_BUMPL6V5U5:
+      debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n");
+      break;
+   case SVGA3D_BUMPX8L8V8U8:
+      debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n");
+      break;
+   case SVGA3D_BUMPL8V8U8:
+      debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n");
+      break;
+   case SVGA3D_ARGB_S10E5:
+      debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n");
+      break;
+   case SVGA3D_ARGB_S23E8:
+      debug_printf("\t\t.format = SVGA3D_ARGB_S23E8\n");
+      break;
+   case SVGA3D_A2R10G10B10:
+      debug_printf("\t\t.format = SVGA3D_A2R10G10B10\n");
+      break;
+   case SVGA3D_V8U8:
+      debug_printf("\t\t.format = SVGA3D_V8U8\n");
+      break;
+   case SVGA3D_Q8W8V8U8:
+      debug_printf("\t\t.format = SVGA3D_Q8W8V8U8\n");
+      break;
+   case SVGA3D_CxV8U8:
+      debug_printf("\t\t.format = SVGA3D_CxV8U8\n");
+      break;
+   case SVGA3D_X8L8V8U8:
+      debug_printf("\t\t.format = SVGA3D_X8L8V8U8\n");
+      break;
+   case SVGA3D_A2W10V10U10:
+      debug_printf("\t\t.format = SVGA3D_A2W10V10U10\n");
+      break;
+   case SVGA3D_ALPHA8:
+      debug_printf("\t\t.format = SVGA3D_ALPHA8\n");
+      break;
+   case SVGA3D_R_S10E5:
+      debug_printf("\t\t.format = SVGA3D_R_S10E5\n");
+      break;
+   case SVGA3D_R_S23E8:
+      debug_printf("\t\t.format = SVGA3D_R_S23E8\n");
+      break;
+   case SVGA3D_RG_S10E5:
+      debug_printf("\t\t.format = SVGA3D_RG_S10E5\n");
+      break;
+   case SVGA3D_RG_S23E8:
+      debug_printf("\t\t.format = SVGA3D_RG_S23E8\n");
+      break;
+   case SVGA3D_BUFFER:
+      debug_printf("\t\t.format = SVGA3D_BUFFER\n");
+      break;
+   case SVGA3D_Z_D24X8:
+      debug_printf("\t\t.format = SVGA3D_Z_D24X8\n");
+      break;
+   case SVGA3D_FORMAT_MAX:
+      debug_printf("\t\t.format = SVGA3D_FORMAT_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.format = %i\n", (*cmd).format);
+      break;
+   }
+   debug_printf("\t\t.face[0].numMipLevels = %u\n", (*cmd).face[0].numMipLevels);
+   debug_printf("\t\t.face[1].numMipLevels = %u\n", (*cmd).face[1].numMipLevels);
+   debug_printf("\t\t.face[2].numMipLevels = %u\n", (*cmd).face[2].numMipLevels);
+   debug_printf("\t\t.face[3].numMipLevels = %u\n", (*cmd).face[3].numMipLevels);
+   debug_printf("\t\t.face[4].numMipLevels = %u\n", (*cmd).face[4].numMipLevels);
+   debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels);
+}
+
+
+void            
+svga_dump_commands(const void *commands, uint32_t size)
+{
+   const uint8_t *next = commands;
+   const uint8_t *last = next + size;
+   
+   assert(size % sizeof(uint32_t) == 0);
+   
+   while(next < last) {
+      const uint32_t cmd_id = *(const uint32_t *)next;
+
+      if(SVGA_3D_CMD_BASE <= cmd_id && cmd_id < SVGA_3D_CMD_MAX) {
+         const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
+         const uint8_t *body = (const uint8_t *)&header[1];
+
+         next = (const uint8_t *)body + header->size;
+         if(next > last)
+            break;
+
+         switch(cmd_id) {
+         case SVGA_3D_CMD_SURFACE_DEFINE:
+            debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n");
+            {
+               const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body;
+               dump_SVGA3dCmdDefineSurface(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dSize) <= next) {
+                  dump_SVGA3dSize((const SVGA3dSize *)body);
+                  body += sizeof(SVGA3dSize);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SURFACE_DESTROY:
+            debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n");
+            {
+               const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body;
+               dump_SVGA3dCmdDestroySurface(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SURFACE_COPY:
+            debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n");
+            {
+               const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body;
+               dump_SVGA3dCmdSurfaceCopy(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dCopyBox) <= next) {
+                  dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+                  body += sizeof(SVGA3dCopyBox);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SURFACE_STRETCHBLT:
+            debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n");
+            {
+               const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body;
+               dump_SVGA3dCmdSurfaceStretchBlt(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SURFACE_DMA:
+            debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n");
+            {
+               const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body;
+               dump_SVGA3dCmdSurfaceDMA(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dCopyBox) <= next) {
+                  dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+                  body += sizeof(SVGA3dCopyBox);
+               }
+               while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) {
+                  dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body);
+                  body += sizeof(SVGA3dCmdSurfaceDMASuffix);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_CONTEXT_DEFINE:
+            debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n");
+            {
+               const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body;
+               dump_SVGA3dCmdDefineContext(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_CONTEXT_DESTROY:
+            debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n");
+            {
+               const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body;
+               dump_SVGA3dCmdDestroyContext(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETTRANSFORM:
+            debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n");
+            {
+               const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body;
+               dump_SVGA3dCmdSetTransform(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETZRANGE:
+            debug_printf("\tSVGA_3D_CMD_SETZRANGE\n");
+            {
+               const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body;
+               dump_SVGA3dCmdSetZRange(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETRENDERSTATE:
+            debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n");
+            {
+               const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body;
+               dump_SVGA3dCmdSetRenderState(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dRenderState) <= next) {
+                  dump_SVGA3dRenderState((const SVGA3dRenderState *)body);
+                  body += sizeof(SVGA3dRenderState);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SETRENDERTARGET:
+            debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n");
+            {
+               const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body;
+               dump_SVGA3dCmdSetRenderTarget(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETTEXTURESTATE:
+            debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n");
+            {
+               const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body;
+               dump_SVGA3dCmdSetTextureState(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dTextureState) <= next) {
+                  dump_SVGA3dTextureState((const SVGA3dTextureState *)body);
+                  body += sizeof(SVGA3dTextureState);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SETMATERIAL:
+            debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n");
+            {
+               const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body;
+               dump_SVGA3dCmdSetMaterial(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETLIGHTDATA:
+            debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n");
+            {
+               const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body;
+               dump_SVGA3dCmdSetLightData(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETLIGHTENABLED:
+            debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n");
+            {
+               const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body;
+               dump_SVGA3dCmdSetLightEnabled(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETVIEWPORT:
+            debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n");
+            {
+               const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body;
+               dump_SVGA3dCmdSetViewport(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETCLIPPLANE:
+            debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n");
+            {
+               const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body;
+               dump_SVGA3dCmdSetClipPlane(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_CLEAR:
+            debug_printf("\tSVGA_3D_CMD_CLEAR\n");
+            {
+               const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body;
+               dump_SVGA3dCmdClear(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dRect) <= next) {
+                  dump_SVGA3dRect((const SVGA3dRect *)body);
+                  body += sizeof(SVGA3dRect);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_PRESENT:
+            debug_printf("\tSVGA_3D_CMD_PRESENT\n");
+            {
+               const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body;
+               dump_SVGA3dCmdPresent(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dCopyRect) <= next) {
+                  dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body);
+                  body += sizeof(SVGA3dCopyRect);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SHADER_DEFINE:
+            debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n");
+            {
+               const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
+               dump_SVGA3dCmdDefineShader(cmd);
+               body = (const uint8_t *)&cmd[1];
+               sh_svga_dump((const uint32_t *)body, 
+                            (unsigned)(next - body)/sizeof(uint32_t),
+                            FALSE );
+               body = next;
+            }
+            break;
+         case SVGA_3D_CMD_SHADER_DESTROY:
+            debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n");
+            {
+               const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body;
+               dump_SVGA3dCmdDestroyShader(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SET_SHADER:
+            debug_printf("\tSVGA_3D_CMD_SET_SHADER\n");
+            {
+               const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body;
+               dump_SVGA3dCmdSetShader(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SET_SHADER_CONST:
+            debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n");
+            {
+               const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body;
+               dump_SVGA3dCmdSetShaderConst(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_DRAW_PRIMITIVES:
+            debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n");
+            {
+               const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body;
+               unsigned i, j;
+               dump_SVGA3dCmdDrawPrimitives(cmd);
+               body = (const uint8_t *)&cmd[1];
+               for(i = 0; i < cmd->numVertexDecls; ++i) {
+                  dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body);
+                  body += sizeof(SVGA3dVertexDecl);
+               }
+               for(j = 0; j < cmd->numRanges; ++j) {
+                  dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body);
+                  body += sizeof(SVGA3dPrimitiveRange);
+               }
+               while(body + sizeof(SVGA3dVertexDivisor) <= next) {
+                  dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body);
+                  body += sizeof(SVGA3dVertexDivisor);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SETSCISSORRECT:
+            debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n");
+            {
+               const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body;
+               dump_SVGA3dCmdSetScissorRect(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_BEGIN_QUERY:
+            debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n");
+            {
+               const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body;
+               dump_SVGA3dCmdBeginQuery(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_END_QUERY:
+            debug_printf("\tSVGA_3D_CMD_END_QUERY\n");
+            {
+               const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body;
+               dump_SVGA3dCmdEndQuery(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_WAIT_FOR_QUERY:
+            debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n");
+            {
+               const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body;
+               dump_SVGA3dCmdWaitForQuery(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         default:
+            debug_printf("\t0x%08x\n", cmd_id);
+            break;
+         }
+
+         while(body + sizeof(uint32_t) <= next) {
+            debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+            body += sizeof(uint32_t);
+         }
+         while(body + sizeof(uint32_t) <= next)
+            debug_printf("\t\t0x%02x\n", *body++);
+      }
+      else if(cmd_id == SVGA_CMD_FENCE) {
+         debug_printf("\tSVGA_CMD_FENCE\n");
+         debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]);
+         next += 2*sizeof(uint32_t);
+      }
+      else {
+         debug_printf("\t0x%08x\n", cmd_id);
+         next += sizeof(uint32_t);
+      }
+   }
+}
+
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h
new file mode 100644
index 0000000000..69a8702087
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.h
@@ -0,0 +1,34 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DUMP_H_
+#define SVGA_DUMP_H_
+
+#include "pipe/p_compiler.h"
+
+void
+svga_dump_commands(const void *commands, uint32_t size);
+
+#endif /* SVGA_DUMP_H_ */
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py
new file mode 100755
index 0000000000..3cb29c395b
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.py
@@ -0,0 +1,329 @@
+#!/usr/bin/env python
+'''
+Generates dumper for the SVGA 3D command stream using pygccxml.
+
+Jose Fonseca <jfonseca@vmware.com>
+'''
+
+copyright = '''
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+ '''
+
+import os
+import sys
+
+from pygccxml import parser
+from pygccxml import declarations
+
+from pygccxml.declarations import algorithm
+from pygccxml.declarations import decl_visitor
+from pygccxml.declarations import type_traits
+from pygccxml.declarations import type_visitor
+
+
+enums = True
+
+
+class decl_dumper_t(decl_visitor.decl_visitor_t):
+
+    def __init__(self, instance = '', decl = None):
+        decl_visitor.decl_visitor_t.__init__(self)
+        self._instance = instance
+        self.decl = decl
+
+    def clone(self):
+        return decl_dumper_t(self._instance, self.decl)
+
+    def visit_class(self):
+        class_ = self.decl
+        assert self.decl.class_type in ('struct', 'union')
+
+        for variable in class_.variables():
+            if variable.name != '':
+                #print 'variable = %r' % variable.name
+                dump_type(self._instance + '.' + variable.name, variable.type)
+
+    def visit_enumeration(self):
+        if enums:
+            print '   switch(%s) {' % ("(*cmd)" + self._instance,)
+            for name, value in self.decl.values:
+                print '   case %s:' % (name,)
+                print '      debug_printf("\\t\\t%s = %s\\n");' % (self._instance, name)
+                print '      break;'
+            print '   default:'
+            print '      debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance)
+            print '      break;'
+            print '   }'
+        else:
+            print '   debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance)
+
+
+def dump_decl(instance, decl):
+    dumper = decl_dumper_t(instance, decl)
+    algorithm.apply_visitor(dumper, decl)
+
+
+class type_dumper_t(type_visitor.type_visitor_t):
+
+    def __init__(self, instance, type_):
+        type_visitor.type_visitor_t.__init__(self)
+        self.instance = instance
+        self.type = type_
+
+    def clone(self):
+        return type_dumper_t(self.instance, self.type)
+
+    def visit_char(self):
+        self.print_instance('%i')
+        
+    def visit_unsigned_char(self):
+        self.print_instance('%u')
+
+    def visit_signed_char(self):
+        self.print_instance('%i')
+    
+    def visit_wchar(self):
+        self.print_instance('%i')
+        
+    def visit_short_int(self):
+        self.print_instance('%i')
+        
+    def visit_short_unsigned_int(self):
+        self.print_instance('%u')
+        
+    def visit_bool(self):
+        self.print_instance('%i')
+        
+    def visit_int(self):
+        self.print_instance('%i')
+        
+    def visit_unsigned_int(self):
+        self.print_instance('%u')
+        
+    def visit_long_int(self):
+        self.print_instance('%li')
+        
+    def visit_long_unsigned_int(self):
+        self.print_instance('%lu')
+        
+    def visit_long_long_int(self):
+        self.print_instance('%lli')
+        
+    def visit_long_long_unsigned_int(self):
+        self.print_instance('%llu')
+        
+    def visit_float(self):
+        self.print_instance('%f')
+        
+    def visit_double(self):
+        self.print_instance('%f')
+        
+    def visit_array(self):
+        for i in range(type_traits.array_size(self.type)):
+            dump_type(self.instance + '[%i]' % i, type_traits.base_type(self.type))
+
+    def visit_pointer(self):
+        self.print_instance('%p')
+
+    def visit_declarated(self):
+        #print 'decl = %r' % self.type.decl_string
+        decl = type_traits.remove_declarated(self.type)
+        dump_decl(self.instance, decl)
+
+    def print_instance(self, format):
+        print '   debug_printf("\\t\\t%s = %s\\n", %s);' % (self.instance, format, "(*cmd)" + self.instance)
+
+
+def dump_type(instance, type_):
+    type_ = type_traits.remove_alias(type_)
+    visitor = type_dumper_t(instance, type_)
+    algorithm.apply_visitor(visitor, type_)
+
+
+def dump_struct(decls, class_):
+    print 'static void'
+    print 'dump_%s(const %s *cmd)' % (class_.name, class_.name)
+    print '{'
+    dump_decl('', class_)
+    print '}'
+    print ''
+
+
+cmds = [
+    ('SVGA_3D_CMD_SURFACE_DEFINE', 'SVGA3dCmdDefineSurface', (), 'SVGA3dSize'),
+    ('SVGA_3D_CMD_SURFACE_DESTROY', 'SVGA3dCmdDestroySurface', (), None),
+    ('SVGA_3D_CMD_SURFACE_COPY', 'SVGA3dCmdSurfaceCopy', (), 'SVGA3dCopyBox'),
+    ('SVGA_3D_CMD_SURFACE_STRETCHBLT', 'SVGA3dCmdSurfaceStretchBlt', (), None),
+    ('SVGA_3D_CMD_SURFACE_DMA', 'SVGA3dCmdSurfaceDMA', (), 'SVGA3dCopyBox'),
+    ('SVGA_3D_CMD_CONTEXT_DEFINE', 'SVGA3dCmdDefineContext', (), None),
+    ('SVGA_3D_CMD_CONTEXT_DESTROY', 'SVGA3dCmdDestroyContext', (), None),
+    ('SVGA_3D_CMD_SETTRANSFORM', 'SVGA3dCmdSetTransform', (), None),
+    ('SVGA_3D_CMD_SETZRANGE', 'SVGA3dCmdSetZRange', (), None),
+    ('SVGA_3D_CMD_SETRENDERSTATE', 'SVGA3dCmdSetRenderState', (), 'SVGA3dRenderState'),
+    ('SVGA_3D_CMD_SETRENDERTARGET', 'SVGA3dCmdSetRenderTarget', (), None),
+    ('SVGA_3D_CMD_SETTEXTURESTATE', 'SVGA3dCmdSetTextureState', (), 'SVGA3dTextureState'),
+    ('SVGA_3D_CMD_SETMATERIAL', 'SVGA3dCmdSetMaterial', (), None),
+    ('SVGA_3D_CMD_SETLIGHTDATA', 'SVGA3dCmdSetLightData', (), None),
+    ('SVGA_3D_CMD_SETLIGHTENABLED', 'SVGA3dCmdSetLightEnabled', (), None),
+    ('SVGA_3D_CMD_SETVIEWPORT', 'SVGA3dCmdSetViewport', (), None),
+    ('SVGA_3D_CMD_SETCLIPPLANE', 'SVGA3dCmdSetClipPlane', (), None),
+    ('SVGA_3D_CMD_CLEAR', 'SVGA3dCmdClear', (), 'SVGA3dRect'),
+    ('SVGA_3D_CMD_PRESENT', 'SVGA3dCmdPresent', (), 'SVGA3dCopyRect'),
+    ('SVGA_3D_CMD_SHADER_DEFINE', 'SVGA3dCmdDefineShader', (), None),
+    ('SVGA_3D_CMD_SHADER_DESTROY', 'SVGA3dCmdDestroyShader', (), None),
+    ('SVGA_3D_CMD_SET_SHADER', 'SVGA3dCmdSetShader', (), None),
+    ('SVGA_3D_CMD_SET_SHADER_CONST', 'SVGA3dCmdSetShaderConst', (), None),
+    ('SVGA_3D_CMD_DRAW_PRIMITIVES', 'SVGA3dCmdDrawPrimitives', (('SVGA3dVertexDecl', 'numVertexDecls'), ('SVGA3dPrimitiveRange', 'numRanges')), 'SVGA3dVertexDivisor'),
+    ('SVGA_3D_CMD_SETSCISSORRECT', 'SVGA3dCmdSetScissorRect', (), None),
+    ('SVGA_3D_CMD_BEGIN_QUERY', 'SVGA3dCmdBeginQuery', (), None),
+    ('SVGA_3D_CMD_END_QUERY', 'SVGA3dCmdEndQuery', (), None),
+    ('SVGA_3D_CMD_WAIT_FOR_QUERY', 'SVGA3dCmdWaitForQuery', (), None),
+    #('SVGA_3D_CMD_PRESENT_READBACK', None, (), None),
+]
+
+def dump_cmds():
+    print r'''
+void            
+svga_dump_commands(const void *commands, uint32_t size)
+{
+   const uint8_t *next = commands;
+   const uint8_t *last = next + size;
+   
+   assert(size % sizeof(uint32_t) == 0);
+   
+   while(next < last) {
+      const uint32_t cmd_id = *(const uint32_t *)next;
+
+      if(SVGA_3D_CMD_BASE <= cmd_id && cmd_id < SVGA_3D_CMD_MAX) {
+         const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
+         const uint8_t *body = (const uint8_t *)&header[1];
+
+         next = (const uint8_t *)body + header->size;
+         if(next > last)
+            break;
+'''
+
+    print '         switch(cmd_id) {'
+    indexes = 'ijklmn'
+    for id, header, body, footer in cmds:
+        print '         case %s:' % id
+        print '            debug_printf("\\t%s\\n");' % id
+        print '            {'
+        print '               const %s *cmd = (const %s *)body;' % (header, header)
+        if len(body):
+            print '               unsigned ' + ', '.join(indexes[:len(body)]) + ';'
+        print '               dump_%s(cmd);' % header
+        print '               body = (const uint8_t *)&cmd[1];'
+        for i in range(len(body)):
+            struct, count = body[i]
+            idx = indexes[i]
+            print '               for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx)
+            print '                  dump_%s((const %s *)body);' % (struct, struct)
+            print '                  body += sizeof(%s);' % struct
+            print '               }'
+        if footer is not None:
+            print '               while(body + sizeof(%s) <= next) {' % footer
+            print '                  dump_%s((const %s *)body);' % (footer, footer)
+            print '                  body += sizeof(%s);' % footer
+            print '               }'
+        if id == 'SVGA_3D_CMD_SHADER_DEFINE':
+            print '               sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));'
+            print '               body = next;'
+        print '            }'
+        print '            break;'
+    print '         default:'
+    print '            debug_printf("\\t0x%08x\\n", cmd_id);'
+    print '            break;'
+    print '         }'
+            
+    print r'''
+         while(body + sizeof(uint32_t) <= next) {
+            debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+            body += sizeof(uint32_t);
+         }
+         while(body + sizeof(uint32_t) <= next)
+            debug_printf("\t\t0x%02x\n", *body++);
+      }
+      else if(cmd_id == SVGA_CMD_FENCE) {
+         debug_printf("\tSVGA_CMD_FENCE\n");
+         debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]);
+         next += 2*sizeof(uint32_t);
+      }
+      else {
+         debug_printf("\t0x%08x\n", cmd_id);
+         next += sizeof(uint32_t);
+      }
+   }
+}
+'''
+
+def main():
+    print copyright.strip()
+    print
+    print '/**'
+    print ' * @file'
+    print ' * Dump SVGA commands.'
+    print ' *'
+    print ' * Generated automatically from svga3d_reg.h by svga_dump.py.'
+    print ' */'
+    print
+    print '#include "svga_types.h"'
+    print '#include "shader_dump/st_shader_dump.h"'
+    print '#include "svga3d_reg.h"'
+    print
+    print '#include "pipe/p_debug.h"'
+    print '#include "svga_dump.h"'
+    print
+
+    config = parser.config_t(
+        include_paths = ['include'],
+        compiler = 'gcc',
+    )
+
+    headers = [
+        'include/svga_types.h', 
+        'include/svga3d_reg.h', 
+    ]
+
+    decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE)
+    global_ns = declarations.get_global_namespace(decls)
+
+    names = set()
+    for id, header, body, footer in cmds:
+        names.add(header)
+        for struct, count in body:
+            names.add(struct)
+        if footer is not None:
+            names.add(footer)
+
+    for class_ in global_ns.classes(lambda decl: decl.name in names):
+        dump_struct(decls, class_)
+
+    dump_cmds()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/gallium/winsys/drm/vmware/Makefile b/src/gallium/winsys/drm/vmware/Makefile
new file mode 100644
index 0000000000..2ae6dead5c
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/Makefile
@@ -0,0 +1,12 @@
+# src/gallium/winsys/drm/vmware/Makefile
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+SUBDIRS = core $(GALLIUM_STATE_TRACKERS_DIRS)
+
+default install clean:
+	@for dir in $(SUBDIRS) ; do \
+		if [ -d $$dir ] ; then \
+			(cd $$dir && $(MAKE) $@) || exit 1; \
+		fi \
+	done
diff --git a/src/gallium/winsys/drm/vmware/SConscript b/src/gallium/winsys/drm/vmware/SConscript
new file mode 100644
index 0000000000..06e6d5be9c
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/SConscript
@@ -0,0 +1,11 @@
+Import('*')
+
+SConscript(['core/SConscript',])
+
+if 'mesa' in env['statetrackers']:
+
+    SConscript(['dri/SConscript'])
+
+if 'xorg' in env['statetrackers']:
+
+    SConscript(['xorg/SConscript'])
diff --git a/src/gallium/winsys/drm/vmware/core/Makefile b/src/gallium/winsys/drm/vmware/core/Makefile
new file mode 100644
index 0000000000..755dc45935
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/Makefile
@@ -0,0 +1,47 @@
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = svgadrm
+
+C_SOURCES = \
+        vmw_buffer.c \
+        vmw_context.c  \
+        vmw_fence.c  \
+        vmw_screen.c  \
+        vmw_screen_dri.c  \
+        vmw_screen_ioctl.c  \
+        vmw_screen_pools.c  \
+        vmw_screen_svga.c  \
+        vmw_surface.c
+
+LIBRARY_INCLUDES = \
+       -I$(TOP)/src/gallium/drivers/svga \
+       -I$(TOP)/src/gallium/drivers/svga/include \
+       -I$(GALLIUM)/src/mesa/drivers/dri/common \
+       -I$(GALLIUM)/include \
+       -I$(GALLIUM)/include/GL/internal \
+       -I$(GALLIUM)/src/mesa \
+       -I$(GALLIUM)/src/mesa/main \
+       -I$(GALLIUM)/src/mesa/glapi \
+       -I$(GALLIUM)/src/egl/main \
+       -I$(GALLIUM)/src/egl/drivers/dri \
+       $(shell pkg-config libdrm --cflags-only-I)
+
+LIBRARY_DEFINES = \
+       -DHAVE_STDINT_H -D_FILE_OFFSET_BITS=64 \
+       $(shell pkg-config libdrm --cflags-only-other)
+
+CC = gcc -fvisibility=hidden -msse -msse2
+
+# Set the gnu99 standard to enable anonymous structs in vmware headers.
+#
+CFLAGS = -Wall -Werror -Wmissing-prototypes -std=gnu99 -ffast-math \
+       $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) $(ASM_FLAGS)
+
+include ../../../../Makefile.template
+
+
+symlinks:
+
+
+include depend
diff --git a/src/gallium/winsys/drm/vmware/core/SConscript b/src/gallium/winsys/drm/vmware/core/SConscript
new file mode 100644
index 0000000000..1875b659ac
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/SConscript
@@ -0,0 +1,39 @@
+Import('*')
+
+env = env.Clone()
+
+if env['gcc']:
+	env.Append(CCFLAGS = ['-fvisibility=hidden', '-Werror'])
+	env.Append(CPPDEFINES = [
+		'HAVE_STDINT_H', 
+		'HAVE_SYS_TYPES_H',
+                '-D_FILE_OFFSET_BITS=64',
+	])
+	
+env.Prepend(CPPPATH = [
+	'include',
+        '#/src/gallium/drivers/svga',
+        '#/src/gallium/drivers/svga/include',
+])
+
+env.Append(CPPDEFINES = [
+])
+
+sources = [
+        'vmw_buffer.c',
+        'vmw_context.c',
+        'vmw_fence.c',
+        'vmw_screen.c',
+        'vmw_screen_dri.c',
+        'vmw_screen_ioctl.c',
+        'vmw_screen_pools.c',
+        'vmw_screen_svga.c',
+        'vmw_surface.c',
+]
+
+svgadrm = env.ConvenienceLibrary(
+	target = 'svgadrm',
+	source = sources,
+)
+
+Export('svgadrm')
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_buffer.c b/src/gallium/winsys/drm/vmware/core/vmw_buffer.c
new file mode 100644
index 0000000000..b812fb59d3
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_buffer.c
@@ -0,0 +1,274 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA buffer manager for Guest Memory Regions (GMRs).
+ * 
+ * GMRs are used for pixel and vertex data upload/download to/from the virtual
+ * SVGA hardware. There is a limited number of GMRs available, and 
+ * creating/destroying them is also a slow operation so we must suballocate 
+ * them.
+ * 
+ * This file implements a pipebuffer library's buffer manager, so that we can
+ * use pipepbuffer's suballocation, fencing, and debugging facilities with GMRs. 
+ * 
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "svga_cmd.h"
+
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "pipebuffer/pb_buffer.h"
+#include "pipebuffer/pb_bufmgr.h"
+
+#include "svga_winsys.h"
+
+#include "vmw_screen.h"
+#include "vmw_buffer.h"
+
+
+struct vmw_gmr_bufmgr;
+
+
+struct vmw_gmr_buffer
+{
+   struct pb_buffer base;
+   
+   struct vmw_gmr_bufmgr *mgr;
+   
+   struct vmw_region *region;
+   void *map;
+   
+#ifdef DEBUG
+   struct pipe_fence_handle *last_fence;
+#endif
+};
+
+
+extern const struct pb_vtbl vmw_gmr_buffer_vtbl;
+
+
+static INLINE struct vmw_gmr_buffer *
+vmw_gmr_buffer(struct pb_buffer *buf)
+{
+   assert(buf);
+   assert(buf->vtbl == &vmw_gmr_buffer_vtbl);
+   return (struct vmw_gmr_buffer *)buf;
+}
+
+
+struct vmw_gmr_bufmgr
+{
+   struct pb_manager base;
+   
+   struct vmw_winsys_screen *vws;
+};
+
+
+static INLINE struct vmw_gmr_bufmgr *
+vmw_gmr_bufmgr(struct pb_manager *mgr)
+{
+   assert(mgr);
+   return (struct vmw_gmr_bufmgr *)mgr;
+}
+
+
+static void
+vmw_gmr_buffer_destroy(struct pb_buffer *_buf)
+{
+   struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf);
+
+#ifdef DEBUG
+   if(buf->last_fence) {
+      struct svga_winsys_screen *sws = &buf->mgr->vws->base;
+      assert(sws->fence_signalled(sws, buf->last_fence, 0) == 0);
+   }
+#endif
+
+   vmw_ioctl_region_unmap(buf->region);
+   
+   vmw_ioctl_region_destroy(buf->region);
+
+   FREE(buf);
+}
+
+
+static void *
+vmw_gmr_buffer_map(struct pb_buffer *_buf,
+               unsigned flags)
+{
+   struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf);
+   return buf->map;
+}
+
+
+static void
+vmw_gmr_buffer_unmap(struct pb_buffer *_buf)
+{
+   /* Do nothing */
+   (void)_buf;
+}
+
+
+static void
+vmw_gmr_buffer_get_base_buffer(struct pb_buffer *buf,
+                           struct pb_buffer **base_buf,
+                           unsigned *offset)
+{
+   *base_buf = buf;
+   *offset = 0;
+}
+
+
+static enum pipe_error
+vmw_gmr_buffer_validate( struct pb_buffer *_buf, 
+                         struct pb_validate *vl,
+                         unsigned flags )
+{
+   /* Always pinned */
+   return PIPE_OK;
+}
+
+
+static void
+vmw_gmr_buffer_fence( struct pb_buffer *_buf, 
+                      struct pipe_fence_handle *fence )
+{
+   /* We don't need to do anything, as the pipebuffer library
+    * will take care of delaying the destruction of fenced buffers */  
+#ifdef DEBUG
+   struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf);
+   if(fence)
+      buf->last_fence = fence;
+#endif
+}
+
+
+const struct pb_vtbl vmw_gmr_buffer_vtbl = {
+   vmw_gmr_buffer_destroy,
+   vmw_gmr_buffer_map,
+   vmw_gmr_buffer_unmap,
+   vmw_gmr_buffer_validate,
+   vmw_gmr_buffer_fence,
+   vmw_gmr_buffer_get_base_buffer
+};
+
+
+static struct pb_buffer *
+vmw_gmr_bufmgr_create_buffer(struct pb_manager *_mgr,
+                         pb_size size,
+                         const struct pb_desc *desc) 
+{
+   struct vmw_gmr_bufmgr *mgr = vmw_gmr_bufmgr(_mgr);
+   struct vmw_winsys_screen *vws = mgr->vws;
+   struct vmw_gmr_buffer *buf;
+   
+   buf = CALLOC_STRUCT(vmw_gmr_buffer);
+   if(!buf)
+      goto error1;
+
+   pipe_reference_init(&buf->base.base.reference, 1);
+   buf->base.base.alignment = desc->alignment;
+   buf->base.base.usage = desc->usage;
+   buf->base.base.size = size;
+   buf->base.vtbl = &vmw_gmr_buffer_vtbl;
+   buf->mgr = mgr;
+
+   buf->region = vmw_ioctl_region_create(vws, size);
+   if(!buf->region)
+      goto error2;
+	 
+   buf->map = vmw_ioctl_region_map(buf->region);
+   if(!buf->map)
+      goto error3;
+
+   return &buf->base;
+
+error3:
+   vmw_ioctl_region_destroy(buf->region);
+error2:
+   FREE(buf);
+error1:
+   return NULL;
+}
+
+
+static void
+vmw_gmr_bufmgr_flush(struct pb_manager *mgr) 
+{
+   /* No-op */
+}
+
+
+static void
+vmw_gmr_bufmgr_destroy(struct pb_manager *_mgr) 
+{
+   struct vmw_gmr_bufmgr *mgr = vmw_gmr_bufmgr(_mgr);
+   FREE(mgr);
+}
+
+
+struct pb_manager *
+vmw_gmr_bufmgr_create(struct vmw_winsys_screen *vws) 
+{
+   struct vmw_gmr_bufmgr *mgr;
+   
+   mgr = CALLOC_STRUCT(vmw_gmr_bufmgr);
+   if(!mgr)
+      return NULL;
+
+   mgr->base.destroy = vmw_gmr_bufmgr_destroy;
+   mgr->base.create_buffer = vmw_gmr_bufmgr_create_buffer;
+   mgr->base.flush = vmw_gmr_bufmgr_flush;
+   
+   mgr->vws = vws;
+   
+   return &mgr->base;
+}
+
+
+boolean
+vmw_gmr_bufmgr_region_ptr(struct pb_buffer *buf, 
+                          struct SVGAGuestPtr *ptr)
+{
+   struct pb_buffer *base_buf;
+   unsigned offset = 0;
+   struct vmw_gmr_buffer *gmr_buf;
+   
+   pb_get_base_buffer( buf, &base_buf, &offset );
+   
+   gmr_buf = vmw_gmr_buffer(base_buf);
+   if(!gmr_buf)
+      return FALSE;
+   
+   *ptr = vmw_ioctl_region_ptr(gmr_buf->region);
+   
+   ptr->offset += offset;
+   
+   return TRUE;
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_buffer.h b/src/gallium/winsys/drm/vmware/core/vmw_buffer.h
new file mode 100644
index 0000000000..634bdcabd2
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_buffer.h
@@ -0,0 +1,65 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#ifndef VMW_BUFFER_H_
+#define VMW_BUFFER_H_
+
+
+#include "pipe/p_compiler.h"
+
+struct SVGAGuestPtr;
+struct pb_buffer;
+struct pb_manager;
+struct svga_winsys_buffer;
+struct svga_winsys_surface;
+struct vmw_winsys_screen;
+
+
+static INLINE struct pb_buffer *
+vmw_pb_buffer(struct svga_winsys_buffer *buffer)
+{
+   assert(buffer);
+   return (struct pb_buffer *)buffer;
+}
+
+
+static INLINE struct svga_winsys_buffer *
+vmw_svga_winsys_buffer(struct pb_buffer *buffer)
+{
+   assert(buffer);
+   return (struct svga_winsys_buffer *)buffer;
+}
+
+
+struct pb_manager *
+vmw_gmr_bufmgr_create(struct vmw_winsys_screen *vws);
+
+boolean
+vmw_gmr_bufmgr_region_ptr(struct pb_buffer *buf, 
+                          struct SVGAGuestPtr *ptr);
+
+
+#endif /* VMW_BUFFER_H_ */
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_context.c b/src/gallium/winsys/drm/vmware/core/vmw_context.c
new file mode 100644
index 0000000000..b6997588de
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_context.c
@@ -0,0 +1,297 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "svga_cmd.h"
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_debug_stack.h"
+#include "pipebuffer/pb_buffer.h"
+#include "pipebuffer/pb_validate.h"
+
+#include "svga_winsys.h"
+#include "vmw_context.h"
+#include "vmw_screen.h"
+#include "vmw_buffer.h"
+#include "vmw_surface.h"
+#include "vmw_fence.h"
+
+#define VMW_COMMAND_SIZE (64*1024)
+#define VMW_SURFACE_RELOCS (1024)
+
+#define VMW_MUST_FLUSH_STACK 8
+
+struct vmw_svga_winsys_context
+{
+   struct svga_winsys_context base;
+
+   struct vmw_winsys_screen *vws;
+
+#ifdef DEBUG
+   boolean must_flush;
+   struct debug_stack_frame must_flush_stack[VMW_MUST_FLUSH_STACK];
+#endif
+
+   struct {
+      uint8_t buffer[VMW_COMMAND_SIZE];
+      uint32_t size;
+      uint32_t used;
+      uint32_t reserved;
+   } command;
+
+   struct {
+      struct vmw_svga_winsys_surface *handles[VMW_SURFACE_RELOCS];
+      uint32_t size;
+      uint32_t used;
+      uint32_t staged;
+      uint32_t reserved;
+   } surface;
+
+   struct pb_validate *validate;
+
+   uint32_t last_fence;
+};
+
+
+static INLINE struct vmw_svga_winsys_context *
+vmw_svga_winsys_context(struct svga_winsys_context *swc)
+{
+   assert(swc);
+   return (struct vmw_svga_winsys_context *)swc;
+}
+
+
+static enum pipe_error
+vmw_swc_flush(struct svga_winsys_context *swc,
+              struct pipe_fence_handle **pfence)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct pipe_fence_handle *fence = NULL;
+   unsigned i;
+   enum pipe_error ret;
+
+   ret = pb_validate_validate(vswc->validate);
+   assert(ret == PIPE_OK);
+   if(ret == PIPE_OK) {
+
+      if (vswc->command.used)
+         vmw_ioctl_command(vswc->vws,
+                           vswc->command.buffer,
+                           vswc->command.used,
+                           &vswc->last_fence);
+
+      fence = vmw_pipe_fence(vswc->last_fence);
+
+      pb_validate_fence(vswc->validate, fence);
+   }
+
+   vswc->command.used = 0;
+   vswc->command.reserved = 0;
+
+   for(i = 0; i < vswc->surface.used + vswc->surface.staged; ++i) {
+      struct vmw_svga_winsys_surface *vsurf =
+	 vswc->surface.handles[i];
+      p_atomic_dec(&vsurf->validated);
+      vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL);
+   }
+
+   vswc->surface.used = 0;
+   vswc->surface.reserved = 0;
+
+#ifdef DEBUG
+   vswc->must_flush = FALSE;
+#endif
+
+   if(pfence)
+      *pfence = fence;
+
+   return ret;
+}
+
+
+static void *
+vmw_swc_reserve(struct svga_winsys_context *swc,
+                uint32_t nr_bytes, uint32_t nr_relocs )
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+
+#ifdef DEBUG
+   /* Check if somebody forgot to check the previous failure */
+   if(vswc->must_flush) {
+      debug_printf("Forgot to flush:\n");
+      debug_backtrace_dump(vswc->must_flush_stack, VMW_MUST_FLUSH_STACK);
+      assert(!vswc->must_flush);
+   }
+#endif
+
+   assert(nr_bytes <= vswc->command.size);
+   if(nr_bytes > vswc->command.size)
+      return NULL;
+
+   if(vswc->command.used + nr_bytes > vswc->command.size ||
+      vswc->surface.used + nr_relocs > vswc->surface.size) {
+#ifdef DEBUG
+      vswc->must_flush = TRUE;
+      debug_backtrace_capture(vswc->must_flush_stack, 1,
+                              VMW_MUST_FLUSH_STACK);
+#endif
+      return NULL;
+   }
+
+   assert(vswc->command.used + nr_bytes <= vswc->command.size);
+   assert(vswc->surface.used + nr_relocs <= vswc->surface.size);
+
+   vswc->command.reserved = nr_bytes;
+   vswc->surface.reserved = nr_relocs;
+   vswc->surface.staged = 0;
+
+   return vswc->command.buffer + vswc->command.used;
+}
+
+
+static void
+vmw_swc_surface_relocation(struct svga_winsys_context *swc,
+                           uint32 *where,
+                           struct svga_winsys_surface *surface,
+                           unsigned flags)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_svga_winsys_surface *vsurf;
+
+   if(!surface) {
+      *where = SVGA3D_INVALID_ID;
+      return;
+   }
+
+   assert(vswc->surface.staged < vswc->surface.reserved);
+
+   vsurf = vmw_svga_winsys_surface(surface);
+
+   *where = vsurf->sid;
+
+   vmw_svga_winsys_surface_reference(&vswc->surface.handles[vswc->surface.used + vswc->surface.staged], vsurf);
+   p_atomic_inc(&vsurf->validated);
+   ++vswc->surface.staged;
+}
+
+
+static void
+vmw_swc_region_relocation(struct svga_winsys_context *swc,
+                          struct SVGAGuestPtr *where,
+                          struct svga_winsys_buffer *buffer,
+                          uint32 offset,
+                          unsigned flags)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct SVGAGuestPtr ptr;
+   struct pb_buffer *buf = vmw_pb_buffer(buffer);
+   enum pipe_error ret;
+
+   if(!vmw_gmr_bufmgr_region_ptr(buf, &ptr))
+      assert(0);
+
+   ptr.offset += offset;
+
+   *where = ptr;
+
+   ret = pb_validate_add_buffer(vswc->validate, buf, flags);
+   /* TODO: Update pipebuffer to reserve buffers and not fail here */
+   assert(ret == PIPE_OK);
+}
+
+
+static void
+vmw_swc_commit(struct svga_winsys_context *swc)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+
+   assert(vswc->command.reserved);
+   assert(vswc->command.used + vswc->command.reserved <= vswc->command.size);
+   vswc->command.used += vswc->command.reserved;
+   vswc->command.reserved = 0;
+
+   assert(vswc->surface.staged <= vswc->surface.reserved);
+   assert(vswc->surface.used + vswc->surface.staged <= vswc->surface.size);
+   vswc->surface.used += vswc->surface.staged;
+   vswc->surface.staged = 0;
+   vswc->surface.reserved = 0;
+}
+
+
+static void
+vmw_swc_destroy(struct svga_winsys_context *swc)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   unsigned i;
+   for(i = 0; i < vswc->surface.used; ++i) {
+      p_atomic_dec(&vswc->surface.handles[i]->validated);
+      vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL);
+   }
+   pb_validate_destroy(vswc->validate);
+   vmw_ioctl_context_destroy(vswc->vws, swc->cid);
+   FREE(vswc);
+}
+
+
+struct svga_winsys_context *
+vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   struct vmw_svga_winsys_context *vswc;
+
+   vswc = CALLOC_STRUCT(vmw_svga_winsys_context);
+   if(!vswc)
+      return NULL;
+
+   vswc->base.destroy = vmw_swc_destroy;
+   vswc->base.reserve = vmw_swc_reserve;
+   vswc->base.surface_relocation = vmw_swc_surface_relocation;
+   vswc->base.region_relocation = vmw_swc_region_relocation;
+   vswc->base.commit = vmw_swc_commit;
+   vswc->base.flush = vmw_swc_flush;
+
+   vswc->base.cid = vmw_ioctl_context_create(vws);
+
+   vswc->vws = vws;
+
+   vswc->command.size = VMW_COMMAND_SIZE;
+   vswc->surface.size = VMW_SURFACE_RELOCS;
+
+   vswc->validate = pb_validate_create();
+   if(!vswc->validate) {
+      FREE(vswc);
+      return NULL;
+   }
+
+   return &vswc->base;
+}
+
+
+struct pipe_context *
+vmw_svga_context_create(struct pipe_screen *screen)
+{
+   return svga_context_create(screen);
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_context.h b/src/gallium/winsys/drm/vmware/core/vmw_context.h
new file mode 100644
index 0000000000..305ce9b5be
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_context.h
@@ -0,0 +1,59 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef VMW_CONTEXT_H_
+#define VMW_CONTEXT_H_
+
+#include "pipe/p_compiler.h"
+
+struct svga_winsys_screen;
+struct svga_winsys_context;
+struct pipe_context;
+struct pipe_screen;
+
+#define VMW_DEBUG 0
+
+#if VMW_DEBUG
+#define vmw_printf debug_printf
+#define VMW_FUNC  debug_printf("%s\n", __FUNCTION__)
+#else
+#define VMW_FUNC
+#define vmw_printf(...)
+#endif
+
+
+struct svga_winsys_context *
+vmw_svga_winsys_context_create(struct svga_winsys_screen *sws);
+
+struct pipe_context *
+vmw_svga_context_create(struct pipe_screen *screen);
+
+
+#endif /* VMW_CONTEXT_H_ */
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_fence.c b/src/gallium/winsys/drm/vmware/core/vmw_fence.c
new file mode 100644
index 0000000000..873dd51166
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_fence.c
@@ -0,0 +1,108 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "util/u_memory.h"
+#include "pipebuffer/pb_buffer_fenced.h"
+
+#include "vmw_screen.h"
+#include "vmw_fence.h"
+
+
+
+struct vmw_fence_ops 
+{
+   struct pb_fence_ops base;
+
+   struct vmw_winsys_screen *vws;
+};
+
+
+static INLINE struct vmw_fence_ops *
+vmw_fence_ops(struct pb_fence_ops *ops)
+{
+   assert(ops);
+   return (struct vmw_fence_ops *)ops;
+}
+
+
+static void
+vmw_fence_ops_fence_reference(struct pb_fence_ops *ops,
+                              struct pipe_fence_handle **ptr,
+                              struct pipe_fence_handle *fence)
+{
+   *ptr = fence;
+}
+
+
+static int
+vmw_fence_ops_fence_signalled(struct pb_fence_ops *ops,
+                              struct pipe_fence_handle *fence,
+                              unsigned flag)
+{
+   struct vmw_winsys_screen *vws = vmw_fence_ops(ops)->vws;
+   (void)flag;
+   return vmw_ioctl_fence_signalled(vws, vmw_fence(fence));
+}
+
+
+static int
+vmw_fence_ops_fence_finish(struct pb_fence_ops *ops,
+                           struct pipe_fence_handle *fence,
+                           unsigned flag)
+{
+   struct vmw_winsys_screen *vws = vmw_fence_ops(ops)->vws;
+   (void)flag;
+   return vmw_ioctl_fence_finish(vws, vmw_fence(fence));
+}
+
+
+static void
+vmw_fence_ops_destroy(struct pb_fence_ops *ops)
+{
+   FREE(ops);
+}
+
+
+struct pb_fence_ops *
+vmw_fence_ops_create(struct vmw_winsys_screen *vws) 
+{
+   struct vmw_fence_ops *ops;
+
+   ops = CALLOC_STRUCT(vmw_fence_ops);
+   if(!ops)
+      return NULL;
+
+   ops->base.destroy = &vmw_fence_ops_destroy;
+   ops->base.fence_reference = &vmw_fence_ops_fence_reference;
+   ops->base.fence_signalled = &vmw_fence_ops_fence_signalled;
+   ops->base.fence_finish = &vmw_fence_ops_fence_finish;
+
+   ops->vws = vws;
+
+   return &ops->base;
+}
+
+
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_fence.h b/src/gallium/winsys/drm/vmware/core/vmw_fence.h
new file mode 100644
index 0000000000..5357b4f61d
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_fence.h
@@ -0,0 +1,59 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#ifndef VMW_FENCE_H_
+#define VMW_FENCE_H_
+
+
+#include "pipe/p_compiler.h"
+
+
+struct pipe_fence_handle;
+struct pb_fence_ops;
+struct vmw_winsys_screen;
+
+
+/** Cast from a pipe_fence_handle pointer into a SVGA fence */
+static INLINE uint32_t
+vmw_fence( struct pipe_fence_handle *fence )
+{
+   return (uint32_t)(uintptr_t)fence;
+}
+
+
+/** Cast from a SVGA fence number to pipe_fence_handle pointer */
+static INLINE struct pipe_fence_handle *
+vmw_pipe_fence( uint32_t fence )
+{
+   return (struct pipe_fence_handle *)(uintptr_t)fence;
+}
+
+
+struct pb_fence_ops *
+vmw_fence_ops_create(struct vmw_winsys_screen *vws); 
+
+
+#endif /* VMW_FENCE_H_ */
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen.c b/src/gallium/winsys/drm/vmware/core/vmw_screen.c
new file mode 100644
index 0000000000..911eec5e25
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen.c
@@ -0,0 +1,74 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "vmw_screen.h"
+
+#include "vmw_context.h"
+
+#include "util/u_memory.h"
+#include "pipe/p_compiler.h"
+
+
+/* Called from vmw_drm_create_screen(), creates and initializes the
+ * vmw_winsys_screen structure, which is the main entity in this
+ * module.
+ */
+struct vmw_winsys_screen *
+vmw_winsys_create( int fd )
+{
+   struct vmw_winsys_screen *vws = CALLOC_STRUCT(vmw_winsys_screen);
+   if (!vws)
+      goto out_no_vws;
+
+   vws->ioctl.drm_fd = fd;
+
+   if (!vmw_ioctl_init(vws))
+      goto out_no_ioctl;
+
+   if(!vmw_pools_init(vws))
+      goto out_no_pools;
+
+   if (!vmw_winsys_screen_init_svga(vws))
+      goto out_no_svga;
+
+   return vws;
+out_no_svga:
+   vmw_pools_cleanup(vws);
+out_no_pools:
+   vmw_ioctl_cleanup(vws);
+out_no_ioctl:
+   FREE(vws);
+out_no_vws:
+   return NULL;
+}
+
+void
+vmw_winsys_destroy(struct vmw_winsys_screen *vws)
+{
+   vmw_pools_cleanup(vws);
+   vmw_ioctl_cleanup(vws);
+   FREE(vws);
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen.h b/src/gallium/winsys/drm/vmware/core/vmw_screen.h
new file mode 100644
index 0000000000..a875107370
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen.h
@@ -0,0 +1,134 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * Common definitions for the VMware SVGA winsys.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef VMW_SCREEN_H_
+#define VMW_SCREEN_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+#include "svga_winsys.h"
+
+struct pb_manager;
+struct vmw_region;
+
+
+struct vmw_winsys_screen
+{
+   struct svga_winsys_screen base;
+
+   struct {
+      volatile uint32_t *fifo_map;
+      uint64_t last_fence;
+      int drm_fd;
+   } ioctl;
+
+   struct {
+      struct pb_manager *gmr;
+      struct pb_manager *gmr_mm;
+      struct pb_manager *gmr_fenced;
+   } pools;
+};
+
+
+static INLINE struct vmw_winsys_screen *
+vmw_winsys_screen(struct svga_winsys_screen *base)
+{
+   return (struct vmw_winsys_screen *)base;
+}
+
+/*  */
+uint32
+vmw_ioctl_context_create(struct vmw_winsys_screen *vws);
+
+void
+vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws,
+                          uint32 cid);
+
+uint32
+vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
+                              SVGA3dSurfaceFlags flags,
+                              SVGA3dSurfaceFormat format,
+                              SVGA3dSize size,
+                              uint32 numFaces,
+                              uint32 numMipLevels);
+
+void
+vmw_ioctl_surface_destroy(struct vmw_winsys_screen *vws,
+                          uint32 sid);
+
+void
+vmw_ioctl_command(struct vmw_winsys_screen *vws,
+                       void *commands,
+                       uint32_t size,
+                       uint32_t *fence);
+
+struct vmw_region *
+vmw_ioctl_region_create(struct vmw_winsys_screen *vws, uint32_t size);
+
+void
+vmw_ioctl_region_destroy(struct vmw_region *region);
+
+struct SVGAGuestPtr
+vmw_ioctl_region_ptr(struct vmw_region *region);
+
+void *
+vmw_ioctl_region_map(struct vmw_region *region);
+void
+vmw_ioctl_region_unmap(struct vmw_region *region);
+
+
+int
+vmw_ioctl_fence_finish(struct vmw_winsys_screen *vws,
+                       uint32_t fence);
+
+int
+vmw_ioctl_fence_signalled(struct vmw_winsys_screen *vws,
+                          uint32_t fence);
+
+
+/* Initialize parts of vmw_winsys_screen at startup:
+ */
+boolean vmw_ioctl_init(struct vmw_winsys_screen *vws);
+boolean vmw_pools_init(struct vmw_winsys_screen *vws);
+boolean vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws);
+
+void vmw_ioctl_cleanup(struct vmw_winsys_screen *vws);
+void vmw_pools_cleanup(struct vmw_winsys_screen *vws);
+
+struct vmw_winsys_screen *vmw_winsys_create(int fd);
+void vmw_winsys_destroy(struct vmw_winsys_screen *sws);
+
+
+#endif /* VMW_SCREEN_H_ */
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c
new file mode 100644
index 0000000000..5995eee34b
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c
@@ -0,0 +1,371 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "vmw_screen.h"
+
+#include "trace/tr_drm.h"
+
+#include "vmw_screen.h"
+#include "vmw_surface.h"
+#include "vmw_fence.h"
+#include "vmw_context.h"
+
+#include <state_tracker/dri1_api.h>
+#include <state_tracker/drm_api.h>
+#include <vmwgfx_drm.h>
+#include <xf86drm.h>
+
+#include <stdio.h>
+
+static struct dri1_api dri1_api_hooks;
+static struct dri1_api_version ddx_required = { 0, 1, 0 };
+static struct dri1_api_version ddx_compat = { 0, 0, 0 };
+static struct dri1_api_version dri_required = { 4, 0, 0 };
+static struct dri1_api_version dri_compat = { 4, 0, 0 };
+static struct dri1_api_version drm_required = { 0, 1, 0 };
+static struct dri1_api_version drm_compat = { 0, 0, 0 };
+
+static boolean
+vmw_dri1_check_version(const struct dri1_api_version *cur,
+		       const struct dri1_api_version *required,
+		       const struct dri1_api_version *compat,
+		       const char component[])
+{
+   if (cur->major > required->major && cur->major <= compat->major)
+      return TRUE;
+   if (cur->major == required->major && cur->minor >= required->minor)
+      return TRUE;
+
+   fprintf(stderr, "%s version failure.\n", component);
+   fprintf(stderr, "%s version is %d.%d.%d and this driver can only work\n"
+	   "with versions %d.%d.x through %d.x.x.\n",
+	   component,
+	   cur->major,
+	   cur->minor,
+	   cur->patch_level, required->major, required->minor, compat->major);
+   return FALSE;
+}
+
+/* This is actually the entrypoint to the entire driver, called by the
+ * libGL (or EGL, or ...) code via the drm_api_hooks table at the
+ * bottom of the file.
+ */
+static struct pipe_screen *
+vmw_drm_create_screen(struct drm_api *drm_api,
+                      int fd,
+                      struct drm_create_screen_arg *arg)
+{
+   struct vmw_winsys_screen *vws;
+   struct pipe_screen *screen;
+   struct dri1_create_screen_arg *dri1;
+
+   if (arg != NULL) {
+      switch (arg->mode) {
+      case DRM_CREATE_NORMAL:
+	 break;
+      case DRM_CREATE_DRI1:
+	 dri1 = (struct dri1_create_screen_arg *)arg;
+	 if (!vmw_dri1_check_version(&dri1->ddx_version, &ddx_required,
+				     &ddx_compat, "ddx - driver api"))
+	    return NULL;
+	 if (!vmw_dri1_check_version(&dri1->dri_version, &dri_required,
+				     &dri_compat, "dri info"))
+	    return NULL;
+	 if (!vmw_dri1_check_version(&dri1->drm_version, &drm_required,
+				     &drm_compat, "vmwgfx drm driver"))
+	    return NULL;
+	 dri1->api = &dri1_api_hooks;
+	 break;
+      default:
+	 return NULL;
+      }
+   }
+
+   vws = vmw_winsys_create( fd );
+   if (!vws)
+      goto out_no_vws;
+
+   screen = svga_screen_create( &vws->base );
+   if (!screen)
+      goto out_no_screen;
+
+   return screen;
+
+   /* Failure cases:
+    */
+out_no_screen:
+   vmw_winsys_destroy( vws );
+
+out_no_vws:
+   return NULL;
+}
+
+static INLINE boolean
+vmw_dri1_intersect_src_bbox(struct drm_clip_rect *dst,
+			    int dst_x,
+			    int dst_y,
+			    const struct drm_clip_rect *src,
+			    const struct drm_clip_rect *bbox)
+{
+   int xy1;
+   int xy2;
+
+   xy1 = ((int)src->x1 > (int)bbox->x1 + dst_x) ? src->x1 :
+      (int)bbox->x1 + dst_x;
+   xy2 = ((int)src->x2 < (int)bbox->x2 + dst_x) ? src->x2 :
+      (int)bbox->x2 + dst_x;
+   if (xy1 >= xy2 || xy1 < 0)
+      return FALSE;
+
+   dst->x1 = xy1;
+   dst->x2 = xy2;
+
+   xy1 = ((int)src->y1 > (int)bbox->y1 + dst_y) ? src->y1 :
+      (int)bbox->y1 + dst_y;
+   xy2 = ((int)src->y2 < (int)bbox->y2 + dst_y) ? src->y2 :
+      (int)bbox->y2 + dst_y;
+   if (xy1 >= xy2 || xy1 < 0)
+      return FALSE;
+
+   dst->y1 = xy1;
+   dst->y2 = xy2;
+   return TRUE;
+}
+
+/**
+ * No fancy get-surface-from-sarea stuff here.
+ * Just use the present blit.
+ */
+
+static void
+vmw_dri1_present_locked(struct pipe_context *locked_pipe,
+			struct pipe_surface *surf,
+			const struct drm_clip_rect *rect,
+			unsigned int num_clip,
+			int x_draw, int y_draw,
+			const struct drm_clip_rect *bbox,
+			struct pipe_fence_handle **p_fence)
+{
+   struct svga_winsys_surface *srf =
+      svga_screen_texture_get_winsys_surface(surf->texture);
+   struct vmw_svga_winsys_surface *vsrf = vmw_svga_winsys_surface(srf);
+   struct vmw_winsys_screen *vws =
+      vmw_winsys_screen(svga_winsys_screen(locked_pipe->screen));
+   struct drm_clip_rect clip;
+   int i;
+   struct
+   {
+      SVGA3dCmdHeader header;
+      SVGA3dCmdPresent body;
+      SVGA3dCopyRect rect;
+   } cmd;
+   boolean visible = FALSE;
+   uint32_t fence_seq = 0;
+
+   VMW_FUNC;
+   cmd.header.id = SVGA_3D_CMD_PRESENT;
+   cmd.header.size = sizeof cmd.body + sizeof cmd.rect;
+   cmd.body.sid = vsrf->sid;
+
+   for (i = 0; i < num_clip; ++i) {
+      if (!vmw_dri1_intersect_src_bbox(&clip, x_draw, y_draw, rect++, bbox))
+	 continue;
+
+      cmd.rect.x = clip.x1;
+      cmd.rect.y = clip.y1;
+      cmd.rect.w = clip.x2 - clip.x1;
+      cmd.rect.h = clip.y2 - clip.y1;
+      cmd.rect.srcx = (int)clip.x1 - x_draw;
+      cmd.rect.srcy = (int)clip.y1 - y_draw;
+
+      vmw_printf("%s: Clip %d x %d y %d w %d h %d srcx %d srcy %d\n",
+		   __FUNCTION__,
+		   i,
+		   cmd.rect.x,
+		   cmd.rect.y,
+		   cmd.rect.w, cmd.rect.h, cmd.rect.srcx, cmd.rect.srcy);
+
+      vmw_ioctl_command(vws, &cmd, sizeof cmd.header + cmd.header.size,
+                        &fence_seq);
+      visible = TRUE;
+   }
+
+   *p_fence = (visible) ? vmw_pipe_fence(fence_seq) : NULL;
+   vmw_svga_winsys_surface_reference(&vsrf, NULL);
+}
+
+/**
+ * FIXME: We'd probably want to cache these buffers in the
+ * screen, based on handle.
+ */
+
+static struct pipe_buffer *
+vmw_drm_buffer_from_handle(struct drm_api *drm_api,
+                           struct pipe_screen *screen,
+			   const char *name,
+			   unsigned handle)
+{
+    struct vmw_svga_winsys_surface *vsrf;
+    struct svga_winsys_surface *ssrf;
+    struct vmw_winsys_screen *vws =
+	vmw_winsys_screen(svga_winsys_screen(screen));
+    struct pipe_buffer *buf;
+    union drm_vmw_surface_reference_arg arg;
+    struct drm_vmw_surface_arg *req = &arg.req;
+    struct drm_vmw_surface_create_req *rep = &arg.rep;
+    int ret;
+    int i;
+
+    /**
+     * The vmware device specific handle is the hardware SID.
+     * FIXME: We probably want to move this to the ioctl implementations.
+     */
+
+    memset(&arg, 0, sizeof(arg));
+    req->sid = handle;
+
+    ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_REF_SURFACE,
+			      &arg, sizeof(arg));
+
+    if (ret) {
+	fprintf(stderr, "Failed referencing shared surface. SID %d.\n"
+		"Error %d (%s).\n",
+		handle, ret, strerror(-ret));
+	return NULL;
+    }
+
+    if (rep->mip_levels[0] != 1) {
+	fprintf(stderr, "Incorrect number of mipmap levels on shared surface."
+		" SID %d, levels %d\n",
+		handle, rep->mip_levels[0]);
+	goto out_mip;
+    }
+
+    for (i=1; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+	if (rep->mip_levels[i] != 0) {
+	    fprintf(stderr, "Incorrect number of faces levels on shared surface."
+		    " SID %d, face %d present.\n",
+		    handle, i);
+	    goto out_mip;
+	}
+    }
+
+    vsrf = CALLOC_STRUCT(vmw_svga_winsys_surface);
+    if (!vsrf)
+	goto out_mip;
+
+    pipe_reference_init(&vsrf->refcnt, 1);
+    p_atomic_set(&vsrf->validated, 0);
+    vsrf->sid = handle;
+    ssrf = svga_winsys_surface(vsrf);
+    buf = svga_screen_buffer_wrap_surface(screen, rep->format, ssrf);
+    if (!buf)
+	vmw_svga_winsys_surface_reference(&vsrf, NULL);
+
+    return buf;
+  out_mip:
+    vmw_ioctl_surface_destroy(vws, handle);
+    return NULL;
+}
+
+static struct pipe_texture *
+vmw_drm_texture_from_handle(struct drm_api *drm_api,
+			    struct pipe_screen *screen,
+			    struct pipe_texture *templat,
+			    const char *name,
+			    unsigned stride,
+			    unsigned handle)
+{
+    struct pipe_buffer *buffer;
+    buffer = vmw_drm_buffer_from_handle(drm_api, screen, name, handle);
+
+    if (!buffer)
+	return NULL;
+
+    return screen->texture_blanket(screen, templat, &stride, buffer);
+}
+
+static boolean
+vmw_drm_handle_from_buffer(struct drm_api *drm_api,
+                           struct pipe_screen *screen,
+			   struct pipe_buffer *buffer,
+			   unsigned *handle)
+{
+    struct svga_winsys_surface *surface =
+	svga_screen_buffer_get_winsys_surface(buffer);
+    struct vmw_svga_winsys_surface *vsrf;
+
+    if (!surface)
+	return FALSE;
+
+    vsrf = vmw_svga_winsys_surface(surface);
+    *handle = vsrf->sid;
+    vmw_svga_winsys_surface_reference(&vsrf, NULL);
+    return TRUE;
+}
+
+static boolean
+vmw_drm_handle_from_texture(struct drm_api *drm_api,
+			    struct pipe_screen *screen,
+			    struct pipe_texture *texture,
+			    unsigned *stride,
+			    unsigned *handle)
+{
+    struct pipe_buffer *buffer;
+
+    if (!svga_screen_buffer_from_texture(texture, &buffer, stride))
+	return FALSE;
+
+    return vmw_drm_handle_from_buffer(drm_api, screen, buffer, handle);
+}
+
+static struct pipe_context*
+vmw_drm_create_context(struct drm_api *drm_api,
+                       struct pipe_screen *screen)
+{
+   return vmw_svga_context_create(screen);
+}
+
+static struct dri1_api dri1_api_hooks = {
+   .front_srf_locked = NULL,
+   .present_locked = vmw_dri1_present_locked
+};
+
+static struct drm_api vmw_drm_api_hooks = {
+   .create_screen = vmw_drm_create_screen,
+   .create_context = vmw_drm_create_context,
+   .texture_from_shared_handle = vmw_drm_texture_from_handle,
+   .shared_handle_from_texture = vmw_drm_handle_from_texture,
+   .local_handle_from_texture = vmw_drm_handle_from_texture,
+};
+
+struct drm_api* drm_api_create()
+{
+   return trace_drm_create(&vmw_drm_api_hooks);
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c
new file mode 100644
index 0000000000..b3515732a2
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c
@@ -0,0 +1,503 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ *
+ * Wrappers for DRM ioctl functionlaity used by the rest of the vmw
+ * drm winsys.
+ *
+ * Based on svgaicd_escape.c
+ */
+
+
+#include "svga_cmd.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "svgadump/svga_dump.h"
+#include "vmw_screen.h"
+#include "vmw_context.h"
+#include "xf86drm.h"
+#include "vmwgfx_drm.h"
+
+#include <sys/mman.h>
+#include <errno.h>
+#include <unistd.h>
+
+struct vmw_region
+{
+   SVGAGuestPtr ptr;
+   uint32_t handle;
+   uint64_t map_handle;
+   void *data;
+   uint32_t map_count;
+   int drm_fd;
+   uint32_t size;
+};
+
+static void
+vmw_check_last_cmd(struct vmw_winsys_screen *vws)
+{
+   static uint32_t buffer[16384];
+   struct drm_vmw_fifo_debug_arg arg;
+   int ret;
+
+   return;
+   memset(&arg, 0, sizeof(arg));
+   arg.debug_buffer = (unsigned long)buffer;
+   arg.debug_buffer_size = 65536;
+
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_FIFO_DEBUG,
+			     &arg, sizeof(arg));
+
+   if (ret) {
+      debug_printf("%s Ioctl error: \"%s\".\n", __FUNCTION__, strerror(-ret));
+      return;
+   }
+
+   if (arg.did_not_fit) {
+      debug_printf("%s Command did not fit completely.\n", __FUNCTION__);
+   }
+
+   svga_dump_commands(buffer, arg.used_size);
+}
+
+static void
+vmw_ioctl_fifo_unmap(struct vmw_winsys_screen *vws, void *mapping)
+{
+   VMW_FUNC;
+   (void)munmap(mapping, getpagesize());
+}
+
+
+static void *
+vmw_ioctl_fifo_map(struct vmw_winsys_screen *vws,
+                   uint32_t fifo_offset )
+{
+   void *map;
+
+   VMW_FUNC;
+
+   map = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED,
+	      vws->ioctl.drm_fd, fifo_offset);
+
+   if (map == MAP_FAILED) {
+      debug_printf("Map failed %s\n", strerror(errno));
+      return NULL;
+   }
+
+   vmw_printf("Fifo (min) is 0x%08x\n", ((uint32_t *) map)[SVGA_FIFO_MIN]);
+
+   return map;
+}
+
+uint32
+vmw_ioctl_context_create(struct vmw_winsys_screen *vws)
+{
+   struct drm_vmw_context_arg c_arg;
+   int ret;
+
+   VMW_FUNC;
+
+   ret = drmCommandRead(vws->ioctl.drm_fd, DRM_VMW_CREATE_CONTEXT,
+			&c_arg, sizeof(c_arg));
+
+   if (ret)
+      return -1;
+
+   vmw_check_last_cmd(vws);
+   vmw_printf("Context id is %d\n", c_arg.cid);
+
+   return c_arg.cid;
+}
+
+void
+vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws, uint32 cid)
+{
+   struct drm_vmw_context_arg c_arg;
+
+   VMW_FUNC;
+
+   memset(&c_arg, 0, sizeof(c_arg));
+   c_arg.cid = cid;
+
+   (void)drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_UNREF_CONTEXT,
+			 &c_arg, sizeof(c_arg));
+
+   vmw_check_last_cmd(vws);
+}
+
+uint32
+vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
+			      SVGA3dSurfaceFlags flags,
+			      SVGA3dSurfaceFormat format,
+			      SVGA3dSize size,
+			      uint32_t numFaces, uint32_t numMipLevels)
+{
+   union drm_vmw_surface_create_arg s_arg;
+   struct drm_vmw_surface_create_req *req = &s_arg.req;
+   struct drm_vmw_surface_arg *rep = &s_arg.rep;
+   struct drm_vmw_size sizes[DRM_VMW_MAX_SURFACE_FACES*
+			     DRM_VMW_MAX_MIP_LEVELS];
+   struct drm_vmw_size *cur_size;
+   uint32_t iFace;
+   uint32_t iMipLevel;
+   int ret;
+
+   vmw_printf("%s flags %d format %d\n", __FUNCTION__, flags, format);
+
+   memset(&s_arg, 0, sizeof(s_arg));
+   req->flags = (uint32_t) flags;
+   req->format = (uint32_t) format;
+   req->shareable = 1;
+
+   assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES*
+	  DRM_VMW_MAX_MIP_LEVELS);
+   cur_size = sizes;
+   for (iFace = 0; iFace < numFaces; ++iFace) {
+      SVGA3dSize mipSize = size;
+
+      req->mip_levels[iFace] = numMipLevels;
+      for (iMipLevel = 0; iMipLevel < numMipLevels; ++iMipLevel) {
+	 cur_size->width = mipSize.width;
+	 cur_size->height = mipSize.height;
+	 cur_size->depth = mipSize.depth;
+	 mipSize.width = MAX2(mipSize.width >> 1, 1);
+	 mipSize.height = MAX2(mipSize.height >> 1, 1);
+	 mipSize.depth = MAX2(mipSize.depth >> 1, 1);
+	 cur_size++;
+      }
+   }
+   for (iFace = numFaces; iFace < SVGA3D_MAX_SURFACE_FACES; ++iFace) {
+      req->mip_levels[iFace] = 0;
+   }
+
+   req->size_addr = (unsigned long)&sizes;
+
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_CREATE_SURFACE,
+			     &s_arg, sizeof(s_arg));
+
+   if (ret)
+      return -1;
+
+   vmw_printf("Surface id is %d\n", rep->sid);
+   vmw_check_last_cmd(vws);
+
+   return rep->sid;
+}
+
+void
+vmw_ioctl_surface_destroy(struct vmw_winsys_screen *vws, uint32 sid)
+{
+   struct drm_vmw_surface_arg s_arg;
+
+   VMW_FUNC;
+
+   memset(&s_arg, 0, sizeof(s_arg));
+   s_arg.sid = sid;
+
+   (void)drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_UNREF_SURFACE,
+			 &s_arg, sizeof(s_arg));
+   vmw_check_last_cmd(vws);
+
+}
+
+void
+vmw_ioctl_command(struct vmw_winsys_screen *vws, void *commands, uint32_t size,
+		       uint32_t * pfence)
+{
+   struct drm_vmw_execbuf_arg arg;
+   struct drm_vmw_fence_rep rep;
+   int ret;
+
+#ifdef DEBUG
+   {
+      static boolean firsttime = TRUE;
+      static boolean debug = FALSE;
+      static boolean skip = FALSE;
+      if (firsttime) {
+         debug = debug_get_bool_option("SVGA_DUMP_CMD", FALSE);
+         skip = debug_get_bool_option("SVGA_SKIP_CMD", FALSE);
+      }
+      if (debug) {
+         VMW_FUNC;
+         svga_dump_commands(commands, size);
+      }
+      firsttime = FALSE;
+      if (skip) {
+         size = 0;
+      }
+   }
+#endif
+
+   memset(&arg, 0, sizeof(arg));
+   memset(&rep, 0, sizeof(rep));
+
+   rep.error = -EFAULT;
+   arg.fence_rep = (unsigned long)&rep;
+   arg.commands = (unsigned long)commands;
+   arg.command_size = size;
+
+   do {
+       ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, sizeof(arg));
+   } while(ret == -ERESTART);
+   if (ret) {
+      debug_printf("%s error %s.\n", __FUNCTION__, strerror(-ret));
+   }
+   if (rep.error) {
+
+      /*
+       * Kernel has synced and put the last fence sequence in the FIFO
+       * register.
+       */
+
+      if (rep.error == -EFAULT)
+	 rep.fence_seq = vws->ioctl.fifo_map[SVGA_FIFO_FENCE];
+
+      debug_printf("%s Fence error %s.\n", __FUNCTION__,
+		   strerror(-rep.error));
+   }
+
+   vws->ioctl.last_fence = rep.fence_seq;
+
+   if (pfence)
+      *pfence = rep.fence_seq;
+   vmw_check_last_cmd(vws);
+
+}
+
+
+struct vmw_region *
+vmw_ioctl_region_create(struct vmw_winsys_screen *vws, uint32_t size)
+{
+   struct vmw_region *region;
+   union drm_vmw_alloc_dmabuf_arg arg;
+   struct drm_vmw_alloc_dmabuf_req *req = &arg.req;
+   struct drm_vmw_dmabuf_rep *rep = &arg.rep;
+   int ret;
+
+   vmw_printf("%s: size = %u\n", __FUNCTION__, size);
+
+   region = CALLOC_STRUCT(vmw_region);
+   if (!region)
+      goto out_err1;
+
+   memset(&arg, 0, sizeof(arg));
+   req->size = size;
+   do {
+      ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_ALLOC_DMABUF, &arg,
+				sizeof(arg));
+   } while (ret == -ERESTART);
+
+   if (ret) {
+      debug_printf("IOCTL failed %d: %s\n", ret, strerror(-ret));
+      goto out_err1;
+   }
+
+   region->ptr.gmrId = rep->cur_gmr_id;
+   region->ptr.offset = rep->cur_gmr_offset;
+   region->data = NULL;
+   region->handle = rep->handle;
+   region->map_handle = rep->map_handle;
+   region->map_count = 0;
+   region->size = size;
+   region->drm_fd = vws->ioctl.drm_fd;
+
+   vmw_printf("   gmrId = %u, offset = %u\n",
+              region->ptr.gmrId, region->ptr.offset);
+
+   return region;
+
+ out_err1:
+   return NULL;
+}
+
+void
+vmw_ioctl_region_destroy(struct vmw_region *region)
+{
+   struct drm_vmw_unref_dmabuf_arg arg;
+
+   vmw_printf("%s: gmrId = %u, offset = %u\n", __FUNCTION__,
+              region->ptr.gmrId, region->ptr.offset);
+
+   if (region->data) {
+      munmap(region->data, region->size);
+      region->data = NULL;
+   }
+
+   memset(&arg, 0, sizeof(arg));
+   arg.handle = region->handle;
+   drmCommandWrite(region->drm_fd, DRM_VMW_UNREF_DMABUF, &arg, sizeof(arg));
+
+   FREE(region);
+}
+
+SVGAGuestPtr
+vmw_ioctl_region_ptr(struct vmw_region *region)
+{
+   return region->ptr;
+}
+
+void *
+vmw_ioctl_region_map(struct vmw_region *region)
+{
+   void *map;
+
+   vmw_printf("%s: gmrId = %u, offset = %u\n", __FUNCTION__,
+              region->ptr.gmrId, region->ptr.offset);
+
+   if (region->data == NULL) {
+      map = mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+		 region->drm_fd, region->map_handle);
+      if (map == MAP_FAILED) {
+	 debug_printf("%s: Map failed.\n", __FUNCTION__);
+	 return NULL;
+      }
+
+      region->data = map;
+   }
+
+   ++region->map_count;
+
+   return region->data;
+}
+
+void
+vmw_ioctl_region_unmap(struct vmw_region *region)
+{
+   vmw_printf("%s: gmrId = %u, offset = %u\n", __FUNCTION__,
+              region->ptr.gmrId, region->ptr.offset);
+   --region->map_count;
+}
+
+
+int
+vmw_ioctl_fence_signalled(struct vmw_winsys_screen *vws,
+                          uint32_t fence)
+{
+   uint32_t expected;
+   uint32_t current;
+   
+   assert(fence);
+   if(!fence)
+      return 0;
+   
+   expected = fence;
+   current = vws->ioctl.fifo_map[SVGA_FIFO_FENCE];
+   
+   if ((int32)(current - expected) >= 0)
+      return 0; /* fence passed */
+   else
+      return -1;
+}
+
+
+static void
+vmw_ioctl_sync(struct vmw_winsys_screen *vws, 
+		    uint32_t fence)
+{
+   uint32_t cur_fence;
+   struct drm_vmw_fence_wait_arg arg;
+   int ret;
+
+   vmw_printf("%s: fence = %lu\n", __FUNCTION__,
+              (unsigned long)fence);
+
+   cur_fence = vws->ioctl.fifo_map[SVGA_FIFO_FENCE];
+   vmw_printf("%s: Fence id read is 0x%08x\n", __FUNCTION__,
+              (unsigned int)cur_fence);
+
+   if ((cur_fence - fence) < (1 << 24))
+      return;
+
+   memset(&arg, 0, sizeof(arg));
+   arg.sequence = fence;
+
+   do {
+       ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_FENCE_WAIT, &arg,
+				 sizeof(arg));
+   } while (ret == -ERESTART);
+}
+
+
+int
+vmw_ioctl_fence_finish(struct vmw_winsys_screen *vws,
+                       uint32_t fence)
+{
+   assert(fence);
+   
+   if(fence) {
+      if(vmw_ioctl_fence_signalled(vws, fence) != 0) {
+         vmw_ioctl_sync(vws, fence);
+      }
+   }
+   
+   return 0;
+}
+
+
+boolean
+vmw_ioctl_init(struct vmw_winsys_screen *vws)
+{
+   struct drm_vmw_getparam_arg gp_arg;
+   int ret;
+
+   VMW_FUNC;
+
+   memset(&gp_arg, 0, sizeof(gp_arg));
+   gp_arg.param = DRM_VMW_PARAM_FIFO_OFFSET;
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+			     &gp_arg, sizeof(gp_arg));
+
+   if (ret) {
+      debug_printf("GET_PARAM on %d returned %d: %s\n",
+		   vws->ioctl.drm_fd, ret, strerror(-ret));
+      goto out_err1;
+   }
+
+   vmw_printf("Offset to map is 0x%08llx\n",
+              (unsigned long long)gp_arg.value);
+
+   vws->ioctl.fifo_map = vmw_ioctl_fifo_map(vws, gp_arg.value);
+   if (vws->ioctl.fifo_map == NULL)
+      goto out_err1;
+
+   vmw_printf("%s OK\n", __FUNCTION__);
+   return TRUE;
+
+ out_err1:
+   debug_printf("%s Failed\n", __FUNCTION__);
+   return FALSE;
+}
+
+
+
+void
+vmw_ioctl_cleanup(struct vmw_winsys_screen *vws)
+{
+   VMW_FUNC;
+
+   vmw_ioctl_fifo_unmap(vws, (void *)vws->ioctl.fifo_map);
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c
new file mode 100644
index 0000000000..b1c24b0cb6
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c
@@ -0,0 +1,79 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "vmw_screen.h"
+
+#include "vmw_buffer.h"
+#include "vmw_fence.h"
+
+#include "pipebuffer/pb_buffer.h"
+#include "pipebuffer/pb_bufmgr.h"
+
+void
+vmw_pools_cleanup(struct vmw_winsys_screen *vws)
+{
+   if(vws->pools.gmr_fenced)
+      vws->pools.gmr_fenced->destroy(vws->pools.gmr_fenced);
+
+   /* gmr_mm pool is already destroyed above */
+
+   if(vws->pools.gmr)
+      vws->pools.gmr->destroy(vws->pools.gmr);
+}
+
+
+boolean
+vmw_pools_init(struct vmw_winsys_screen *vws)
+{
+   vws->pools.gmr = vmw_gmr_bufmgr_create(vws);
+   if(!vws->pools.gmr)
+      goto error;
+
+   vws->pools.gmr_mm = mm_bufmgr_create(vws->pools.gmr,
+                                        16*1024*1024,
+                                        12 /* 4096 alignment */);
+   if(!vws->pools.gmr_mm)
+      goto error;
+
+   vws->pools.gmr_fenced = fenced_bufmgr_create(
+      vws->pools.gmr_mm,
+      vmw_fence_ops_create(vws));
+
+#ifdef DEBUG
+   vws->pools.gmr_fenced = pb_debug_manager_create(vws->pools.gmr_fenced,
+						   4096,
+						   4096);
+#endif
+   if(!vws->pools.gmr_fenced)
+      goto error;
+
+   return TRUE;
+
+error:
+   vmw_pools_cleanup(vws);
+   return FALSE;
+}
+
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c
new file mode 100644
index 0000000000..d7d008859b
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c
@@ -0,0 +1,295 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * This file implements the SVGA interface into this winsys, defined
+ * in drivers/svga/svga_winsys.h.
+ *
+ * @author Keith Whitwell
+ * @author Jose Fonseca
+ */
+
+
+#include "svga_cmd.h"
+#include "svga3d_caps.h"
+
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipebuffer/pb_buffer.h"
+#include "pipebuffer/pb_bufmgr.h"
+#include "svga_winsys.h"
+#include "vmw_context.h"
+#include "vmw_screen.h"
+#include "vmw_surface.h"
+#include "vmw_buffer.h"
+#include "vmw_fence.h"
+
+
+static struct svga_winsys_buffer *
+vmw_svga_winsys_buffer_create(struct svga_winsys_screen *sws,
+                              unsigned alignment,
+                              unsigned usage,
+                              unsigned size)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   struct pb_desc desc;
+   struct pb_manager *provider;
+   struct pb_buffer *buffer;
+
+   memset(&desc, 0, sizeof desc);
+   desc.alignment = alignment;
+   desc.usage = usage;
+
+   provider = vws->pools.gmr_fenced;
+
+   assert(provider);
+   buffer = provider->create_buffer(provider, size, &desc);
+   if(!buffer)
+      return NULL;
+
+   return vmw_svga_winsys_buffer(buffer);
+}
+
+
+static void *
+vmw_svga_winsys_buffer_map(struct svga_winsys_screen *sws,
+                           struct svga_winsys_buffer *buf,
+                           unsigned flags)
+{
+   (void)sws;
+   return pb_map(vmw_pb_buffer(buf), flags);
+}
+
+
+static void
+vmw_svga_winsys_buffer_unmap(struct svga_winsys_screen *sws,
+                             struct svga_winsys_buffer *buf)
+{
+   (void)sws;
+   pb_unmap(vmw_pb_buffer(buf));
+}
+
+
+static void
+vmw_svga_winsys_buffer_destroy(struct svga_winsys_screen *sws,
+                               struct svga_winsys_buffer *buf)
+{
+   struct pb_buffer *pbuf = vmw_pb_buffer(buf);
+   (void)sws;
+   pb_reference(&pbuf, NULL);
+}
+
+
+static void
+vmw_svga_winsys_fence_reference(struct svga_winsys_screen *sws,
+                                struct pipe_fence_handle **pdst,
+                                struct pipe_fence_handle *src)
+{
+   (void)sws;
+   *pdst = src;
+}
+
+
+static int
+vmw_svga_winsys_fence_signalled(struct svga_winsys_screen *sws,
+                                struct pipe_fence_handle *fence,
+                                unsigned flag)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   (void)flag;
+   return vmw_ioctl_fence_signalled(vws, vmw_fence(fence));
+}
+
+
+static int
+vmw_svga_winsys_fence_finish(struct svga_winsys_screen *sws,
+                             struct pipe_fence_handle *fence,
+                             unsigned flag)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   (void)flag;
+   return vmw_ioctl_fence_finish(vws, vmw_fence(fence));
+}
+
+
+
+static struct svga_winsys_surface *
+vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
+                               SVGA3dSurfaceFlags flags,
+                               SVGA3dSurfaceFormat format,
+                               SVGA3dSize size,
+                               uint32 numFaces,
+                               uint32 numMipLevels)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   struct vmw_svga_winsys_surface *surface;
+
+   surface = CALLOC_STRUCT(vmw_svga_winsys_surface);
+   if(!surface)
+      goto no_surface;
+
+   pipe_reference_init(&surface->refcnt, 1);
+   p_atomic_set(&surface->validated, 0);
+   surface->screen = vws;
+   surface->sid = vmw_ioctl_surface_create(vws,
+                                           flags, format, size,
+                                           numFaces, numMipLevels);
+   if(surface->sid == SVGA3D_INVALID_ID)
+      goto no_sid;
+
+   return svga_winsys_surface(surface);
+
+no_sid:
+   FREE(surface);
+no_surface:
+   return NULL;
+}
+
+
+static boolean
+vmw_svga_winsys_surface_is_flushed(struct svga_winsys_screen *sws,
+                                   struct svga_winsys_surface *surface)
+{
+   struct vmw_svga_winsys_surface *vsurf = vmw_svga_winsys_surface(surface);
+   return (p_atomic_read(&vsurf->validated) == 0);
+}
+
+
+static void
+vmw_svga_winsys_surface_ref(struct svga_winsys_screen *sws,
+			    struct svga_winsys_surface **pDst,
+			    struct svga_winsys_surface *src)
+{
+   struct vmw_svga_winsys_surface *d_vsurf = vmw_svga_winsys_surface(*pDst);
+   struct vmw_svga_winsys_surface *s_vsurf = vmw_svga_winsys_surface(src);
+
+   vmw_svga_winsys_surface_reference(&d_vsurf, s_vsurf);
+   *pDst = svga_winsys_surface(d_vsurf);
+}
+
+
+static void
+vmw_svga_winsys_destroy(struct svga_winsys_screen *sws)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+
+   vmw_winsys_destroy(vws);
+}
+
+
+static boolean
+vmw_svga_winsys_get_cap(struct svga_winsys_screen *sws,
+                        SVGA3dDevCapIndex index,
+                        SVGA3dDevCapResult *result)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   const uint32 *capsBlock;
+   const SVGA3dCapsRecord *capsRecord = NULL;
+   uint32 offset;
+   const SVGA3dCapPair *capArray;
+   int numCaps, first, last;
+
+   if(!vws->ioctl.fifo_map)
+      return FALSE;
+
+   if(vws->ioctl.fifo_map[SVGA_FIFO_3D_HWVERSION] < SVGA3D_HWVERSION_WS6_B1)
+      return FALSE;
+
+   /*
+    * Search linearly through the caps block records for the specified type.
+    */
+   capsBlock = (const uint32 *)&vws->ioctl.fifo_map[SVGA_FIFO_3D_CAPS];
+   for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) {
+      const SVGA3dCapsRecord *record;
+      assert(offset < SVGA_FIFO_3D_CAPS_SIZE);
+      record = (const SVGA3dCapsRecord *) (capsBlock + offset);
+      if ((record->header.type >= SVGA3DCAPS_RECORD_DEVCAPS_MIN) &&
+          (record->header.type <= SVGA3DCAPS_RECORD_DEVCAPS_MAX) &&
+          (!capsRecord || (record->header.type > capsRecord->header.type))) {
+         capsRecord = record;
+      }
+   }
+
+   if(!capsRecord)
+      return FALSE;
+
+   /*
+    * Calculate the number of caps from the size of the record.
+    */
+   capArray = (const SVGA3dCapPair *) capsRecord->data;
+   numCaps = (int) ((capsRecord->header.length * sizeof(uint32) -
+                     sizeof capsRecord->header) / (2 * sizeof(uint32)));
+
+   /*
+    * Binary-search for the cap with the specified index.
+    */
+   for (first = 0, last = numCaps - 1; first <= last; ) {
+      int mid = (first + last) / 2;
+
+      if ((SVGA3dDevCapIndex) capArray[mid][0] == index) {
+         /*
+          * Found it.
+          */
+         result->u = capArray[mid][1];
+         return TRUE;
+      }
+
+      /*
+       * Divide and conquer.
+       */
+      if ((SVGA3dDevCapIndex) capArray[mid][0] > index) {
+         last = mid - 1;
+      } else {
+         first = mid + 1;
+      }
+   }
+
+   return FALSE;
+}
+
+
+boolean
+vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws)
+{
+   vws->base.destroy = vmw_svga_winsys_destroy;
+   vws->base.get_cap = vmw_svga_winsys_get_cap;
+   vws->base.context_create = vmw_svga_winsys_context_create;
+   vws->base.surface_create = vmw_svga_winsys_surface_create;
+   vws->base.surface_is_flushed = vmw_svga_winsys_surface_is_flushed;
+   vws->base.surface_reference = vmw_svga_winsys_surface_ref;
+   vws->base.buffer_create = vmw_svga_winsys_buffer_create;
+   vws->base.buffer_map = vmw_svga_winsys_buffer_map;
+   vws->base.buffer_unmap = vmw_svga_winsys_buffer_unmap;
+   vws->base.buffer_destroy = vmw_svga_winsys_buffer_destroy;
+   vws->base.fence_reference = vmw_svga_winsys_fence_reference;
+   vws->base.fence_signalled = vmw_svga_winsys_fence_signalled;
+   vws->base.fence_finish = vmw_svga_winsys_fence_finish;
+
+   return TRUE;
+}
+
+
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_surface.c b/src/gallium/winsys/drm/vmware/core/vmw_surface.c
new file mode 100644
index 0000000000..9ec4bf9272
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_surface.c
@@ -0,0 +1,59 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "svga_cmd.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+
+#include "vmw_surface.h"
+#include "vmw_screen.h"
+
+void
+vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst,
+                                  struct vmw_svga_winsys_surface *src)
+{
+   struct pipe_reference *src_ref;
+   struct pipe_reference *dst_ref;
+   struct vmw_svga_winsys_surface *dst = *pdst;
+   
+   if(*pdst == src || pdst == NULL)
+      return;
+   
+   src_ref = src ? &src->refcnt : NULL;
+   dst_ref = dst ? &dst->refcnt : NULL;
+
+   if (pipe_reference(&dst_ref, src_ref)) {
+      vmw_ioctl_surface_destroy(dst->screen, dst->sid);
+#ifdef DEBUG
+      /* to detect dangling pointers */
+      assert(p_atomic_read(&dst->validated) == 0);
+      dst->sid = SVGA3D_INVALID_ID;
+#endif
+      FREE(dst);
+   }
+
+   *pdst = src;
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_surface.h b/src/gallium/winsys/drm/vmware/core/vmw_surface.h
new file mode 100644
index 0000000000..340cc1532e
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_surface.h
@@ -0,0 +1,79 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * Surfaces for VMware SVGA winsys.
+ * 
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef VMW_SURFACE_H_
+#define VMW_SURFACE_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_atomic.h"
+#include "pipe/p_refcnt.h"
+
+#define VMW_MAX_PRESENTS 3
+
+
+
+struct vmw_svga_winsys_surface
+{
+   struct pipe_atomic validated;
+   struct pipe_reference refcnt;
+
+   struct vmw_winsys_screen *screen;
+   uint32_t sid;
+
+   /* FIXME: make this thread safe */
+   unsigned next_present_no;
+   uint32_t present_fences[VMW_MAX_PRESENTS];
+};
+
+
+static INLINE struct svga_winsys_surface *
+svga_winsys_surface(struct vmw_svga_winsys_surface *surf)
+{
+   assert(!surf || surf->sid != SVGA3D_INVALID_ID);
+   return (struct svga_winsys_surface *)surf;
+}
+
+
+static INLINE struct vmw_svga_winsys_surface *
+vmw_svga_winsys_surface(struct svga_winsys_surface *surf)
+{
+   return (struct vmw_svga_winsys_surface *)surf;
+}
+
+
+void
+vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst,
+                                  struct vmw_svga_winsys_surface *src);
+
+#endif /* VMW_SURFACE_H_ */
diff --git a/src/gallium/winsys/drm/vmware/dri/Makefile b/src/gallium/winsys/drm/vmware/dri/Makefile
new file mode 100644
index 0000000000..8a39e23da6
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/dri/Makefile
@@ -0,0 +1,18 @@
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = vmwgfx_dri.so
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/state_trackers/dri/libdridrm.a \
+	$(TOP)/src/gallium/winsys/drm/vmware/core/libsvgadrm.a \
+	$(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/drivers/svga/libsvga.a
+
+C_SOURCES = \
+	$(COMMON_GALLIUM_SOURCES)
+
+include ../../Makefile.template
+
+symlinks:
diff --git a/src/gallium/winsys/drm/vmware/dri/SConscript b/src/gallium/winsys/drm/vmware/dri/SConscript
new file mode 100644
index 0000000000..adf2bf16d1
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/dri/SConscript
@@ -0,0 +1,63 @@
+import os
+import os.path
+
+Import('*')
+
+if env['platform'] == 'linux':
+
+   if env['dri']:
+      env = env.Clone()
+
+      sources = [
+        '#/src/mesa/drivers/dri/common/utils.c',
+        '#/src/mesa/drivers/dri/common/vblank.c',
+        '#/src/mesa/drivers/dri/common/dri_util.c',
+        '#/src/mesa/drivers/dri/common/xmlconfig.c',
+         ]
+   
+      
+      env.ParseConfig('pkg-config --cflags --libs libdrm')
+      
+      env.Prepend(CPPPATH = [
+            '#/src/mesa/state_tracker',
+            '#/src/mesa/drivers/dri/common',
+            '#/src/mesa/main',
+            '#/src/mesa/glapi',
+            '#/src/mesa',
+            '#/include',
+            '#/src/gallium/drivers/svga',
+            '#/src/gallium/drivers/svga/include',
+            ])
+      
+      env.Append(CPPDEFINES = [
+            'HAVE_STDINT_H', 
+            'HAVE_SYS_TYPES_H',
+            ])
+
+      env.Append(CFLAGS = [
+            '-Werror',
+            '-std=gnu99',
+            '-D_FILE_OFFSET_BITS=64',
+            ])
+      
+      env.Prepend(LIBPATH = [
+            ])
+      
+      env.Prepend(LIBS = [
+            trace,
+            st_dri,
+            svgadrm,
+            svga,
+            mesa,
+            auxiliaries,
+            ])
+      
+      # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
+      env.LoadableModule(
+         target ='vmwgfx_dri.so',
+         source = sources,
+         LIBS = env['LIBS'],
+         SHLIBPREFIX = '',
+         )
+      
+
diff --git a/src/gallium/winsys/drm/vmware/egl/Makefile b/src/gallium/winsys/drm/vmware/egl/Makefile
new file mode 100644
index 0000000000..8e2980c318
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/egl/Makefile
@@ -0,0 +1,18 @@
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = EGL_svga.so
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/state_trackers/egl/libegldrm.a \
+	$(TOP)/src/gallium/winsys/drm/vmware/core/libsvgadrm.a \
+	$(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/drivers/svga/libsvga.a
+
+C_SOURCES = \
+	$(COMMON_GALLIUM_SOURCES)
+
+include ../../Makefile.template
+
+symlinks:
diff --git a/src/gallium/winsys/drm/vmware/xorg/Makefile b/src/gallium/winsys/drm/vmware/xorg/Makefile
new file mode 100644
index 0000000000..e152263256
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/xorg/Makefile
@@ -0,0 +1,54 @@
+TARGET     = vmwgfx_drv.so
+CFILES     = $(wildcard ./*.c)
+OBJECTS    = $(patsubst ./%.c,./%.o,$(CFILES))
+TOP        = ../../../../../..
+
+include $(TOP)/configs/current
+
+INCLUDES = \
+	$(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \
+	-I../gem \
+	-I$(TOP)/src/gallium/include \
+	-I$(TOP)/src/gallium/drivers \
+	-I$(TOP)/src/gallium/auxiliary \
+	-I$(TOP)/src/gallium
+
+LIBS = \
+	$(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
+	$(TOP)/src/gallium/winsys/drm/vmware/core/libsvgadrm.a \
+	$(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/drivers/svga/libsvga.a \
+	$(GALLIUM_AUXILIARIES)
+
+DRIVER_DEFINES = \
+	-DHAVE_CONFIG_H
+
+
+#############################################
+
+
+
+all default: $(TARGET)
+
+$(TARGET): $(OBJECTS) Makefile $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a $(LIBS)
+	$(TOP)/bin/mklib -noprefix -o $@ \
+	$(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_intel
+
+clean:
+	rm -rf $(OBJECTS) $(TARGET)
+
+install:
+	$(INSTALL) -d $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR)
+	$(MINSTALL) -m 755 $(TARGET) $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR)
+
+
+##############################################
+
+
+.c.o:
+	$(CC) -c $(CFLAGS) $(INCLUDES) $(DRIVER_DEFINES) $< -o $@
+
+
+##############################################
+
+.PHONY	= all clean install
diff --git a/src/gallium/winsys/drm/vmware/xorg/SConscript b/src/gallium/winsys/drm/vmware/xorg/SConscript
new file mode 100644
index 0000000000..41a489774c
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/xorg/SConscript
@@ -0,0 +1,55 @@
+import os.path
+
+Import('*')
+
+if env['platform'] == 'linux':
+
+	env = env.Clone()
+
+	env.ParseConfig('pkg-config --cflags --libs libdrm xorg-server')
+
+	env.Prepend(CPPPATH = [
+		'#/include',
+		'#/src/gallium',
+		'#/src/mesa',
+		'#/src/gallium/drivers/svga',
+		'#/src/gallium/drivers/svga/include',
+	])
+
+	env.Append(CPPDEFINES = [
+	])
+
+	if env['gcc']:
+		env.Append(CPPDEFINES = [
+			'HAVE_STDINT_H',
+			'HAVE_SYS_TYPES_H',
+		])
+		env.Append(CFLAGS = ['-Werror'])
+
+	env.Append(CFLAGS = [
+		'-std=gnu99',
+		'-D_FILE_OFFSET_BITS=64',
+	])
+
+	env.Prepend(LIBPATH = [
+	])
+
+	env.Prepend(LIBS = [
+		trace,
+		st_xorg,
+		svgadrm,
+		svga,
+                auxiliaries,
+	])
+
+	sources = [
+		'vmw_xorg.c',
+	]
+
+	# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
+	env.LoadableModule(
+		target ='vmwgfx_drv.so',
+		source = sources,
+		LIBS = env['LIBS'],
+		SHLIBPREFIX = '',
+	)
diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c b/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c
new file mode 100644
index 0000000000..3acc110ae7
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c
@@ -0,0 +1,150 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * Glue file for Xorg State Tracker.
+ *
+ * @author Alan Hourihane <alanh@tungstengraphics.com>
+ * @author Jakob Bornecrantz <wallbraker@gmail.com>
+ */
+
+#include "state_trackers/xorg/xorg_winsys.h"
+
+static void vmw_xorg_identify(int flags);
+static Bool vmw_xorg_pci_probe(DriverPtr driver,
+			       int entity_num,
+			       struct pci_device *device,
+			       intptr_t match_data);
+
+static const struct pci_id_match vmw_xorg_device_match[] = {
+    {0x15ad, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, 0, 0, 0},
+    {0, 0, 0, 0, 0, 0, 0},
+};
+
+static SymTabRec vmw_xorg_chipsets[] = {
+    {PCI_MATCH_ANY, "VMware SVGA Device"},
+    {-1, NULL}
+};
+
+static PciChipsets vmw_xorg_pci_devices[] = {
+    {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL},
+    {-1, -1, NULL}
+};
+
+static XF86ModuleVersionInfo vmw_xorg_version = {
+    "vmwgfx",
+    MODULEVENDORSTRING,
+    MODINFOSTRING1,
+    MODINFOSTRING2,
+    XORG_VERSION_CURRENT,
+    0, 1, 0, /* major, minor, patch */
+    ABI_CLASS_VIDEODRV,
+    ABI_VIDEODRV_VERSION,
+    MOD_CLASS_VIDEODRV,
+    {0, 0, 0, 0}
+};
+
+/*
+ * Xorg driver exported structures
+ */
+
+_X_EXPORT DriverRec vmwgfx = {
+    1,
+    "vmwgfx",
+    vmw_xorg_identify,
+    NULL,
+    xorg_tracker_available_options,
+    NULL,
+    0,
+    NULL,
+    vmw_xorg_device_match,
+    vmw_xorg_pci_probe
+};
+
+static MODULESETUPPROTO(vmw_xorg_setup);
+
+_X_EXPORT XF86ModuleData vmwgfxModuleData = {
+    &vmw_xorg_version,
+    vmw_xorg_setup,
+    NULL
+};
+
+/*
+ * Xorg driver functions
+ */
+
+static pointer
+vmw_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin)
+{
+    static Bool setupDone = 0;
+
+    /* This module should be loaded only once, but check to be sure.
+     */
+    if (!setupDone) {
+	setupDone = 1;
+	xf86AddDriver(&vmwgfx, module, HaveDriverFuncs);
+
+	/*
+	 * The return value must be non-NULL on success even though there
+	 * is no TearDownProc.
+	 */
+	return (pointer) 1;
+    } else {
+	if (errmaj)
+	    *errmaj = LDR_ONCEONLY;
+	return NULL;
+    }
+}
+
+static void
+vmw_xorg_identify(int flags)
+{
+    xf86PrintChipsets("vmwgfx", "Driver for VMware SVGA device",
+		      vmw_xorg_chipsets);
+}
+
+static Bool
+vmw_xorg_pci_probe(DriverPtr driver,
+	  int entity_num, struct pci_device *device, intptr_t match_data)
+{
+    ScrnInfoPtr scrn = NULL;
+    EntityInfoPtr entity;
+
+    scrn = xf86ConfigPciEntity(scrn, 0, entity_num, vmw_xorg_pci_devices,
+			       NULL, NULL, NULL, NULL, NULL);
+    if (scrn != NULL) {
+	scrn->driverVersion = 1;
+	scrn->driverName = "vmwgfx";
+	scrn->name = "vmwgfx";
+	scrn->Probe = NULL;
+
+	entity = xf86GetEntityInfo(entity_num);
+
+	/* Use all the functions from the xorg tracker */
+	xorg_tracker_set_functions(scrn);
+    }
+    return scrn != NULL;
+}
-- 
cgit v1.2.3


From 49289f1d25d42a6b3eb5da5f85b2dd6a14cda8e7 Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Tue, 17 Nov 2009 19:49:56 +0100
Subject: nouveau: nv30: Add missing include to fix warning

---
 src/gallium/drivers/nv30/nv30_fragprog.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index cc0385426c..0ce702d6f8 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -4,6 +4,7 @@
 #include "pipe/p_inlines.h"
 
 #include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 
@@ -131,7 +132,7 @@ emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
 				sizeof(uint32_t) * 4);
 		}
 
-		sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);	
+		sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
 		break;
 	case NV30SR_NONE:
 		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
@@ -666,7 +667,7 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
 		{
 			struct tgsi_full_immediate *imm;
 			float vals[4];
-			
+
 			imm = &p.FullToken.FullImmediate;
 			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
 			assert(fpc->nr_imm < MAX_IMM);
@@ -754,7 +755,7 @@ nv30_fragprog_translate(struct nv30_context *nv30,
 	fp->insn[fpc->inst_offset + 1] = 0x00000000;
 	fp->insn[fpc->inst_offset + 2] = 0x00000000;
 	fp->insn[fpc->inst_offset + 3] = 0x00000000;
-	
+
 	fp->translated = TRUE;
 	fp->on_hw = FALSE;
 out_err:
@@ -838,7 +839,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
 update_constants:
 	if (fp->nr_consts) {
 		float *map;
-		
+
 		map = pipe_buffer_map(pscreen, constbuf,
 				      PIPE_BUFFER_USAGE_CPU_READ);
 		for (i = 0; i < fp->nr_consts; i++) {
-- 
cgit v1.2.3


From b353106467d386b48877d6ae1048cca3feaf99ff Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Tue, 17 Nov 2009 19:50:37 +0100
Subject: nouveau: nv30: Check for NULL front (happens with DRI2)

---
 src/gallium/drivers/nv30/nv30_screen.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index 221ae1b5f8..7cd36902eb 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -116,7 +116,10 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
 		case PIPE_FORMAT_Z24X8_UNORM:
 			return TRUE;
 		case PIPE_FORMAT_Z16_UNORM:
-			return (front->format == PIPE_FORMAT_R5G6B5_UNORM);
+			if (front) {
+				return (front->format == PIPE_FORMAT_R5G6B5_UNORM);
+			}
+			return TRUE;
 		default:
 			break;
 		}
-- 
cgit v1.2.3


From 8c5a108dc321c4760e6d70b1104493b5bd54e6de Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Tue, 17 Nov 2009 09:07:15 +0100
Subject: svga: Remove -Werror for now as GCC 4.4.x raises a bunch of warnings

---
 src/gallium/drivers/svga/Makefile             | 2 +-
 src/gallium/drivers/svga/SConscript           | 3 ---
 src/gallium/winsys/drm/vmware/core/SConscript | 2 +-
 3 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile
index 05ab4ab9b3..fe1d6d7384 100644
--- a/src/gallium/drivers/svga/Makefile
+++ b/src/gallium/drivers/svga/Makefile
@@ -57,7 +57,7 @@ CC = gcc -fvisibility=hidden -msse -msse2
 
 # Set the gnu99 standard to enable anonymous structs in vmware headers.
 #
-CFLAGS = -Wall -Werror -Wmissing-prototypes -std=gnu99 -ffast-math \
+CFLAGS = -Wall -Wmissing-prototypes -std=gnu99 -ffast-math \
 	$(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) $(ASM_FLAGS)
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript
index 0fa745c9b8..ff9645fc03 100644
--- a/src/gallium/drivers/svga/SConscript
+++ b/src/gallium/drivers/svga/SConscript
@@ -10,9 +10,6 @@ if env['gcc']:
 		'HAVE_STDINT_H', 
 		'HAVE_SYS_TYPES_H',
 	])
-	if env['platform'] not in ['windows']:
-		# The Windows headers cause many gcc warnings
-		env.Append(CCFLAGS = ['-Werror'])
 	
 env.Prepend(CPPPATH = [
 	'include',
diff --git a/src/gallium/winsys/drm/vmware/core/SConscript b/src/gallium/winsys/drm/vmware/core/SConscript
index 1875b659ac..edaf9458be 100644
--- a/src/gallium/winsys/drm/vmware/core/SConscript
+++ b/src/gallium/winsys/drm/vmware/core/SConscript
@@ -3,7 +3,7 @@ Import('*')
 env = env.Clone()
 
 if env['gcc']:
-	env.Append(CCFLAGS = ['-fvisibility=hidden', '-Werror'])
+	env.Append(CCFLAGS = ['-fvisibility=hidden'])
 	env.Append(CPPDEFINES = [
 		'HAVE_STDINT_H', 
 		'HAVE_SYS_TYPES_H',
-- 
cgit v1.2.3


From d299ee771b577a8ce839861d1af336fc316e0a1d Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sun, 15 Nov 2009 11:47:57 -0800
Subject: i965g rename offset_edge to offset_edgeflag

---
 src/gallium/drivers/i965/brw_clip.h          |  2 +-
 src/gallium/drivers/i965/brw_clip_unfilled.c | 10 +++++-----
 src/gallium/drivers/i965/brw_clip_util.c     |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
index 772c34be88..9bec9643d7 100644
--- a/src/gallium/drivers/i965/brw_clip.h
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -132,7 +132,7 @@ struct brw_clip_compile {
    GLuint offset_bfc0;
    GLuint offset_bfc1;
    
-   GLuint offset_edge;
+   GLuint offset_edgeflag;
 };
 
 #define ATTR_SIZE  (4*4)
diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c
index 1cb86dd25b..0fab3a5f1a 100644
--- a/src/gallium/drivers/i965/brw_clip_unfilled.c
+++ b/src/gallium/drivers/i965/brw_clip_unfilled.c
@@ -214,12 +214,12 @@ static void merge_edgeflags( struct brw_clip_compile *c )
    {   
       brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
       brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8));
-      brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset_edge), brw_imm_f(0));
+      brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset_edgeflag), brw_imm_f(0));
       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
 
       brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ);
       brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9));
-      brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset_edge), brw_imm_f(0));
+      brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset_edgeflag), brw_imm_f(0));
       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
    }
    brw_ENDIF(p, is_poly);
@@ -290,7 +290,7 @@ static void emit_lines(struct brw_clip_compile *c,
       /* draw edge if edgeflag != 0 */
       brw_CMP(p, 
 	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
-	      deref_1f(v0, c->offset_edge),
+	      deref_1f(v0, c->offset_edgeflag),
 	      brw_imm_f(0));
       draw_edge = brw_IF(p, BRW_EXECUTE_1);
       {
@@ -329,7 +329,7 @@ static void emit_points(struct brw_clip_compile *c,
        */
       brw_CMP(p, 
 	      vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, 
-	      deref_1f(v0, c->offset_edge),
+	      deref_1f(v0, c->offset_edgeflag),
 	      brw_imm_f(0));
       draw_point = brw_IF(p, BRW_EXECUTE_1);
       {
@@ -446,7 +446,7 @@ void brw_emit_unfilled_clip( struct brw_clip_compile *c )
    brw_clip_tri_init_vertices(c);
    brw_clip_init_ff_sync(c);
 
-   assert(c->offset_edge);
+   assert(c->offset_edgeflag);
 
    if (c->key.fill_ccw == CLIP_CULL &&
        c->key.fill_cw == CLIP_CULL) {
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
index f8f98c8037..018511e699 100644
--- a/src/gallium/drivers/i965/brw_clip_util.c
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -146,7 +146,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
       if (c->chipset.is_igdng)
           delta = i * 16 + 32 * 3;
 
-      if (delta == c->offset_edge) {
+      if (delta == c->offset_edgeflag) {
 	 if (force_edgeflag) 
 	    brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
 	 else
-- 
cgit v1.2.3


From 1877e6cd2d76143ef8a9c516122afe614ae3b4a4 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 17 Nov 2009 14:46:23 -0800
Subject: i965g: handle special vs outputs specially

Where vs output semantic tags indicate an output is signficant for
fixed function processing (such as clipping, unfilled modes, etc),
retain information about that output so that we can get to it easily
later on.

Fix up the unfilled processing, but hard-wire edgeflag to one for now.

With this change, trivial/tri-unfilled works.
---
 src/gallium/drivers/i965/brw_clip.c          | 45 ++++++++++++++++++++--------
 src/gallium/drivers/i965/brw_clip.h          | 15 +++++++---
 src/gallium/drivers/i965/brw_clip_line.c     |  5 ++--
 src/gallium/drivers/i965/brw_clip_tri.c      | 12 ++++----
 src/gallium/drivers/i965/brw_clip_unfilled.c |  9 +++---
 src/gallium/drivers/i965/brw_clip_util.c     |  2 +-
 src/gallium/drivers/i965/brw_context.h       | 19 +++++++++---
 src/gallium/drivers/i965/brw_pipe_shader.c   | 38 ++++++++++++++++++++---
 src/gallium/drivers/i965/brw_vs.c            | 13 +++++++-
 src/gallium/drivers/i965/brw_vs_emit.c       | 42 ++++++++++++++++----------
 src/gallium/drivers/i965/brw_wm_pass2.c      |  2 +-
 11 files changed, 147 insertions(+), 55 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index 35e1d2fdbd..4ec7b823e8 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -58,7 +58,6 @@ compile_clip_prog( struct brw_context *brw,
    const GLuint *program;
    GLuint program_size;
    GLuint delta;
-   GLuint i;
 
    memset(&c, 0, sizeof(c));
    
@@ -82,16 +81,26 @@ compile_clip_prog( struct brw_context *brw,
    else
        delta = REG_SIZE;
 
-   /* XXX: c.offset is now pretty redundant:
-    */
-   for (i = 0; i < c.key.nr_attrs; i++) {
-      c.offset[i] = delta;
-      delta += ATTR_SIZE;
-   }
-
    /* XXX: c.nr_attrs is very redundant:
     */
    c.nr_attrs = c.key.nr_attrs;
+
+   c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE;
+
+   if (c.key.output_color0)
+      c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE;
+
+   if (c.key.output_color1)
+      c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE;
+
+   if (c.key.output_bfc0)
+      c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE;
+
+   if (c.key.output_bfc1)
+      c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE;
+
+   if (c.key.output_edgeflag)
+      c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE;
    
    if (BRW_IS_IGDNG(brw))
        c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
@@ -158,21 +167,33 @@ compile_clip_prog( struct brw_context *brw,
 static enum pipe_error
 upload_clip_prog(struct brw_context *brw)
 {
-   enum pipe_error ret;
+   const struct brw_vertex_shader *vs = brw->curr.vertex_shader;
    struct brw_clip_prog_key key;
+   enum pipe_error ret;
 
    /* Populate the key, starting from the almost-complete version from
     * the rast state. 
     */
 
    /* PIPE_NEW_RAST */
-   memcpy(&key, &brw->curr.rast->clip_key, sizeof key);
-
+   key = brw->curr.rast->clip_key;
+   
    /* BRW_NEW_REDUCED_PRIMITIVE */
    key.primitive = brw->reduced_primitive;
 
+   /* XXX: if edgeflag is moved to a proper TGSI vs output, can remove
+    * dependency on CACHE_NEW_VS_PROG
+    */
+   /* CACHE_NEW_VS_PROG */
+   key.nr_attrs        = brw->vs.prog_data->nr_outputs;
+   key.output_edgeflag = brw->vs.prog_data->output_edgeflag;
+
    /* PIPE_NEW_VS */
-   key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_OUTPUT] + 1;
+   key.output_hpos     = vs->output_hpos;
+   key.output_color0   = vs->output_color0;
+   key.output_color1   = vs->output_color1;
+   key.output_bfc0     = vs->output_bfc0;
+   key.output_bfc1     = vs->output_bfc1;
 
    /* PIPE_NEW_CLIP */
    key.nr_userclip = brw->curr.ucp.nr;
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
index 9bec9643d7..8729efa47b 100644
--- a/src/gallium/drivers/i965/brw_clip.h
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -42,7 +42,7 @@
  * up polygon offset and flatshading at this point:
  */
 struct brw_clip_prog_key {
-   GLuint nr_attrs:5;
+   GLuint nr_attrs:6;
    GLuint primitive:4;
    GLuint nr_userclip:3;
    GLuint do_flat_shading:1;
@@ -54,7 +54,14 @@ struct brw_clip_prog_key {
    GLuint copy_bfc_cw:1;
    GLuint copy_bfc_ccw:1;
    GLuint clip_mode:3;
-   GLuint pad1:7;
+   GLuint output_hpos:6;        /* not always zero? */
+
+   GLuint output_color0:6;
+   GLuint output_color1:6;
+   GLuint output_bfc0:6;
+   GLuint output_bfc1:6;
+   GLuint output_edgeflag:6;
+   GLuint pad1:2;
    
    GLfloat offset_factor;
    GLfloat offset_units;
@@ -123,7 +130,6 @@ struct brw_clip_compile {
    GLuint last_mrf;
 
    GLuint header_position_offset;
-   GLuint offset[PIPE_MAX_SHADER_OUTPUTS];
    GLboolean need_ff_sync;
 
    GLuint nr_color_attrs;
@@ -131,7 +137,8 @@ struct brw_clip_compile {
    GLuint offset_color1;
    GLuint offset_bfc0;
    GLuint offset_bfc1;
-   
+
+   GLuint offset_hpos;
    GLuint offset_edgeflag;
 };
 
diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c
index a4790bda95..54282d975e 100644
--- a/src/gallium/drivers/i965/brw_clip_line.c
+++ b/src/gallium/drivers/i965/brw_clip_line.c
@@ -132,7 +132,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
    struct brw_instruction *is_neg2 = NULL;
    struct brw_instruction *not_culled;
    struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD);
-   const int hpos = 0;		/* XXX: position not always first element */
 
    brw_MOV(p, get_addr_reg(vtx0),      brw_address(c->reg.vertex[0]));
    brw_MOV(p, get_addr_reg(vtx1),      brw_address(c->reg.vertex[1]));
@@ -173,12 +172,12 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
 
 	 /* dp = DP4(vtx->position, plane) 
 	  */
-	 brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[hpos]), c->reg.plane_equation);
+	 brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset_hpos), c->reg.plane_equation);
 
 	 /* if (IS_NEGATIVE(dp1)) 
 	  */
 	 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
-	 brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[hpos]), c->reg.plane_equation);
+	 brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset_hpos), c->reg.plane_equation);
 	 is_negative = brw_IF(p, BRW_EXECUTE_1);
 	 {
              /*
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
index 5486f4fa89..fa00f6044f 100644
--- a/src/gallium/drivers/i965/brw_clip_tri.c
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -249,13 +249,13 @@ void brw_clip_tri( struct brw_clip_compile *c )
 
 	    /* IS_NEGATIVE(prev) */
 	    brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
-	    brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	    brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset_hpos), c->reg.plane_equation);
 	    prev_test = brw_IF(p, BRW_EXECUTE_1);
 	    {
 	       /* IS_POSITIVE(next)
 		*/
 	       brw_set_conditionalmod(p, BRW_CONDITIONAL_GE);
-	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation);
 	       next_test = brw_IF(p, BRW_EXECUTE_1);
 	       {
 
@@ -297,7 +297,7 @@ void brw_clip_tri( struct brw_clip_compile *c )
 	       /* IS_NEGATIVE(next)
 		*/
 	       brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
-	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation);
+	       brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation);
 	       next_test = brw_IF(p, BRW_EXECUTE_1);
 	       {
 		  /* Going out of bounds.  Avoid division by zero as we
@@ -462,9 +462,9 @@ static void brw_clip_test( struct brw_clip_compile *c )
     brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
     brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
     brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
-    brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
-    brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
-    brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
+    brw_MOV(p, v0, deref_4f(vt0, c->offset_hpos));
+    brw_MOV(p, v1, deref_4f(vt1, c->offset_hpos));
+    brw_MOV(p, v2, deref_4f(vt2, c->offset_hpos));
     brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
 
     /* test nearz, xmin, ymin plane */
diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c
index 0fab3a5f1a..aec835b8ce 100644
--- a/src/gallium/drivers/i965/brw_clip_unfilled.c
+++ b/src/gallium/drivers/i965/brw_clip_unfilled.c
@@ -45,9 +45,9 @@ static void compute_tri_direction( struct brw_clip_compile *c )
    struct brw_compile *p = &c->func;
    struct brw_reg e = c->reg.tmp0;
    struct brw_reg f = c->reg.tmp1;
-   struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); 
-   struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); 
-   struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); 
+   struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset_hpos); 
+   struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset_hpos); 
+   struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset_hpos); 
 
 
    struct brw_reg v0n = get_tmp(c);
@@ -123,7 +123,8 @@ static void copy_bfc( struct brw_clip_compile *c )
 
    /* Do we have any colors to copy? 
     */
-   if (c->nr_color_attrs == 0)
+   if ((c->offset_color0 == 0 || c->offset_bfc0 == 0) &&
+       (c->offset_color1 == 0 || c->offset_bfc1 == 0))
       return;
 
    /* In some wierd degnerate cases we can end up testing the
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
index 018511e699..872042c9a9 100644
--- a/src/gallium/drivers/i965/brw_clip_util.c
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -106,7 +106,7 @@ static void brw_clip_project_vertex( struct brw_clip_compile *c,
    /* Fixup position.  Extract from the original vertex and re-project
     * to screen space:
     */
-   brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS]));
+   brw_MOV(p, tmp, deref_4f(vert_addr, c->offset_hpos));
    brw_clip_project_position(c, tmp);
    brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp);
 	 
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 31f3cf3685..31e04b6e14 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -152,13 +152,23 @@ struct brw_rasterizer_state;
 
 struct brw_vertex_shader {
    const struct tgsi_token *tokens;
+   struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
+
    struct tgsi_shader_info info;
 
-   unsigned  has_flow_control:1;
+   GLuint has_flow_control:1;
+   GLuint use_const_buffer:1;
+
+   /* Offsets of special vertex shader outputs required for clipping.
+    */
+   GLuint output_hpos:6;        /* not always zero? */
+   GLuint output_color0:6;
+   GLuint output_color1:6;
+   GLuint output_bfc0:6;
+   GLuint output_bfc1:6;
+   GLuint output_edgeflag:6;
 
    unsigned id;
-   struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
-   GLboolean use_const_buffer;
 };
 
 struct brw_fs_signature {
@@ -317,7 +327,8 @@ struct brw_vs_prog_data {
 
    GLuint nr_params;       /**< number of TGSI_FILE_CONSTANT's */
 
-   GLboolean copy_edgeflag;
+   GLuint output_edgeflag;
+
    GLboolean writes_psiz;
 
    /* Used for calculating urb partitions:
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 7febf9e0c2..02bc8fa130 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -124,21 +124,51 @@ static void *brw_create_vs_state( struct pipe_context *pipe,
 				  const struct pipe_shader_state *shader )
 {
    struct brw_context *brw = brw_context(pipe);
+   struct brw_vertex_shader *vs;
+   unsigned i;
 
-   struct brw_vertex_shader *vs = CALLOC_STRUCT(brw_vertex_shader);
+   vs = CALLOC_STRUCT(brw_vertex_shader);
    if (vs == NULL)
       return NULL;
 
    /* Duplicate tokens, scan shader
     */
-   vs->id = brw->program_id++;
-   vs->has_flow_control = has_flow_control(&vs->info);
-
    vs->tokens = tgsi_dup_tokens(shader->tokens);
    if (vs->tokens == NULL)
       goto fail;
 
    tgsi_scan_shader(vs->tokens, &vs->info);
+
+   vs->id = brw->program_id++;
+   vs->has_flow_control = has_flow_control(&vs->info);
+
+   for (i = 0; i < vs->info.num_outputs; i++) {
+      int index = vs->info.output_semantic_index[i];
+      switch (vs->info.output_semantic_name[i]) {
+      case TGSI_SEMANTIC_POSITION:
+         vs->output_hpos = i;
+         break;
+      case TGSI_SEMANTIC_COLOR:
+         if (index == 0)
+            vs->output_color0 = i;
+         else
+            vs->output_color1 = i;
+         break;
+      case TGSI_SEMANTIC_BCOLOR:
+         if (index == 0)
+            vs->output_bfc0 = i;
+         else
+            vs->output_bfc1 = i;
+         break;
+#if 0
+      case TGSI_SEMANTIC_EDGEFLAG:
+         vs->output_edgeflag = i;
+         break;
+#endif
+      }
+   }
+
+
    
    /* Done:
     */
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 05a62ed974..2668392919 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -57,7 +57,18 @@ static enum pipe_error do_vs_prog( struct brw_context *brw,
 
    c.prog_data.nr_outputs = vp->info.num_outputs;
    c.prog_data.nr_inputs = vp->info.num_inputs;
-   c.prog_data.copy_edgeflag = c.key.copy_edgeflag;
+
+   /* XXX: we want edgeflag handling to be integrated to the vertex
+    * shader, but are currently faking the edgeflag output:
+    */
+   if (c.key.copy_edgeflag) {
+      c.prog_data.output_edgeflag = c.prog_data.nr_outputs;
+      c.prog_data.nr_outputs++;
+   }
+   else {
+      c.prog_data.output_edgeflag = ~0;
+   }
+
 
    if (1)
       tgsi_dump(c.vp->tokens, 0);
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 933c9c4d63..bcaeaca62d 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -70,11 +70,17 @@ static boolean is_position_output( struct brw_vs_compile *c,
                                    unsigned vs_output )
 {
    struct brw_vertex_shader *vs = c->vp;
-   unsigned semantic = vs->info.output_semantic_name[vs_output];
-   unsigned index = vs->info.output_semantic_index[vs_output];
 
-   return (semantic == TGSI_SEMANTIC_POSITION &&
-           index == 0);
+   if (vs_output == c->prog_data.output_edgeflag) {
+      return FALSE;
+   }
+   else {
+      unsigned semantic = vs->info.output_semantic_name[vs_output];
+      unsigned index = vs->info.output_semantic_index[vs_output];
+      
+      return (semantic == TGSI_SEMANTIC_POSITION &&
+              index == 0);
+   }
 }
 
 
@@ -83,15 +89,22 @@ static boolean find_output_slot( struct brw_vs_compile *c,
                                   unsigned *fs_input_slot )
 {
    struct brw_vertex_shader *vs = c->vp;
-   unsigned semantic = vs->info.output_semantic_name[vs_output];
-   unsigned index = vs->info.output_semantic_index[vs_output];
-   unsigned i;
 
-   for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
-      if (c->key.fs_signature.input[i].semantic == semantic &&
+   if (vs_output == c->prog_data.output_edgeflag) {
+      *fs_input_slot = c->key.fs_signature.nr_inputs;
+      return TRUE;
+   }
+   else {
+      unsigned semantic = vs->info.output_semantic_name[vs_output];
+      unsigned index = vs->info.output_semantic_index[vs_output];
+      unsigned i;
+
+      for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+         if (c->key.fs_signature.input[i].semantic == semantic &&
           c->key.fs_signature.input[i].semantic_index == index) {
-         *fs_input_slot = i;
-         return TRUE;
+            *fs_input_slot = i;
+            return TRUE;
+         }
       }
    }
 
@@ -219,7 +232,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 
    /* XXX: need to access vertex output semantics here:
     */
-   for (i = 0; i < c->prog_data.nr_outputs; i++) {
+   for (i = 0; i < c->nr_outputs; i++) {
       unsigned slot;
 
       /* XXX: Put output position in slot zero always.  Clipper, etc,
@@ -1116,10 +1129,9 @@ static void emit_vertex_write( struct brw_vs_compile *c)
    GLuint len_vertext_header = 2;
 
    if (c->key.copy_edgeflag) {
-      assert(0);
       brw_MOV(p, 
-	      get_reg(c, TGSI_FILE_OUTPUT, 0),
-	      get_reg(c, TGSI_FILE_INPUT, 0));
+              get_reg(c, TGSI_FILE_OUTPUT, c->prog_data.output_edgeflag),
+              brw_imm_f(1));
    }
 
    /* Build ndc coords */
diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c
index 2a879863ab..56f39d036b 100644
--- a/src/gallium/drivers/i965/brw_wm_pass2.c
+++ b/src/gallium/drivers/i965/brw_wm_pass2.c
@@ -93,7 +93,7 @@ static void init_registers( struct brw_wm_compile *c )
    assert(c->key.vp_nr_outputs >= 1);
 
    c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
-   c->prog_data.urb_read_length = c->key.vp_nr_outputs * 2;
+   c->prog_data.urb_read_length = (c->key.nr_inputs + 1) * 2;
    c->prog_data.curb_read_length = c->nr_creg * 2;
 
    /* Note this allocation:
-- 
cgit v1.2.3


From 845ddbc9aa62d1c9142822608370d96b2d68cec0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 17 Nov 2009 16:14:54 -0700
Subject: i915g: remove trailing commas in enum lists to silence warnings

---
 src/gallium/drivers/i915/intel_winsys.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915/intel_winsys.h b/src/gallium/drivers/i915/intel_winsys.h
index 2c8dc63f3f..c6bf6e6f7f 100644
--- a/src/gallium/drivers/i915/intel_winsys.h
+++ b/src/gallium/drivers/i915/intel_winsys.h
@@ -42,21 +42,21 @@ enum intel_buffer_usage
    INTEL_USAGE_2D_TARGET = 0x04,
    INTEL_USAGE_2D_SOURCE = 0x08,
    /* use on vertex */
-   INTEL_USAGE_VERTEX    = 0x10,
+   INTEL_USAGE_VERTEX    = 0x10
 };
 
 enum intel_buffer_type
 {
    INTEL_NEW_TEXTURE,
    INTEL_NEW_SCANOUT, /**< a texture used for scanning out from */
-   INTEL_NEW_VERTEX,
+   INTEL_NEW_VERTEX
 };
 
 enum intel_buffer_tile
 {
    INTEL_TILE_NONE,
    INTEL_TILE_X,
-   INTEL_TILE_Y,
+   INTEL_TILE_Y
 };
 
 struct intel_batchbuffer {
-- 
cgit v1.2.3


From 7e3955d8e80c364d9b4c9eee1ec9758ff3ab8a1d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 17 Nov 2009 16:15:29 -0700
Subject: i915g: replace //-style comments

---
 src/gallium/drivers/i915/i915_state.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 7d48e6e84d..71f00bc346 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -58,8 +58,10 @@ translate_wrap_mode(unsigned wrap)
       return TEXCOORDMODE_CLAMP_EDGE;
    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
       return TEXCOORDMODE_CLAMP_BORDER;
-//   case PIPE_TEX_WRAP_MIRRORED_REPEAT:
-//      return TEXCOORDMODE_MIRROR;
+/*
+   case PIPE_TEX_WRAP_MIRRORED_REPEAT:
+      return TEXCOORDMODE_MIRROR;
+*/
    default:
       return TEXCOORDMODE_WRAP;
    }
-- 
cgit v1.2.3


From 1dbf3642b9c1c37f72e2212ce78056cf8959a957 Mon Sep 17 00:00:00 2001
From: Alan Hourihane <alanh@vmware.com>
Date: Fri, 20 Nov 2009 18:08:29 +0000
Subject: Fix memory leak.

---
 src/gallium/drivers/softpipe/sp_state_fs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index 256faa94b8..b41f7e8ab7 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -143,6 +143,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
    struct sp_vertex_shader *state = (struct sp_vertex_shader *) vs;
 
    draw_delete_vertex_shader(softpipe->draw, state->draw_data);
+   FREE( (void *)state->shader.tokens );
    FREE( state );
 }
 
-- 
cgit v1.2.3


From 6b1ede0110f855218119a7a3b26fe3b26aee1bbd Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 18 Nov 2009 14:40:43 -0800
Subject: i965g: remove more references to nr_vp_outputs in wm compilation

We're really more concerned about how many inputs the fragment shader
is expecting.
---
 src/gallium/drivers/i965/brw_wm_pass2.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c
index 56f39d036b..19248b4519 100644
--- a/src/gallium/drivers/i965/brw_wm_pass2.c
+++ b/src/gallium/drivers/i965/brw_wm_pass2.c
@@ -87,11 +87,9 @@ static void init_registers( struct brw_wm_compile *c )
 
    /* XXX: currently just hope the VS outputs line up with FS inputs:
     */
-   for (j = 0; j < c->key.vp_nr_outputs; j++)
+   for (j = 0; j < c->key.nr_inputs; j++)
       prealloc_reg(c, &c->payload.input_interp[j], reg++);
 
-   assert(c->key.vp_nr_outputs >= 1);
-
    c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
    c->prog_data.urb_read_length = (c->key.nr_inputs + 1) * 2;
    c->prog_data.curb_read_length = c->nr_creg * 2;
-- 
cgit v1.2.3


From f56b95e40796ea3859b1cb83341730bf74a6f85f Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Thu, 19 Nov 2009 08:18:58 +0100
Subject: identity: Add missing screen methods.

---
 src/gallium/drivers/identity/id_objects.c | 39 +++++++++++++++++++++++++++++++
 src/gallium/drivers/identity/id_objects.h | 25 ++++++++++++++++++++
 src/gallium/drivers/identity/id_public.h  |  2 +-
 src/gallium/drivers/identity/id_screen.c  | 33 ++++++++++++++++++++++++++
 4 files changed, 98 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c
index e893e59940..bc9bc7121d 100644
--- a/src/gallium/drivers/identity/id_objects.c
+++ b/src/gallium/drivers/identity/id_objects.c
@@ -180,3 +180,42 @@ identity_transfer_destroy(struct identity_transfer *id_transfer)
    screen->tex_transfer_destroy(id_transfer->transfer);
    FREE(id_transfer);
 }
+
+struct pipe_video_surface *
+identity_video_surface_create(struct identity_screen *id_screen,
+                              struct pipe_video_surface *video_surface)
+{
+   struct identity_video_surface *id_video_surface;
+
+   if (!video_surface) {
+      goto error;
+   }
+
+   assert(video_surface->screen == id_screen->screen);
+
+   id_video_surface = CALLOC_STRUCT(identity_video_surface);
+   if (!id_video_surface) {
+      goto error;
+   }
+
+   memcpy(&id_video_surface->base,
+          video_surface,
+          sizeof(struct pipe_video_surface));
+
+   pipe_reference_init(&id_video_surface->base.reference, 1);
+   id_video_surface->base.screen = &id_screen->base;
+   id_video_surface->video_surface = video_surface;
+
+   return &id_video_surface->base;
+
+error:
+   pipe_video_surface_reference(&video_surface, NULL);
+   return NULL;
+}
+
+void
+identity_video_surface_destroy(struct identity_video_surface *id_video_surface)
+{
+   pipe_video_surface_reference(&id_video_surface->video_surface, NULL);
+   FREE(id_video_surface);
+}
diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h
index ce58faa3c7..77cc719079 100644
--- a/src/gallium/drivers/identity/id_objects.h
+++ b/src/gallium/drivers/identity/id_objects.h
@@ -31,6 +31,7 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_state.h"
+#include "pipe/p_video_state.h"
 
 #include "id_screen.h"
 
@@ -67,6 +68,14 @@ struct identity_transfer
 };
 
 
+struct identity_video_surface
+{
+   struct pipe_video_surface base;
+
+   struct pipe_video_surface *video_surface;
+};
+
+
 static INLINE struct identity_buffer *
 identity_buffer(struct pipe_buffer *_buffer)
 {
@@ -103,6 +112,15 @@ identity_transfer(struct pipe_transfer *_transfer)
    return (struct identity_transfer *)_transfer;
 }
 
+static INLINE struct identity_video_surface *
+identity_video_surface(struct pipe_video_surface *_video_surface)
+{
+   if (!_video_surface) {
+      return NULL;
+   }
+   (void)identity_screen(_video_surface->screen);
+   return (struct identity_video_surface *)_video_surface;
+}
 
 static INLINE struct pipe_buffer *
 identity_buffer_unwrap(struct pipe_buffer *_buffer)
@@ -165,5 +183,12 @@ identity_transfer_create(struct identity_texture *id_texture,
 void
 identity_transfer_destroy(struct identity_transfer *id_transfer);
 
+struct pipe_video_surface *
+identity_video_surface_create(struct identity_screen *id_screen,
+                              struct pipe_video_surface *video_surface);
+
+void
+identity_video_surface_destroy(struct identity_video_surface *id_video_surface);
+
 
 #endif /* ID_OBJECTS_H */
diff --git a/src/gallium/drivers/identity/id_public.h b/src/gallium/drivers/identity/id_public.h
index cac14cfd60..3d2862eaa0 100644
--- a/src/gallium/drivers/identity/id_public.h
+++ b/src/gallium/drivers/identity/id_public.h
@@ -37,4 +37,4 @@ identity_screen_create(struct pipe_screen *screen);
 struct pipe_context *
 identity_context_create(struct pipe_screen *screen, struct pipe_context *pipe);
 
-#endif /* PT_PUBLIC_H */
+#endif /* ID_PUBLIC_H */
diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c
index 26439637d0..53eae3ef54 100644
--- a/src/gallium/drivers/identity/id_screen.c
+++ b/src/gallium/drivers/identity/id_screen.c
@@ -379,6 +379,33 @@ identity_screen_buffer_destroy(struct pipe_buffer *_buffer)
    identity_buffer_destroy(identity_buffer(_buffer));
 }
 
+static struct pipe_video_surface *
+identity_screen_video_surface_create(struct pipe_screen *_screen,
+                                     enum pipe_video_chroma_format chroma_format,
+                                     unsigned width,
+                                     unsigned height)
+{
+   struct identity_screen *id_screen = identity_screen(_screen);
+   struct pipe_screen *screen = id_screen->screen;
+   struct pipe_video_surface *result;
+
+   result = screen->video_surface_create(screen,
+                                         chroma_format,
+                                         width,
+                                         height);
+
+   if (result) {
+      return identity_video_surface_create(id_screen, result);
+   }
+   return NULL;
+}
+
+static void
+identity_screen_video_surface_destroy(struct pipe_video_surface *_vsfc)
+{
+   identity_video_surface_destroy(identity_video_surface(_vsfc));
+}
+
 static void
 identity_screen_flush_frontbuffer(struct pipe_screen *_screen,
                                   struct pipe_surface *_surface,
@@ -472,6 +499,12 @@ identity_screen_create(struct pipe_screen *screen)
    if (screen->buffer_unmap)
       id_screen->base.buffer_unmap = identity_screen_buffer_unmap;
    id_screen->base.buffer_destroy = identity_screen_buffer_destroy;
+   if (screen->video_surface_create) {
+      id_screen->base.video_surface_create = identity_screen_video_surface_create;
+   }
+   if (screen->video_surface_destroy) {
+      id_screen->base.video_surface_destroy = identity_screen_video_surface_destroy;
+   }
    id_screen->base.flush_frontbuffer = identity_screen_flush_frontbuffer;
    id_screen->base.fence_reference = identity_screen_fence_reference;
    id_screen->base.fence_signalled = identity_screen_fence_signalled;
-- 
cgit v1.2.3


From 367cfca808e74101689dd0acb247f3ec38fc4c7f Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 19 Nov 2009 11:37:50 -0700
Subject: softpipe: add missing check in softpipe_is_texture_referenced()

Check if the named texture is referenced by the texture cache.
---
 src/gallium/drivers/softpipe/sp_context.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 94d000a5ac..d325499bf8 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -120,7 +120,7 @@ softpipe_destroy( struct pipe_context *pipe )
  * if (the texture is being used as a framebuffer surface)
  *    return PIPE_REFERENCED_FOR_WRITE
  * else if (the texture is a bound texture source)
- *    return PIPE_REFERENCED_FOR_READ  XXX not done yet
+ *    return PIPE_REFERENCED_FOR_READ
  * else
  *    return PIPE_UNREFERENCED
  */
@@ -132,6 +132,7 @@ softpipe_is_texture_referenced( struct pipe_context *pipe,
    struct softpipe_context *softpipe = softpipe_context( pipe );
    unsigned i;
 
+   /* check if any of the bound drawing surfaces are this texture */
    if (softpipe->dirty_render_cache) {
       for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
          if (softpipe->framebuffer.cbufs[i] && 
@@ -145,7 +146,12 @@ softpipe_is_texture_referenced( struct pipe_context *pipe,
       }
    }
    
-   /* FIXME: we also need to do the same for the texture cache */
+   /* check if any of the tex_cache textures are this texture */
+   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+      if (softpipe->tex_cache[i] &&
+          softpipe->tex_cache[i]->texture == texture)
+         return PIPE_REFERENCED_FOR_READ;
+   }
    
    return PIPE_UNREFERENCED;
 }
-- 
cgit v1.2.3


From 3f4016650099642f900fc169c078b1d78128899a Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 19 Nov 2009 14:02:06 -0700
Subject: softpipe: whitespace/indentation fixes

---
 src/gallium/drivers/softpipe/sp_context.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index d325499bf8..5f60139968 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -249,9 +249,9 @@ softpipe_create( struct pipe_screen *screen )
 
 
    /* setup quad rendering stages */
-      softpipe->quad.shade = sp_quad_shade_stage(softpipe);
-      softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe);
-      softpipe->quad.blend = sp_quad_blend_stage(softpipe);
+   softpipe->quad.shade = sp_quad_shade_stage(softpipe);
+   softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe);
+   softpipe->quad.blend = sp_quad_blend_stage(softpipe);
 
 
    /*
@@ -281,7 +281,6 @@ softpipe_create( struct pipe_screen *screen )
    draw_set_render(softpipe->draw, softpipe->vbuf_backend);
 
 
-
    /* plug in AA line/point stages */
    draw_install_aaline_stage(softpipe->draw, &softpipe->pipe);
    draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe);
@@ -297,4 +296,3 @@ softpipe_create( struct pipe_screen *screen )
    softpipe_destroy(&softpipe->pipe);
    return NULL;
 }
-
-- 
cgit v1.2.3


From 683e35f726a182ed9fc6b6d5cb07146eebe14dea Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 14:39:34 -0800
Subject: gallium: don't use arrays for texture width,height,depth

---
 src/gallium/auxiliary/draw/draw_pipe_aaline.c    | 10 +--
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c  |  6 +-
 src/gallium/auxiliary/util/u_blit.c              | 26 +++----
 src/gallium/auxiliary/util/u_gen_mipmap.c        | 25 +++---
 src/gallium/auxiliary/util/u_math.h              |  4 +-
 src/gallium/auxiliary/util/u_surface.c           |  6 +-
 src/gallium/auxiliary/vl/vl_compositor.c         | 12 +--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 22 +++---
 src/gallium/drivers/i915/i915_state_sampler.c    |  2 +-
 src/gallium/drivers/i915/i915_texture.c          | 96 +++++++++++-------------
 src/gallium/drivers/softpipe/sp_tex_sample.c     | 58 +++++++-------
 src/gallium/drivers/softpipe/sp_tex_tile_cache.c |  7 +-
 src/gallium/drivers/softpipe/sp_texture.c        | 49 ++++++------
 src/gallium/drivers/trace/tr_dump_state.c        |  6 +-
 src/gallium/drivers/trace/tr_rbug.c              |  6 +-
 src/gallium/include/pipe/p_state.h               |  6 +-
 src/mesa/state_tracker/st_atom_framebuffer.c     |  5 +-
 src/mesa/state_tracker/st_atom_pixeltransfer.c   |  2 +-
 src/mesa/state_tracker/st_cb_drawpixels.c        |  4 +-
 src/mesa/state_tracker/st_cb_fbo.c               |  6 +-
 src/mesa/state_tracker/st_cb_readpixels.c        |  2 +-
 src/mesa/state_tracker/st_cb_texture.c           | 12 +--
 src/mesa/state_tracker/st_gen_mipmap.c           | 39 +++++-----
 src/mesa/state_tracker/st_texture.c              | 36 ++++-----
 24 files changed, 226 insertions(+), 221 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 9f956715a2..31de84b272 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -398,9 +398,9 @@ aaline_create_texture(struct aaline_stage *aaline)
    texTemp.target = PIPE_TEXTURE_2D;
    texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
    texTemp.last_level = MAX_TEXTURE_LEVEL;
-   texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL;
-   texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL;
-   texTemp.depth[0] = 1;
+   texTemp.width0 = 1 << MAX_TEXTURE_LEVEL;
+   texTemp.height0 = 1 << MAX_TEXTURE_LEVEL;
+   texTemp.depth0 = 1;
    pf_get_block(texTemp.format, &texTemp.block);
 
    aaline->texture = screen->texture_create(screen, &texTemp);
@@ -413,11 +413,11 @@ aaline_create_texture(struct aaline_stage *aaline)
     */
    for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
       struct pipe_transfer *transfer;
-      const uint size = aaline->texture->width[level];
+      const uint size = u_minify(aaline->texture->width0, level);
       ubyte *data;
       uint i, j;
 
-      assert(aaline->texture->width[level] == aaline->texture->height[level]);
+      assert(aaline->texture->width0 == aaline->texture->height0);
 
       /* This texture is new, no need to flush. 
        */
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 283502cdf3..27d89721b1 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -427,9 +427,9 @@ pstip_create_texture(struct pstip_stage *pstip)
    texTemp.target = PIPE_TEXTURE_2D;
    texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
    texTemp.last_level = 0;
-   texTemp.width[0] = 32;
-   texTemp.height[0] = 32;
-   texTemp.depth[0] = 1;
+   texTemp.width0 = 32;
+   texTemp.height0 = 32;
+   texTemp.depth0 = 1;
    pf_get_block(texTemp.format, &texTemp.block);
 
    pstip->texture = screen->texture_create(screen, &texTemp);
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 5038642599..5372df5735 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -354,9 +354,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
       texTemp.target = PIPE_TEXTURE_2D;
       texTemp.format = src->format;
       texTemp.last_level = 0;
-      texTemp.width[0] = srcW;
-      texTemp.height[0] = srcH;
-      texTemp.depth[0] = 1;
+      texTemp.width0 = srcW;
+      texTemp.height0 = srcH;
+      texTemp.depth0 = 1;
       pf_get_block(src->format, &texTemp.block);
 
       tex = screen->texture_create(screen, &texTemp);
@@ -389,10 +389,10 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    }
    else {
       pipe_texture_reference(&tex, src->texture);
-      s0 = srcX0 / (float)tex->width[0];
-      s1 = srcX1 / (float)tex->width[0];
-      t0 = srcY0 / (float)tex->height[0];
-      t1 = srcY1 / (float)tex->height[0];
+      s0 = srcX0 / (float)tex->width0;
+      s1 = srcX1 / (float)tex->width0;
+      t0 = srcY0 / (float)tex->height0;
+      t1 = srcY1 / (float)tex->height0;
    }
 
 
@@ -518,13 +518,13 @@ util_blit_pixels_tex(struct blit_state *ctx,
    assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
           filter == PIPE_TEX_MIPFILTER_LINEAR);
 
-   assert(tex->width[0] != 0);
-   assert(tex->height[0] != 0);
+   assert(tex->width0 != 0);
+   assert(tex->height0 != 0);
 
-   s0 = srcX0 / (float)tex->width[0];
-   s1 = srcX1 / (float)tex->width[0];
-   t0 = srcY0 / (float)tex->height[0];
-   t1 = srcY1 / (float)tex->height[0];
+   s0 = srcX0 / (float)tex->width0;
+   s1 = srcX1 / (float)tex->width0;
+   t0 = srcY0 / (float)tex->height0;
+   t1 = srcY1 / (float)tex->height0;
 
    assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
                                                  PIPE_TEXTURE_2D,
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index aa823aa218..84db14576e 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -45,6 +45,7 @@
 #include "util/u_draw_quad.h"
 #include "util/u_gen_mipmap.h"
 #include "util/u_simple_shaders.h"
+#include "util/u_math.h"
 
 #include "cso_cache/cso_context.h"
 
@@ -1125,12 +1126,12 @@ make_1d_mipmap(struct gen_mipmap_state *ctx,
       
       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
                                           PIPE_TRANSFER_READ, 0, 0,
-                                          pt->width[srcLevel],
-                                          pt->height[srcLevel]);
+                                          u_minify(pt->width0, srcLevel),
+                                          u_minify(pt->height0, srcLevel));
       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
                                           PIPE_TRANSFER_WRITE, 0, 0,
-                                          pt->width[dstLevel],
-                                          pt->height[dstLevel]);
+                                          u_minify(pt->width0, dstLevel),
+                                          u_minify(pt->height0, dstLevel));
 
       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -1168,12 +1169,12 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
       
       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
                                           PIPE_TRANSFER_READ, 0, 0,
-                                          pt->width[srcLevel],
-                                          pt->height[srcLevel]);
+                                          u_minify(pt->width0, srcLevel),
+                                          u_minify(pt->height0, srcLevel));
       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
                                           PIPE_TRANSFER_WRITE, 0, 0,
-                                          pt->width[dstLevel],
-                                          pt->height[dstLevel]);
+                                          u_minify(pt->width0, dstLevel),
+                                          u_minify(pt->height0, dstLevel));
 
       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -1575,8 +1576,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
        * Setup framebuffer / dest surface
        */
       fb.cbufs[0] = surf;
-      fb.width = pt->width[dstLevel];
-      fb.height = pt->height[dstLevel];
+      fb.width = u_minify(pt->width0, dstLevel);
+      fb.height = u_minify(pt->height0, dstLevel);
       cso_set_framebuffer(ctx->cso, &fb);
 
       /*
@@ -1597,8 +1598,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
       offset = set_vertex_data(ctx,
                                pt->target,
                                face,
-                               (float) pt->width[dstLevel],
-                               (float) pt->height[dstLevel]);
+                               (float) u_minify(pt->width0, dstLevel),
+                               (float) u_minify(pt->height0, dstLevel));
 
       util_draw_vertex_buffer(ctx->pipe, 
                               ctx->vbuf,
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 75b075f160..7a598efc3a 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -507,9 +507,9 @@ align(int value, int alignment)
 }
 
 static INLINE unsigned
-minify(unsigned value)
+u_minify(unsigned value, unsigned levels)
 {
-    return MAX2(1, value >> 1);
+    return MAX2(1, value >> levels);
 }
 
 #ifndef COPY_4V
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index 85e443204e..de8c266db8 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -79,9 +79,9 @@ util_create_rgba_surface(struct pipe_screen *screen,
    templ.target = target;
    templ.format = format;
    templ.last_level = 0;
-   templ.width[0] = width;
-   templ.height[0] = height;
-   templ.depth[0] = 1;
+   templ.width0 = width;
+   templ.height0 = height;
+   templ.depth0 = 1;
    pf_get_block(format, &templ.block);
    templ.tex_usage = usage;
 
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index cda6dc134a..592dd17421 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -455,8 +455,8 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    assert(dst_area);
    assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME);
 
-   compositor->fb_state.width = dst_surface->width[0];
-   compositor->fb_state.height = dst_surface->height[0];
+   compositor->fb_state.width = dst_surface->width0;
+   compositor->fb_state.height = dst_surface->height0;
    compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface
    (
       compositor->pipe->screen,
@@ -504,12 +504,12 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    vs_consts->dst_trans.z = 0;
    vs_consts->dst_trans.w = 0;
 
-   vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0];
-   vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0];
+   vs_consts->src_scale.x = src_area->w / (float)src_surface->width0;
+   vs_consts->src_scale.y = src_area->h / (float)src_surface->height0;
    vs_consts->src_scale.z = 1;
    vs_consts->src_scale.w = 1;
-   vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0];
-   vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0];
+   vs_consts->src_trans.x = src_area->x / (float)src_surface->width0;
+   vs_consts->src_trans.y = src_area->y / (float)src_surface->height0;
    vs_consts->src_trans.z = 0;
    vs_consts->src_trans.w = 0;
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index c4ba69817f..1934965995 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -681,7 +681,7 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
       (
          r->pipe->screen, r->textures.all[i],
          0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
-         r->textures.all[i]->width[0], r->textures.all[i]->height[0]
+         r->textures.all[i]->width0, r->textures.all[i]->height0
       );
 
       r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
@@ -835,26 +835,26 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
    template.format = PIPE_FORMAT_R16_SNORM;
    template.last_level = 0;
-   template.width[0] = r->pot_buffers ?
+   template.width0 = r->pot_buffers ?
       util_next_power_of_two(r->picture_width) : r->picture_width;
-   template.height[0] = r->pot_buffers ?
+   template.height0 = r->pot_buffers ?
       util_next_power_of_two(r->picture_height) : r->picture_height;
-   template.depth[0] = 1;
+   template.depth0 = 1;
    pf_get_block(template.format, &template.block);
    template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
 
    r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
 
    if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      template.width[0] = r->pot_buffers ?
+      template.width0 = r->pot_buffers ?
          util_next_power_of_two(r->picture_width / 2) :
          r->picture_width / 2;
-      template.height[0] = r->pot_buffers ?
+      template.height0 = r->pot_buffers ?
          util_next_power_of_two(r->picture_height / 2) :
          r->picture_height / 2;
    }
    else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
-      template.height[0] = r->pot_buffers ?
+      template.height0 = r->pot_buffers ?
          util_next_power_of_two(r->picture_height / 2) :
          r->picture_height / 2;
 
@@ -1283,8 +1283,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
    );
 
-   vs_consts->denorm.x = r->surface->width[0];
-   vs_consts->denorm.y = r->surface->height[0];
+   vs_consts->denorm.x = r->surface->width0;
+   vs_consts->denorm.y = r->surface->height0;
 
    pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
 
@@ -1633,8 +1633,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
       renderer->past = past;
       renderer->future = future;
       renderer->fence = fence;
-      renderer->surface_tex_inv_size.x = 1.0f / surface->width[0];
-      renderer->surface_tex_inv_size.y = 1.0f / surface->height[0];
+      renderer->surface_tex_inv_size.x = 1.0f / surface->width0;
+      renderer->surface_tex_inv_size.y = 1.0f / surface->height0;
    }
 
    while (num_macroblocks) {
diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c
index c5e9084d12..cbac4175c8 100644
--- a/src/gallium/drivers/i915/i915_state_sampler.c
+++ b/src/gallium/drivers/i915/i915_state_sampler.c
@@ -231,7 +231,7 @@ i915_update_texture(struct i915_context *i915,
 {
    const struct pipe_texture *pt = &tex->base;
    uint format, pitch;
-   const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+   const uint width = pt->width0, height = pt->height0, depth = pt->depth0;
    const uint num_levels = pt->last_level;
    unsigned max_lod = num_levels * 4;
    unsigned tiled = MS3_USE_FENCE_REGS;
diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c
index 286c9ace8e..c7b86dd4c5 100644
--- a/src/gallium/drivers/i915/i915_texture.c
+++ b/src/gallium/drivers/i915/i915_texture.c
@@ -105,10 +105,6 @@ i915_miptree_set_level_info(struct i915_texture *tex,
 
    assert(level < PIPE_MAX_TEXTURE_LEVELS);
 
-   pt->width[level] = w;
-   pt->height[level] = h;
-   pt->depth[level] = d;
-   
    pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w);
    pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h);
 
@@ -168,16 +164,16 @@ i915_scanout_layout(struct i915_texture *tex)
       return FALSE;
 
    i915_miptree_set_level_info(tex, 0, 1,
-                               tex->base.width[0],
-                               tex->base.height[0],
+                               tex->base.width0,
+                               tex->base.height0,
                                1);
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
-   if (tex->base.width[0] >= 240) {
+   if (tex->base.width0 >= 240) {
       tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
       tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
       tex->hw_tiled = INTEL_TILE_X;
-   } else if (tex->base.width[0] == 64 && tex->base.height[0] == 64) {
+   } else if (tex->base.width0 == 64 && tex->base.height0 == 64) {
       tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
       tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
    } else {
@@ -185,7 +181,7 @@ i915_scanout_layout(struct i915_texture *tex)
    }
 
    debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
-      tex->base.width[0], tex->base.height[0], pt->block.size,
+      tex->base.width0, tex->base.height0, pt->block.size,
       tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
 
    return TRUE;
@@ -203,12 +199,12 @@ i915_display_target_layout(struct i915_texture *tex)
       return FALSE;
 
    /* fallback to normal textures for small textures */
-   if (tex->base.width[0] < 240)
+   if (tex->base.width0 < 240)
       return FALSE;
 
    i915_miptree_set_level_info(tex, 0, 1,
-                               tex->base.width[0],
-                               tex->base.height[0],
+                               tex->base.width0,
+                               tex->base.height0,
                                1);
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
@@ -217,7 +213,7 @@ i915_display_target_layout(struct i915_texture *tex)
    tex->hw_tiled = INTEL_TILE_X;
 
    debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
-      tex->base.width[0], tex->base.height[0], pt->block.size,
+      tex->base.width0, tex->base.height0, pt->block.size,
       tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
 
    return TRUE;
@@ -228,8 +224,8 @@ i915_miptree_layout_2d(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
    unsigned level;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
    unsigned nblocksx = pt->nblocksx[0];
    unsigned nblocksy = pt->nblocksy[0];
 
@@ -254,8 +250,8 @@ i915_miptree_layout_2d(struct i915_texture *tex)
 
       tex->total_nblocksy += nblocksy;
 
-      width = minify(width);
-      height = minify(height);
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
       nblocksx = pf_get_nblocksx(&pt->block, width);
       nblocksy = pf_get_nblocksy(&pt->block, height);
    }
@@ -267,9 +263,9 @@ i915_miptree_layout_3d(struct i915_texture *tex)
    struct pipe_texture *pt = &tex->base;
    unsigned level;
 
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
-   unsigned depth = pt->depth[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
    unsigned nblocksx = pt->nblocksx[0];
    unsigned nblocksy = pt->nblocksy[0];
    unsigned stack_nblocksy = 0;
@@ -285,36 +281,34 @@ i915_miptree_layout_3d(struct i915_texture *tex)
 
       stack_nblocksy += MAX2(2, nblocksy);
 
-      width = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
       nblocksx = pf_get_nblocksx(&pt->block, width);
       nblocksy = pf_get_nblocksy(&pt->block, height);
    }
 
    /* Fixup depth image_offsets: 
     */
-   depth = pt->depth[0];
    for (level = 0; level <= pt->last_level; level++) {
       unsigned i;
       for (i = 0; i < depth; i++) 
          i915_miptree_set_image_offset(tex, level, i, 0, i * stack_nblocksy);
 
-      depth = minify(depth);
+      depth = u_minify(depth, 1);
    }
 
    /* Multiply slice size by texture depth for total size.  It's
     * remarkable how wasteful of memory the i915 texture layouts
     * are.  They are largely fixed in the i945.
     */
-   tex->total_nblocksy = stack_nblocksy * pt->depth[0];
+   tex->total_nblocksy = stack_nblocksy * pt->depth0;
 }
 
 static void
 i915_miptree_layout_cube(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
-   unsigned width = pt->width[0], height = pt->height[0];
+   unsigned width = pt->width0, height = pt->height0;
    const unsigned nblocks = pt->nblocksx[0];
    unsigned level;
    unsigned face;
@@ -383,8 +377,8 @@ i945_miptree_layout_2d(struct i915_texture *tex)
    unsigned level;
    unsigned x = 0;
    unsigned y = 0;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
    unsigned nblocksx = pt->nblocksx[0];
    unsigned nblocksy = pt->nblocksy[0];
 
@@ -407,8 +401,8 @@ i945_miptree_layout_2d(struct i915_texture *tex)
     */
    if (pt->last_level > 0) {
       unsigned mip1_nblocksx 
-         = align(pf_get_nblocksx(&pt->block, minify(width)), align_x)
-         + pf_get_nblocksx(&pt->block, minify(minify(width)));
+         = align(pf_get_nblocksx(&pt->block, u_minify(width, 1)), align_x)
+         + pf_get_nblocksx(&pt->block, u_minify(width, 2));
 
       if (mip1_nblocksx > nblocksx)
          tex->stride = mip1_nblocksx * pt->block.size;
@@ -439,8 +433,8 @@ i945_miptree_layout_2d(struct i915_texture *tex)
          y += nblocksy;
       }
 
-      width  = minify(width);
-      height = minify(height);
+      width  = u_minify(width, 1);
+      height = u_minify(height, 1);
       nblocksx = pf_get_nblocksx(&pt->block, width);
       nblocksy = pf_get_nblocksy(&pt->block, height);
    }
@@ -450,9 +444,9 @@ static void
 i945_miptree_layout_3d(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
-   unsigned depth = pt->depth[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
    unsigned nblocksx = pt->nblocksx[0];
    unsigned nblocksy = pt->nblocksy[0];
    unsigned pack_x_pitch, pack_x_nr;
@@ -495,9 +489,9 @@ i945_miptree_layout_3d(struct i915_texture *tex)
          pack_y_pitch >>= 1;
       }
 
-      width = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
       nblocksx = pf_get_nblocksx(&pt->block, width);
       nblocksy = pf_get_nblocksy(&pt->block, height);
    }
@@ -511,11 +505,11 @@ i945_miptree_layout_cube(struct i915_texture *tex)
 
    const unsigned nblocks = pt->nblocksx[0];
    unsigned face;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
 
    /*
-   printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]);
+   printf("%s %i, %i\n", __FUNCTION__, pt->width0, pt->height0);
    */
 
    assert(width == height); /* cubemap images are square */
@@ -651,8 +645,8 @@ i915_texture_create(struct pipe_screen *screen,
    pipe_reference_init(&tex->base.reference, 1);
    tex->base.screen = screen;
 
-   tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]);
-   tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]);
+   tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width0);
+   tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height0);
    
    if (is->is_i945) {
       if (!i945_miptree_layout(tex))
@@ -667,7 +661,7 @@ i915_texture_create(struct pipe_screen *screen,
 
 
    /* for scanouts and cursors, cursors arn't scanouts */
-   if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width[0] != 64)
+   if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width0 != 64)
       buf_usage = INTEL_NEW_SCANOUT;
    else
       buf_usage = INTEL_NEW_TEXTURE;
@@ -710,7 +704,7 @@ i915_texture_blanket(struct pipe_screen * screen,
    /* Only supports one type */
    if (base->target != PIPE_TEXTURE_2D ||
        base->last_level != 0 ||
-       base->depth[0] != 1) {
+       base->depth0 != 1) {
       return NULL;
    }
 
@@ -724,7 +718,7 @@ i915_texture_blanket(struct pipe_screen * screen,
 
    tex->stride = stride[0];
 
-   i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1);
+   i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1);
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
    pipe_buffer_reference(&tex->buffer, buffer);
@@ -788,8 +782,8 @@ i915_get_tex_surface(struct pipe_screen *screen,
       pipe_reference_init(&ps->reference, 1);
       pipe_texture_reference(&ps->texture, pt);
       ps->format = pt->format;
-      ps->width = pt->width[level];
-      ps->height = pt->height[level];
+      ps->width = u_minify(pt->width0, level);
+      ps->height = u_minify(pt->height0, level);
       ps->offset = offset;
       ps->usage = flags;
    }
@@ -919,7 +913,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen,
    /* Only supports one type */
    if (base->target != PIPE_TEXTURE_2D ||
        base->last_level != 0 ||
-       base->depth[0] != 1) {
+       base->depth0 != 1) {
       return NULL;
    }
 
@@ -933,7 +927,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen,
 
    tex->stride = stride;
 
-   i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1);
+   i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1);
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
    tex->buffer = buffer;
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index c22ee86b66..e26153b1d9 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -521,7 +521,7 @@ compute_lambda_1d(const struct sp_sampler_varient *samp,
    const struct pipe_sampler_state *sampler = samp->sampler;
    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
-   float rho = MAX2(dsdx, dsdy) * texture->width[0];
+   float rho = MAX2(dsdx, dsdy) * texture->width0;
    float lambda;
 
    lambda = util_fast_log2(rho);
@@ -545,8 +545,8 @@ compute_lambda_2d(const struct sp_sampler_varient *samp,
    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
-   float maxx = MAX2(dsdx, dsdy) * texture->width[0];
-   float maxy = MAX2(dtdx, dtdy) * texture->height[0];
+   float maxx = MAX2(dsdx, dsdy) * texture->width0;
+   float maxy = MAX2(dtdx, dtdy) * texture->height0;
    float rho  = MAX2(maxx, maxy);
    float lambda;
 
@@ -573,9 +573,9 @@ compute_lambda_3d(const struct sp_sampler_varient *samp,
    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
-   float maxx = MAX2(dsdx, dsdy) * texture->width[0];
-   float maxy = MAX2(dtdx, dtdy) * texture->height[0];
-   float maxz = MAX2(dpdx, dpdy) * texture->depth[0];
+   float maxx = MAX2(dsdx, dsdy) * texture->width0;
+   float maxy = MAX2(dtdx, dtdy) * texture->height0;
+   float maxz = MAX2(dpdx, dpdy) * texture->depth0;
    float rho, lambda;
 
    rho = MAX2(maxx, maxy);
@@ -644,8 +644,8 @@ get_texel_2d(const struct sp_sampler_varient *samp,
    const struct pipe_texture *texture = samp->texture;
    unsigned level = addr.bits.level;
 
-   if (x < 0 || x >= (int) texture->width[level] ||
-       y < 0 || y >= (int) texture->height[level]) {
+   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+       y < 0 || y >= (int) u_minify(texture->height0, level)) {
       return samp->sampler->border_color;
    }
    else {
@@ -737,9 +737,9 @@ get_texel_3d(const struct sp_sampler_varient *samp,
    const struct pipe_texture *texture = samp->texture;
    unsigned level = addr.bits.level;
 
-   if (x < 0 || x >= (int) texture->width[level] ||
-       y < 0 || y >= (int) texture->height[level] ||
-       z < 0 || z >= (int) texture->depth[level]) {
+   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+       y < 0 || y >= (int) u_minify(texture->height0, level) ||
+       z < 0 || z >= (int) u_minify(texture->depth0, level)) {
       return samp->sampler->border_color;
    }
    else {
@@ -925,7 +925,7 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
+   width = u_minify(texture->width0, level0);
 
    assert(width > 0);
 
@@ -961,8 +961,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
 
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1008,8 +1008,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1046,9 +1046,9 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
-   depth = texture->depth[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
+   depth = u_minify(texture->depth0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1088,7 +1088,7 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
+   width = u_minify(texture->width0, level0);
 
    assert(width > 0);
 
@@ -1127,8 +1127,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1174,8 +1174,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1221,9 +1221,9 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
-   depth = texture->depth[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
+   depth = u_minify(texture->depth0, level0);
 
    addr.value = 0;
    addr.bits.level = level0;
@@ -1778,8 +1778,8 @@ sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
 
    samp->texture = texture;
    samp->cache = tex_cache;
-   samp->xpot = util_unsigned_logbase2( texture->width[0] );
-   samp->ypot = util_unsigned_logbase2( texture->height[0] );
+   samp->xpot = util_unsigned_logbase2( texture->width0 );
+   samp->ypot = util_unsigned_logbase2( texture->height0 );
    samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level);
 }
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
index 407a22a9f4..e50a76a73b 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
@@ -35,6 +35,7 @@
 #include "pipe/p_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_tile.h"
+#include "util/u_math.h"
 #include "sp_context.h"
 #include "sp_surface.h"
 #include "sp_texture.h"
@@ -246,9 +247,9 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
                                      addr.bits.level, 
                                      addr.bits.z, 
                                      PIPE_TRANSFER_READ, 0, 0,
-                                     tc->texture->width[addr.bits.level],
-                                     tc->texture->height[addr.bits.level]);
-
+                                     u_minify(tc->texture->width0, addr.bits.level),
+                                     u_minify(tc->texture->height0, addr.bits.level));
+         
          tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
 
          tc->tex_face = addr.bits.face;
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 7caf2928b4..ac5f61e46f 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -52,16 +52,17 @@ softpipe_texture_layout(struct pipe_screen *screen,
 {
    struct pipe_texture *pt = &spt->base;
    unsigned level;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
-   unsigned depth = pt->depth[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
 
    unsigned buffer_size = 0;
 
+   pt->width0 = width;
+   pt->height0 = height;
+   pt->depth0 = depth;
+
    for (level = 0; level <= pt->last_level; level++) {
-      pt->width[level] = width;
-      pt->height[level] = height;
-      pt->depth[level] = depth;
       pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);  
       pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);  
       spt->stride[level] = pt->nblocksx[level]*pt->block.size;
@@ -72,9 +73,9 @@ softpipe_texture_layout(struct pipe_screen *screen,
                       ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
                       spt->stride[level]);
 
-      width  = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width  = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
    }
 
    spt->buffer = screen->buffer_create(screen, 32,
@@ -96,12 +97,12 @@ softpipe_displaytarget_layout(struct pipe_screen *screen,
                      PIPE_BUFFER_USAGE_GPU_READ_WRITE);
    unsigned tex_usage = spt->base.tex_usage;
 
-   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);  
-   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);  
+   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0);  
+   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0);  
 
    spt->buffer = screen->surface_buffer_create( screen, 
-                                                spt->base.width[0], 
-                                                spt->base.height[0],
+                                                spt->base.width0, 
+                                                spt->base.height0,
                                                 spt->base.format,
                                                 usage,
                                                 tex_usage,
@@ -126,9 +127,9 @@ softpipe_texture_create(struct pipe_screen *screen,
    pipe_reference_init(&spt->base.reference, 1);
    spt->base.screen = screen;
 
-   spt->pot = (util_is_power_of_two(template->width[0]) &&
-               util_is_power_of_two(template->height[0]) &&
-               util_is_power_of_two(template->depth[0]));
+   spt->pot = (util_is_power_of_two(template->width0) &&
+               util_is_power_of_two(template->height0) &&
+               util_is_power_of_two(template->depth0));
 
    if (spt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
                               PIPE_TEXTURE_USAGE_PRIMARY)) {
@@ -163,7 +164,7 @@ softpipe_texture_blanket(struct pipe_screen * screen,
    /* Only supports one type */
    if (base->target != PIPE_TEXTURE_2D ||
        base->last_level != 0 ||
-       base->depth[0] != 1) {
+       base->depth0 != 1) {
       return NULL;
    }
 
@@ -174,8 +175,8 @@ softpipe_texture_blanket(struct pipe_screen * screen,
    spt->base = *base;
    pipe_reference_init(&spt->base.reference, 1);
    spt->base.screen = screen;
-   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);  
-   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);  
+   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0);  
+   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0);  
    spt->stride[0] = stride[0];
 
    pipe_buffer_reference(&spt->buffer, buffer);
@@ -213,8 +214,8 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
       pipe_reference_init(&ps->reference, 1);
       pipe_texture_reference(&ps->texture, pt);
       ps->format = pt->format;
-      ps->width = pt->width[level];
-      ps->height = pt->height[level];
+      ps->width = u_minify(pt->width0, level);
+      ps->height = u_minify(pt->height0, level);
       ps->offset = spt->level_offset[level];
       ps->usage = usage;
 
@@ -434,9 +435,9 @@ softpipe_video_surface_create(struct pipe_screen *screen,
    template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
    template.last_level = 0;
    /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */
-   template.width[0] = util_next_power_of_two(width);
-   template.height[0] = util_next_power_of_two(height);
-   template.depth[0] = 1;
+   template.width0 = util_next_power_of_two(width);
+   template.height0 = util_next_power_of_two(height);
+   template.depth0 = 1;
    pf_get_block(template.format, &template.block);
    template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
 
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index bcf6751af4..6d58209294 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -83,15 +83,15 @@ void trace_dump_template(const struct pipe_texture *templat)
    trace_dump_member(format, templat, format);
 
    trace_dump_member_begin("width");
-   trace_dump_array(uint, templat->width, 1);
+   trace_dump_uint(templat->width0);
    trace_dump_member_end();
 
    trace_dump_member_begin("height");
-   trace_dump_array(uint, templat->height, 1);
+   trace_dump_uint(templat->height0);
    trace_dump_member_end();
 
    trace_dump_member_begin("depth");
-   trace_dump_array(uint, templat->depth, 1);
+   trace_dump_uint(templat->depth0);
    trace_dump_member_end();
 
    trace_dump_member_begin("block");
diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c
index 81e0a6f3b0..b59458c0e3 100644
--- a/src/gallium/drivers/trace/tr_rbug.c
+++ b/src/gallium/drivers/trace/tr_rbug.c
@@ -200,9 +200,9 @@ trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header,
    t = tr_tex->texture;
    rbug_send_texture_info_reply(tr_rbug->con, serial,
                                t->target, t->format,
-                               t->width, t->last_level + 1,
-                               t->height, t->last_level + 1,
-                               t->depth, t->last_level + 1,
+                               &t->width0, 1,
+                               &t->height0, 1,
+                               &t->depth0, 1,
                                t->block.width, t->block.height, t->block.size,
                                t->last_level,
                                t->nr_samples,
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 287b424e4a..9766e86620 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -343,9 +343,9 @@ struct pipe_texture
    enum pipe_texture_target target; /**< PIPE_TEXTURE_x */
    enum pipe_format format;         /**< PIPE_FORMAT_x */
 
-   unsigned width[PIPE_MAX_TEXTURE_LEVELS];
-   unsigned height[PIPE_MAX_TEXTURE_LEVELS];
-   unsigned depth[PIPE_MAX_TEXTURE_LEVELS];
+   unsigned width0;
+   unsigned height0;
+   unsigned depth0;
 
    struct pipe_format_block block;
    unsigned nblocksx[PIPE_MAX_TEXTURE_LEVELS]; /**< allocated width in blocks */
diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c
index e18c0f6e0a..8ca4335e33 100644
--- a/src/mesa/state_tracker/st_atom_framebuffer.c
+++ b/src/mesa/state_tracker/st_atom_framebuffer.c
@@ -40,6 +40,7 @@
 #include "pipe/p_inlines.h"
 #include "cso_cache/cso_context.h"
 #include "util/u_rect.h"
+#include "util/u_math.h"
 
 
@@ -64,8 +65,8 @@ update_renderbuffer_surface(struct st_context *st,
       GLuint level;
       /* find matching mipmap level size */
       for (level = 0; level <= texture->last_level; level++) {
-         if (texture->width[level] == rtt_width &&
-             texture->height[level] == rtt_height) {
+         if (u_minify(texture->width0, level) == rtt_width &&
+             u_minify(texture->height0, level) == rtt_height) {
 
             pipe_surface_reference(&strb->surface, NULL);
 
diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c
index babfcc87b4..4b35f59cc2 100644
--- a/src/mesa/state_tracker/st_atom_pixeltransfer.c
+++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c
@@ -145,7 +145,7 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt)
    const GLuint gSize = ctx->PixelMaps.GtoG.Size;
    const GLuint bSize = ctx->PixelMaps.BtoB.Size;
    const GLuint aSize = ctx->PixelMaps.AtoA.Size;
-   const uint texSize = pt->width[0];
+   const uint texSize = pt->width0;
    uint *dest;
    uint i, j;
 
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 1d33e81c2c..7ec4599280 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -637,8 +637,8 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    y1 = y + height * ctx->Pixel.ZoomY;
 
    draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex,
-	     (GLfloat) width / pt->width[0],
-	     (GLfloat) height / pt->height[0]);
+	     (GLfloat) width / pt->width0,
+	     (GLfloat) height / pt->height0);
 
    /* restore state */
    cso_restore_rasterizer(cso);
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 65ce12ccd4..0469fb9978 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -128,9 +128,9 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
       template.target = PIPE_TEXTURE_2D;
       template.format = format;
       pf_get_block(format, &template.block);
-      template.width[0] = width;
-      template.height[0] = height;
-      template.depth[0] = 1;
+      template.width0 = width;
+      template.height0 = height;
+      template.depth0 = 1;
       template.last_level = 0;
       template.nr_samples = rb->NumSamples;
       if (pf_is_depth_stencil(format)) {
diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c
index 772bb3bb69..103861d6f9 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -243,7 +243,7 @@ st_fast_readpixels(GLcontext *ctx, struct st_renderbuffer *strb,
       GLint row, col, dy, dstStride;
 
       if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) {
-         y = strb->texture->height[0] - y - height;
+         y = strb->texture->height0 - y - height;
       }
 
       trans = st_cond_flush_get_tex_transfer(st_context(ctx), strb->texture,
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 9186db76e1..72892b7c8c 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -451,9 +451,9 @@ compress_with_blit(GLcontext * ctx,
    templ.target = PIPE_TEXTURE_2D;
    templ.format = st_mesa_format_to_pipe_format(mesa_format);
    pf_get_block(templ.format, &templ.block);
-   templ.width[0] = width;
-   templ.height[0] = height;
-   templ.depth[0] = 1;
+   templ.width0 = width;
+   templ.height0 = height;
+   templ.depth0 = 1;
    templ.last_level = 0;
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
    src_tex = screen->texture_create(screen, &templ);
@@ -1813,9 +1813,9 @@ st_finalize_texture(GLcontext *ctx,
       if (stObj->pt->target != gl_target_to_pipe(stObj->base.Target) ||
           stObj->pt->format != fmt ||
           stObj->pt->last_level < stObj->lastLevel ||
-          stObj->pt->width[0] != firstImage->base.Width2 ||
-          stObj->pt->height[0] != firstImage->base.Height2 ||
-          stObj->pt->depth[0] != firstImage->base.Depth2 ||
+          stObj->pt->width0 != firstImage->base.Width2 ||
+          stObj->pt->height0 != firstImage->base.Height2 ||
+          stObj->pt->depth0 != firstImage->base.Depth2 ||
           /* Nominal bytes per pixel: */
           stObj->pt->block.size / stObj->pt->block.width != cpp)
       {
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
index 16ca2771b0..f8068fa12b 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -38,6 +38,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
 #include "util/u_gen_mipmap.h"
+#include "util/u_math.h"
 
 #include "cso_cache/cso_cache.h"
 #include "cso_cache/cso_context.h"
@@ -133,14 +134,14 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target,
       srcTrans = st_cond_flush_get_tex_transfer(st_context(ctx), pt, face,
 						srcLevel, zslice,
 						PIPE_TRANSFER_READ, 0, 0,
-						pt->width[srcLevel],
-						pt->height[srcLevel]);
+						u_minify(pt->width0, srcLevel),
+						u_minify(pt->height0, srcLevel));
 
       dstTrans = st_cond_flush_get_tex_transfer(st_context(ctx), pt, face,
 						dstLevel, zslice,
 						PIPE_TRANSFER_WRITE, 0, 0,
-						pt->width[dstLevel],
-						pt->height[dstLevel]);
+						u_minify(pt->width0, dstLevel),
+						u_minify(pt->height0, dstLevel));
 
       srcData = (ubyte *) screen->transfer_map(screen, srcTrans);
       dstData = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -149,13 +150,17 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target,
       dstStride = dstTrans->stride / dstTrans->block.size;
 
       _mesa_generate_mipmap_level(target, datatype, comps,
-                   0 /*border*/,
-                   pt->width[srcLevel], pt->height[srcLevel], pt->depth[srcLevel],
-                   srcData,
-                   srcStride, /* stride in texels */
-                   pt->width[dstLevel], pt->height[dstLevel], pt->depth[dstLevel],
-                   dstData,
-                   dstStride); /* stride in texels */
+                                  0 /*border*/,
+                                  u_minify(pt->width0, srcLevel),
+                                  u_minify(pt->height0, srcLevel),
+                                  u_minify(pt->depth0, srcLevel),
+                                  srcData,
+                                  srcStride, /* stride in texels */
+                                  u_minify(pt->width0, dstLevel),
+                                  u_minify(pt->height0, dstLevel),
+                                  u_minify(pt->depth0, dstLevel),
+                                  dstData,
+                                  dstStride); /* stride in texels */
 
       screen->transfer_unmap(screen, srcTrans);
       screen->transfer_unmap(screen, dstTrans);
@@ -232,9 +237,9 @@ st_generate_mipmap(GLcontext *ctx, GLenum target,
                                     oldTex->target,
                                     oldTex->format,
                                     lastLevel,
-                                    oldTex->width[0],
-                                    oldTex->height[0],
-                                    oldTex->depth[0],
+                                    oldTex->width0,
+                                    oldTex->height0,
+                                    oldTex->depth0,
                                     oldTex->tex_usage);
 
       /* The texture isn't in a "complete" state yet so set the expected
@@ -269,9 +274,9 @@ st_generate_mipmap(GLcontext *ctx, GLenum target,
          = _mesa_get_tex_image(ctx, texObj, target, srcLevel);
       struct gl_texture_image *dstImage;
       struct st_texture_image *stImage;
-      uint dstWidth = pt->width[dstLevel];
-      uint dstHeight = pt->height[dstLevel];
-      uint dstDepth = pt->depth[dstLevel];
+      uint dstWidth = u_minify(pt->width0, dstLevel);
+      uint dstHeight = u_minify(pt->height0, dstLevel);
+      uint dstDepth = u_minify(pt->depth0, dstLevel); 
       uint border = srcImage->Border;
 
       dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel);
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index 3945822f66..aa88fdcd78 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -44,6 +44,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
 #include "util/u_rect.h"
+#include "util/u_math.h"
 
 
 #define DBG if(0) printf
@@ -100,9 +101,9 @@ st_texture_create(struct st_context *st,
    pt.target = target;
    pt.format = format;
    pt.last_level = last_level;
-   pt.width[0] = width0;
-   pt.height[0] = height0;
-   pt.depth[0] = depth0;
+   pt.width0 = width0;
+   pt.height0 = height0;
+   pt.depth0 = depth0;
    pf_get_block(format, &pt.block);
    pt.tex_usage = usage;
 
@@ -135,9 +136,9 @@ st_texture_match_image(const struct pipe_texture *pt,
    /* Test if this image's size matches what's expected in the
     * established texture.
     */
-   if (image->Width != pt->width[level] ||
-       image->Height != pt->height[level] ||
-       image->Depth != pt->depth[level])
+   if (image->Width != u_minify(pt->width0, level) ||
+       image->Height != u_minify(pt->height0, level) ||
+       image->Depth != u_minify(pt->depth0, level))
       return GL_FALSE;
 
    return GL_TRUE;
@@ -265,7 +266,7 @@ st_texture_image_data(struct st_context *st,
 {
    struct pipe_context *pipe = st->pipe;
    struct pipe_screen *screen = pipe->screen;
-   GLuint depth = dst->depth[level];
+   GLuint depth = u_minify(dst->depth0, level);
    GLuint i;
    const GLubyte *srcUB = src;
    struct pipe_transfer *dst_transfer;
@@ -275,15 +276,16 @@ st_texture_image_data(struct st_context *st,
    for (i = 0; i < depth; i++) {
       dst_transfer = st_no_flush_get_tex_transfer(st, dst, face, level, i,
 						  PIPE_TRANSFER_WRITE, 0, 0,
-						  dst->width[level],
-						  dst->height[level]);
+						  u_minify(dst->width0, level),
+                                                  u_minify(dst->height0, level));
 
       st_surface_data(pipe, dst_transfer,
 		      0, 0,                             /* dstx, dsty */
 		      srcUB,
 		      src_row_stride,
 		      0, 0,                             /* source x, y */
-		      dst->width[level], dst->height[level]);       /* width, height */
+		      u_minify(dst->width0, level),
+                      u_minify(dst->height0, level));      /* width, height */
 
       screen->tex_transfer_destroy(dst_transfer);
 
@@ -301,9 +303,9 @@ st_texture_image_copy(struct pipe_context *pipe,
                       GLuint face)
 {
    struct pipe_screen *screen = pipe->screen;
-   GLuint width = dst->width[dstLevel];
-   GLuint height = dst->height[dstLevel];
-   GLuint depth = dst->depth[dstLevel];
+   GLuint width = u_minify(dst->width0, dstLevel); 
+   GLuint height = u_minify(dst->height0, dstLevel); 
+   GLuint depth = u_minify(dst->depth0, dstLevel); 
    struct pipe_surface *src_surface;
    struct pipe_surface *dst_surface;
    GLuint i;
@@ -313,13 +315,13 @@ st_texture_image_copy(struct pipe_context *pipe,
 
       /* find src texture level of needed size */
       for (srcLevel = 0; srcLevel <= src->last_level; srcLevel++) {
-         if (src->width[srcLevel] == width &&
-             src->height[srcLevel] == height) {
+         if (u_minify(src->width0, srcLevel) == width &&
+             u_minify(src->height0, srcLevel) == height) {
             break;
          }
       }
-      assert(src->width[srcLevel] == width);
-      assert(src->height[srcLevel] == height);
+      assert(u_minify(src->width0, srcLevel) == width);
+      assert(u_minify(src->height0, srcLevel) == height);
 
 #if 0
       {
-- 
cgit v1.2.3


From 25cbf9b4da7be45218f645102d6be5144be4291f Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 14:47:24 -0800
Subject: i965g: fix initialization of texture width/height/depth arrays

Will remove these arrays in another branch - they're completely redundant.
---
 src/gallium/drivers/i965/brw_screen_tex_layout.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c
index bcdf8d8074..f793fa8859 100644
--- a/src/gallium/drivers/i965/brw_screen_tex_layout.c
+++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c
@@ -101,16 +101,17 @@ brw_tex_set_level_info(struct brw_texture *tex,
 		       GLuint x, GLuint y,
 		       GLuint w, GLuint h, GLuint d)
 {
-   assert(tex->base.width[level] == w);
-   assert(tex->base.height[level] == h);
-   assert(tex->base.depth[level] == d);
-   assert(tex->image_offset[level] == NULL);
-   assert(nr_images >= 1);
 
    if (BRW_DEBUG & DEBUG_TEXTURE)
       debug_printf("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
 		   level, w, h, d, x, y, tex->level_offset[level]);
 
+   assert(tex->image_offset[level] == NULL);
+   assert(nr_images >= 1);
+
+   tex->base.width[level] = w;
+   tex->base.height[level] = h;
+   tex->base.depth[level] = d;
 
    tex->level_offset[level] = (x + y * tex->pitch) * tex->cpp;
    tex->nr_images[level] = nr_images;
-- 
cgit v1.2.3


From 9e4f3eaf6630e0d3a9b05da90e4879a94516b974 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 15:04:00 -0800
Subject: i965g: make the load-before-use vs immediate path work

---
 src/gallium/drivers/i965/brw_vs_emit.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index bcaeaca62d..52d4731dfd 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -202,7 +202,8 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
       struct brw_reg r;
       int j;
 
-      r = brw_vec8_grf(reg, 0);
+      c->regs[TGSI_FILE_IMMEDIATE][i] = 
+         r = brw_vec8_grf(reg, 0);
 
       for (j = 0; j < 4; j++) {
 	 brw_MOV(&c->func, 
@@ -1628,7 +1629,7 @@ void brw_vs_emit(struct brw_vs_compile *c)
 
       case TGSI_TOKEN_TYPE_IMMEDIATE: {
 	 static const float id[4] = {0,0,0,1};
-	 const float *imm = &parse.FullToken.FullImmediate.u[i].Float;
+	 const float *imm = &parse.FullToken.FullImmediate.u[0].Float;
 	 unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
 
 	 for (i = 0; i < size; i++)
-- 
cgit v1.2.3


From 1b9eda4c74c83cc0ffa98f2885660c80cdff2a65 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 16:09:39 -0800
Subject: i965g: get fragment constants sort-of working

---
 src/gallium/drivers/i965/brw_curbe.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 79ebac9d15..5fa1723311 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -51,10 +51,10 @@
 static int calculate_curbe_offsets( struct brw_context *brw )
 {
    /* CACHE_NEW_WM_PROG */
-   const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
+   const GLuint nr_fp_regs = brw->wm.prog_data->curb_read_length;
    
    /* BRW_NEW_VERTEX_PROGRAM */
-   const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16;
+   const GLuint nr_vp_regs = brw->vs.prog_data->curb_read_length;
    GLuint nr_clip_regs = 0;
    GLuint total_regs;
 
@@ -162,6 +162,7 @@ static GLfloat fixed_plane[6][4] = {
  */
 static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
 {
+   struct pipe_screen *screen = brw->base.screen;
    const GLuint sz = brw->curbe.total_size;
    const GLuint bufsz = sz * 16 * sizeof(GLfloat);
    enum pipe_error ret;
@@ -182,14 +183,15 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
    /* fragment shader constants */
    if (brw->curbe.wm_size) {
       GLuint offset = brw->curbe.wm_start * 16;
+      unsigned nr = brw->wm.prog_data->nr_params;
 
-      /* map fs constant buffer */
+      const GLfloat *value = screen->buffer_map( screen,
+						 brw->curr.fragment_constants,
+						 PIPE_BUFFER_USAGE_CPU_READ);
 
-      /* copy float constants */
-      for (i = 0; i < brw->wm.prog_data->nr_params; i++) 
-	 buf[offset + i] = *brw->wm.prog_data->param[i];
+      memcpy(&buf[offset], value, nr * 4 * sizeof(float));
 
-      /* unmap fs constant buffer */
+      screen->buffer_unmap( screen, brw->curr.fragment_constants );
    }
 
 
@@ -225,7 +227,6 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
    if (brw->curbe.vs_size) {
       GLuint offset = brw->curbe.vs_start * 16;
       GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      struct pipe_screen *screen = brw->base.screen;
 
       /* XXX: note that constant buffers are currently *already* in
        * buffer objects.  If we want to keep on putting them into the
-- 
cgit v1.2.3


From 8db59a1fa329b28ba375d54d6d6d5df06f411a6e Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 16:14:44 -0800
Subject: i965g: setup nr_attrs should track nr fragment shader inputs

This would be equivalent to taking the nr outputs of the vp varient
---
 src/gallium/drivers/i965/brw_sf.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index 52fb2cd42d..e75f447a03 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -124,8 +124,12 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw)
 
    /* Populate the key, noting state dependencies:
     */
-   /* CACHE_NEW_VS_PROG */
-   key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_OUTPUT] + 1;
+
+   /* XXX: Add one to turn the max value into a count, then add
+    * another one to account for the position input.
+    */
+   /* PIPE_NEW_FRAGMENT_SHADER */
+   key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 2;
 
 
    /* XXX: this is probably where the mapping between vertex shader
-- 
cgit v1.2.3


From c58e20fbbb87b8dbd0c58294d4ad3d297c3aa747 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 18:47:07 -0800
Subject: i965g: fix typo in previous commit

---
 src/gallium/drivers/i965/brw_sf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index e75f447a03..6f4502da97 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -129,7 +129,7 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw)
     * another one to account for the position input.
     */
    /* PIPE_NEW_FRAGMENT_SHADER */
-   key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 2;
+   key.nr_attrs = brw->curr.fragment_shader->info.file_max[TGSI_FILE_INPUT] + 2;
 
 
    /* XXX: this is probably where the mapping between vertex shader
-- 
cgit v1.2.3


From 47cef2bb8f5979ae690e89943f83060999a29a55 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 18:55:18 -0800
Subject: i965g: add new state flag tracking fs signature changes

---
 src/gallium/drivers/i965/brw_context.h     |  1 +
 src/gallium/drivers/i965/brw_pipe_shader.c | 13 ++++++++++++-
 src/gallium/drivers/i965/brw_sf.c          |  9 ++++-----
 src/gallium/drivers/i965/brw_vs.c          |  7 +++----
 4 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 31e04b6e14..65859be0ec 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -233,6 +233,7 @@ struct brw_sampler {
 #define PIPE_NEW_SCISSOR                0x100000
 #define PIPE_NEW_BOUND_TEXTURES         0x200000
 #define PIPE_NEW_NR_CBUFS               0x400000
+#define PIPE_NEW_FRAGMENT_SIGNATURE     0x800000
 
 
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 02bc8fa130..c755fa6889 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -58,9 +58,20 @@ static GLboolean has_flow_control(const struct tgsi_shader_info *info)
 
 static void brw_bind_fs_state( struct pipe_context *pipe, void *prog )
 {
+   struct brw_fragment_shader *fs = (struct brw_fragment_shader *)prog;
    struct brw_context *brw = brw_context(pipe);
+   
+   if (brw->curr.fragment_shader == fs)
+      return;
+
+   if (brw->curr.fragment_shader == NULL ||
+       fs == NULL ||
+       memcmp(&brw->curr.fragment_shader->signature, &fs->signature,
+              brw_fs_signature_size(&fs->signature)) != 0) {
+      brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SIGNATURE;
+   }
 
-   brw->curr.fragment_shader = (struct brw_fragment_shader *)prog;
+   brw->curr.fragment_shader = fs;
    brw->state.dirty.mesa |= PIPE_NEW_FRAGMENT_SHADER;
 }
 
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index 6f4502da97..aa2ab5098c 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -125,11 +125,10 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw)
    /* Populate the key, noting state dependencies:
     */
 
-   /* XXX: Add one to turn the max value into a count, then add
-    * another one to account for the position input.
+   /* XXX: Add one to account for the position input.
     */
-   /* PIPE_NEW_FRAGMENT_SHADER */
-   key.nr_attrs = brw->curr.fragment_shader->info.file_max[TGSI_FILE_INPUT] + 2;
+   /* PIPE_NEW_FRAGMENT_SIGNATURE */
+   key.nr_attrs = brw->curr.fragment_shader->signature.nr_inputs + 1;
 
 
    /* XXX: this is probably where the mapping between vertex shader
@@ -194,7 +193,7 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw)
 
 const struct brw_tracked_state brw_sf_prog = {
    .dirty = {
-      .mesa  = (PIPE_NEW_RAST | PIPE_NEW_VERTEX_SHADER),
+      .mesa  = (PIPE_NEW_RAST | PIPE_NEW_FRAGMENT_SIGNATURE),
       .brw   = (BRW_NEW_REDUCED_PRIMITIVE),
       .cache = 0
    },
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 2668392919..25b51eb41e 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -101,7 +101,7 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
 {
    struct brw_vs_prog_key key;
    struct brw_vertex_shader *vp = brw->curr.vertex_shader;
-   struct brw_fragment_shader *fs = brw->curr.fragment_shader;
+   struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
    enum pipe_error ret;
 
    memset(&key, 0, sizeof(key));
@@ -111,8 +111,7 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
    key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL ||
 			brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL);
 
-   memcpy(&key.fs_signature, &fs->signature,
-          brw_fs_signature_size(&fs->signature));
+   memcpy(&key.fs_signature, sig, brw_fs_signature_size(sig));
 
 
    /* Make an early check for the key.
@@ -138,7 +137,7 @@ const struct brw_tracked_state brw_vs_prog = {
    .dirty = {
       .mesa  = (PIPE_NEW_CLIP | 
                 PIPE_NEW_RAST |
-                PIPE_NEW_FRAGMENT_SHADER),
+                PIPE_NEW_FRAGMENT_SIGNATURE),
       .brw   = BRW_NEW_VERTEX_PROGRAM,
       .cache = 0
    },
-- 
cgit v1.2.3


From 34a01929d54266e8e5fec47e94859405bce588fa Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 19:15:21 -0800
Subject: i965g: special case setup when fs has no inputs

---
 src/gallium/drivers/i965/brw_sf.c      | 60 ++++++++++++++++++++--------------
 src/gallium/drivers/i965/brw_sf.h      |  1 +
 src/gallium/drivers/i965/brw_sf_emit.c | 19 +++++++++++
 3 files changed, 55 insertions(+), 25 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index aa2ab5098c..0b94dc40c3 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -64,32 +64,42 @@ static enum pipe_error compile_sf_prog( struct brw_context *brw,
    c.prog_data.urb_read_length = c.nr_attr_regs;
    c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
 
-   
-   /* Which primitive?  Or all three? 
+   /* Special case when there are no attributes to setup.
+    *
+    * XXX: should be able to set nr_setup_attrs to nr_attrs-1 -- but
+    * breaks vp-tris.c
     */
-   switch (key->primitive) {
-   case SF_TRIANGLES:
-      c.nr_verts = 3;
-      brw_emit_tri_setup( &c, GL_TRUE );
-      break;
-   case SF_LINES:
-      c.nr_verts = 2;
-      brw_emit_line_setup( &c, GL_TRUE );
-      break;
-   case SF_POINTS:
-      c.nr_verts = 1;
-      if (key->do_point_sprite)
-	  brw_emit_point_sprite_setup( &c, GL_TRUE );
-      else
-	  brw_emit_point_setup( &c, GL_TRUE );
-      break;
-   case SF_UNFILLED_TRIS:
-      c.nr_verts = 3;
-      brw_emit_anyprim_setup( &c );
-      break;
-   default:
-      assert(0);
-      return PIPE_ERROR_BAD_INPUT;
+   if (c.nr_attrs - 1 == 0) {
+      c.nr_verts = 0;
+      brw_emit_null_setup( &c );
+   }
+   else {
+      /* Which primitive?  Or all three? 
+       */
+      switch (key->primitive) {
+      case SF_TRIANGLES:
+         c.nr_verts = 3;
+         brw_emit_tri_setup( &c, GL_TRUE );
+         break;
+      case SF_LINES:
+         c.nr_verts = 2;
+         brw_emit_line_setup( &c, GL_TRUE );
+         break;
+      case SF_POINTS:
+         c.nr_verts = 1;
+         if (key->do_point_sprite)
+            brw_emit_point_sprite_setup( &c, GL_TRUE );
+         else
+            brw_emit_point_setup( &c, GL_TRUE );
+         break;
+      case SF_UNFILLED_TRIS:
+         c.nr_verts = 3;
+         brw_emit_anyprim_setup( &c );
+         break;
+      default:
+         assert(0);
+         return PIPE_ERROR_BAD_INPUT;
+      }
    }
 
    /* get the program
diff --git a/src/gallium/drivers/i965/brw_sf.h b/src/gallium/drivers/i965/brw_sf.h
index 0b7003dc5e..a895c7d2f6 100644
--- a/src/gallium/drivers/i965/brw_sf.h
+++ b/src/gallium/drivers/i965/brw_sf.h
@@ -112,6 +112,7 @@ struct brw_sf_compile {
 };
 
  
+void brw_emit_null_setup( struct brw_sf_compile *c );
 void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate );
 void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate );
 void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate );
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
index db52c9553e..2983e8a9dd 100644
--- a/src/gallium/drivers/i965/brw_sf_emit.c
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -352,6 +352,25 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
 }
 
 
+void brw_emit_null_setup( struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+
+   /* m0 is implicitly copied from r0 in the send instruction:
+    */	 
+   brw_urb_WRITE(p, 
+                 brw_null_reg(),
+                 0,
+                 brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+                 0, 	/* allocate */
+                 1,	/* used */
+                 1, 	/* msg len */
+                 0,	/* response len */
+                 1,	/* eot */
+                 1, 	/* writes complete */
+                 0,	/* offset */
+                 BRW_URB_SWIZZLE_TRANSPOSE); 
+}
 
 void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate)
 {
-- 
cgit v1.2.3


From 4fb77ef840a42c3c8e2a43aa772a73614528fc4d Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 19:31:32 -0800
Subject: i965g: include interpolation info in fs signature

---
 src/gallium/drivers/i965/brw_context.h     | 5 +++--
 src/gallium/drivers/i965/brw_pipe_shader.c | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 65859be0ec..64279c4676 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -174,8 +174,9 @@ struct brw_vertex_shader {
 struct brw_fs_signature {
    GLuint nr_inputs;
    struct {
-      GLuint semantic:5;
-      GLuint semantic_index:27;
+      GLuint interp:3;          /* TGSI_INTERPOLATE_x */
+      GLuint semantic:5;        /* TGSI_SEMANTIC_x */
+      GLuint semantic_index:24;
    } input[PIPE_MAX_SHADER_INPUTS];
 };
 
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index c755fa6889..3222ee7777 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -109,6 +109,7 @@ static void *brw_create_fs_state( struct pipe_context *pipe,
 
    fs->signature.nr_inputs = fs->info.num_inputs;
    for (i = 0; i < fs->info.num_inputs; i++) {
+      fs->signature.input[i].interp = fs->info.input_interpolate[i];
       fs->signature.input[i].semantic = fs->info.input_semantic_name[i];
       fs->signature.input[i].semantic_index = fs->info.input_semantic_index[i];
    }
-- 
cgit v1.2.3


From d2f4c80c8baf48bcfd3e33a275df2fa6fcb6d353 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 19:41:25 -0800
Subject: i965g: get linear vs perspective interpolation working again

---
 src/gallium/drivers/i965/brw_sf.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index 0b94dc40c3..e1986a9dbb 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -127,8 +127,10 @@ static enum pipe_error compile_sf_prog( struct brw_context *brw,
  */
 static enum pipe_error upload_sf_prog(struct brw_context *brw)
 {
-   enum pipe_error ret;
+   const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
    struct brw_sf_prog_key key;
+   enum pipe_error ret;
+   unsigned i;
 
    memset(&key, 0, sizeof(key));
 
@@ -138,24 +140,26 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw)
    /* XXX: Add one to account for the position input.
     */
    /* PIPE_NEW_FRAGMENT_SIGNATURE */
-   key.nr_attrs = brw->curr.fragment_shader->signature.nr_inputs + 1;
+   key.nr_attrs = sig->nr_inputs + 1;
 
 
-   /* XXX: this is probably where the mapping between vertex shader
-    * outputs and fragment shader inputs should be handled.  Assume
-    * for now 1:1 correspondance.
-    *
-    * XXX: scan frag shader inputs to work out linear vs. perspective
-    * interpolation below.
-    *
-    * XXX: as long as we're hard-wiring, is eg. position required to
-    * be linear?
+   /* XXX: why is position required to be linear?  why do we care
+    * about it at all?
     */
-   //key.linear_attrs = 0;
-   //key.persp_attrs = (1 << key.nr_attrs) - 1;
+   key.linear_attrs = 1;        /* position -- but why? */
 
-   key.linear_attrs = (1 << key.nr_attrs) - 1;
-   key.persp_attrs = 0;
+   for (i = 0; i < sig->nr_inputs; i++) {
+      switch (sig->input[i].interp) {
+      case TGSI_INTERPOLATE_CONSTANT:
+         break;
+      case TGSI_INTERPOLATE_LINEAR:
+         key.linear_attrs |= 1 << (i+1);
+         break;
+      case TGSI_INTERPOLATE_PERSPECTIVE:
+         key.persp_attrs |= 1 << (i+1);
+         break;
+      }
+   }
 
    /* BRW_NEW_REDUCED_PRIMITIVE */
    switch (brw->reduced_primitive) {
-- 
cgit v1.2.3


From 63b0af07755201e5ad630bf7f67a7997263734d6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 19:51:04 -0800
Subject: i965g: first pass at vs immediates in curbe

---
 src/gallium/drivers/i965/brw_context.h     |   6 ++
 src/gallium/drivers/i965/brw_curbe.c       |  40 ++++++----
 src/gallium/drivers/i965/brw_pipe_shader.c |  43 +++++++++++
 src/gallium/drivers/i965/brw_vs_emit.c     | 120 ++++++++++++-----------------
 4 files changed, 124 insertions(+), 85 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 64279c4676..096c8cf12b 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -149,12 +149,17 @@ struct brw_blend_state {
 
 struct brw_rasterizer_state;
 
+struct brw_immediate_data {
+   unsigned nr;
+   float (*data)[4];
+};
 
 struct brw_vertex_shader {
    const struct tgsi_token *tokens;
    struct brw_winsys_buffer *const_buffer;    /** Program constant buffer/surface */
 
    struct tgsi_shader_info info;
+   struct brw_immediate_data immediates;
 
    GLuint has_flow_control:1;
    GLuint use_const_buffer:1;
@@ -189,6 +194,7 @@ struct brw_fragment_shader {
    struct tgsi_shader_info info;
 
    struct brw_fs_signature signature;
+   struct brw_immediate_data immediates;
 
    unsigned iz_lookup;
    //unsigned wm_lookup;
diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 5fa1723311..3e821d5afe 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -226,21 +226,34 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
    /* vertex shader constants */
    if (brw->curbe.vs_size) {
       GLuint offset = brw->curbe.vs_start * 16;
-      GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      struct brw_vertex_shader *vs = brw->curr.vertex_shader;
+      GLuint nr_immediate, nr_const;
 
-      /* XXX: note that constant buffers are currently *already* in
-       * buffer objects.  If we want to keep on putting them into the
-       * curbe, makes sense to treat constbuf's specially with malloc.
-       */
-      const GLfloat *value = screen->buffer_map( screen,
-						 brw->curr.vertex_constants,
-						 PIPE_BUFFER_USAGE_CPU_READ);
+      nr_immediate = vs->immediates.nr;
+      if (nr_immediate) {
+         memcpy(&buf[offset], 
+                vs->immediates.data,
+                nr_immediate * 4 * sizeof(float));
 
-      /* XXX: what if user's constant buffer is too small?
-       */
-      memcpy(&buf[offset], value, nr * 4 * sizeof(float));
+         offset += nr_immediate * 4;
+      }
 
-      screen->buffer_unmap( screen, brw->curr.vertex_constants );
+      nr_const = vs->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      if (nr_const) {
+         /* XXX: note that constant buffers are currently *already* in
+          * buffer objects.  If we want to keep on putting them into the
+          * curbe, makes sense to treat constbuf's specially with malloc.
+          */
+         const GLfloat *value = screen->buffer_map( screen,
+                                                    brw->curr.vertex_constants,
+                                                    PIPE_BUFFER_USAGE_CPU_READ);
+         
+         /* XXX: what if user's constant buffer is too small?
+          */
+         memcpy(&buf[offset], value, nr_const * 4 * sizeof(float));
+         
+         screen->buffer_unmap( screen, brw->curr.vertex_constants );
+      }
    }
 
    if (BRW_DEBUG & DEBUG_CURBE) {
@@ -263,8 +276,7 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
    } 
    else {
       /* constants have changed */
-      if (brw->curbe.last_buf)
-	 FREE(brw->curbe.last_buf);
+      FREE(brw->curbe.last_buf);
 
       brw->curbe.last_buf = buf;
       brw->curbe.last_bufsz = bufsz;
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 3222ee7777..31a715ab65 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -55,6 +55,47 @@ static GLboolean has_flow_control(const struct tgsi_shader_info *info)
 }
 
 
+static void scan_immediates(const struct tgsi_token *tokens,
+                            const struct tgsi_shader_info *info,
+                            struct brw_immediate_data *imm)
+{
+   struct tgsi_parse_context parse;
+   boolean done = FALSE;
+
+   imm->nr = 0;
+   imm->data = MALLOC(info->immediate_count * 4 * sizeof(float));
+
+   tgsi_parse_init( &parse, tokens );
+   while (!tgsi_parse_end_of_tokens( &parse ) && !done) {
+      tgsi_parse_token( &parse );
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         break;
+
+      case TGSI_TOKEN_TYPE_IMMEDIATE: {
+	 static const float id[4] = {0,0,0,1};
+	 const float *value = &parse.FullToken.FullImmediate.u[0].Float;
+	 unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
+         unsigned i;
+
+	 for (i = 0; i < size; i++)
+	    imm->data[imm->nr][i] = value[i];
+
+	 for (; i < 4; i++)
+	    imm->data[imm->nr][i] = id[i];
+         
+         imm->nr++;
+	 break;
+      }
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+	 done = 1;
+	 break;
+      }
+   }
+}
+
 
 static void brw_bind_fs_state( struct pipe_context *pipe, void *prog )
 {
@@ -106,6 +147,7 @@ static void *brw_create_fs_state( struct pipe_context *pipe,
       goto fail;
 
    tgsi_scan_shader(fs->tokens, &fs->info);
+   scan_immediates(fs->tokens, &fs->info, &fs->immediates);
 
    fs->signature.nr_inputs = fs->info.num_inputs;
    for (i = 0; i < fs->info.num_inputs; i++) {
@@ -150,6 +192,7 @@ static void *brw_create_vs_state( struct pipe_context *pipe,
       goto fail;
 
    tgsi_scan_shader(vs->tokens, &vs->info);
+   scan_immediates(vs->tokens, &vs->info, &vs->immediates);
 
    vs->id = brw->program_id++;
    vs->has_flow_control = has_flow_control(&vs->info);
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 52d4731dfd..00f0af2d07 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -42,6 +42,15 @@
 #include "brw_vs.h"
 #include "brw_debug.h"
 
+/* Choose one of the 4 vec4's which can be packed into each 16-wide reg.
+ */
+static INLINE struct brw_reg brw_vec4_grf_repeat( GLuint reg, GLuint slot )
+{
+   int nr = reg + slot/2;
+   int subnr = (slot%2) * 4;
+
+   return stride(brw_vec4_grf(nr, subnr), 0, 4, 1);
+}
 
 
 static struct brw_reg get_tmp( struct brw_vs_compile *c )
@@ -119,7 +128,7 @@ static boolean find_output_slot( struct brw_vs_compile *c,
  */
 static void brw_vs_alloc_regs( struct brw_vs_compile *c )
 {
-   GLuint i, reg = 0, mrf;
+   GLuint i, reg = 0, subreg = 0, mrf;
    int attributes_in_vue;
 
    /* Determine whether to use a real constant buffer or use a block
@@ -150,33 +159,57 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    /* User clip planes from curbe: 
     */
    if (c->key.nr_userclip) {
-      for (i = 0; i < c->key.nr_userclip; i++) {
-	 c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
+      /* Skip over fixed planes:  Or never read them into vs unit?
+       */
+      subreg += 6;
+
+      for (i = 0; i < c->key.nr_userclip; i++, subreg++) {
+	 c->userplane[i] = 
+            stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
       }     
 
       /* Deal with curbe alignment:
        */
-      reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;
+      subreg = align(subreg, 2);
+      /*reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;*/
    }
 
-   /* Vertex program parameters from curbe:
+
+   /* Immediates: always in the curbe.
+    *
+    * XXX: Can try to encode some immediates as brw immediates
+    * XXX: Make sure ureg sets minimal immediate size and respect it
+    * here.
     */
-   if (c->vp->use_const_buffer) {
-      /* get constants from a real constant buffer */
-      c->prog_data.curb_read_length = 0;
-      c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */
+   for (i = 0; i < c->vp->info.immediate_count; i++, subreg++) {
+      c->regs[TGSI_FILE_IMMEDIATE][i] = 
+         stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
    }
-   else {
-      /* use a section of the GRF for constants */
+   c->prog_data.nr_params = c->vp->info.immediate_count * 4;
+
+
+   /* Vertex constant buffer.
+    *
+    * Constants from the buffer can be either cached in the curbe or
+    * loaded as needed from the actual constant buffer.
+    */
+   if (!c->vp->use_const_buffer) {
       GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1;
-      for (i = 0; i < nr_params; i++) {
-         c->regs[TGSI_FILE_CONSTANT][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
+
+      for (i = 0; i < nr_params; i++, subreg++) {
+         c->regs[TGSI_FILE_CONSTANT][i] =
+            stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1);
       }
-      reg += (nr_params + 1) / 2;
-      c->prog_data.curb_read_length = reg - 1;
-      c->prog_data.nr_params = nr_params * 4;
+
+      c->prog_data.nr_params += nr_params * 4;
    }
 
+   /* All regs allocated
+    */
+   reg += (subreg + 1) / 2;
+   c->prog_data.curb_read_length = reg - 1;
+
+
    /* Allocate input regs:  
     */
    c->nr_inputs = c->vp->info.num_inputs;
@@ -191,28 +224,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
    if (c->nr_inputs == 0)
       reg++;
 
-   /* Allocate a GRF and load immediate values by hand with 4 MOVs!!!
-    *
-    * XXX: Try to encode float immediates as brw immediates
-    * XXX: Put immediates into the CURBE.
-    * XXX: Make sure ureg sets minimal immediate size and respect it
-    * here.
-    */
-   for (i = 0; i < c->nr_immediates; i++) {
-      struct brw_reg r;
-      int j;
-
-      c->regs[TGSI_FILE_IMMEDIATE][i] = 
-         r = brw_vec8_grf(reg, 0);
-
-      for (j = 0; j < 4; j++) {
-	 brw_MOV(&c->func, 
-		 brw_writemask(r, (1<<j)), 
-		 brw_imm_f(c->immediate[i][j]));
-      }
-
-      reg++;
-   }
 
 
    /* Allocate outputs.  The non-position outputs go straight into message regs.
@@ -1605,8 +1616,6 @@ void brw_vs_emit(struct brw_vs_compile *c)
    struct brw_instruction *end_inst, *last_inst;
    struct tgsi_parse_context parse;
    struct tgsi_full_instruction *inst;
-   boolean done = FALSE;
-   int i;
 
    if (BRW_DEBUG & DEBUG_VS)
       tgsi_dump(c->vp->tokens, 0); 
@@ -1616,37 +1625,6 @@ void brw_vs_emit(struct brw_vs_compile *c)
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_access_mode(p, BRW_ALIGN_16);
    
-   /* Inputs */
-   tgsi_parse_init( &parse, tokens );
-   while( !tgsi_parse_end_of_tokens( &parse ) ) {
-      tgsi_parse_token( &parse );
-
-      switch( parse.FullToken.Token.Type ) {
-      case TGSI_TOKEN_TYPE_DECLARATION:
-	 /* Nothing to do -- using info from tgsi_scan().
-	  */
-         break;
-
-      case TGSI_TOKEN_TYPE_IMMEDIATE: {
-	 static const float id[4] = {0,0,0,1};
-	 const float *imm = &parse.FullToken.FullImmediate.u[0].Float;
-	 unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
-
-	 for (i = 0; i < size; i++)
-	    c->immediate[c->nr_immediates][i] = imm[i];
-
-	 for ( ; i < 4; i++)
-	    c->immediate[c->nr_immediates][i] = id[i];
-
-	 c->nr_immediates++;
-	 break;
-      }
-
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-	 done = 1;
-	 break;
-      }
-   }
 
    /* Static register allocation
     */
-- 
cgit v1.2.3


From 9507a6c206627b3ae76e2ae8398fff518e39941a Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 20:02:42 -0800
Subject: i965g: fragment shader immediates working

---
 src/gallium/drivers/i965/brw_curbe.c    | 30 ++++++++++++++++-----
 src/gallium/drivers/i965/brw_wm.h       |  9 -------
 src/gallium/drivers/i965/brw_wm_pass0.c | 48 ++++++++-------------------------
 3 files changed, 34 insertions(+), 53 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c
index 3e821d5afe..3f031577d5 100644
--- a/src/gallium/drivers/i965/brw_curbe.c
+++ b/src/gallium/drivers/i965/brw_curbe.c
@@ -182,16 +182,32 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
 
    /* fragment shader constants */
    if (brw->curbe.wm_size) {
+      const struct brw_fragment_shader *fs = brw->curr.fragment_shader;
       GLuint offset = brw->curbe.wm_start * 16;
-      unsigned nr = brw->wm.prog_data->nr_params;
+      GLuint nr_immediate, nr_const;
+
+      nr_immediate = fs->immediates.nr;
+      if (nr_immediate) {
+         memcpy(&buf[offset], 
+                fs->immediates.data,
+                nr_immediate * 4 * sizeof(float));
 
-      const GLfloat *value = screen->buffer_map( screen,
-						 brw->curr.fragment_constants,
-						 PIPE_BUFFER_USAGE_CPU_READ);
+         offset += nr_immediate * 4;
+      }
 
-      memcpy(&buf[offset], value, nr * 4 * sizeof(float));
+      nr_const = fs->info.file_max[TGSI_FILE_CONSTANT] + 1;
+/*      nr_const = brw->wm.prog_data->nr_params; */
+      if (nr_const) {
+         const GLfloat *value = screen->buffer_map( screen,
+                                                    brw->curr.fragment_constants,
+                                                    PIPE_BUFFER_USAGE_CPU_READ);
 
-      screen->buffer_unmap( screen, brw->curr.fragment_constants );
+         memcpy(&buf[offset], value,
+                nr_const * 4 * sizeof(float));
+         
+         screen->buffer_unmap( screen, 
+                               brw->curr.fragment_constants );
+      }
    }
 
 
@@ -226,7 +242,7 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
    /* vertex shader constants */
    if (brw->curbe.vs_size) {
       GLuint offset = brw->curbe.vs_start * 16;
-      struct brw_vertex_shader *vs = brw->curr.vertex_shader;
+      const struct brw_vertex_shader *vs = brw->curr.vertex_shader;
       GLuint nr_immediate, nr_const;
 
       nr_immediate = vs->immediates.nr;
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index f85a8af878..b7d807dcb3 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -129,12 +129,6 @@ struct brw_wm_ref {
    GLuint insn:24;
 };
 
-struct brw_wm_imm_ref {
-   const struct brw_wm_ref *ref;
-   GLfloat imm1f;
-};
-
-
 struct brw_wm_instruction {
    struct brw_wm_value *dst[4];
    struct brw_wm_ref *src[3][4];
@@ -272,9 +266,6 @@ struct brw_wm_compile {
    struct brw_wm_instruction instruction[BRW_WM_MAX_INSN];
    GLuint nr_insns;
 
-   struct brw_wm_imm_ref imm_ref[BRW_WM_MAX_CONST];
-   GLuint nr_imm_refs;
-
    struct brw_wm_grf pass2_grf[BRW_WM_MAX_GRF/2];
 
    GLuint grf_limit;
diff --git a/src/gallium/drivers/i965/brw_wm_pass0.c b/src/gallium/drivers/i965/brw_wm_pass0.c
index 7bb341e2c2..0bacad2b0f 100644
--- a/src/gallium/drivers/i965/brw_wm_pass0.c
+++ b/src/gallium/drivers/i965/brw_wm_pass0.c
@@ -30,6 +30,7 @@
   */
 
 #include "util/u_memory.h"
+#include "util/u_math.h"
 
 #include "brw_debug.h"
 #include "brw_wm.h"
@@ -97,9 +98,10 @@ static void pass0_set_fpreg_ref( struct brw_wm_compile *c,
 }
 
 static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c, 
-					       const GLfloat *param_ptr )
+					       unsigned idx,
+                                               unsigned component)
 {
-   GLuint i = c->prog_data.nr_params++;
+   GLuint i = idx * 4 + component;
    
    if (i >= BRW_WM_MAX_PARAM) {
       debug_printf("%s: out of params\n", __FUNCTION__);
@@ -109,8 +111,7 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
    else {
       struct brw_wm_ref *ref = get_ref(c);
 
-      c->prog_data.param[i] = param_ptr;
-      c->nr_creg = (i+16)/16;
+      c->nr_creg = MAX2(c->nr_creg, (i+16)/16);
 
       /* Push the offsets into hw_reg.  These will be added to the
        * real register numbers once one is allocated in pass2.
@@ -125,37 +126,6 @@ static const struct brw_wm_ref *get_param_ref( struct brw_wm_compile *c,
 }
 
 
-/** Return a ref to an immediate value */
-static const struct brw_wm_ref *get_imm_ref( struct brw_wm_compile *c,
-					     const GLfloat *imm1f )
-{
-   GLuint i;
-
-   /* Search for an existing const value matching the request:
-    */
-   for (i = 0; i < c->nr_imm_refs; i++) {
-      if (c->imm_ref[i].imm1f == *imm1f) 
-	 return c->imm_ref[i].ref;
-   }
-
-   /* Else try to add a new one:
-    */
-   if (c->nr_imm_refs < Elements(c->imm_ref)) {
-      GLuint i = c->nr_imm_refs++;
-
-      /* An immediate is a special type of parameter:
-       */
-      c->imm_ref[i].imm1f = *imm1f;
-      c->imm_ref[i].ref = get_param_ref(c, imm1f);
-
-      return c->imm_ref[i].ref;
-   }
-   else {
-      debug_printf("%s: out of imm_refs\n", __FUNCTION__);
-      c->prog_data.error = 1;
-      return NULL;
-   }
-}
 
 
 /* Lookup our internal registers
@@ -177,11 +147,15 @@ static const struct brw_wm_ref *pass0_get_reg( struct brw_wm_compile *c,
 	 break;
 
       case TGSI_FILE_CONSTANT:
-	 ref = get_param_ref(c, &c->env_param[idx][component]);
+	 ref = get_param_ref(c, 
+                             c->fp->info.immediate_count + idx,
+                             component);
 	 break;
 
       case TGSI_FILE_IMMEDIATE:
-	 ref = get_imm_ref(c, &c->immediate[idx].v[component]);
+	 ref = get_param_ref(c, 
+                             idx,
+                             component);
 	 break;
 
       default:
-- 
cgit v1.2.3


From 95d7aca4b9963820e7ead81830340dbeb563897b Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 19 Nov 2009 20:40:41 -0800
Subject: i965g: fix typo converting wm src regs

---
 src/gallium/drivers/i965/brw_wm_fp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index 0df84f8546..174486a101 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -1023,7 +1023,7 @@ static void emit_insn( struct brw_wm_compile *c,
 			inst->Instruction.Saturate );
 
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
-      src[i] = translate_src( c, &inst->FullSrcRegisters[0] );
+      src[i] = translate_src( c, &inst->FullSrcRegisters[i] );
    
    switch (opcode) {
    case TGSI_OPCODE_ABS:
-- 
cgit v1.2.3


From a24631bcd7ab2cbc6fff2a536502a07a13a9bc83 Mon Sep 17 00:00:00 2001
From: Alan Hourihane <alanh@vmware.com>
Date: Fri, 20 Nov 2009 18:08:29 +0000
Subject: Fix memory leak.

---
 src/gallium/drivers/softpipe/sp_state_fs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index 256faa94b8..b41f7e8ab7 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -143,6 +143,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
    struct sp_vertex_shader *state = (struct sp_vertex_shader *) vs;
 
    draw_delete_vertex_shader(softpipe->draw, state->draw_data);
+   FREE( (void *)state->shader.tokens );
    FREE( state );
 }
 
-- 
cgit v1.2.3


From 904469dcd2e50d950c5e061103907da659053ff2 Mon Sep 17 00:00:00 2001
From: Alan Hourihane <alanh@vmware.com>
Date: Fri, 20 Nov 2009 18:10:54 +0000
Subject: Fix indentation.

---
 src/gallium/drivers/softpipe/sp_context.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 5f60139968..bdbb7fa9b9 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -90,14 +90,15 @@ softpipe_destroy( struct pipe_context *pipe )
    if (softpipe->draw)
       draw_destroy( softpipe->draw );
 
-      softpipe->quad.shade->destroy( softpipe->quad.shade );
-      softpipe->quad.depth_test->destroy( softpipe->quad.depth_test );
-      softpipe->quad.blend->destroy( softpipe->quad.blend );
+   softpipe->quad.shade->destroy( softpipe->quad.shade );
+   softpipe->quad.depth_test->destroy( softpipe->quad.depth_test );
+   softpipe->quad.blend->destroy( softpipe->quad.blend );
 
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
       sp_destroy_tile_cache(softpipe->cbuf_cache[i]);
       pipe_surface_reference(&softpipe->framebuffer.cbufs[i], NULL);
    }
+
    sp_destroy_tile_cache(softpipe->zsbuf_cache);
    pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL);
 
-- 
cgit v1.2.3


From beea241374a91b8aab81db175b28e98c2b4835d9 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 19 Nov 2009 01:35:08 +0100
Subject: r300g: set better values in the R300_VAP_CNTL register

---
 src/gallium/drivers/r300/r300_emit.c | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index eeb97a2d37..2a8d32242b 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -690,12 +690,35 @@ void r300_emit_vertex_format_state(struct r300_context* r300)
     END_CS;
 }
 
+/* XXX This should probably go to util ... */
+/* Return the number of bits set in the given number. */
+static unsigned bitcount(unsigned n)
+{
+    unsigned bits;
+    for (bits = 0; n > 0; n = n >> 1) {
+        bits += n & 1;
+    }
+    return bits;
+}
+
+/* XXX ... and this one too. */
+#define MIN3(x, y, z) MIN2(MIN2(x, y), z)
+
 void r300_emit_vertex_program_code(struct r300_context* r300,
                                    struct r300_vertex_program_code* code)
 {
     int i;
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
     unsigned instruction_count = code->length / 4;
+
+    int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72;
+    int input_count = MAX2(bitcount(code->InputsRead), 1);
+    int output_count = MAX2(bitcount(code->OutputsWritten), 1);
+    int temp_count = MAX2(code->num_temporaries, 1);
+    int pvs_num_slots = MIN3(vtx_mem_size / input_count,
+                             vtx_mem_size / output_count, 10);
+    int pvs_num_controllers = MIN2(6, vtx_mem_size / temp_count);
+
     CS_LOCALS(r300);
 
     if (!r300screen->caps->has_tcl) {
@@ -708,8 +731,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
     /* R300_VAP_PVS_CODE_CNTL_0
      * R300_VAP_PVS_CONST_CNTL
      * R300_VAP_PVS_CODE_CNTL_1
-     * See the r5xx docs for instructions on how to use these.
-     * XXX these could be optimized to select better values... */
+     * See the r5xx docs for instructions on how to use these. */
     OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
     OUT_CS(R300_PVS_FIRST_INST(0) |
             R300_PVS_XYZW_VALID_INST(instruction_count - 1) |
@@ -722,10 +744,11 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
     for (i = 0; i < code->length; i++)
         OUT_CS(code->body.d[i]);
 
-    OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) |
-            R300_PVS_NUM_CNTLRS(5) |
+    OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) |
+            R300_PVS_NUM_CNTLRS(pvs_num_controllers) |
             R300_PVS_NUM_FPUS(r300screen->caps->num_vert_fpus) |
-            R300_PVS_VF_MAX_VTX_NUM(12));
+            R300_PVS_VF_MAX_VTX_NUM(12) |
+            (r300screen->caps->is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0));
     END_CS;
 }
 
-- 
cgit v1.2.3


From 6a3eb1f91b4ccd4ee7ac6b91505e0dfa476922d4 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Fri, 20 Nov 2009 14:10:45 -0800
Subject: r300g: Use MAX3 and MIN3.

---
 src/gallium/drivers/r300/r300_emit.c          | 3 ---
 src/gallium/drivers/r300/r300_state_derived.c | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 2a8d32242b..4cd5074379 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -701,9 +701,6 @@ static unsigned bitcount(unsigned n)
     return bits;
 }
 
-/* XXX ... and this one too. */
-#define MIN3(x, y, z) MIN2(MIN2(x, y), z)
-
 void r300_emit_vertex_program_code(struct r300_context* r300,
                                    struct r300_vertex_program_code* code)
 {
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 7166694edf..b4d0eeaf8c 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -458,7 +458,7 @@ static void r300_update_rs_block(struct r300_context* r300,
     rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) |
         R300_HIRES_EN;
 
-    rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0);
+    rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0);
 }
 
 /* Update the vertex format. */
-- 
cgit v1.2.3


From 06ec216d191e160494dd0a922ab0395418a78402 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Fri, 20 Nov 2009 14:10:59 -0800
Subject: r300g: Clean up bitcounting function.

I didn't see this in u_math; surely somebody else has this wheel
reinvented elsewhere.
---
 src/gallium/drivers/r300/r300_emit.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 4cd5074379..c50c989f01 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -690,14 +690,19 @@ void r300_emit_vertex_format_state(struct r300_context* r300)
     END_CS;
 }
 
-/* XXX This should probably go to util ... */
+/* XXX This should go to util ... */
 /* Return the number of bits set in the given number. */
 static unsigned bitcount(unsigned n)
 {
-    unsigned bits;
-    for (bits = 0; n > 0; n = n >> 1) {
-        bits += n & 1;
+    unsigned bits = 0;
+
+    while (n) {
+        if (n & 1) {
+            bits++;
+        }
+        n >>= 1;
     }
+
     return bits;
 }
 
@@ -714,7 +719,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
     int temp_count = MAX2(code->num_temporaries, 1);
     int pvs_num_slots = MIN3(vtx_mem_size / input_count,
                              vtx_mem_size / output_count, 10);
-    int pvs_num_controllers = MIN2(6, vtx_mem_size / temp_count);
+    int pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6);
 
     CS_LOCALS(r300);
 
-- 
cgit v1.2.3


From f6541773c4661247879995637207dcc5803bbf00 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Fri, 20 Nov 2009 14:31:42 -0800
Subject: i915g: Add missing break statement in i915_debug.c.

---
 src/gallium/drivers/i915/i915_debug.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c
index e6640e587b..c6e6d6fd31 100644
--- a/src/gallium/drivers/i915/i915_debug.c
+++ b/src/gallium/drivers/i915/i915_debug.c
@@ -851,6 +851,7 @@ static boolean i915_debug_packet( struct debug_stream *stream )
       default:
 	 return debug(stream, "", 0);
       }
+      break;
    default:
       assert(0);
       return 0;
-- 
cgit v1.2.3


From f4041b37e2d305cff0a97eb836250e9f8b1840a8 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sat, 14 Nov 2009 22:14:42 +0100
Subject: r300g: fix rectangle textures on r3xx

Adapted from Maciej Cencora's patch.
---
 src/gallium/drivers/r300/r300_emit.c  | 22 ++++++++++++++++++++--
 src/gallium/drivers/r300/r300_state.c |  8 ++++++++
 2 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index c50c989f01..0bdf58202f 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -129,7 +129,9 @@ static const float * get_shader_constant(
     struct rc_constant * constant,
     struct r300_constant_buffer * externals)
 {
-    static const float zero[4] = { 0.0, 0.0, 0.0, 0.0 };
+    static float vec[4] = { 0.0, 0.0, 0.0, 0.0 };
+    struct pipe_texture *tex;
+
     switch(constant->Type) {
         case RC_CONSTANT_EXTERNAL:
             return externals->constants[constant->u.External];
@@ -137,10 +139,26 @@ static const float * get_shader_constant(
         case RC_CONSTANT_IMMEDIATE:
             return constant->u.Immediate;
 
+        case RC_CONSTANT_STATE:
+            switch (constant->u.State[0])
+            {
+                /* R3xx-specific */
+                case RC_STATE_R300_TEXRECT_FACTOR:
+                    tex = &r300->textures[constant->u.State[1]]->tex;
+                    vec[0] = 1.0 / tex->width[0];
+                    vec[1] = 1.0 / tex->height[0];
+                    vec[2] = vec[3] = 1;
+                    break;
+
+                default:
+                    assert(0);
+            }
+            return vec;
+
         default:
             debug_printf("r300: Implementation error: Unhandled constant type %i\n",
                 constant->Type);
-            return zero;
+            return vec;
     }
 }
 
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index d1eced61db..00f10ffd73 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -571,6 +571,7 @@ static void r300_set_sampler_textures(struct pipe_context* pipe,
                                       struct pipe_texture** texture)
 {
     struct r300_context* r300 = r300_context(pipe);
+    boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500;
     int i;
 
     /* XXX magic num */
@@ -585,6 +586,13 @@ static void r300_set_sampler_textures(struct pipe_context* pipe,
             pipe_texture_reference((struct pipe_texture**)&r300->textures[i],
                 texture[i]);
             r300->dirty_state |= (R300_NEW_TEXTURE << i);
+
+            /* R300-specific - set the texrect factor in a fragment shader */
+            if (!is_r500 && r300->textures[i]->is_npot) {
+                /* XXX It would be nice to re-emit just 1 constant,
+                 * XXX not all of them */
+                r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+            }
         }
     }
 
-- 
cgit v1.2.3


From 6a95996abb33a040f957ffedf3824afcc98a9e71 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Fri, 20 Nov 2009 14:55:22 -0800
Subject: r300g: Texrect factor cleanup.

(0, 0, 0, 1) is a much saner default value, and texrect factors only need
to be (1/s, 1/t, 0, 1).
---
 src/gallium/drivers/r300/r300_emit.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 0bdf58202f..37e75ba061 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -129,7 +129,7 @@ static const float * get_shader_constant(
     struct rc_constant * constant,
     struct r300_constant_buffer * externals)
 {
-    static float vec[4] = { 0.0, 0.0, 0.0, 0.0 };
+    static float vec[4] = { 0.0, 0.0, 0.0, 1.0 };
     struct pipe_texture *tex;
 
     switch(constant->Type) {
@@ -140,26 +140,30 @@ static const float * get_shader_constant(
             return constant->u.Immediate;
 
         case RC_CONSTANT_STATE:
-            switch (constant->u.State[0])
-            {
-                /* R3xx-specific */
+            switch (constant->u.State[0]) {
+                /* Factor for converting rectangle coords to
+                 * normalized coords. Should only show up on non-r500. */
                 case RC_STATE_R300_TEXRECT_FACTOR:
                     tex = &r300->textures[constant->u.State[1]]->tex;
                     vec[0] = 1.0 / tex->width[0];
                     vec[1] = 1.0 / tex->height[0];
-                    vec[2] = vec[3] = 1;
                     break;
 
                 default:
-                    assert(0);
+                    debug_printf("r300: Implementation error: "
+                        "Unknown RC_CONSTANT type %d\n", constant->u.State[0]);
             }
-            return vec;
+            break;
 
         default:
-            debug_printf("r300: Implementation error: Unhandled constant type %i\n",
-                constant->Type);
-            return vec;
+            debug_printf("r300: Implementation error: "
+                "Unhandled constant type %d\n", constant->Type);
     }
+
+    /* This should either be (0, 0, 0, 1), which should be a relatively safe
+     * RGBA or STRQ value, or it could be one of the RC_CONSTANT_STATE
+     * state factors. */
+    return vec;
 }
 
 /* Convert a normal single-precision float into the 7.16 format
-- 
cgit v1.2.3


From ae70cd1f027bdfc7f500d78b6c5333e6b35d3ee8 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 19 Nov 2009 21:07:20 +0100
Subject: r300g: remove variant states from emit_state_invariant

---
 src/gallium/drivers/r300/r300_state_invariant.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index c07e6ae676..46d1cb39b5 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -84,7 +84,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
     END_CS;
 
     /* XXX unsorted stuff from surface_fill */
-    BEGIN_CS(60 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
+    BEGIN_CS(56 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
     /* Flush PVS. */
     OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
 
@@ -135,8 +135,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
     OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
     OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
     OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000);
-    OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1);
-    OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405);
     OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F);
 
     /* XXX */
-- 
cgit v1.2.3


From 015e7e7724a64d3d9e02e57f6a8eb88a6441f596 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Fri, 20 Nov 2009 05:17:00 +0100
Subject: r300g: emit R300_TEX_ENABLE to indicate there are no textures bound

Previously, this reg wasn't emitted at all if texture_count == 0.
---
 src/gallium/drivers/r300/r300_emit.c | 15 +++++++++++++--
 src/gallium/drivers/r300/r300_emit.h |  2 ++
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 37e75ba061..6d702c0027 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -837,13 +837,22 @@ void r300_emit_viewport_state(struct r300_context* r300,
     END_CS;
 }
 
+void r300_emit_texture_count(struct r300_context* r300)
+{
+    CS_LOCALS(r300);
+
+    BEGIN_CS(2);
+    OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1);
+    END_CS;
+
+}
+
 void r300_flush_textures(struct r300_context* r300)
 {
     CS_LOCALS(r300);
 
-    BEGIN_CS(4);
+    BEGIN_CS(2);
     OUT_CS_REG(R300_TX_INVALTAGS, 0);
-    OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1);
     END_CS;
 }
 
@@ -997,6 +1006,8 @@ validate:
     /* Samplers and textures are tracked separately but emitted together. */
     if (r300->dirty_state &
             (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) {
+        r300_emit_texture_count(r300);
+
         for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) {
   	    if (r300->dirty_state &
 		((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) {
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 7c83c5166d..3797d3d332 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -92,6 +92,8 @@ void r300_emit_vertex_shader(struct r300_context* r300,
 void r300_emit_viewport_state(struct r300_context* r300,
                               struct r300_viewport_state* viewport);
 
+void r300_emit_texture_count(struct r300_context* r300);
+
 void r300_flush_textures(struct r300_context* r300);
 
 /* Emit all dirty state. */
-- 
cgit v1.2.3


From 1c181a7eff96816b5d72ea5daab5818eef0ebc60 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 15 Nov 2009 05:25:15 +0100
Subject: r300g: Begin separating HW TCL and SW TCL state and setup.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch removes draw_context entirely from the HW TCL path and cleans up
a few other things along the way. Hopefully, nothing got broken.

Thanks to Marek Olšák for testing, review, and pointing out my bugs. :3
---
 src/gallium/drivers/r300/r300_context.c       |  31 ++---
 src/gallium/drivers/r300/r300_render.c        |   4 -
 src/gallium/drivers/r300/r300_state.c         |   8 +-
 src/gallium/drivers/r300/r300_state_derived.c | 192 +++++++++++++-------------
 src/gallium/drivers/r300/r300_vbo.c           |  46 ------
 src/gallium/drivers/r300/r300_vs.h            |   3 -
 6 files changed, 110 insertions(+), 174 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index ae23329b83..26db536248 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -123,15 +123,24 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
 
     r300->context.clear = r300_clear;
 
-    if (r300screen->caps->has_tcl)
-    {
+    if (r300screen->caps->has_tcl) {
         r300->context.draw_arrays = r300_draw_arrays;
         r300->context.draw_elements = r300_draw_elements;
         r300->context.draw_range_elements = r300_draw_range_elements;
-    }
-    else
-    {
-        assert(0);
+    } else {
+        r300->context.draw_arrays = r300_swtcl_draw_arrays;
+        r300->context.draw_elements = r300_draw_elements;
+        r300->context.draw_range_elements = r300_swtcl_draw_range_elements;
+
+        /* Create a Draw. This is used for SW TCL. */
+        r300->draw = draw_create();
+        /* Enable our renderer. */
+        draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
+        /* Enable Draw's clipping. */
+        draw_set_driver_clipping(r300->draw, FALSE);
+        /* Force Draw to never do viewport transform, since we can do
+         * transform in hardware, always. */
+        draw_set_viewport_state(r300->draw, &r300_viewport_identity);
     }
 
     r300->context.is_texture_referenced = r300_is_texture_referenced;
@@ -145,16 +154,6 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->scissor_state = CALLOC_STRUCT(r300_scissor_state);
     r300->viewport_state = CALLOC_STRUCT(r300_viewport_state);
 
-    /* Create a Draw. This is used for vert collation and SW TCL. */
-    r300->draw = draw_create();
-    /* Enable our renderer. */
-    draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
-    /* Disable Draw's clipping if TCL is present. */
-    draw_set_driver_clipping(r300->draw, r300_screen(screen)->caps->has_tcl);
-    /* Force Draw to never do viewport transform, since (again) we can do
-     * transform in hardware, always. */
-    draw_set_viewport_state(r300->draw, &r300_viewport_identity);
-
     /* Open up the OQ BO. */
     r300->oqbo = screen->buffer_create(screen, 4096,
             PIPE_BUFFER_USAGE_VERTEX, 4096);
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 62e1456ed3..4c5fb405c6 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -183,8 +183,6 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
         return FALSE;
     }
 
-    setup_vertex_attributes(r300);
-
     setup_index_buffer(r300, indexBuffer, indexSize);
 
     r300_emit_dirty_state(r300);
@@ -226,8 +224,6 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
         return FALSE;
     }
 
-    setup_vertex_attributes(r300);
-
     r300_emit_dirty_state(r300);
 
     r300_emit_aos(r300, start);
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 00f10ffd73..5422a2cc9c 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -714,9 +714,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
 
         tgsi_scan_shader(shader->tokens, &vs->info);
 
-        /* Appease Draw. */
-        vs->draw = draw_create_vertex_shader(r300->draw, shader);
-
         return (void*)vs;
     } else {
         return draw_create_vertex_shader(r300->draw, shader);
@@ -727,8 +724,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
 {
     struct r300_context* r300 = r300_context(pipe);
 
-    draw_flush(r300->draw);
-
     if (r300_screen(pipe->screen)->caps->has_tcl) {
         struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
 
@@ -739,10 +734,10 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
             r300_translate_vertex_shader(r300, vs);
         }
 
-        draw_bind_vertex_shader(r300->draw, vs->draw);
         r300->vs = vs;
         r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS;
     } else {
+        draw_flush(r300->draw);
         draw_bind_vertex_shader(r300->draw,
                 (struct draw_vertex_shader*)shader);
     }
@@ -756,7 +751,6 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
         struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
 
         rc_constants_destroy(&vs->code.constants);
-        draw_delete_vertex_shader(r300->draw, vs->draw);
         FREE((void*)vs->state.tokens);
         FREE(shader);
     } else {
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index b4d0eeaf8c..5aa4166d93 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -65,84 +65,43 @@ int r300_shader_key_compare(void* key1, void* key2) {
 static void r300_vs_tab_routes(struct r300_context* r300,
                                struct r300_vertex_info* vformat)
 {
-    struct r300_screen* r300screen = r300_screen(r300->context.screen);
     struct vertex_info* vinfo = &vformat->vinfo;
     int* tab = vformat->vs_tab;
     boolean pos = FALSE, psize = FALSE, fog = FALSE;
     int i, texs = 0, cols = 0;
-    struct tgsi_shader_info* info;
-
-    if (r300screen->caps->has_tcl) {
-        /* Use vertex shader to determine required routes. */
-        info = &r300->vs->info;
-    } else {
-        /* Use fragment shader to determine required routes. */
-        info = &r300->fs->info;
-    }
+    struct tgsi_shader_info* info = &r300->fs->info;
 
-    assert(info->num_inputs <= 16);
+    /* XXX One day we should figure out how to handle a different number of
+     * VS outputs and FS inputs, as well as a different number of vertex streams
+     * and VS inputs. It's definitely one of the sources of hardlocks. */
 
-    if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte)
-    {
-        for (i = 0; i < info->num_inputs; i++) {
-            switch (r300->vs->code.inputs[i]) {
-                case TGSI_SEMANTIC_POSITION:
-                    pos = TRUE;
-                    tab[i] = 0;
-                    break;
-                case TGSI_SEMANTIC_COLOR:
-                    tab[i] = 2 + cols;
-                    cols++;
-                    break;
-                case TGSI_SEMANTIC_PSIZE:
-                    assert(psize == FALSE);
-                    psize = TRUE;
-                    tab[i] = 15;
-                    break;
-                case TGSI_SEMANTIC_FOG:
-                    assert(fog == FALSE);
-                    fog = TRUE;
-                    /* Fall through */
-                case TGSI_SEMANTIC_GENERIC:
-                    tab[i] = 6 + texs;
-                    texs++;
-                    break;
-                default:
-                    debug_printf("r300: Unknown vertex input %d\n",
-                        info->input_semantic_name[i]);
-                    break;
-            }
-        }
-    }
-    else
-    {
-        /* Just copy vert attribs over as-is. */
-        for (i = 0; i < info->num_inputs; i++) {
-            tab[i] = i;
-        }
-
-        for (i = 0; i < info->num_outputs; i++) {
-            switch (info->output_semantic_name[i]) {
-                case TGSI_SEMANTIC_POSITION:
-                    pos = TRUE;
-                    break;
-                case TGSI_SEMANTIC_COLOR:
-                    cols++;
-                    break;
-                case TGSI_SEMANTIC_PSIZE:
-                    psize = TRUE;
-                    break;
-                case TGSI_SEMANTIC_FOG:
-                    fog = TRUE;
-                    /* Fall through */
-                case TGSI_SEMANTIC_GENERIC:
-                    texs++;
-                    break;
-                default:
-                    debug_printf("r300: Unknown vertex output %d\n",
-                        info->output_semantic_name[i]);
-                    break;
-            }
+    for (i = 0; i < info->num_inputs; i++) {
+        switch (info->input_semantic_name[i]) {
+            case TGSI_SEMANTIC_POSITION:
+                pos = TRUE;
+                tab[i] = 0;
+                break;
+            case TGSI_SEMANTIC_COLOR:
+                tab[i] = 2 + cols;
+                cols++;
+                break;
+            case TGSI_SEMANTIC_PSIZE:
+                assert(psize == FALSE);
+                psize = TRUE;
+                tab[i] = 15;
+                break;
+            case TGSI_SEMANTIC_FOG:
+                assert(fog == FALSE);
+                fog = TRUE;
+                /* Fall through */
+            case TGSI_SEMANTIC_GENERIC:
+                tab[i] = 6 + texs;
+                texs++;
+                break;
+            default:
+                debug_printf("r300: Unknown vertex input %d\n",
+                    info->input_semantic_name[i]);
+                break;
         }
     }
 
@@ -161,8 +120,7 @@ static void r300_vs_tab_routes(struct r300_context* r300,
 
     /* We need to add vertex position attribute only for SW TCL case,
      * for HW TCL case it could be generated by vertex shader */
-    if (!pos && !r300screen->caps->has_tcl) {
-        debug_printf("r300: Forcing vertex position attribute emit...\n");
+    if (!pos) {
         /* Make room for the position attribute
          * at the beginning of the tab. */
         for (i = 15; i > 0; i--) {
@@ -230,31 +188,66 @@ static void r300_vs_tab_routes(struct r300_context* r300,
 static void r300_vertex_psc(struct r300_context* r300,
                             struct r300_vertex_info* vformat)
 {
-    struct r300_screen* r300screen = r300_screen(r300->context.screen);
-    struct vertex_info* vinfo = &vformat->vinfo;
-    int* tab = vformat->vs_tab;
     uint16_t type, swizzle;
     enum pipe_format format;
-    unsigned i, attrib_count;
+    unsigned i;
 
     /* Vertex shaders have no semantics on their inputs,
-     * so PSC should just route stuff based on their info,
+     * so PSC should just route stuff based on the vertex elements,
      * and not on attrib information. */
-    if (r300screen->caps->has_tcl) {
-        attrib_count = r300->vs->info.num_inputs;
-        DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n",
-                attrib_count);
-    } else {
-        attrib_count = vinfo->num_attribs;
-        DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
-        for (i = 0; i < attrib_count; i++) {
-            DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
-                   " tab %d\n", vinfo->attrib[i].src_index,
-                   vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
-                   tab[i]);
+    DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements"
+            " in psc\n",
+            r300->vs->info.num_inputs,
+            r300->vertex_element_count);
+
+    for (i = 0; i < r300->vertex_element_count; i++) {
+        format = r300->vertex_element[i].src_format;
+
+        type = r300_translate_vertex_data_type(format) |
+            (i << R300_DST_VEC_LOC_SHIFT);
+        swizzle = r300_translate_vertex_data_swizzle(format);
+
+        if (i % 2) {
+            vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
+            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+        } else {
+            vformat->vap_prog_stream_cntl[i >> 1] |= type;
+            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
         }
     }
 
+
+    assert(i <= 15);
+
+    /* Set the last vector in the PSC. */
+    if (i) {
+        i -= 1;
+    }
+    vformat->vap_prog_stream_cntl[i >> 1] |=
+        (R300_LAST_VEC << (i & 1 ? 16 : 0));
+}
+
+/* Update the PSC tables for SW TCL, using Draw. */
+static void r300_swtcl_vertex_psc(struct r300_context* r300,
+                                  struct r300_vertex_info* vformat)
+{
+    struct vertex_info* vinfo = &vformat->vinfo;
+    int* tab = vformat->vs_tab;
+    uint16_t type, swizzle;
+    enum pipe_format format;
+    unsigned i, attrib_count;
+
+    /* For each Draw attribute, route it to the fragment shader according
+     * to the tab. */
+    attrib_count = vinfo->num_attribs;
+    DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
+    for (i = 0; i < attrib_count; i++) {
+        DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
+               " tab %d\n", vinfo->attrib[i].src_index,
+               vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
+               tab[i]);
+    }
+
     for (i = 0; i < attrib_count; i++) {
         /* Make sure we have a proper destination for our attribute. */
         assert(tab[i] != -1);
@@ -272,12 +265,10 @@ static void r300_vertex_psc(struct r300_context* r300,
         /* Add the attribute to the PSC table. */
         if (i & 1) {
             vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
-
             vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
         } else {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type <<  0;
-
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 0;
+            vformat->vap_prog_stream_cntl[i >> 1] |= type;
+            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
         }
     }
 
@@ -505,7 +496,13 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
     }
 
     r300_vs_tab_routes(r300, vformat);
-    r300_vertex_psc(r300, vformat);
+
+    if (r300screen->caps->has_tcl) {
+        r300_vertex_psc(r300, vformat);
+    } else {
+        r300_swtcl_vertex_psc(r300, vformat);
+    }
+
     r300_update_fs_tab(r300, vformat);
 
     r300_update_rs_block(r300, rs_block);
@@ -553,8 +550,7 @@ static void r300_update_ztop(struct r300_context* r300)
 
 void r300_update_derived_state(struct r300_context* r300)
 {
-    /* XXX */
-    if (TRUE || r300->dirty_state &
+    if (r300->dirty_state &
         (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) {
         r300_update_derived_shader_state(r300);
     }
diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index a6a159667a..6ebaf715dc 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -34,52 +34,6 @@
 #include "r300_reg.h"
 #include "r300_winsys.h"
 
-static INLINE void setup_vertex_attribute(struct r300_vertex_info *vinfo,
-                                          struct pipe_vertex_element *vert_elem,
-                                          unsigned attr_num)
-{
-    uint16_t hw_fmt1, hw_fmt2;
-
-    hw_fmt1 = r300_translate_vertex_data_type(vert_elem->src_format) |
-        (attr_num << R300_DST_VEC_LOC_SHIFT);
-    hw_fmt2 = r300_translate_vertex_data_swizzle(vert_elem->src_format);
-
-    if (attr_num % 2 == 0)
-    {
-        vinfo->vap_prog_stream_cntl[attr_num >> 1] = hw_fmt1;
-        vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] = hw_fmt2;
-    }
-    else
-    {
-        vinfo->vap_prog_stream_cntl[attr_num >> 1] |= hw_fmt1 << 16;
-        vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] |= hw_fmt2 << 16;
-    }
-}
-
-static void finish_vertex_attribs_setup(struct r300_vertex_info *vinfo,
-                                        unsigned attribs_num)
-{
-    uint32_t last_vec_bit = (attribs_num % 2 == 0) ?
-        (R300_LAST_VEC << 16) : R300_LAST_VEC;
-
-    assert(attribs_num > 0 && attribs_num <= 16);
-    vinfo->vap_prog_stream_cntl[(attribs_num - 1) >> 1] |= last_vec_bit;
-}
-
-void setup_vertex_attributes(struct r300_context *r300)
-{
-    struct pipe_vertex_element *vert_elem;
-    int i;
-
-    for (i = 0; i < r300->vertex_element_count; i++) {
-        vert_elem = &r300->vertex_element[i];
-        setup_vertex_attribute(r300->vertex_info, vert_elem, i);
-    }
-
-    finish_vertex_attribs_setup(r300->vertex_info,
-        r300->vertex_element_count);
-}
-
 static INLINE int get_buffer_offset(struct r300_context *r300,
                                     unsigned int buf_nr,
                                     unsigned int elem_offset)
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 2a4ce315e3..00b02bf510 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -35,9 +35,6 @@ struct r300_vertex_shader {
     struct pipe_shader_state state;
     struct tgsi_shader_info info;
 
-    /* Fallback shader, because Draw has issues */
-    struct draw_vertex_shader* draw;
-
     /* Has this shader been translated yet? */
     boolean translated;
 
-- 
cgit v1.2.3


From 8bf75f28de161173d1cdaad8c74bcac074e1211e Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Sat, 21 Nov 2009 01:52:22 +0000
Subject: i965g: get basic texturing working again

Revert to fixed-layout surface binding table -- it's probably the best
way to do this.  Pass sampler and texture numbers separately even
though we're always keeping them the same at present.
---
 src/gallium/drivers/i965/brw_context.h          | 13 +++--
 src/gallium/drivers/i965/brw_pipe_fb.c          |  4 +-
 src/gallium/drivers/i965/brw_pipe_sampler.c     |  3 +-
 src/gallium/drivers/i965/brw_sf.c               |  3 +-
 src/gallium/drivers/i965/brw_wm.c               |  9 +++
 src/gallium/drivers/i965/brw_wm.h               |  4 +-
 src/gallium/drivers/i965/brw_wm_emit.c          | 34 ++++++------
 src/gallium/drivers/i965/brw_wm_fp.c            | 39 ++++++++-----
 src/gallium/drivers/i965/brw_wm_surface_state.c | 74 ++++++++++++++++---------
 9 files changed, 117 insertions(+), 66 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 096c8cf12b..598e747fe0 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -209,9 +209,9 @@ struct brw_fragment_shader {
 
 
 struct brw_sampler {
-   float border_color[4];
    struct brw_ss0 ss0;
    struct brw_ss1 ss1;
+   float border_color[4];
    struct brw_ss3 ss3;
 };
 
@@ -355,20 +355,23 @@ struct brw_vs_ouput_sizes {
 /** Number of texture sampler units */
 #define BRW_MAX_TEX_UNIT 16
 
+/** Max number of render targets in a shader */
+#define BRW_MAX_DRAW_BUFFERS 4
+
 /**
  * Size of our surface binding table for the WM.
  * This contains pointers to the drawing surfaces and current texture
  * objects and shader constant buffers (+2).
  */
-#define BRW_WM_MAX_SURF (PIPE_MAX_COLOR_BUFS + BRW_MAX_TEX_UNIT + 1)
+#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
 
 /**
  * Helpers to convert drawing buffers, textures and constant buffers
  * to surface binding table indexes, for WM.
  */
-#define SURF_INDEX_DRAW(d)           (d)
-#define SURF_INDEX_FRAG_CONST_BUFFER (PIPE_MAX_COLOR_BUFS) 
-#define SURF_INDEX_TEXTURE(t)        (PIPE_MAX_COLOR_BUFS + 1 + (t))
+#define BTI_COLOR_BUF(d)          (d)
+#define BTI_FRAGMENT_CONSTANTS    (BRW_MAX_DRAW_BUFFERS) 
+#define BTI_TEXTURE(t)            (BRW_MAX_DRAW_BUFFERS + 1 + (t))
 
 /**
  * Size of surface binding table for the VS.
diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
index 1511220447..6b03094f50 100644
--- a/src/gallium/drivers/i965/brw_pipe_fb.c
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -31,7 +31,7 @@ static void brw_set_framebuffer_state( struct pipe_context *pipe,
 
    /* Color buffers:
     */
-   for (i = 0; i < MAX2(fb->nr_cbufs, brw->curr.fb.nr_cbufs); i++) {
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
       if (brw->curr.fb.cbufs[i] != fb->cbufs[i]) {
 	 brw->state.dirty.mesa |= PIPE_NEW_COLOR_BUFFERS;
 	 pipe_surface_reference(&brw->curr.fb.cbufs[i], fb->cbufs[i]);
@@ -39,7 +39,7 @@ static void brw_set_framebuffer_state( struct pipe_context *pipe,
    }
    
    if (brw->curr.fb.nr_cbufs != fb->nr_cbufs) {
-      brw->curr.fb.nr_cbufs = fb->nr_cbufs;
+      brw->curr.fb.nr_cbufs = MIN2(BRW_MAX_DRAW_BUFFERS, fb->nr_cbufs);
       brw->state.dirty.mesa |= PIPE_NEW_NR_CBUFS;
    }
 }
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
index f0a765ecf5..5cd38a43a6 100644
--- a/src/gallium/drivers/i965/brw_pipe_sampler.c
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -107,7 +107,7 @@ static void *
 brw_create_sampler_state( struct pipe_context *pipe,
                           const struct pipe_sampler_state *template )
 {
-   struct brw_sampler_state *sampler = CALLOC_STRUCT(brw_sampler_state);
+   struct brw_sampler *sampler = CALLOC_STRUCT(brw_sampler);
 
    sampler->ss0.min_filter = translate_img_filter( template->min_img_filter );
    sampler->ss0.mag_filter = translate_img_filter( template->mag_img_filter );
@@ -214,7 +214,6 @@ void brw_pipe_sampler_init( struct brw_context *brw )
 
    brw->base.set_sampler_textures = brw_set_sampler_textures;
 }
-
 void brw_pipe_sampler_cleanup( struct brw_context *brw )
 {
 }
diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index e1986a9dbb..a28fb71589 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -153,9 +153,10 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw)
       case TGSI_INTERPOLATE_CONSTANT:
          break;
       case TGSI_INTERPOLATE_LINEAR:
+      case TGSI_INTERPOLATE_PERSPECTIVE:
          key.linear_attrs |= 1 << (i+1);
          break;
-      case TGSI_INTERPOLATE_PERSPECTIVE:
+//      case TGSI_INTERPOLATE_PERSPECTIVE:
          key.persp_attrs |= 1 << (i+1);
          break;
       }
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 3c5a2dab7a..2c9d3e5e87 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -56,6 +56,15 @@ GLuint brw_wm_nr_args( GLuint opcode )
    case WM_FB_WRITE:
    case WM_PINTERP:
       return 3;
+   case TGSI_OPCODE_TEX:
+   case TGSI_OPCODE_TXP:
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXD:
+      /* sampler arg is held as a field in the instruction, not in an
+       * actual register:
+       */
+      return tgsi_get_opcode_info(opcode)->num_src - 1;
+
    default:
       assert(opcode < MAX_OPCODE);
       return tgsi_get_opcode_info(opcode)->num_src;
diff --git a/src/gallium/drivers/i965/brw_wm.h b/src/gallium/drivers/i965/brw_wm.h
index b7d807dcb3..f1ca9f6369 100644
--- a/src/gallium/drivers/i965/brw_wm.h
+++ b/src/gallium/drivers/i965/brw_wm.h
@@ -135,6 +135,7 @@ struct brw_wm_instruction {
    GLuint opcode:8;
    GLuint saturate:1;
    GLuint writemask:4;
+   GLuint sampler:4;
    GLuint tex_unit:4;   /* texture/sampler unit for texture instructions */
    GLuint target:4;     /* TGSI_TEXTURE_x for texture instructions,
                          * target binding table index for FB_WRITE
@@ -201,7 +202,8 @@ struct brw_fp_instruction {
    unsigned opcode:8;
    unsigned target:8; /* XXX: special usage for FB_WRITE */
    unsigned tex_unit:4;
-   unsigned pad:12;
+   unsigned sampler:4;
+   unsigned pad:8;
 };
 
 
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index a14e12f35b..3250db1848 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -792,7 +792,8 @@ static void emit_tex( struct brw_wm_compile *c,
 		      const struct brw_wm_instruction *inst,
 		      struct brw_reg *dst,
 		      GLuint dst_flags,
-		      struct brw_reg *arg )
+		      struct brw_reg *coord,
+		      GLuint sampler)
 {
    struct brw_compile *p = &c->func;
    GLuint msgLength, responseLength;
@@ -838,7 +839,7 @@ static void emit_tex( struct brw_wm_compile *c,
    for (i = 0; i < nr; i++) {
       static const GLuint swz[4] = {0,1,2,2};
       if (emit & (1<<i)) 
-	 brw_MOV(p, brw_message_reg(msgLength+1), arg[swz[i]]);
+	 brw_MOV(p, brw_message_reg(msgLength+1), coord[swz[i]]);
       else
 	 brw_MOV(p, brw_message_reg(msgLength+1), brw_imm_f(0));
       msgLength += 2;
@@ -862,8 +863,8 @@ static void emit_tex( struct brw_wm_compile *c,
 	      retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 	      1,
 	      retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
-              SURF_INDEX_TEXTURE(inst->tex_unit),
-	      inst->tex_unit,	  /* sampler */
+              BTI_TEXTURE(inst->tex_unit),
+	      sampler,          /* sampler index */
 	      inst->writemask,
 	      msg_type, 
 	      responseLength,
@@ -878,7 +879,8 @@ static void emit_txb( struct brw_wm_compile *c,
 		      const struct brw_wm_instruction *inst,
 		      struct brw_reg *dst,
 		      GLuint dst_flags,
-		      struct brw_reg *arg )
+		      struct brw_reg *coord,
+		      GLuint sampler )
 {
    struct brw_compile *p = &c->func;
    GLuint msgLength;
@@ -888,7 +890,7 @@ static void emit_txb( struct brw_wm_compile *c,
    switch (inst->target) {
    case TGSI_TEXTURE_1D:
    case TGSI_TEXTURE_SHADOW1D:
-      brw_MOV(p, brw_message_reg(2), arg[0]);
+      brw_MOV(p, brw_message_reg(2), coord[0]);
       brw_MOV(p, brw_message_reg(4), brw_imm_f(0));
       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
       break;
@@ -896,22 +898,22 @@ static void emit_txb( struct brw_wm_compile *c,
    case TGSI_TEXTURE_RECT:
    case TGSI_TEXTURE_SHADOW2D:
    case TGSI_TEXTURE_SHADOWRECT:
-      brw_MOV(p, brw_message_reg(2), arg[0]);
-      brw_MOV(p, brw_message_reg(4), arg[1]);
+      brw_MOV(p, brw_message_reg(2), coord[0]);
+      brw_MOV(p, brw_message_reg(4), coord[1]);
       brw_MOV(p, brw_message_reg(6), brw_imm_f(0));
       break;
    case TGSI_TEXTURE_3D:
    case TGSI_TEXTURE_CUBE:
-      brw_MOV(p, brw_message_reg(2), arg[0]);
-      brw_MOV(p, brw_message_reg(4), arg[1]);
-      brw_MOV(p, brw_message_reg(6), arg[2]);
+      brw_MOV(p, brw_message_reg(2), coord[0]);
+      brw_MOV(p, brw_message_reg(4), coord[1]);
+      brw_MOV(p, brw_message_reg(6), coord[2]);
       break;
    default:
       /* unexpected target */
       abort();
    }
 
-   brw_MOV(p, brw_message_reg(8), arg[3]);
+   brw_MOV(p, brw_message_reg(8), coord[3]);
    msgLength = 9;
 
    if (BRW_IS_IGDNG(p->brw))
@@ -923,8 +925,8 @@ static void emit_txb( struct brw_wm_compile *c,
 	      retype(vec16(dst[0]), BRW_REGISTER_TYPE_UW),
 	      1,
 	      retype(c->payload.depth[0].hw_reg, BRW_REGISTER_TYPE_UW),
-              SURF_INDEX_TEXTURE(inst->tex_unit),
-	      inst->tex_unit,	  /* sampler */
+              BTI_TEXTURE(inst->tex_unit),
+	      sampler,          /* sampler index */
 	      inst->writemask,
 	      msg_type,
 	      8,		/* responseLength */
@@ -1483,11 +1485,11 @@ void brw_wm_emit( struct brw_wm_compile *c )
 	 /* Texturing operations:
 	  */
       case TGSI_OPCODE_TEX:
-	 emit_tex(c, inst, dst, dst_flags, args[0]);
+	 emit_tex(c, inst, dst, dst_flags, args[0], inst->sampler);
 	 break;
 
       case TGSI_OPCODE_TXB:
-	 emit_txb(c, inst, dst, dst_flags, args[0]);
+	 emit_txb(c, inst, dst, dst_flags, args[0], inst->sampler);
 	 break;
 
       case TGSI_OPCODE_KIL:
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index 174486a101..a8b5e15f36 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -282,6 +282,7 @@ static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
 					     struct brw_fp_dst dest,
 					     GLuint tex_unit,
 					     GLuint target,
+					     GLuint sampler,
 					     struct brw_fp_src src0,
 					     struct brw_fp_src src1,
 					     struct brw_fp_src src2 )
@@ -298,6 +299,7 @@ static struct brw_fp_instruction * emit_tex_op(struct brw_wm_compile *c,
    inst->dst = dest;
    inst->tex_unit = tex_unit;
    inst->target = target;
+   inst->sampler = sampler;
    inst->src[0] = src0;
    inst->src[1] = src1;
    inst->src[2] = src2;
@@ -313,7 +315,7 @@ static INLINE void emit_op3(struct brw_wm_compile *c,
 			    struct brw_fp_src src1,
 			    struct brw_fp_src src2 )
 {
-   emit_tex_op(c, op, dest, 0, 0, src0, src1, src2);
+   emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src2);
 }
 
 
@@ -323,7 +325,7 @@ static INLINE void emit_op2(struct brw_wm_compile *c,
 			    struct brw_fp_src src0,
 			    struct brw_fp_src src1)
 {
-   emit_tex_op(c, op, dest, 0, 0, src0, src1, src_undef());
+   emit_tex_op(c, op, dest, 0, 0, 0, src0, src1, src_undef());
 }
 
 static INLINE void emit_op1(struct brw_wm_compile *c,
@@ -331,14 +333,14 @@ static INLINE void emit_op1(struct brw_wm_compile *c,
 			    struct brw_fp_dst dest,
 			    struct brw_fp_src src0)
 {
-   emit_tex_op(c, op, dest, 0, 0, src0, src_undef(), src_undef());
+   emit_tex_op(c, op, dest, 0, 0, 0, src0, src_undef(), src_undef());
 }
 
 static INLINE void emit_op0(struct brw_wm_compile *c,
 			   GLuint op,
 			   struct brw_fp_dst dest)
 {
-   emit_tex_op(c, op, dest, 0, 0, src_undef(), src_undef(), src_undef());
+   emit_tex_op(c, op, dest, 0, 0, 0, src_undef(), src_undef(), src_undef());
 }
 
 
@@ -674,7 +676,8 @@ static void precalc_tex( struct brw_wm_compile *c,
 			 struct brw_fp_dst dst,
 			 unsigned target,
 			 unsigned unit,
-			 struct brw_fp_src src0 )
+			 struct brw_fp_src src0,
+			 struct brw_fp_src sampler )
 {
    struct brw_fp_src coord = src_undef();
    struct brw_fp_dst tmp = dst_undef();
@@ -751,6 +754,7 @@ static void precalc_tex( struct brw_wm_compile *c,
                   dst_saturate(tmp, dst.saturate),
                   unit,
                   target,
+                  sampler.index,
                   coord,
                   src_undef(),
                   src_undef());
@@ -802,6 +806,7 @@ static void precalc_tex( struct brw_wm_compile *c,
                   dst,
                   unit,
                   target,
+                  sampler.index,
                   coord,
                   src_undef(),
                   src_undef());
@@ -851,7 +856,8 @@ static void precalc_txp( struct brw_wm_compile *c,
 			 struct brw_fp_dst dst,
 			 unsigned target,
 			 unsigned unit,
-			 struct brw_fp_src src0 )
+			 struct brw_fp_src src0,
+                         struct brw_fp_src sampler )
 {
    if (projtex(c, target, src0)) {
       struct brw_fp_dst tmp = get_temp(c);
@@ -877,7 +883,8 @@ static void precalc_txp( struct brw_wm_compile *c,
 		  dst,
 		  target,
 		  unit,
-		  src_reg_from_dst(tmp));
+		  src_reg_from_dst(tmp),
+                  sampler );
 
       release_temp(c, tmp);
    }
@@ -885,7 +892,7 @@ static void precalc_txp( struct brw_wm_compile *c,
    {
       /* dst = TEX src0
        */
-      precalc_tex(c, dst, target, unit, src0);
+      precalc_tex(c, dst, target, unit, src0, sampler);
    }
 }
 
@@ -936,6 +943,7 @@ static void emit_fb_write( struct brw_wm_compile *c )
 		  dst_undef(),
 		  (i == c->key.nr_cbufs - 1), /* EOT */
 		  i,
+                  0,            /* no sampler */
 		  outcolor,
 		  payload_r0_depth,
 		  outdepth);
@@ -1056,15 +1064,17 @@ static void emit_insn( struct brw_wm_compile *c,
    case TGSI_OPCODE_TEX:
       precalc_tex(c, dst,
 		  inst->InstructionExtTexture.Texture,
-		  src[0].file,	/* sampler unit */
-		  src[1] );
+		  src[1].index,	/* use sampler unit for tex idx */
+		  src[0],       /* coord */
+                  src[1]);      /* sampler */
       break;
 
    case TGSI_OPCODE_TXP:
       precalc_txp(c, dst,
 		  inst->InstructionExtTexture.Texture,
-		  src[0].file,	/* sampler unit */
-		  src[1] );
+		  src[1].index,	/* use sampler unit for tex idx */
+		  src[0],       /* coord */
+                  src[1]);      /* sampler */
       break;
 
    case TGSI_OPCODE_TXB:
@@ -1072,8 +1082,9 @@ static void emit_insn( struct brw_wm_compile *c,
        */
       precalc_tex(c, dst,
 		  inst->InstructionExtTexture.Texture,
-		  src[0].file,	/* sampler unit */
-		  src[1] );
+		  src[1].index,	/* use sampler unit for tex idx*/
+		  src[0],
+                  src[1]);
       break;
 
    case TGSI_OPCODE_XPD: 
diff --git a/src/gallium/drivers/i965/brw_wm_surface_state.c b/src/gallium/drivers/i965/brw_wm_surface_state.c
index f882331433..f92b8198ed 100644
--- a/src/gallium/drivers/i965/brw_wm_surface_state.c
+++ b/src/gallium/drivers/i965/brw_wm_surface_state.c
@@ -149,19 +149,23 @@ brw_wm_get_binding_table(struct brw_context *brw,
    enum pipe_error ret;
    struct brw_winsys_reloc reloc[BRW_WM_MAX_SURF];
    uint32_t data[BRW_WM_MAX_SURF];
+   GLuint nr_relocs = 0;
    GLuint data_size = brw->wm.nr_surfaces * sizeof data[0];
    int i;
 
    assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF);
    assert(brw->wm.nr_surfaces > 0);
 
-   /* Emit binding table relocations to surface state */
+   /* Emit binding table relocations to surface state 
+    */
    for (i = 0; i < brw->wm.nr_surfaces; i++) {
-      make_reloc(&reloc[i],
-                 BRW_USAGE_STATE,
-                 0,
-                 i * sizeof(GLuint),
-                 brw->wm.surf_bo[i]);
+      if (brw->wm.surf_bo[i]) {
+         make_reloc(&reloc[nr_relocs++],
+                    BRW_USAGE_STATE,
+                    0,
+                    i * sizeof(GLuint),
+                    brw->wm.surf_bo[i]);
+      }
    }
 
    /* Note there is no key for this search beyond the values in the
@@ -169,7 +173,7 @@ brw_wm_get_binding_table(struct brw_context *brw,
     */
    if (brw_search_cache(&brw->surface_cache, BRW_SS_SURF_BIND,
                         NULL, 0,
-                        reloc, brw->wm.nr_surfaces,
+                        reloc, nr_relocs,
                         NULL,
                         bo_out))
       return PIPE_OK;
@@ -182,7 +186,7 @@ brw_wm_get_binding_table(struct brw_context *brw,
 
    ret = brw_upload_cache( &brw->surface_cache, BRW_SS_SURF_BIND,
                            NULL, 0,
-                           reloc, brw->wm.nr_surfaces,
+                           reloc, nr_relocs,
                            data, data_size,
                            NULL, NULL,
                            bo_out);
@@ -208,40 +212,60 @@ static enum pipe_error prepare_wm_surfaces(struct brw_context *brw )
    for (i = 0; i < brw->curr.fb.nr_cbufs; i++) {
       ret = brw_update_render_surface(brw, 
                                       brw_surface(brw->curr.fb.cbufs[i]), 
-                                      &brw->wm.surf_bo[nr_surfaces++]);
+                                      &brw->wm.surf_bo[BTI_COLOR_BUF(i)]);
       if (ret)
          return ret;
+      
+      nr_surfaces = BTI_COLOR_BUF(i) + 1;
+   }
+
+
+
+   /* PIPE_NEW_FRAGMENT_CONSTANTS
+    */
+#if 0
+   if (brw->curr.fragment_constants) {
+      ret = brw_update_fragment_constant_surface(
+         brw, 
+         brw->curr.fragment_constants, 
+         &brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS]);
+
+      if (ret)
+         return ret;
+
+      nr_surfaces = BTI_FRAGMENT_CONSTANTS + 1;
    }
+   else {
+      bo_reference(&brw->wm.surf_bo[SURF_FRAG_CONSTANTS], NULL);      
+   }
+#endif
+
 
    /* PIPE_NEW_TEXTURE 
     */
    for (i = 0; i < brw->curr.num_textures; i++) {
       ret = brw_update_texture_surface(brw, 
                                        brw_texture(brw->curr.texture[i]),
-                                       &brw->wm.surf_bo[nr_surfaces++]);
+                                       &brw->wm.surf_bo[BTI_TEXTURE(i)]);
       if (ret)
          return ret;
+
+      nr_surfaces = BTI_TEXTURE(i) + 1;
    }
 
-   /* PIPE_NEW_FRAGMENT_CONSTANTS
+   /* Clear any inactive entries:
     */
-#if 0
-   if (brw->curr.fragment_constants) {
-      ret = brw_update_fragment_constant_surface(brw, 
-                                                 brw->curr.fragment_constants, 
-                                                 &brw->wm.surf_bo[nr_surfaces++]);
-      if (ret)
-         return ret;
-   }
-#endif
+   for (i = brw->curr.fb.nr_cbufs; i < BRW_MAX_DRAW_BUFFERS; i++) 
+      bo_reference(&brw->wm.surf_bo[BTI_COLOR_BUF(i)], NULL);
 
-   if (brw->wm.nr_surfaces != nr_surfaces) {
+   if (!brw->curr.fragment_constants)
+      bo_reference(&brw->wm.surf_bo[BTI_FRAGMENT_CONSTANTS], NULL);      
 
-      /* Unreference any left-over old buffers
-       */
-      for (i = nr_surfaces; i < brw->wm.nr_surfaces; i++)
-         bo_reference(&brw->wm.surf_bo[i], NULL);
+   /* XXX: no pipe_max_textures define?? */
+   for (i = brw->curr.num_textures; i < PIPE_MAX_SAMPLERS; i++)
+      bo_reference(&brw->wm.surf_bo[BTI_TEXTURE(i)], NULL);
 
+   if (brw->wm.nr_surfaces != nr_surfaces) {
       brw->wm.nr_surfaces = nr_surfaces;
       brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
    }
-- 
cgit v1.2.3


From b7078a88119e248b0196f7446abe029c22f1ee28 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sat, 14 Nov 2009 23:27:20 +0100
Subject: r300g: add texture lod clamping

These now work:
piglit/lodclamp
piglit/levelclamp
---
 src/gallium/drivers/r300/r300_context.h |  4 ++++
 src/gallium/drivers/r300/r300_emit.c    | 11 ++++++++++-
 src/gallium/drivers/r300/r300_reg.h     |  5 +++--
 src/gallium/drivers/r300/r300_state.c   |  5 +++++
 src/gallium/drivers/r300/r300_texture.c |  3 +--
 5 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index f954ba7f9a..60ef415caa 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -92,6 +92,10 @@ struct r300_sampler_state {
     uint32_t filter0;      /* R300_TX_FILTER0: 0x4400 */
     uint32_t filter1;      /* R300_TX_FILTER1: 0x4440 */
     uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */
+
+    /* Min/max LOD must be clamped to [0, last_level], thus
+     * it's dependent on a currently bound texture */
+    unsigned min_lod, max_lod;
 };
 
 struct r300_scissor_state {
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 6d702c0027..ad7dff36be 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -583,6 +583,8 @@ void r300_emit_texture(struct r300_context* r300,
                        unsigned offset)
 {
     uint32_t filter0 = sampler->filter0;
+    uint32_t format0 = tex->state.format0;
+    unsigned min_level, max_level;
     CS_LOCALS(r300);
 
     /* to emulate 1D textures through 2D ones correctly */
@@ -591,13 +593,20 @@ void r300_emit_texture(struct r300_context* r300,
         filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
     }
 
+    /* determine min/max levels */
+    /* the MAX_MIP level is the largest (finest) one */
+    max_level = MIN2(sampler->max_lod, tex->tex.last_level);
+    min_level = MIN2(sampler->min_lod, max_level);
+    format0 |= R300_TX_NUM_LEVELS(max_level);
+    filter0 |= R300_TX_MAX_MIP_LEVEL(min_level);
+
     BEGIN_CS(16);
     OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 |
         (offset << 28));
     OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1);
     OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color);
 
-    OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), tex->state.format0);
+    OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), format0);
     OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1);
     OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2);
     OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1);
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 8ca785cb58..66fdada221 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -1463,6 +1463,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #	define R300_TX_MIN_FILTER_MIP_NEAREST    (1 << 13)
 #	define R300_TX_MIN_FILTER_MIP_LINEAR     (2 << 13)
 #	define R300_TX_MIN_FILTER_MIP_MASK       (3 << 13)
+#       define R300_TX_MAX_MIP_LEVEL_SHIFT       17
+#       define R300_TX_MAX_MIP_LEVEL_MASK        (0xf << 17)
 #	define R300_TX_MAX_ANISO_1_TO_1          (0 << 21)
 #	define R300_TX_MAX_ANISO_2_TO_1          (1 << 21)
 #	define R300_TX_MAX_ANISO_4_TO_1          (2 << 21)
@@ -1471,6 +1473,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #	define R300_TX_MAX_ANISO_MASK            (7 << 21)
 #       define R300_TX_WRAP_S(x)                 ((x) << 0)
 #       define R300_TX_WRAP_T(x)                 ((x) << 3)
+#       define R300_TX_MAX_MIP_LEVEL(x)          ((x) << 17)
 
 #define R300_TX_FILTER1_0                      0x4440
 #	define R300_CHROMA_KEY_MODE_DISABLE    0
@@ -1500,8 +1503,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_TX_HEIGHTMASK_MASK           (2047 << 11)
 #	define R300_TX_DEPTHMASK_SHIFT           22
 #	define R300_TX_DEPTHMASK_MASK            (0xf << 22)
-#       define R300_TX_MAX_MIP_LEVEL_SHIFT       26
-#       define R300_TX_MAX_MIP_LEVEL_MASK        (0xf << 26)
 #       define R300_TX_SIZE_PROJECTED            (1 << 30)
 #       define R300_TX_PITCH_EN                  (1 << 31)
 #       define R300_TX_WIDTH(x)                  ((x) << 0)
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 5422a2cc9c..f2867675f0 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -523,6 +523,11 @@ static void*
                                                    state->mag_img_filter,
                                                    state->min_mip_filter);
 
+    /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
+    /* We must pass these to the emit function to clamp them properly. */
+    sampler->min_lod = MAX2((unsigned)state->min_lod, 0);
+    sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0);
+
     lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1);
 
     sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT;
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index aea25cf71d..d13aa8f036 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -43,8 +43,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
         state->format2 = (tex->pitch[0] - 1) & 0x1fff;
     } else {
         /* power of two textures (3D, mipmaps, and no pitch) */
-        state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) |
-                          R300_TX_NUM_LEVELS(pt->last_level & 0xf);
+        state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf);
     }
 
     state->format1 = r300_translate_texformat(pt->format);
-- 
cgit v1.2.3


From 4e1236e60267d036a1a604412bd7efd7a249a588 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sun, 15 Nov 2009 16:41:25 +0100
Subject: r300g: fix updating a vertex format

We must update PSC when we change the vertex format, e.g. vertex colors
from RGBA to BGRA.
---
 src/gallium/drivers/r300/r300_state.c         | 2 ++
 src/gallium/drivers/r300/r300_state_derived.c | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index f2867675f0..997d59ff6f 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -687,6 +687,8 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
         draw_flush(r300->draw);
         draw_set_vertex_buffers(r300->draw, count, buffers);
     }
+
+    r300->state |= R300_NEW_VERTEX_FORMAT;
 }
 
 static void r300_set_vertex_elements(struct pipe_context* pipe,
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 5aa4166d93..82f2be3101 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -512,7 +512,7 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
 
     r300->vertex_info = vformat;
     r300->rs_block = rs_block;
-    r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK);
+    r300->dirty_state |= R300_NEW_RS_BLOCK;
 }
 
 static void r300_update_ztop(struct r300_context* r300)
@@ -551,7 +551,8 @@ static void r300_update_ztop(struct r300_context* r300)
 void r300_update_derived_state(struct r300_context* r300)
 {
     if (r300->dirty_state &
-        (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) {
+        (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER |
+         R300_NEW_VERTEX_FORMAT)) {
         r300_update_derived_shader_state(r300);
     }
 
-- 
cgit v1.2.3


From 624a0cd9c1bcc8d0952bb30e3336237fb99041b2 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 19 Nov 2009 20:41:19 +0100
Subject: r300g: fix typo in r300_reg.h to prevent the RS unit from doing
 random things

And reorder fragment shader inputs so that the colors are before texcoords,
as is allocated by the shader compiler. This commit makes VS->FS attribute
routing work on R500.
---
 src/gallium/drivers/r300/r300_reg.h           |  2 +-
 src/gallium/drivers/r300/r300_state_derived.c | 26 ++++++++++++--------------
 2 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 66fdada221..3a419b24b0 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -1293,7 +1293,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #        define R500_RS_INST_TEX_ID(x)                  ((x) << 0)
 #define R500_RS_INST_TEX_CN_WRITE			(1 << 4)
 #define R500_RS_INST_TEX_ADDR_SHIFT			5
-#        define R500_RS_INST_TEX_ADDR(x)                ((x) << 0)
+#        define R500_RS_INST_TEX_ADDR(x)                ((x) << 5)
 #define R500_RS_INST_COL_ID_SHIFT			12
 #        define R500_RS_INST_COL_ID(x)                  ((x) << 12)
 #define R500_RS_INST_COL_CN_NO_WRITE			(0 << 16)
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 82f2be3101..8faf78932d 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -381,17 +381,18 @@ static void r300_update_rs_block(struct r300_context* r300,
             col_count++;
         }
 
+        for (i = 0; i < col_count; i++) {
+            rs->inst[i] |= R500_RS_INST_COL_ID(i) |
+                R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset);
+            fp_offset++;
+        }
+
         for (i = 0; i < tex_count; i++) {
             rs->inst[i] |= R500_RS_INST_TEX_ID(i) |
                 R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_offset);
             fp_offset++;
         }
 
-        for (i = 0; i < col_count; i++) {
-            rs->inst[i] |= R500_RS_INST_COL_ID(i) |
-                R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset);
-            fp_offset++;
-        }
     } else {
         for (i = 0; i < info->num_inputs; i++) {
             switch (info->input_semantic_name[i]) {
@@ -416,8 +417,10 @@ static void r300_update_rs_block(struct r300_context* r300,
             }
         }
 
+        /* Rasterize at least one color, or bad things happen. */
         if (col_count == 0) {
             rs->ip[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+            col_count++;
         }
 
         if (tex_count == 0) {
@@ -428,9 +431,10 @@ static void r300_update_rs_block(struct r300_context* r300,
                 R300_RS_SEL_Q(R300_RS_SEL_K1);
         }
 
-        /* Rasterize at least one color, or bad things happen. */
-        if ((col_count == 0) && (tex_count == 0)) {
-            col_count++;
+        for (i = 0; i < col_count; i++) {
+            rs->inst[i] |= R300_RS_INST_COL_ID(i) |
+                R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset);
+            fp_offset++;
         }
 
         for (i = 0; i < tex_count; i++) {
@@ -438,12 +442,6 @@ static void r300_update_rs_block(struct r300_context* r300,
                 R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_offset);
             fp_offset++;
         }
-
-        for (i = 0; i < col_count; i++) {
-            rs->inst[i] |= R300_RS_INST_COL_ID(i) |
-                R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset);
-            fp_offset++;
-        }
     }
 
     rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) |
-- 
cgit v1.2.3


From 435c495549d707432f9fb9868e665a42a6923058 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 19 Nov 2009 22:40:11 +0100
Subject: r300g: silence warnings

---
 src/gallium/drivers/r300/r300_screen.h        | 2 ++
 src/gallium/drivers/r300/r300_state_derived.c | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 41df31f670..1ce5ff3904 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -27,6 +27,8 @@
 
 #include "r300_chipset.h"
 
+struct r300_winsys;
+
 struct r300_screen {
     /* Parent class */
     struct pipe_screen screen;
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 8faf78932d..962754f3b1 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -47,8 +47,8 @@ struct r300_shader_derived_value {
 
 unsigned r300_shader_key_hash(void* key) {
     struct r300_shader_key* shader_key = (struct r300_shader_key*)key;
-    unsigned vs = (unsigned)shader_key->vs;
-    unsigned fs = (unsigned)shader_key->fs;
+    unsigned vs = (intptr_t)shader_key->vs;
+    unsigned fs = (intptr_t)shader_key->fs;
 
     return (vs << 16) | (fs & 0xffff);
 }
-- 
cgit v1.2.3


From 3a2cd66af8774af15eabef655ded9b48e67242d5 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sat, 21 Nov 2009 05:51:13 +0100
Subject: r300g: clean up vs/fs tabs

Instead of vs_tab, we use vs_output_tab and it's local now. fs_tab hasn't
been used anywhere, so I removed it and r300_update_fs_tab too.
---
 src/gallium/drivers/r300/r300_context.h       |   5 --
 src/gallium/drivers/r300/r300_state_derived.c | 114 ++++++--------------------
 2 files changed, 25 insertions(+), 94 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 60ef415caa..39c0914cff 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -223,11 +223,6 @@ struct r300_texture {
 struct r300_vertex_info {
     /* Parent class */
     struct vertex_info vinfo;
-    /* Map of vertex attributes into PVS memory for HW TCL,
-     * or GA memory for SW TCL. */
-    int vs_tab[16];
-    /* Map of rasterizer attributes from GB through RS to US. */
-    int fs_tab[16];
 
     /* R300_VAP_PROG_STREAK_CNTL_[0-7] */
     uint32_t vap_prog_stream_cntl[8];
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 962754f3b1..45aeefb483 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -61,12 +61,12 @@ int r300_shader_key_compare(void* key1, void* key2) {
         (shader_key1->fs == shader_key2->fs);
 }
 
-/* Set up the vs_tab and routes. */
-static void r300_vs_tab_routes(struct r300_context* r300,
-                               struct r300_vertex_info* vformat)
+/* Set up the vs_output_tab and routes. */
+static void r300_vs_output_tab_routes(struct r300_context* r300,
+                                      struct r300_vertex_info* vformat,
+                                      int* vs_output_tab)
 {
     struct vertex_info* vinfo = &vformat->vinfo;
-    int* tab = vformat->vs_tab;
     boolean pos = FALSE, psize = FALSE, fog = FALSE;
     int i, texs = 0, cols = 0;
     struct tgsi_shader_info* info = &r300->fs->info;
@@ -79,23 +79,23 @@ static void r300_vs_tab_routes(struct r300_context* r300,
         switch (info->input_semantic_name[i]) {
             case TGSI_SEMANTIC_POSITION:
                 pos = TRUE;
-                tab[i] = 0;
+                vs_output_tab[i] = 0;
                 break;
             case TGSI_SEMANTIC_COLOR:
-                tab[i] = 2 + cols;
+                vs_output_tab[i] = 2 + cols;
                 cols++;
                 break;
             case TGSI_SEMANTIC_PSIZE:
                 assert(psize == FALSE);
                 psize = TRUE;
-                tab[i] = 15;
+                vs_output_tab[i] = 15;
                 break;
             case TGSI_SEMANTIC_FOG:
                 assert(fog == FALSE);
                 fog = TRUE;
                 /* Fall through */
             case TGSI_SEMANTIC_GENERIC:
-                tab[i] = 6 + texs;
+                vs_output_tab[i] = 6 + texs;
                 texs++;
                 break;
             default:
@@ -122,11 +122,11 @@ static void r300_vs_tab_routes(struct r300_context* r300,
      * for HW TCL case it could be generated by vertex shader */
     if (!pos) {
         /* Make room for the position attribute
-         * at the beginning of the tab. */
+         * at the beginning of the vs_output_tab. */
         for (i = 15; i > 0; i--) {
-            tab[i] = tab[i-1];
+            vs_output_tab[i] = vs_output_tab[i-1];
         }
-        tab[0] = 0;
+        vs_output_tab[0] = 0;
     }
 
     /* Position. */
@@ -229,34 +229,34 @@ static void r300_vertex_psc(struct r300_context* r300,
 
 /* Update the PSC tables for SW TCL, using Draw. */
 static void r300_swtcl_vertex_psc(struct r300_context* r300,
-                                  struct r300_vertex_info* vformat)
+                                  struct r300_vertex_info* vformat,
+                                  int* vs_output_tab)
 {
     struct vertex_info* vinfo = &vformat->vinfo;
-    int* tab = vformat->vs_tab;
     uint16_t type, swizzle;
     enum pipe_format format;
     unsigned i, attrib_count;
 
     /* For each Draw attribute, route it to the fragment shader according
-     * to the tab. */
+     * to the vs_output_tab. */
     attrib_count = vinfo->num_attribs;
     DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
     for (i = 0; i < attrib_count; i++) {
         DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
-               " tab %d\n", vinfo->attrib[i].src_index,
+               " vs_output_tab %d\n", vinfo->attrib[i].src_index,
                vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
-               tab[i]);
+               vs_output_tab[i]);
     }
 
     for (i = 0; i < attrib_count; i++) {
         /* Make sure we have a proper destination for our attribute. */
-        assert(tab[i] != -1);
+        assert(vs_output_tab[i] != -1);
 
         format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
 
         /* Obtain the type of data in this attribute. */
         type = r300_translate_vertex_data_type(format) |
-            tab[i] << R300_DST_VEC_LOC_SHIFT;
+            vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
 
         /* Obtain the swizzle for this attribute. Note that the default
          * swizzle in the hardware is not XYZW! */
@@ -280,68 +280,6 @@ static void r300_swtcl_vertex_psc(struct r300_context* r300,
         (R300_LAST_VEC << (i & 1 ? 16 : 0));
 }
 
-/* Set up the mappings from GB to US, for RS block. */
-static void r300_update_fs_tab(struct r300_context* r300,
-                               struct r300_vertex_info* vformat)
-{
-    struct tgsi_shader_info* info = &r300->fs->info;
-    int i, cols = 0, texs = 0, cols_emitted = 0;
-    int* tab = vformat->fs_tab;
-
-    for (i = 0; i < 16; i++) {
-        tab[i] = -1;
-    }
-
-    assert(info->num_inputs <= 16);
-    for (i = 0; i < info->num_inputs; i++) {
-        switch (info->input_semantic_name[i]) {
-            case TGSI_SEMANTIC_COLOR:
-                tab[i] = INTERP_LINEAR;
-                cols++;
-                break;
-            case TGSI_SEMANTIC_POSITION:
-            case TGSI_SEMANTIC_PSIZE:
-                debug_printf("r300: Implementation error: Can't use "
-                        "pos attribs in fragshader yet!\n");
-                /* Pass through for now */
-            case TGSI_SEMANTIC_FOG:
-            case TGSI_SEMANTIC_GENERIC:
-                tab[i] = INTERP_PERSPECTIVE;
-                break;
-            default:
-                debug_printf("r300: Unknown vertex input %d\n",
-                    info->input_semantic_name[i]);
-                break;
-        }
-    }
-
-    /* Now that we know where everything is... */
-    DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs);
-    for (i = 0; i < info->num_inputs; i++) {
-        switch (tab[i]) {
-            case INTERP_LINEAR:
-                DBG(r300, DBG_DRAW, "r300: attrib: "
-                        "stack offset %d, color,    tab %d\n",
-                        i, cols_emitted);
-                tab[i] = cols_emitted;
-                cols_emitted++;
-                break;
-            case INTERP_PERSPECTIVE:
-                DBG(r300, DBG_DRAW, "r300: attrib: "
-                        "stack offset %d, texcoord, tab %d\n",
-                        i, cols + texs);
-                tab[i] = cols + texs;
-                texs++;
-                break;
-            case -1:
-                debug_printf("r300: Implementation error: Bad fp interp!\n");
-            default:
-                break;
-        }
-    }
-
-}
-
 /* Set up the RS block. This is the part of the chipset that actually does
  * the rasterization of vertices into fragments. This is also the part of the
  * chipset that locks up if any part of it is even slightly wrong. */
@@ -456,8 +394,13 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
     struct r300_vertex_info* vformat;
     struct r300_rs_block* rs_block;
+    int vs_output_tab[16];
     int i;
 
+    for (i = 0; i < 16; i++) {
+        vs_output_tab[i] = -1;
+    }
+
     /*
     struct r300_shader_key* key;
     struct r300_shader_derived_value* value;
@@ -488,21 +431,14 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
     vformat = CALLOC_STRUCT(r300_vertex_info);
     rs_block = CALLOC_STRUCT(r300_rs_block);
 
-    for (i = 0; i < 16; i++) {
-        vformat->vs_tab[i] = -1;
-        vformat->fs_tab[i] = -1;
-    }
-
-    r300_vs_tab_routes(r300, vformat);
+    r300_vs_output_tab_routes(r300, vformat, vs_output_tab);
 
     if (r300screen->caps->has_tcl) {
         r300_vertex_psc(r300, vformat);
     } else {
-        r300_swtcl_vertex_psc(r300, vformat);
+        r300_swtcl_vertex_psc(r300, vformat, vs_output_tab);
     }
 
-    r300_update_fs_tab(r300, vformat);
-
     r300_update_rs_block(r300, rs_block);
 
     FREE(r300->vertex_info);
-- 
cgit v1.2.3


From 44c0aaf990f46c6dcb46d58dda0c182f5d40cb42 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Fri, 20 Nov 2009 04:52:49 +0100
Subject: r300g: do not reallocate r300_vertex_info and r300_rs_block all the
 time

---
 src/gallium/drivers/r300/r300_context.c       |  2 ++
 src/gallium/drivers/r300/r300_state_derived.c | 42 ++++++++++++---------------
 2 files changed, 20 insertions(+), 24 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 26db536248..769733b6dd 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -69,6 +69,7 @@ static void r300_destroy_context(struct pipe_context* context)
     FREE(r300->blend_color_state);
     FREE(r300->rs_block);
     FREE(r300->scissor_state);
+    FREE(r300->vertex_info);
     FREE(r300->viewport_state);
     FREE(r300);
 }
@@ -152,6 +153,7 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state);
     r300->rs_block = CALLOC_STRUCT(r300_rs_block);
     r300->scissor_state = CALLOC_STRUCT(r300_scissor_state);
+    r300->vertex_info = CALLOC_STRUCT(r300_vertex_info);
     r300->viewport_state = CALLOC_STRUCT(r300_viewport_state);
 
     /* Open up the OQ BO. */
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 45aeefb483..6fb780cb29 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -63,10 +63,9 @@ int r300_shader_key_compare(void* key1, void* key2) {
 
 /* Set up the vs_output_tab and routes. */
 static void r300_vs_output_tab_routes(struct r300_context* r300,
-                                      struct r300_vertex_info* vformat,
                                       int* vs_output_tab)
 {
-    struct vertex_info* vinfo = &vformat->vinfo;
+    struct vertex_info* vinfo = &r300->vertex_info->vinfo;
     boolean pos = FALSE, psize = FALSE, fog = FALSE;
     int i, texs = 0, cols = 0;
     struct tgsi_shader_info* info = &r300->fs->info;
@@ -185,9 +184,9 @@ static void r300_vs_output_tab_routes(struct r300_context* r300,
 }
 
 /* Update the PSC tables. */
-static void r300_vertex_psc(struct r300_context* r300,
-                            struct r300_vertex_info* vformat)
+static void r300_vertex_psc(struct r300_context* r300)
 {
+    struct r300_vertex_info *vformat = r300->vertex_info;
     uint16_t type, swizzle;
     enum pipe_format format;
     unsigned i;
@@ -229,9 +228,9 @@ static void r300_vertex_psc(struct r300_context* r300,
 
 /* Update the PSC tables for SW TCL, using Draw. */
 static void r300_swtcl_vertex_psc(struct r300_context* r300,
-                                  struct r300_vertex_info* vformat,
                                   int* vs_output_tab)
 {
+    struct r300_vertex_info *vformat = r300->vertex_info;
     struct vertex_info* vinfo = &vformat->vinfo;
     uint16_t type, swizzle;
     enum pipe_format format;
@@ -283,9 +282,9 @@ static void r300_swtcl_vertex_psc(struct r300_context* r300,
 /* Set up the RS block. This is the part of the chipset that actually does
  * the rasterization of vertices into fragments. This is also the part of the
  * chipset that locks up if any part of it is even slightly wrong. */
-static void r300_update_rs_block(struct r300_context* r300,
-                                 struct r300_rs_block* rs)
+static void r300_update_rs_block(struct r300_context* r300)
 {
+    struct r300_rs_block* rs = r300->rs_block;
     struct tgsi_shader_info* info = &r300->fs->info;
     int col_count = 0, fp_offset = 0, i, tex_count = 0;
     int rs_tex_comp = 0;
@@ -392,14 +391,9 @@ static void r300_update_rs_block(struct r300_context* r300,
 static void r300_update_derived_shader_state(struct r300_context* r300)
 {
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
-    struct r300_vertex_info* vformat;
-    struct r300_rs_block* rs_block;
     int vs_output_tab[16];
     int i;
 
-    for (i = 0; i < 16; i++) {
-        vs_output_tab[i] = -1;
-    }
 
     /*
     struct r300_shader_key* key;
@@ -427,25 +421,25 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
             (void*)key, (void*)value);
     } */
 
-    /* XXX This will be refactored ASAP. */
-    vformat = CALLOC_STRUCT(r300_vertex_info);
-    rs_block = CALLOC_STRUCT(r300_rs_block);
+    /* Reset structures */
+    memset(r300->rs_block, 0, sizeof(struct r300_rs_block));
+    memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info));
 
-    r300_vs_output_tab_routes(r300, vformat, vs_output_tab);
+    for (i = 0; i < 16; i++) {
+        vs_output_tab[i] = -1;
+    }
+
+    /* Update states */
+    r300_vs_output_tab_routes(r300, vs_output_tab);
 
     if (r300screen->caps->has_tcl) {
-        r300_vertex_psc(r300, vformat);
+        r300_vertex_psc(r300);
     } else {
-        r300_swtcl_vertex_psc(r300, vformat, vs_output_tab);
+        r300_swtcl_vertex_psc(r300, vs_output_tab);
     }
 
-    r300_update_rs_block(r300, rs_block);
-
-    FREE(r300->vertex_info);
-    FREE(r300->rs_block);
+    r300_update_rs_block(r300);
 
-    r300->vertex_info = vformat;
-    r300->rs_block = rs_block;
     r300->dirty_state |= R300_NEW_RS_BLOCK;
 }
 
-- 
cgit v1.2.3


From 2b07b640619ac68344276ba0557ea46b2cbc3f26 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sat, 21 Nov 2009 19:13:26 -0800
Subject: r300g: Build fix.

Oops.
---
 src/gallium/drivers/r300/r300_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 997d59ff6f..a88d66db24 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -688,7 +688,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
         draw_set_vertex_buffers(r300->draw, count, buffers);
     }
 
-    r300->state |= R300_NEW_VERTEX_FORMAT;
+    r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
 }
 
 static void r300_set_vertex_elements(struct pipe_context* pipe,
-- 
cgit v1.2.3


From 21172d4358be5cce3bda244b33e3728a5fde3751 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 23 Nov 2009 00:58:13 +0000
Subject: i965g: apply linear math to both linear and perspective attrs

---
 src/gallium/drivers/i965/brw_sf.c      | 3 +--
 src/gallium/drivers/i965/brw_sf_emit.c | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_sf.c b/src/gallium/drivers/i965/brw_sf.c
index a28fb71589..e1986a9dbb 100644
--- a/src/gallium/drivers/i965/brw_sf.c
+++ b/src/gallium/drivers/i965/brw_sf.c
@@ -153,10 +153,9 @@ static enum pipe_error upload_sf_prog(struct brw_context *brw)
       case TGSI_INTERPOLATE_CONSTANT:
          break;
       case TGSI_INTERPOLATE_LINEAR:
-      case TGSI_INTERPOLATE_PERSPECTIVE:
          key.linear_attrs |= 1 << (i+1);
          break;
-//      case TGSI_INTERPOLATE_PERSPECTIVE:
+      case TGSI_INTERPOLATE_PERSPECTIVE:
          key.persp_attrs |= 1 << (i+1);
          break;
       }
diff --git a/src/gallium/drivers/i965/brw_sf_emit.c b/src/gallium/drivers/i965/brw_sf_emit.c
index 2983e8a9dd..3b85725e36 100644
--- a/src/gallium/drivers/i965/brw_sf_emit.c
+++ b/src/gallium/drivers/i965/brw_sf_emit.c
@@ -324,7 +324,7 @@ static GLboolean calculate_masks( struct brw_sf_compile *c,
 {
    GLboolean is_last_attr = (reg == c->nr_setup_regs - 1);
    GLuint persp_mask = c->key.persp_attrs;
-   GLuint linear_mask = c->key.linear_attrs;
+   GLuint linear_mask = (c->key.persp_attrs | c->key.linear_attrs);
 
    *pc_persp = 0;
    *pc_linear = 0;
-- 
cgit v1.2.3


From 968a7dfb292f1eefa9ada8096bb023c051518c32 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 23 Nov 2009 01:47:57 +0000
Subject: i965g: use correct key size for vs upload

---
 src/gallium/drivers/i965/brw_vs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 25b51eb41e..14a1c3bcf1 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -84,7 +84,7 @@ static enum pipe_error do_vs_prog( struct brw_context *brw,
       return ret;
 
    ret = brw_upload_cache( &brw->cache, BRW_VS_PROG,
-                           &c.key, sizeof(c.key),
+                           &c.key, brw_vs_prog_key_size(&c.key),
                            NULL, 0,
                            program, program_size,
                            &c.prog_data,
-- 
cgit v1.2.3


From cd0e6619e6d9f0f60606d1e079b1a04af1717309 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 23 Nov 2009 03:00:47 +0000
Subject: i965g: correct test for unfilled modes

---
 src/gallium/drivers/i965/brw_pipe_rast.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_rast.c b/src/gallium/drivers/i965/brw_pipe_rast.c
index 27c568de0a..2117e91a9e 100644
--- a/src/gallium/drivers/i965/brw_pipe_rast.c
+++ b/src/gallium/drivers/i965/brw_pipe_rast.c
@@ -58,8 +58,10 @@ calculate_clip_key_rast( const struct brw_context *brw,
       key->fill_cw = translate_fill(templ->fill_cw);
    }
 
-   if (key->fill_cw != CLIP_FILL ||
-       key->fill_ccw != CLIP_FILL) {
+   if (key->fill_cw == CLIP_LINE ||
+       key->fill_ccw == CLIP_LINE ||
+       key->fill_cw == CLIP_POINT ||
+       key->fill_ccw == CLIP_POINT) {
       key->do_unfilled = 1;
       key->clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED;
    }
-- 
cgit v1.2.3


From 0a89ad80d957869cf8760326787c6189ab50a1e0 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 23 Nov 2009 03:06:58 +0000
Subject: i965g: gs nr_attrs also tracks nr fragment shader inputs

Or, equivalently, nr of outputs of active vertex shader varient.
---
 src/gallium/drivers/i965/brw_gs.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_gs.c b/src/gallium/drivers/i965/brw_gs.c
index ce77be24f6..921b201bae 100644
--- a/src/gallium/drivers/i965/brw_gs.c
+++ b/src/gallium/drivers/i965/brw_gs.c
@@ -154,10 +154,12 @@ static const unsigned gs_prim[PIPE_PRIM_MAX] = {
 static void populate_key( struct brw_context *brw,
 			  struct brw_gs_prog_key *key )
 {
+   const struct brw_fs_signature *sig = &brw->curr.fragment_shader->signature;
+
    memset(key, 0, sizeof(*key));
 
-   /* CACHE_NEW_VS_PROG */
-   key->nr_attrs = brw->vs.prog_data->nr_outputs;
+   /* PIPE_NEW_FRAGMENT_SIGNATURE */
+   key->nr_attrs = sig->nr_inputs + 1;
 
    /* BRW_NEW_PRIMITIVE */
    key->primitive = gs_prim[brw->primitive];
@@ -206,9 +208,9 @@ static int prepare_gs_prog(struct brw_context *brw)
 
 const struct brw_tracked_state brw_gs_prog = {
    .dirty = {
-      .mesa  = 0,
+      .mesa  = PIPE_NEW_FRAGMENT_SIGNATURE,
       .brw   = BRW_NEW_PRIMITIVE,
-      .cache = CACHE_NEW_VS_PROG
+      .cache = 0,
    },
    .prepare = prepare_gs_prog
 };
-- 
cgit v1.2.3


From cc93fa3527e64963acd0e643d7d1061306d9e1df Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Mon, 23 Nov 2009 10:51:07 +0100
Subject: softpipe: Initialise TGSI machine's Face.

---
 src/gallium/drivers/softpipe/sp_fs_exec.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 4076114d39..a8999ed347 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -126,7 +126,13 @@ exec_run( const struct sp_fragment_shader *base,
    setup_pos_vector(quad->posCoef, 
                     (float)quad->input.x0, (float)quad->input.y0, 
                     &machine->QuadPos);
-   
+
+   if (quad->input.facing) {
+      machine->Face = -1.0f;
+   } else {
+      machine->Face = 1.0f;
+   }
+
    quad->inout.mask &= tgsi_exec_machine_run( machine );
    if (quad->inout.mask == 0)
       return FALSE;
-- 
cgit v1.2.3


From 86710c3334850eeaeffcac6d538e01fd5c203167 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Mon, 23 Nov 2009 19:59:02 +0100
Subject: svga: Scrub Makefiles a bit

Remove x86 specific hacks. Not that they will ever be used on
none x86 arches, but they are built by default. And the way the
flags where added was a hack.
---
 src/gallium/drivers/svga/Makefile           |  8 +-------
 src/gallium/winsys/drm/vmware/core/Makefile | 14 +-------------
 2 files changed, 2 insertions(+), 20 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile
index fe1d6d7384..d1413319c9 100644
--- a/src/gallium/drivers/svga/Makefile
+++ b/src/gallium/drivers/svga/Makefile
@@ -51,13 +51,7 @@ LIBRARY_INCLUDES = \
 	-I$(TOP)/src/gallium/drivers/svga/include
 
 LIBRARY_DEFINES = \
+	-std=gnu99 -fvisibility=hidden \
 	-DHAVE_STDINT_H -DHAVE_SYS_TYPES_H
 
-CC = gcc -fvisibility=hidden -msse -msse2
-
-# Set the gnu99 standard to enable anonymous structs in vmware headers.
-#
-CFLAGS = -Wall -Wmissing-prototypes -std=gnu99 -ffast-math \
-	$(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) $(ASM_FLAGS)
-
 include ../../Makefile.template
diff --git a/src/gallium/winsys/drm/vmware/core/Makefile b/src/gallium/winsys/drm/vmware/core/Makefile
index ff8f01b322..a52957c1a5 100644
--- a/src/gallium/winsys/drm/vmware/core/Makefile
+++ b/src/gallium/winsys/drm/vmware/core/Makefile
@@ -28,20 +28,8 @@ LIBRARY_INCLUDES = \
        $(shell pkg-config libdrm --cflags-only-I)
 
 LIBRARY_DEFINES = \
+       -std=gnu99 -fvisibility=hidden \
        -DHAVE_STDINT_H -D_FILE_OFFSET_BITS=64 \
        $(shell pkg-config libdrm --cflags-only-other)
 
-CC = gcc -fvisibility=hidden -msse -msse2
-
-# Set the gnu99 standard to enable anonymous structs in vmware headers.
-#
-CFLAGS = -Wall -Wmissing-prototypes -std=gnu99 -ffast-math \
-       $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) $(ASM_FLAGS)
-
 include ../../../../Makefile.template
-
-
-symlinks:
-
-
-include depend
-- 
cgit v1.2.3


From acc51ac0ace11bb375241467ba35e1014f5fb997 Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Tue, 24 Nov 2009 01:14:03 +0100
Subject: svga: Filter out pendantic and ansi flags

Rather have the driver compile without the flags then
having to disable them.
---
 src/gallium/drivers/svga/Makefile | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile
index d1413319c9..8158364d25 100644
--- a/src/gallium/drivers/svga/Makefile
+++ b/src/gallium/drivers/svga/Makefile
@@ -50,6 +50,9 @@ C_SOURCES = \
 LIBRARY_INCLUDES = \
 	-I$(TOP)/src/gallium/drivers/svga/include
 
+# With linux-debug we get a lots of warnings, filter out the bad flags.
+CFLAGS := $(filter-out -pedantic, $(filter-out -ansi, $(CFLAGS)))
+
 LIBRARY_DEFINES = \
 	-std=gnu99 -fvisibility=hidden \
 	-DHAVE_STDINT_H -DHAVE_SYS_TYPES_H
-- 
cgit v1.2.3


From 8d80b5400a1bbf4e959cd8257d11dfe0483e93db Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 23 Nov 2009 18:06:19 -0700
Subject: r300g: use util_bitcount()

---
 src/gallium/drivers/r300/r300_emit.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index ad7dff36be..e6ab8e4af1 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -721,21 +721,6 @@ void r300_emit_vertex_format_state(struct r300_context* r300)
     END_CS;
 }
 
-/* XXX This should go to util ... */
-/* Return the number of bits set in the given number. */
-static unsigned bitcount(unsigned n)
-{
-    unsigned bits = 0;
-
-    while (n) {
-        if (n & 1) {
-            bits++;
-        }
-        n >>= 1;
-    }
-
-    return bits;
-}
 
 void r300_emit_vertex_program_code(struct r300_context* r300,
                                    struct r300_vertex_program_code* code)
@@ -745,8 +730,8 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
     unsigned instruction_count = code->length / 4;
 
     int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72;
-    int input_count = MAX2(bitcount(code->InputsRead), 1);
-    int output_count = MAX2(bitcount(code->OutputsWritten), 1);
+    int input_count = MAX2(util_bitcount(code->InputsRead), 1);
+    int output_count = MAX2(util_bitcount(code->OutputsWritten), 1);
     int temp_count = MAX2(code->num_temporaries, 1);
     int pvs_num_slots = MIN3(vtx_mem_size / input_count,
                              vtx_mem_size / output_count, 10);
-- 
cgit v1.2.3


From f1ce37f74aff4854071fe5740b055718b2c0c789 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 24 Nov 2009 21:13:18 +0000
Subject: svga: cache textures as well as buffers

---
 src/gallium/drivers/svga/svga_screen_buffer.c  |  12 ++-
 src/gallium/drivers/svga/svga_screen_cache.c   |  93 +++++++++-------
 src/gallium/drivers/svga/svga_screen_cache.h   |  21 ++--
 src/gallium/drivers/svga/svga_screen_texture.c | 142 ++++++++++++++-----------
 src/gallium/drivers/svga/svga_screen_texture.h |  16 ++-
 5 files changed, 167 insertions(+), 117 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c
index 3b7811734e..101c7878bf 100644
--- a/src/gallium/drivers/svga/svga_screen_buffer.c
+++ b/src/gallium/drivers/svga/svga_screen_buffer.c
@@ -71,7 +71,10 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
       
       sbuf->key.numFaces = 1;
       sbuf->key.numMipLevels = 1;
+      sbuf->key.cachable = 1;
       
+      SVGA_DBG(DEBUG_DMA, "surface_create for buffer sz %d\n", sbuf->base.size);
+
       sbuf->handle = svga_screen_surface_create(ss, &sbuf->key);
       if(!sbuf->handle)
          return PIPE_ERROR_OUT_OF_MEMORY;
@@ -82,7 +85,7 @@ svga_buffer_create_host_surface(struct svga_screen *ss,
        */
       sbuf->hw.flags.discard = TRUE;
 
-      SVGA_DBG(DEBUG_DMA, "   grab sid %p sz %d\n", sbuf->handle, sbuf->base.size);
+      SVGA_DBG(DEBUG_DMA, "   --> got sid %p sz %d (buffer)\n", sbuf->handle, sbuf->base.size);
    }
    
    return PIPE_OK;
@@ -776,12 +779,11 @@ svga_screen_buffer_wrap_surface(struct pipe_screen *screen,
 
    /*
     * We are not the creator of this surface and therefore we must not
-    * cache it for reuse. The caching code only caches SVGA3D_BUFFER surfaces
-    * so make sure this isn't one of those.
+    * cache it for reuse. Set the cacheable flag to zero in the key to
+    * prevent this.
     */
-
-   assert(format != SVGA3D_BUFFER);
    sbuf->key.format = format;
+   sbuf->key.cachable = 0;
    sws->surface_reference(sws, &sbuf->handle, srf);
 
    return buf;
diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
index 7360c1688b..65f5c07a72 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -24,6 +24,7 @@
  **********************************************************/
 
 #include "util/u_memory.h"
+#include "util/u_hash.h"
 
 #include "svga_debug.h"
 #include "svga_winsys.h"
@@ -36,24 +37,11 @@
 
 /** 
  * Compute the bucket for this key. 
- * 
- * We simply compute log2(width) for now, but
  */
 static INLINE unsigned
 svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key)
 {
-   unsigned bucket = 0;
-   unsigned size = key->size.width;
-   
-   while ((size >>= 1))
-      ++bucket;
-   
-   if(key->flags & SVGA3D_SURFACE_HINT_INDEXBUFFER)
-      bucket += 32;
-   
-   assert(bucket < SVGA_HOST_SURFACE_CACHE_BUCKETS);
-   
-   return bucket;
+   return util_hash_crc32( key, sizeof key ) % SVGA_HOST_SURFACE_CACHE_BUCKETS;
 }
 
 
@@ -69,6 +57,8 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen,
    unsigned bucket;
    unsigned tries = 0;
 
+   assert(key->cachable);
+
    bucket = svga_screen_cache_bucket(key);
 
    pipe_mutex_lock(cache->mutex);
@@ -104,11 +94,9 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen,
 
    pipe_mutex_unlock(cache->mutex);
    
-#if 0
-   _debug_printf("%s: cache %s after %u tries\n", __FUNCTION__, handle ? "hit" : "miss", tries);
-#else
-   (void)tries;
-#endif
+   if (SVGA_DEBUG & DEBUG_DMA)
+      debug_printf("%s: cache %s after %u tries\n", __FUNCTION__, 
+                   handle ? "hit" : "miss", tries);
    
    return handle;
 }
@@ -128,6 +116,7 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
    struct svga_host_surface_cache_entry *entry = NULL;
    struct svga_winsys_surface *handle = *p_handle;
    
+   assert(key->cachable);
 
    assert(handle);
    if(!handle)
@@ -137,15 +126,15 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
    pipe_mutex_lock(cache->mutex);
    
    if(!LIST_IS_EMPTY(&cache->empty)) {
-        /* use the first empty entry */
-        entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->empty.next, head);
+      /* use the first empty entry */
+      entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->empty.next, head);
         
-        LIST_DEL(&entry->head);
-     }
+      LIST_DEL(&entry->head);
+   }
    else if(!LIST_IS_EMPTY(&cache->unused)) {
       /* free the last used buffer and reuse its entry */
       entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->unused.prev, head);
-      SVGA_DBG(DEBUG_DMA, "unref sid %p\n", entry->handle);
+      SVGA_DBG(DEBUG_DMA, "unref sid %p (make space)\n", entry->handle);
       sws->surface_reference(sws, &entry->handle, NULL);
 
       LIST_DEL(&entry->bucket_head);
@@ -161,7 +150,7 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
    }
    else {
       /* Couldn't cache the buffer -- this really shouldn't happen */
-      SVGA_DBG(DEBUG_DMA, "unref sid %p\n", handle);
+      SVGA_DBG(DEBUG_DMA, "unref sid %p (couldn't find space)\n", handle);
       sws->surface_reference(sws, &handle, NULL);
    }
    
@@ -220,7 +209,7 @@ svga_screen_cache_cleanup(struct svga_screen *svgascreen)
    
    for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) {
       if(cache->entries[i].handle) {
-	 SVGA_DBG(DEBUG_DMA, "unref sid %p\n", cache->entries[i].handle);
+	 SVGA_DBG(DEBUG_DMA, "unref sid %p (shutdown)\n", cache->entries[i].handle);
 	 sws->surface_reference(sws, &cache->entries[i].handle, NULL);
       }
 
@@ -261,18 +250,42 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
 {
    struct svga_winsys_screen *sws = svgascreen->sws;
    struct svga_winsys_surface *handle = NULL;
+   boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable;
+
+   SVGA_DBG(DEBUG_DMA, "%s sz %dx%dx%d mips %d faces %d cachable %d\n", 
+            __FUNCTION__,
+            key->size.width,
+            key->size.height,
+            key->size.depth,
+            key->numMipLevels,
+            key->numFaces,
+            key->cachable);
+
+   if (cachable) {
+      if (key->format == SVGA3D_BUFFER) {
+         /* For buffers, round the buffer size up to the nearest power
+          * of two to increase the probability of cache hits.  Keep
+          * texture surface dimensions unchanged.
+          */
+         uint32_t size = 1;
+         while(size < key->size.width)
+            size <<= 1;
+         key->size.width = size;
+      }
 
-   if (SVGA_SURFACE_CACHE_ENABLED && key->format == SVGA3D_BUFFER) {
-      /* round the buffer size up to the nearest power of two to increase the
-       * probability of cache hits */
-      uint32_t size = 1;
-      while(size < key->size.width)
-         size <<= 1;
-      key->size.width = size;
-      
       handle = svga_screen_cache_lookup(svgascreen, key);
-      if (handle)
-         SVGA_DBG(DEBUG_DMA, "  reuse sid %p sz %d\n", handle, size);
+      if (handle) {
+         if (key->format == SVGA3D_BUFFER)
+            SVGA_DBG(DEBUG_DMA, "  reuse sid %p sz %d (buffer)\n", handle, 
+                     key->size.width);
+         else
+            SVGA_DBG(DEBUG_DMA, "  reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle, 
+                     key->size.width,
+                     key->size.height,
+                     key->size.depth,
+                     key->numMipLevels,
+                     key->numFaces);
+      }
    }
 
    if (!handle) {
@@ -297,11 +310,15 @@ svga_screen_surface_destroy(struct svga_screen *svgascreen,
 {
    struct svga_winsys_screen *sws = svgascreen->sws;
    
-   if(SVGA_SURFACE_CACHE_ENABLED && key->format == SVGA3D_BUFFER) {
+   /* We only set the cachable flag for surfaces of which we are the
+    * exclusive owner.  So just hold onto our existing reference in
+    * that case.
+    */
+   if(SVGA_SURFACE_CACHE_ENABLED && key->cachable) {
       svga_screen_cache_add(svgascreen, key, p_handle);
    }
    else {
-      SVGA_DBG(DEBUG_DMA, "unref sid %p\n", *p_handle);
+      SVGA_DBG(DEBUG_DMA, "unref sid %p (uncachable)\n", *p_handle);
       sws->surface_reference(sws, p_handle, NULL);
    }
 }
diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h
index 1bbe987768..b745769848 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.h
+++ b/src/gallium/drivers/svga/svga_screen_cache.h
@@ -36,10 +36,18 @@
 #include "util/u_double_list.h"
 
 
-/* TODO: Reduce this once we don't allocate an index buffer per draw call */ 
+/* Guess the storage size of cached surfaces and try and keep it under
+ * this amount:
+ */ 
+#define SVGA_HOST_SURFACE_CACHE_BYTES 16*1024*1024
+
+/* Maximum number of discrete surfaces in the cache:
+ */
 #define SVGA_HOST_SURFACE_CACHE_SIZE 1024
 
-#define SVGA_HOST_SURFACE_CACHE_BUCKETS 64
+/* Number of hash buckets:
+ */
+#define SVGA_HOST_SURFACE_CACHE_BUCKETS 256
 
 
 struct svga_winsys_surface;
@@ -50,11 +58,12 @@ struct svga_screen;
  */
 struct svga_host_surface_cache_key
 {
-   SVGA3dSurfaceFlags flags;
-   SVGA3dSurfaceFormat format;
    SVGA3dSize size;
-   uint32_t numFaces;
-   uint32_t numMipLevels;
+   uint32_t flags:8;
+   uint32_t format:8;
+   uint32_t numFaces:8;
+   uint32_t numMipLevels:7;
+   uint32_t cachable:1;         /* False if this is a shared surface */
 };
 
 
diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
index 8472dea04d..158a1e108d 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.c
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -266,14 +266,8 @@ svga_texture_create(struct pipe_screen *screen,
                     const struct pipe_texture *templat)
 {
    struct svga_screen *svgascreen = svga_screen(screen);
-   struct svga_winsys_screen *sws = svgascreen->sws;
    struct svga_texture *tex = CALLOC_STRUCT(svga_texture);
    unsigned width, height, depth;
-   SVGA3dSurfaceFlags flags = 0;
-   SVGA3dSurfaceFormat format;
-   SVGA3dSize size;
-   uint32 numFaces;
-   uint32 numMipLevels;
    unsigned level;
    
    if (!tex)
@@ -301,23 +295,24 @@ svga_texture_create(struct pipe_screen *screen,
       depth = minify(depth);
    }
    
-   size.width = templat->width[0];
-   size.height = templat->height[0];
-   size.depth = templat->depth[0];
+   tex->key.flags = 0;
+   tex->key.size.width = templat->width[0];
+   tex->key.size.height = templat->height[0];
+   tex->key.size.depth = templat->depth[0];
    
    if(templat->target == PIPE_TEXTURE_CUBE) {
-      flags |= SVGA3D_SURFACE_CUBEMAP;
-      numFaces = 6;
+      tex->key.flags |= SVGA3D_SURFACE_CUBEMAP;
+      tex->key.numFaces = 6;
    }
    else {
-      numFaces = 1;
+      tex->key.numFaces = 1;
    }
 
    if(templat->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER)
-      flags |= SVGA3D_SURFACE_HINT_TEXTURE;
+      tex->key.flags |= SVGA3D_SURFACE_HINT_TEXTURE;
 
    if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
-      flags |= SVGA3D_SURFACE_HINT_SCANOUT;
+      tex->key.flags |= SVGA3D_SURFACE_HINT_SCANOUT;
    
    /* 
     * XXX: Never pass the SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot
@@ -328,21 +323,24 @@ svga_texture_create(struct pipe_screen *screen,
 #if 0
    if((templat->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) &&
       !pf_is_compressed(templat->format))
-      flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
+      tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
 #endif
    
    if(templat->tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL)
-      flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+      tex->key.flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
    
-   numMipLevels = templat->last_level + 1;
+   tex->key.numMipLevels = templat->last_level + 1;
    
-   format = svga_translate_format(templat->format);
-   if(format == SVGA3D_FORMAT_INVALID)
+   tex->key.format = svga_translate_format(templat->format);
+   if(tex->key.format == SVGA3D_FORMAT_INVALID)
       goto error2;
+
+   tex->key.cachable = 1;
    
-   tex->handle = sws->surface_create(sws, flags, format, size, numFaces, numMipLevels);
+   SVGA_DBG(DEBUG_DMA, "surface_create for texture\n", tex->handle);
+   tex->handle = svga_screen_surface_create(svgascreen, &tex->key);
    if (tex->handle)
-      SVGA_DBG(DEBUG_DMA, "create sid %p (texture)\n", tex->handle);
+      SVGA_DBG(DEBUG_DMA, "  --> got sid %p (texture)\n", tex->handle);
 
    return &tex->base;
 
@@ -398,6 +396,10 @@ svga_texture_blanket(struct pipe_screen * screen,
       return NULL;
 
    tex->base = *base;
+   
+   /* We don't own this storage, so don't try to cache it.
+    */
+   tex->key.cachable = 0;
 
    if (sbuf->key.format == 1)
       tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM;
@@ -407,6 +409,7 @@ svga_texture_blanket(struct pipe_screen * screen,
    pipe_reference_init(&tex->base.reference, 1);
    tex->base.screen = screen;
 
+   SVGA_DBG(DEBUG_DMA, "blanket sid %p\n", sbuf->handle);
    sws->surface_reference(sws, &tex->handle, sbuf->handle);
 
    return &tex->base;
@@ -427,7 +430,7 @@ svga_texture_destroy(struct pipe_texture *pt)
      DBG("%s deleting %p\n", __FUNCTION__, (void *) tex);
    */
    SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle);
-   ss->sws->surface_reference(ss->sws, &tex->handle, NULL);
+   svga_screen_surface_destroy(ss, &tex->key, &tex->handle);
 
    FREE(tex);
 }
@@ -518,43 +521,43 @@ svga_texture_view_surface(struct pipe_context *pipe,
                           unsigned start_mip,
                           unsigned num_mip,
                           int face_pick,
-                          int zslice_pick)
+                          int zslice_pick,
+                          struct svga_host_surface_cache_key *key) /* OUT */
 {
    struct svga_screen *ss = svga_screen(tex->base.screen);
-   struct svga_winsys_screen *sws = ss->sws;
    struct svga_winsys_surface *handle;
    int i, j;
-   SVGA3dSurfaceFlags flags = 0;
-   SVGA3dSize size;
-   uint32 numFaces;
-   uint32 numMipLevels = num_mip;
    unsigned z_offset = 0;
 
    SVGA_DBG(DEBUG_PERF, 
             "svga: Create surface view: face %d zslice %d mips %d..%d\n",
             face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
 
-   size.width = tex->base.width[start_mip];
-   size.height = tex->base.height[start_mip];
-   size.depth = zslice_pick < 0 ? tex->base.depth[start_mip] : 1;
-   assert(size.depth == 1);
+   key->flags = 0;
+   key->format = format;
+   key->numMipLevels = num_mip;
+   key->size.width = tex->base.width[start_mip];
+   key->size.height = tex->base.height[start_mip];
+   key->size.depth = zslice_pick < 0 ? tex->base.depth[start_mip] : 1;
+   key->cachable = 1;
+   assert(key->size.depth == 1);
    
    if(tex->base.target == PIPE_TEXTURE_CUBE && face_pick < 0) {
-      flags |= SVGA3D_SURFACE_CUBEMAP;
-      numFaces = 6;
+      key->flags |= SVGA3D_SURFACE_CUBEMAP;
+      key->numFaces = 6;
    } else {
-      numFaces = 1;
+      key->numFaces = 1;
    }
 
-   if(format == SVGA3D_FORMAT_INVALID)
+   if(key->format == SVGA3D_FORMAT_INVALID)
       return NULL;
 
-   handle = sws->surface_create(sws, flags, format, size, numFaces, numMipLevels);
-
+   SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n", handle);
+   handle = svga_screen_surface_create(ss, key);
    if (!handle)
       return NULL;
 
-   SVGA_DBG(DEBUG_DMA, "create sid %p (texture view)\n", handle);
+   SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle);
 
    if (face_pick < 0)
       face_pick = 0;
@@ -562,14 +565,20 @@ svga_texture_view_surface(struct pipe_context *pipe,
    if (zslice_pick >= 0)
        z_offset = zslice_pick;
 
-   for (i = 0; i < num_mip; i++) {
-      for (j = 0; j < numFaces; j++) {
+   for (i = 0; i < key->numMipLevels; i++) {
+      for (j = 0; j < key->numFaces; j++) {
          if(tex->defined[j + face_pick][i + start_mip]) {
             unsigned depth = zslice_pick < 0 ? tex->base.depth[i + start_mip] : 1;
-            svga_texture_copy_handle(svga_context(pipe), ss,
-                                     tex->handle, 0, 0, z_offset, i + start_mip, j + face_pick,
+            svga_texture_copy_handle(svga_context(pipe),
+                                     ss,
+                                     tex->handle, 
+                                     0, 0, z_offset, 
+                                     i + start_mip, 
+                                     j + face_pick,
                                      handle, 0, 0, 0, i, j,
-                                     tex->base.width[i + start_mip], tex->base.height[i + start_mip], depth);
+                                     tex->base.width[i + start_mip],
+                                     tex->base.height[i + start_mip],
+                                     depth);
          }
       }
    }
@@ -586,25 +595,23 @@ svga_get_tex_surface(struct pipe_screen *screen,
 {
    struct svga_texture *tex = svga_texture(pt);
    struct svga_surface *s;
-   struct pipe_surface *ps;
    boolean render = flags & PIPE_BUFFER_USAGE_GPU_WRITE ? TRUE : FALSE;
    boolean view = FALSE;
    SVGA3dSurfaceFormat format;
 
    s = CALLOC_STRUCT(svga_surface);
-   ps = &s->base;
-   if (!ps)
+   if (!s)
       return NULL;
 
-   pipe_reference_init(&ps->reference, 1);
-   pipe_texture_reference(&ps->texture, pt);
-   ps->format = pt->format;
-   ps->width = pt->width[level];
-   ps->height = pt->height[level];
-   ps->usage = flags;
-   ps->level = level;
-   ps->face = face;
-   ps->zslice = zslice;
+   pipe_reference_init(&s->base.reference, 1);
+   pipe_texture_reference(&s->base.texture, pt);
+   s->base.format = pt->format;
+   s->base.width = pt->width[level];
+   s->base.height = pt->height[level];
+   s->base.usage = flags;
+   s->base.level = level;
+   s->base.face = face;
+   s->base.zslice = zslice;
 
    if (!render)
       format = svga_translate_format(pt->format);
@@ -619,11 +626,13 @@ svga_get_tex_surface(struct pipe_screen *screen,
       view = TRUE;
 
    /* Currently only used for compressed textures */
-   if (render && (format != svga_translate_format(pt->format))) {
+   if (render && 
+       format != svga_translate_format(pt->format)) {
       view = TRUE;
    }
 
-   if (level != 0 && svga_screen(screen)->debug.force_level_surface_view)
+   if (level != 0 && 
+       svga_screen(screen)->debug.force_level_surface_view)
       view = TRUE;
 
    if (pt->target == PIPE_TEXTURE_3D)
@@ -634,9 +643,10 @@ svga_get_tex_surface(struct pipe_screen *screen,
 
    if (view) {
       SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
-               pt, level, face, zslice, ps);
+               pt, level, face, zslice, s);
 
-      s->handle = svga_texture_view_surface(NULL, tex, format, level, 1, face, zslice);
+      s->handle = svga_texture_view_surface(NULL, tex, format, level, 1, face, zslice,
+                                            &s->key);
       s->real_face = 0;
       s->real_level = 0;
       s->real_zslice = 0;
@@ -644,15 +654,16 @@ svga_get_tex_surface(struct pipe_screen *screen,
       struct svga_winsys_screen *sws = svga_winsys_screen(screen);
 
       SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
-               pt, level, face, zslice, ps);
+               pt, level, face, zslice, s);
 
       sws->surface_reference(sws, &s->handle, tex->handle);
       s->real_face = face;
       s->real_level = level;
       s->real_zslice = zslice;
+      memset(&s->key, 0, sizeof s->key);
    }
 
-   return ps;
+   return &s->base;
 }
 
 
@@ -663,7 +674,7 @@ svga_tex_surface_destroy(struct pipe_surface *surf)
    struct svga_screen *ss = svga_screen(surf->texture->screen);
 
    SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
-   ss->sws->surface_reference(ss->sws, &s->handle, NULL);
+   svga_screen_surface_destroy(ss, &s->key, &s->handle);
    pipe_texture_reference(&surf->texture, NULL);
    FREE(surf);
 }
@@ -974,7 +985,8 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
    sv->handle = svga_texture_view_surface(pipe, tex, format,
                                           min_lod,
                                           max_lod - min_lod + 1,
-                                          -1, -1);
+                                          -1, -1,
+                                          &sv->key);
 
    if (!sv->handle) {
       assert(0);
@@ -1030,7 +1042,7 @@ svga_destroy_sampler_view_priv(struct svga_sampler_view *v)
    struct svga_screen *ss = svga_screen(v->texture->base.screen);
 
    SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle);
-   ss->sws->surface_reference(ss->sws, &v->handle, NULL);
+   svga_screen_surface_destroy(ss, &v->key, &v->handle);
 
    FREE(v);
 }
diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h
index 1e6fef59a3..1cc4063e65 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.h
+++ b/src/gallium/drivers/svga/svga_screen_texture.h
@@ -29,7 +29,7 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_state.h"
-
+#include "svga_screen_cache.h"
 
 struct pipe_context;
 struct pipe_screen;
@@ -68,6 +68,7 @@ struct svga_sampler_view
 
    unsigned age;
 
+   struct svga_host_surface_cache_key key;
    struct svga_winsys_surface *handle;
 };
 
@@ -76,8 +77,6 @@ struct svga_texture
 {
    struct pipe_texture base;
 
-   struct svga_winsys_surface *handle;
-
    boolean defined[6][PIPE_MAX_TEXTURE_LEVELS];
    
    struct svga_sampler_view *cached_view;
@@ -86,6 +85,16 @@ struct svga_texture
    unsigned age;
 
    boolean views_modified;
+
+   /**
+    * Creation key for the host surface handle.
+    * 
+    * This structure describes all the host surface characteristics so that it 
+    * can be looked up in cache, since creating a host surface is often a slow
+    * operation.
+    */
+   struct svga_host_surface_cache_key key;
+   struct svga_winsys_surface *handle;
 };
 
 
@@ -93,6 +102,7 @@ struct svga_surface
 {
    struct pipe_surface base;
 
+   struct svga_host_surface_cache_key key;
    struct svga_winsys_surface *handle;
 
    unsigned real_face;
-- 
cgit v1.2.3


From 55b0157860af0eb957262cb0d22ab47eccd85940 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 25 Nov 2009 11:44:41 +0000
Subject: svga: revert packing of surface key

Over-ambitious packing of values broke my cursor.
---
 src/gallium/drivers/svga/svga_screen_cache.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h
index b745769848..f5aa740d40 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.h
+++ b/src/gallium/drivers/svga/svga_screen_cache.h
@@ -58,10 +58,10 @@ struct svga_screen;
  */
 struct svga_host_surface_cache_key
 {
+   SVGA3dSurfaceFlags flags;
+   SVGA3dSurfaceFormat format;
    SVGA3dSize size;
-   uint32_t flags:8;
-   uint32_t format:8;
-   uint32_t numFaces:8;
+   uint32_t numFaces:24;
    uint32_t numMipLevels:7;
    uint32_t cachable:1;         /* False if this is a shared surface */
 };
-- 
cgit v1.2.3


From 2946aea110beda9c2e0382507b0dba7c508ff5eb Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 25 Nov 2009 17:13:04 +0000
Subject: svga: try harder to make the cachable flag work

It doesn't though.
---
 src/gallium/drivers/svga/svga_screen_buffer.c  |  2 ++
 src/gallium/drivers/svga/svga_screen_texture.c | 23 +++++++++++++++++------
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c
index 101c7878bf..c0b0f518bc 100644
--- a/src/gallium/drivers/svga/svga_screen_buffer.c
+++ b/src/gallium/drivers/svga/svga_screen_buffer.c
@@ -796,6 +796,8 @@ svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer)
    struct svga_winsys_screen *sws = svga_winsys_screen(buffer->screen);
    struct svga_winsys_surface *vsurf = NULL;
 
+   assert(svga_buffer(buffer)->key.cachable == 0);
+   svga_buffer(buffer)->key.cachable = 0;
    sws->surface_reference(sws, &vsurf, svga_buffer(buffer)->handle);
    return vsurf;
 }
diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
index 158a1e108d..d61d88114c 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.c
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -397,9 +397,6 @@ svga_texture_blanket(struct pipe_screen * screen,
 
    tex->base = *base;
    
-   /* We don't own this storage, so don't try to cache it.
-    */
-   tex->key.cachable = 0;
 
    if (sbuf->key.format == 1)
       tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM;
@@ -410,6 +407,11 @@ svga_texture_blanket(struct pipe_screen * screen,
    tex->base.screen = screen;
 
    SVGA_DBG(DEBUG_DMA, "blanket sid %p\n", sbuf->handle);
+
+   /* We don't own this storage, so don't try to cache it.
+    */
+   assert(sbuf->key.cachable == 0);
+   tex->key.cachable = 0;
    sws->surface_reference(sws, &tex->handle, sbuf->handle);
 
    return &tex->base;
@@ -549,13 +551,17 @@ svga_texture_view_surface(struct pipe_context *pipe,
       key->numFaces = 1;
    }
 
-   if(key->format == SVGA3D_FORMAT_INVALID)
+   if(key->format == SVGA3D_FORMAT_INVALID) {
+      key->cachable = 0;
       return NULL;
+   }
 
    SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n", handle);
    handle = svga_screen_surface_create(ss, key);
-   if (!handle)
+   if (!handle) {
+      key->cachable = 0;
       return NULL;
+   }
 
    SVGA_DBG(DEBUG_DMA, " --> got sid %p (texture view)\n", handle);
 
@@ -656,11 +662,11 @@ svga_get_tex_surface(struct pipe_screen *screen,
       SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
                pt, level, face, zslice, s);
 
+      memset(&s->key, 0, sizeof s->key);
       sws->surface_reference(sws, &s->handle, tex->handle);
       s->real_face = face;
       s->real_level = level;
       s->real_zslice = zslice;
-      memset(&s->key, 0, sizeof s->key);
    }
 
    return &s->base;
@@ -674,6 +680,7 @@ svga_tex_surface_destroy(struct pipe_surface *surf)
    struct svga_screen *ss = svga_screen(surf->texture->screen);
 
    SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
+   assert(s->key.cachable == 0);
    svga_screen_surface_destroy(ss, &s->key, &s->handle);
    pipe_texture_reference(&surf->texture, NULL);
    FREE(surf);
@@ -968,6 +975,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
                pt->height[0],
                pt->depth[0],
                pt->last_level);
+      sv->key.cachable = 0;
       sws->surface_reference(sws, &sv->handle, tex->handle);
       return sv;
    }
@@ -990,6 +998,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
 
    if (!sv->handle) {
       assert(0);
+      sv->key.cachable = 0;
       sws->surface_reference(sws, &sv->handle, tex->handle);
       return sv;
    }
@@ -1072,6 +1081,8 @@ svga_screen_texture_get_winsys_surface(struct pipe_texture *texture)
    struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen);
    struct svga_winsys_surface *vsurf = NULL;
 
+   assert(svga_texture(texture)->key.cachable == 0);
+   svga_texture(texture)->key.cachable = 0;
    sws->surface_reference(sws, &vsurf, svga_texture(texture)->handle);
    return vsurf;
 }
-- 
cgit v1.2.3


From ba1ca28cc62fed71c77902b95ae4ed36c6bf25f8 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 24 Nov 2009 13:41:03 +0000
Subject: gallium: simplify tgsi tokens further

Drop anonymous 'Extended' fields, have every optional token named
explicitly in its parent.  Eg. there is now an Instruction.Label flag,
etc.

Drop destination modifiers and other functionality which cannot be
generated by tgsi_ureg.c, which is now the primary way of creating
shaders.

Pull source modifiers into the source register token, drop the second
negate flag.  The source register token is now full - if we need to
expand it, probably best to move all of the modifiers to a new token
and have a single flag for it.
---
 src/gallium/auxiliary/draw/draw_pipe_aaline.c   |   3 +-
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c |   3 +-
 src/gallium/auxiliary/tgsi/tgsi_build.c         | 271 ++++++------------------
 src/gallium/auxiliary/tgsi/tgsi_build.h         |  61 ++----
 src/gallium/auxiliary/tgsi/tgsi_dump.c          |  40 +---
 src/gallium/auxiliary/tgsi/tgsi_exec.c          |  10 +-
 src/gallium/auxiliary/tgsi/tgsi_parse.c         |  83 +-------
 src/gallium/auxiliary/tgsi/tgsi_parse.h         |   6 +-
 src/gallium/auxiliary/tgsi/tgsi_scan.c          |   7 +-
 src/gallium/auxiliary/tgsi/tgsi_sse2.c          |   3 +-
 src/gallium/auxiliary/tgsi/tgsi_text.c          | 155 +-------------
 src/gallium/auxiliary/tgsi/tgsi_ureg.c          |  34 +--
 src/gallium/auxiliary/tgsi/tgsi_util.c          |  30 +--
 src/gallium/auxiliary/vl/vl_shader_build.c      |   3 +-
 src/gallium/drivers/i915/i915_fpc_translate.c   |   2 +-
 src/gallium/drivers/svga/svga_tgsi_insn.c       |  12 +-
 src/gallium/include/pipe/p_shader_tokens.h      | 158 ++------------
 17 files changed, 160 insertions(+), 721 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 9f956715a2..e374010fee 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -268,7 +268,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
       newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
+      newInst.Instruction.Texture = TRUE;
+      newInst.InstructionTexture.Texture = TGSI_TEXTURE_2D;
       newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
       newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1;
       newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 283502cdf3..9de06e37ed 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -296,7 +296,8 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
       newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
       newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
+      newInst.Instruction.Texture = TRUE;
+      newInst.InstructionTexture.Texture = TGSI_TEXTURE_2D;
       newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp;
       newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 9791e58db3..ce9e72e8b5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -122,7 +122,6 @@ tgsi_default_declaration( void )
    declaration.Centroid = 0;
    declaration.Invariant = 0;
    declaration.Padding = 0;
-   declaration.Extended = 0;
 
    return declaration;
 }
@@ -311,7 +310,6 @@ tgsi_default_immediate( void )
    immediate.NrTokens = 1;
    immediate.DataType = TGSI_IMM_FLOAT32;
    immediate.Padding = 0;
-   immediate.Extended = 0;
 
    return immediate;
 }
@@ -422,8 +420,9 @@ tgsi_default_instruction( void )
    instruction.Predicate = 0;
    instruction.NumDstRegs = 1;
    instruction.NumSrcRegs = 1;
+   instruction.Label = 0;
+   instruction.Texture = 0;
    instruction.Padding  = 0;
-   instruction.Extended = 0;
 
    return instruction;
 }
@@ -475,8 +474,8 @@ tgsi_default_full_instruction( void )
 
    full_instruction.Instruction = tgsi_default_instruction();
    full_instruction.InstructionPredicate = tgsi_default_instruction_predicate();
-   full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label();
-   full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture();
+   full_instruction.InstructionLabel = tgsi_default_instruction_label();
+   full_instruction.InstructionTexture = tgsi_default_instruction_texture();
    for( i = 0;  i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) {
       full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register();
    }
@@ -534,42 +533,42 @@ tgsi_build_full_instruction(
                                           header);
    }
 
-   if( tgsi_compare_instruction_ext_label(
-         full_inst->InstructionExtLabel,
-         tgsi_default_instruction_ext_label() ) ) {
-      struct tgsi_instruction_ext_label *instruction_ext_label;
+   if( tgsi_compare_instruction_label(
+         full_inst->InstructionLabel,
+         tgsi_default_instruction_label() ) ) {
+      struct tgsi_instruction_label *instruction_label;
 
       if( maxsize <= size )
          return 0;
-      instruction_ext_label =
-         (struct  tgsi_instruction_ext_label *) &tokens[size];
+      instruction_label =
+         (struct  tgsi_instruction_label *) &tokens[size];
       size++;
 
-      *instruction_ext_label = tgsi_build_instruction_ext_label(
-         full_inst->InstructionExtLabel.Label,
+      *instruction_label = tgsi_build_instruction_label(
+         full_inst->InstructionLabel.Label,
          prev_token,
          instruction,
          header );
-      prev_token = (struct tgsi_token  *) instruction_ext_label;
+      prev_token = (struct tgsi_token  *) instruction_label;
    }
 
-   if( tgsi_compare_instruction_ext_texture(
-         full_inst->InstructionExtTexture,
-         tgsi_default_instruction_ext_texture() ) ) {
-      struct tgsi_instruction_ext_texture *instruction_ext_texture;
+   if( tgsi_compare_instruction_texture(
+         full_inst->InstructionTexture,
+         tgsi_default_instruction_texture() ) ) {
+      struct tgsi_instruction_texture *instruction_texture;
 
       if( maxsize <= size )
          return 0;
-      instruction_ext_texture =
-         (struct  tgsi_instruction_ext_texture *) &tokens[size];
+      instruction_texture =
+         (struct  tgsi_instruction_texture *) &tokens[size];
       size++;
 
-      *instruction_ext_texture = tgsi_build_instruction_ext_texture(
-         full_inst->InstructionExtTexture.Texture,
+      *instruction_texture = tgsi_build_instruction_texture(
+         full_inst->InstructionTexture.Texture,
          prev_token,
          instruction,
          header   );
-      prev_token = (struct tgsi_token  *) instruction_ext_texture;
+      prev_token = (struct tgsi_token  *) instruction_texture;
    }
 
    for( i = 0;  i <   full_inst->Instruction.NumDstRegs; i++ ) {
@@ -591,25 +590,6 @@ tgsi_build_full_instruction(
          header );
       prev_token = (struct tgsi_token  *) dst_register;
 
-      if( tgsi_compare_dst_register_ext_modulate(
-            reg->DstRegisterExtModulate,
-            tgsi_default_dst_register_ext_modulate() ) ) {
-         struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate;
-
-         if( maxsize <= size )
-            return 0;
-         dst_register_ext_modulate =
-            (struct  tgsi_dst_register_ext_modulate *) &tokens[size];
-         size++;
-
-         *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate(
-            reg->DstRegisterExtModulate.Modulate,
-            prev_token,
-            instruction,
-            header );
-         prev_token = (struct tgsi_token  *) dst_register_ext_modulate;
-      }
-
       if( reg->DstRegister.Indirect ) {
          struct tgsi_src_register *ind;
 
@@ -625,6 +605,7 @@ tgsi_build_full_instruction(
             reg->DstRegisterInd.SwizzleZ,
             reg->DstRegisterInd.SwizzleW,
             reg->DstRegisterInd.Negate,
+            reg->DstRegisterInd.Absolute,
             reg->DstRegisterInd.Indirect,
             reg->DstRegisterInd.Dimension,
             reg->DstRegisterInd.Index,
@@ -650,6 +631,7 @@ tgsi_build_full_instruction(
          reg->SrcRegister.SwizzleZ,
          reg->SrcRegister.SwizzleW,
          reg->SrcRegister.Negate,
+         reg->SrcRegister.Absolute,
          reg->SrcRegister.Indirect,
          reg->SrcRegister.Dimension,
          reg->SrcRegister.Index,
@@ -657,29 +639,6 @@ tgsi_build_full_instruction(
          header );
       prev_token = (struct tgsi_token  *) src_register;
 
-      if( tgsi_compare_src_register_ext_mod(
-            reg->SrcRegisterExtMod,
-            tgsi_default_src_register_ext_mod() ) ) {
-         struct tgsi_src_register_ext_mod *src_register_ext_mod;
-
-         if( maxsize <= size )
-            return 0;
-         src_register_ext_mod =
-            (struct  tgsi_src_register_ext_mod *) &tokens[size];
-         size++;
-
-         *src_register_ext_mod = tgsi_build_src_register_ext_mod(
-            reg->SrcRegisterExtMod.Complement,
-            reg->SrcRegisterExtMod.Bias,
-            reg->SrcRegisterExtMod.Scale2X,
-            reg->SrcRegisterExtMod.Absolute,
-            reg->SrcRegisterExtMod.Negate,
-            prev_token,
-            instruction,
-            header );
-         prev_token = (struct tgsi_token  *) src_register_ext_mod;
-      }
-
       if( reg->SrcRegister.Indirect ) {
          struct  tgsi_src_register *ind;
 
@@ -695,6 +654,7 @@ tgsi_build_full_instruction(
             reg->SrcRegisterInd.SwizzleZ,
             reg->SrcRegisterInd.SwizzleW,
             reg->SrcRegisterInd.Negate,
+            reg->SrcRegisterInd.Absolute,
             reg->SrcRegisterInd.Indirect,
             reg->SrcRegisterInd.Dimension,
             reg->SrcRegisterInd.Index,
@@ -733,6 +693,7 @@ tgsi_build_full_instruction(
                reg->SrcRegisterDimInd.SwizzleZ,
                reg->SrcRegisterDimInd.SwizzleW,
                reg->SrcRegisterDimInd.Negate,
+               reg->SrcRegisterDimInd.Absolute,
                reg->SrcRegisterDimInd.Indirect,
                reg->SrcRegisterDimInd.Dimension,
                reg->SrcRegisterDimInd.Index,
@@ -793,86 +754,80 @@ compare32(const void *a, const void *b)
    return *((uint32_t *) a) != *((uint32_t *) b);
 }
 
-struct tgsi_instruction_ext_label
-tgsi_default_instruction_ext_label( void )
+struct tgsi_instruction_label
+tgsi_default_instruction_label( void )
 {
-   struct tgsi_instruction_ext_label instruction_ext_label;
+   struct tgsi_instruction_label instruction_label;
 
-   instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL;
-   instruction_ext_label.Label = 0;
-   instruction_ext_label.Padding = 0;
-   instruction_ext_label.Extended = 0;
+   instruction_label.Label = 0;
+   instruction_label.Padding = 0;
 
-   return instruction_ext_label;
+   return instruction_label;
 }
 
 unsigned
-tgsi_compare_instruction_ext_label(
-   struct tgsi_instruction_ext_label a,
-   struct tgsi_instruction_ext_label b )
+tgsi_compare_instruction_label(
+   struct tgsi_instruction_label a,
+   struct tgsi_instruction_label b )
 {
    a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
    return compare32(&a, &b);
 }
 
-struct tgsi_instruction_ext_label
-tgsi_build_instruction_ext_label(
+struct tgsi_instruction_label
+tgsi_build_instruction_label(
    unsigned label,
    struct tgsi_token  *prev_token,
    struct tgsi_instruction *instruction,
    struct tgsi_header *header )
 {
-   struct tgsi_instruction_ext_label instruction_ext_label;
+   struct tgsi_instruction_label instruction_label;
 
-   instruction_ext_label = tgsi_default_instruction_ext_label();
-   instruction_ext_label.Label = label;
+   instruction_label = tgsi_default_instruction_label();
+   instruction_label.Label = label;
+   instruction->Label = 1;
 
-   prev_token->Extended = 1;
    instruction_grow( instruction, header );
 
-   return instruction_ext_label;
+   return instruction_label;
 }
 
-struct tgsi_instruction_ext_texture
-tgsi_default_instruction_ext_texture( void )
+struct tgsi_instruction_texture
+tgsi_default_instruction_texture( void )
 {
-   struct tgsi_instruction_ext_texture instruction_ext_texture;
+   struct tgsi_instruction_texture instruction_texture;
 
-   instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE;
-   instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN;
-   instruction_ext_texture.Padding = 0;
-   instruction_ext_texture.Extended = 0;
+   instruction_texture.Texture = TGSI_TEXTURE_UNKNOWN;
+   instruction_texture.Padding = 0;
 
-   return instruction_ext_texture;
+   return instruction_texture;
 }
 
 unsigned
-tgsi_compare_instruction_ext_texture(
-   struct tgsi_instruction_ext_texture a,
-   struct tgsi_instruction_ext_texture b )
+tgsi_compare_instruction_texture(
+   struct tgsi_instruction_texture a,
+   struct tgsi_instruction_texture b )
 {
    a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
    return compare32(&a, &b);
 }
 
-struct tgsi_instruction_ext_texture
-tgsi_build_instruction_ext_texture(
+struct tgsi_instruction_texture
+tgsi_build_instruction_texture(
    unsigned texture,
    struct tgsi_token *prev_token,
    struct tgsi_instruction *instruction,
    struct tgsi_header *header )
 {
-   struct tgsi_instruction_ext_texture instruction_ext_texture;
+   struct tgsi_instruction_texture instruction_texture;
 
-   instruction_ext_texture = tgsi_default_instruction_ext_texture();
-   instruction_ext_texture.Texture = texture;
+   instruction_texture = tgsi_default_instruction_texture();
+   instruction_texture.Texture = texture;
+   instruction->Texture = 1;
 
-   prev_token->Extended = 1;
    instruction_grow( instruction, header );
 
-   return instruction_ext_texture;
+   return instruction_texture;
 }
 
 struct tgsi_src_register
@@ -886,10 +841,10 @@ tgsi_default_src_register( void )
    src_register.SwizzleZ = TGSI_SWIZZLE_Z;
    src_register.SwizzleW = TGSI_SWIZZLE_W;
    src_register.Negate = 0;
+   src_register.Absolute = 0;
    src_register.Indirect = 0;
    src_register.Dimension = 0;
    src_register.Index = 0;
-   src_register.Extended = 0;
 
    return src_register;
 }
@@ -902,6 +857,7 @@ tgsi_build_src_register(
    unsigned swizzle_z,
    unsigned swizzle_w,
    unsigned negate,
+   unsigned absolute,
    unsigned indirect,
    unsigned dimension,
    int index,
@@ -925,6 +881,7 @@ tgsi_build_src_register(
    src_register.SwizzleZ = swizzle_z;
    src_register.SwizzleW = swizzle_w;
    src_register.Negate = negate;
+   src_register.Absolute = absolute;
    src_register.Indirect = indirect;
    src_register.Dimension = dimension;
    src_register.Index = index;
@@ -940,7 +897,6 @@ tgsi_default_full_src_register( void )
    struct tgsi_full_src_register full_src_register;
 
    full_src_register.SrcRegister = tgsi_default_src_register();
-   full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod();
    full_src_register.SrcRegisterInd = tgsi_default_src_register();
    full_src_register.SrcRegisterDim = tgsi_default_dimension();
    full_src_register.SrcRegisterDimInd = tgsi_default_src_register();
@@ -949,65 +905,6 @@ tgsi_default_full_src_register( void )
 }
 
 
-struct tgsi_src_register_ext_mod
-tgsi_default_src_register_ext_mod( void )
-{
-   struct tgsi_src_register_ext_mod src_register_ext_mod;
-
-   src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD;
-   src_register_ext_mod.Complement = 0;
-   src_register_ext_mod.Bias = 0;
-   src_register_ext_mod.Scale2X = 0;
-   src_register_ext_mod.Absolute = 0;
-   src_register_ext_mod.Negate = 0;
-   src_register_ext_mod.Padding = 0;
-   src_register_ext_mod.Extended = 0;
-
-   return src_register_ext_mod;
-}
-
-unsigned
-tgsi_compare_src_register_ext_mod(
-   struct tgsi_src_register_ext_mod a,
-   struct tgsi_src_register_ext_mod b )
-{
-   a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
-   return compare32(&a, &b);
-}
-
-struct tgsi_src_register_ext_mod
-tgsi_build_src_register_ext_mod(
-   unsigned complement,
-   unsigned bias,
-   unsigned scale_2x,
-   unsigned absolute,
-   unsigned negate,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_src_register_ext_mod src_register_ext_mod;
-
-   assert( complement <= 1 );
-   assert( bias <= 1 );
-   assert( scale_2x <= 1 );
-   assert( absolute <= 1 );
-   assert( negate <= 1 );
-
-   src_register_ext_mod = tgsi_default_src_register_ext_mod();
-   src_register_ext_mod.Complement = complement;
-   src_register_ext_mod.Bias = bias;
-   src_register_ext_mod.Scale2X = scale_2x;
-   src_register_ext_mod.Absolute = absolute;
-   src_register_ext_mod.Negate = negate;
-
-   prev_token->Extended = 1;
-   instruction_grow( instruction, header );
-
-   return src_register_ext_mod;
-}
-
 struct tgsi_dimension
 tgsi_default_dimension( void )
 {
@@ -1017,7 +914,6 @@ tgsi_default_dimension( void )
    dimension.Dimension = 0;
    dimension.Padding = 0;
    dimension.Index = 0;
-   dimension.Extended = 0;
 
    return dimension;
 }
@@ -1051,7 +947,6 @@ tgsi_default_dst_register( void )
    dst_register.Dimension = 0;
    dst_register.Index = 0;
    dst_register.Padding = 0;
-   dst_register.Extended = 0;
 
    return dst_register;
 }
@@ -1089,51 +984,7 @@ tgsi_default_full_dst_register( void )
 
    full_dst_register.DstRegister = tgsi_default_dst_register();
    full_dst_register.DstRegisterInd = tgsi_default_src_register();
-   full_dst_register.DstRegisterExtModulate =
-      tgsi_default_dst_register_ext_modulate();
 
    return full_dst_register;
 }
 
-struct tgsi_dst_register_ext_modulate
-tgsi_default_dst_register_ext_modulate( void )
-{
-   struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
-
-   dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE;
-   dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X;
-   dst_register_ext_modulate.Padding = 0;
-   dst_register_ext_modulate.Extended = 0;
-
-   return dst_register_ext_modulate;
-}
-
-unsigned
-tgsi_compare_dst_register_ext_modulate(
-   struct tgsi_dst_register_ext_modulate a,
-   struct tgsi_dst_register_ext_modulate b )
-{
-   a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
-   return compare32(&a, &b);
-}
-
-struct tgsi_dst_register_ext_modulate
-tgsi_build_dst_register_ext_modulate(
-   unsigned modulate,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header )
-{
-   struct tgsi_dst_register_ext_modulate dst_register_ext_modulate;
-
-   assert( modulate <= TGSI_MODULATE_EIGHTH );
-
-   dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate();
-   dst_register_ext_modulate.Modulate = modulate;
-
-   prev_token->Extended = 1;
-   instruction_grow( instruction, header );
-
-   return dst_register_ext_modulate;
-}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 0fe5f229d3..0fbc8b1b0a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -171,31 +171,31 @@ tgsi_build_instruction_predicate(int index,
                                  struct tgsi_instruction *instruction,
                                  struct tgsi_header *header);
 
-struct tgsi_instruction_ext_label
-tgsi_default_instruction_ext_label( void );
+struct tgsi_instruction_label
+tgsi_default_instruction_label( void );
 
 unsigned
-tgsi_compare_instruction_ext_label(
-   struct tgsi_instruction_ext_label a,
-   struct tgsi_instruction_ext_label b );
+tgsi_compare_instruction_label(
+   struct tgsi_instruction_label a,
+   struct tgsi_instruction_label b );
 
-struct tgsi_instruction_ext_label
-tgsi_build_instruction_ext_label(
+struct tgsi_instruction_label
+tgsi_build_instruction_label(
    unsigned label,
    struct tgsi_token *prev_token,
    struct tgsi_instruction *instruction,
    struct tgsi_header *header );
 
-struct tgsi_instruction_ext_texture
-tgsi_default_instruction_ext_texture( void );
+struct tgsi_instruction_texture
+tgsi_default_instruction_texture( void );
 
 unsigned
-tgsi_compare_instruction_ext_texture(
-   struct tgsi_instruction_ext_texture a,
-   struct tgsi_instruction_ext_texture b );
+tgsi_compare_instruction_texture(
+   struct tgsi_instruction_texture a,
+   struct tgsi_instruction_texture b );
 
-struct tgsi_instruction_ext_texture
-tgsi_build_instruction_ext_texture(
+struct tgsi_instruction_texture
+tgsi_build_instruction_texture(
    unsigned texture,
    struct tgsi_token *prev_token,
    struct tgsi_instruction *instruction,
@@ -212,6 +212,7 @@ tgsi_build_src_register(
    unsigned swizzle_z,
    unsigned swizzle_w,
    unsigned negate,
+   unsigned absolute,
    unsigned indirect,
    unsigned dimension,
    int index,
@@ -221,24 +222,6 @@ tgsi_build_src_register(
 struct tgsi_full_src_register
 tgsi_default_full_src_register( void );
 
-struct tgsi_src_register_ext_mod
-tgsi_default_src_register_ext_mod( void );
-
-unsigned
-tgsi_compare_src_register_ext_mod(
-   struct tgsi_src_register_ext_mod a,
-   struct tgsi_src_register_ext_mod b );
-
-struct tgsi_src_register_ext_mod
-tgsi_build_src_register_ext_mod(
-   unsigned complement,
-   unsigned bias,
-   unsigned scale_2x,
-   unsigned absolute,
-   unsigned negate,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
 
 struct tgsi_dimension
 tgsi_default_dimension( void );
@@ -265,20 +248,6 @@ tgsi_build_dst_register(
 struct tgsi_full_dst_register
 tgsi_default_full_dst_register( void );
 
-struct tgsi_dst_register_ext_modulate
-tgsi_default_dst_register_ext_modulate( void );
-
-unsigned
-tgsi_compare_dst_register_ext_modulate(
-   struct tgsi_dst_register_ext_modulate a,
-   struct tgsi_dst_register_ext_modulate b );
-
-struct tgsi_dst_register_ext_modulate
-tgsi_build_dst_register_ext_modulate(
-   unsigned modulate,
-   struct tgsi_token *prev_token,
-   struct tgsi_instruction *instruction,
-   struct tgsi_header *header );
 
 #if defined __cplusplus
 }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index d16e64f9c5..7eb64167fb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -150,17 +150,6 @@ static const char *texture_names[] =
 };
 
 
-static const char *modulate_names[TGSI_MODULATE_COUNT] =
-{
-   "",
-   "_2X",
-   "_4X",
-   "_8X",
-   "_D2",
-   "_D4",
-   "_D8"
-};
-
 static void
 _dump_register(
    struct dump_ctx *ctx,
@@ -385,7 +374,6 @@ iter_instruction(
             dst->DstRegister.Index,
             dst->DstRegister.Index );
       }
-      ENM( dst->DstRegisterExtModulate.Modulate, modulate_names );
       _dump_writemask( ctx, dst->DstRegister.WriteMask );
 
       first_reg = FALSE;
@@ -398,18 +386,10 @@ iter_instruction(
          CHR( ',' );
       CHR( ' ' );
 
-      if (src->SrcRegisterExtMod.Negate)
+      if (src->SrcRegister.Negate)
          TXT( "-(" );
-      if (src->SrcRegisterExtMod.Absolute)
+      if (src->SrcRegister.Absolute)
          CHR( '|' );
-      if (src->SrcRegisterExtMod.Scale2X)
-         TXT( "2*(" );
-      if (src->SrcRegisterExtMod.Bias)
-         CHR( '(' );
-      if (src->SrcRegisterExtMod.Complement)
-         TXT( "1-(" );
-      if (src->SrcRegister.Negate)
-         CHR( '-' );
 
       if (src->SrcRegister.Indirect) {
          _dump_register_ind(
@@ -439,23 +419,17 @@ iter_instruction(
          ENM( src->SrcRegister.SwizzleW, swizzle_names );
       }
 
-      if (src->SrcRegisterExtMod.Complement)
-         CHR( ')' );
-      if (src->SrcRegisterExtMod.Bias)
-         TXT( ")-.5" );
-      if (src->SrcRegisterExtMod.Scale2X)
-         CHR( ')' );
-      if (src->SrcRegisterExtMod.Absolute)
+      if (src->SrcRegister.Absolute)
          CHR( '|' );
-      if (src->SrcRegisterExtMod.Negate)
+      if (src->SrcRegister.Negate)
          CHR( ')' );
 
       first_reg = FALSE;
    }
 
-   if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) {
+   if (inst->Instruction.Texture) {
       TXT( ", " );
-      ENM( inst->InstructionExtTexture.Texture, texture_names );
+      ENM( inst->InstructionTexture.Texture, texture_names );
    }
 
    switch (inst->Instruction.Opcode) {
@@ -465,7 +439,7 @@ iter_instruction(
    case TGSI_OPCODE_ENDLOOP:
    case TGSI_OPCODE_CAL:
       TXT( " :" );
-      UID( inst->InstructionExtLabel.Label );
+      UID( inst->InstructionLabel.Label );
       break;
    }
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 89740cee89..61d38e57f1 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1397,10 +1397,6 @@ fetch_source(
    case TGSI_UTIL_SIGN_KEEP:
       break;
    }
-
-   if (reg->SrcRegisterExtMod.Complement) {
-      micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan );
-   }
 }
 
 static void
@@ -1679,7 +1675,7 @@ exec_tex(struct tgsi_exec_machine *mach,
 
    /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
 
-   switch (inst->InstructionExtTexture.Texture) {
+   switch (inst->InstructionTexture.Texture) {
    case TGSI_TEXTURE_1D:
    case TGSI_TEXTURE_SHADOW1D:
 
@@ -1777,7 +1773,7 @@ exec_txd(struct tgsi_exec_machine *mach,
     * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
     */
 
-   switch (inst->InstructionExtTexture.Texture) {
+   switch (inst->InstructionTexture.Texture) {
    case TGSI_TEXTURE_1D:
    case TGSI_TEXTURE_SHADOW1D:
 
@@ -2744,7 +2740,7 @@ exec_instruction(
          mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
 
          /* Finally, jump to the subroutine */
-         *pc = inst->InstructionExtLabel.Label;
+         *pc = inst->InstructionLabel.Label;
       }
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 9ca2993452..853485b48b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -28,7 +28,6 @@
 #include "util/u_debug.h"
 #include "pipe/p_shader_tokens.h"
 #include "tgsi_parse.h"
-#include "tgsi_build.h"
 #include "util/u_memory.h"
 
 void
@@ -59,7 +58,7 @@ tgsi_parse_init(
       ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2];
    }
    else {
-      ctx->FullHeader.Processor = tgsi_default_processor();
+      return TGSI_PARSE_ERROR;
    }
 
    ctx->Tokens = tokens;
@@ -129,7 +128,7 @@ tgsi_parse_token(
    {
       struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration;
 
-      *decl = tgsi_default_full_declaration();
+      memset(decl, 0, sizeof *decl);
       copy_token(&decl->Declaration, &token);
 
       next_token( ctx, &decl->DeclarationRange );
@@ -145,9 +144,8 @@ tgsi_parse_token(
    {
       struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate;
 
-      *imm = tgsi_default_full_immediate();
+      memset(imm, 0, sizeof *imm);
       copy_token(&imm->Immediate, &token);
-      assert( !imm->Immediate.Extended );
 
       switch (imm->Immediate.DataType) {
       case TGSI_IMM_FLOAT32:
@@ -169,41 +167,25 @@ tgsi_parse_token(
    case TGSI_TOKEN_TYPE_INSTRUCTION:
    {
       struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction;
-      unsigned extended;
 
-      *inst = tgsi_default_full_instruction();
+      memset(inst, 0, sizeof *inst);
       copy_token(&inst->Instruction, &token);
-      extended = inst->Instruction.Extended;
 
       if (inst->Instruction.Predicate) {
          next_token(ctx, &inst->InstructionPredicate);
       }
 
-      while( extended ) {
-         struct tgsi_src_register_ext token;
-
-         next_token( ctx, &token );
-
-         switch( token.Type ) {
-         case TGSI_INSTRUCTION_EXT_TYPE_LABEL:
-            copy_token(&inst->InstructionExtLabel, &token);
-            break;
-
-         case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE:
-            copy_token(&inst->InstructionExtTexture, &token);
-            break;
-
-         default:
-            assert( 0 );
-         }
+      if (inst->Instruction.Label) {
+         next_token( ctx, &inst->InstructionLabel);
+      }
 
-         extended = token.Extended;
+      if (inst->Instruction.Texture) {
+         next_token( ctx, &inst->InstructionTexture);
       }
 
       assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
 
       for(  i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
-         unsigned extended;
 
          next_token( ctx, &inst->FullDstRegisters[i].DstRegister );
 
@@ -212,65 +194,23 @@ tgsi_parse_token(
           */
          assert( !inst->FullDstRegisters[i].DstRegister.Dimension );
 
-         extended = inst->FullDstRegisters[i].DstRegister.Extended;
-
-         while( extended ) {
-            struct tgsi_src_register_ext token;
-
-            next_token( ctx, &token );
-
-            switch( token.Type ) {
-            case TGSI_DST_REGISTER_EXT_TYPE_MODULATE:
-               copy_token(&inst->FullDstRegisters[i].DstRegisterExtModulate,
-                          &token);
-               break;
-
-            default:
-               assert( 0 );
-            }
-
-            extended = token.Extended;
-         }
-
          if( inst->FullDstRegisters[i].DstRegister.Indirect ) {
             next_token( ctx, &inst->FullDstRegisters[i].DstRegisterInd );
 
             /*
              * No support for indirect or multi-dimensional addressing.
              */
-            assert( !inst->FullDstRegisters[i].DstRegisterInd.Indirect );
             assert( !inst->FullDstRegisters[i].DstRegisterInd.Dimension );
-            assert( !inst->FullDstRegisters[i].DstRegisterInd.Extended );
+            assert( !inst->FullDstRegisters[i].DstRegisterInd.Indirect );
          }
       }
 
       assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS );
 
       for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
-         unsigned extended;
 
          next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister );
 
-         extended = inst->FullSrcRegisters[i].SrcRegister.Extended;
-
-         while( extended ) {
-            struct tgsi_src_register_ext token;
-
-            next_token( ctx, &token );
-
-            switch( token.Type ) {
-            case TGSI_SRC_REGISTER_EXT_TYPE_MOD:
-               copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod,
-                          &token);
-               break;
-
-            default:
-               assert( 0 );
-            }
-
-            extended = token.Extended;
-         }
-
          if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) {
             next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd );
 
@@ -279,7 +219,6 @@ tgsi_parse_token(
              */
             assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
             assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
-            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
          }
 
          if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) {
@@ -289,7 +228,6 @@ tgsi_parse_token(
              * No support for multi-dimensional addressing.
              */
             assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension );
-            assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended );
 
             if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) {
                next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd );
@@ -299,7 +237,6 @@ tgsi_parse_token(
                */
                assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
                assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
-               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended );
             }
          }
       }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index cb4772ade8..ba9578c6a5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -49,13 +49,11 @@ struct tgsi_full_dst_register
 {
    struct tgsi_dst_register               DstRegister;
    struct tgsi_src_register               DstRegisterInd;
-   struct tgsi_dst_register_ext_modulate  DstRegisterExtModulate;
 };
 
 struct tgsi_full_src_register
 {
    struct tgsi_src_register         SrcRegister;
-   struct tgsi_src_register_ext_mod SrcRegisterExtMod;
    struct tgsi_src_register         SrcRegisterInd;
    struct tgsi_dimension            SrcRegisterDim;
    struct tgsi_src_register         SrcRegisterDimInd;
@@ -81,8 +79,8 @@ struct tgsi_full_instruction
 {
    struct tgsi_instruction             Instruction;
    struct tgsi_instruction_predicate   InstructionPredicate;
-   struct tgsi_instruction_ext_label   InstructionExtLabel;
-   struct tgsi_instruction_ext_texture InstructionExtTexture;
+   struct tgsi_instruction_label       InstructionLabel;
+   struct tgsi_instruction_texture     InstructionTexture;
    struct tgsi_full_dst_register       FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
    struct tgsi_full_src_register       FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
    uint Flags;  /**< user-defined usage */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index f9c16f1b6c..55595539ec 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -35,7 +35,6 @@
 
 
 #include "util/u_math.h"
-#include "tgsi/tgsi_build.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
 
@@ -217,11 +216,7 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
                 src->SrcRegister.Index != dst->DstRegister.Index ||
 
                 src->SrcRegister.Negate ||
-                src->SrcRegisterExtMod.Negate ||
-                src->SrcRegisterExtMod.Absolute ||
-                src->SrcRegisterExtMod.Scale2X ||
-                src->SrcRegisterExtMod.Bias ||
-                src->SrcRegisterExtMod.Complement ||
+                src->SrcRegister.Absolute ||
 
                 src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
                 src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index a96fc94c7a..a6cc3a5398 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1464,7 +1464,8 @@ emit_tex( struct x86_function *func,
    unsigned count;
    unsigned i;
 
-   switch (inst->InstructionExtTexture.Texture) {
+   assert(inst->Instruction.Texture);
+   switch (inst->InstructionTexture.Texture) {
    case TGSI_TEXTURE_1D:
       count = 1;
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index d2b03ffb2f..7250f98cc9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -486,16 +486,6 @@ parse_register_dcl(
    return TRUE;
 }
 
-static const char *modulate_names[TGSI_MODULATE_COUNT] =
-{
-   "_1X",
-   "_2X",
-   "_4X",
-   "_8X",
-   "_D2",
-   "_D4",
-   "_D8"
-};
 
 static boolean
 parse_dst_operand(
@@ -512,19 +502,6 @@ parse_dst_operand(
 
    cur = ctx->cur;
    eat_opt_white( &cur );
-   if (*cur == '_') {
-      uint i;
-
-      for (i = 0; i < TGSI_MODULATE_COUNT; i++) {
-         if (str_match_no_case( &cur, modulate_names[i] )) {
-            if (!is_digit_alpha_underscore( cur )) {
-               dst->DstRegisterExtModulate.Modulate = i;
-               ctx->cur = cur;
-               break;
-            }
-         }
-      }
-   }
 
    if (!parse_opt_writemask( ctx, &writemask ))
       return FALSE;
@@ -577,92 +554,24 @@ parse_src_operand(
    struct translate_ctx *ctx,
    struct tgsi_full_src_register *src )
 {
-   const char *cur;
-   float value;
    uint file;
    int index;
    uint ind_file;
    int ind_index;
    uint ind_comp;
    uint swizzle[4];
-   boolean parsed_ext_negate_paren = FALSE;
    boolean parsed_swizzle;
 
-   if (*ctx->cur == '-') {
-      cur = ctx->cur;
-      cur++;
-      eat_opt_white( &cur );
-      if (*cur == '(') {
-         cur++;
-         src->SrcRegisterExtMod.Negate = 1;
-         eat_opt_white( &cur );
-         ctx->cur = cur;
-         parsed_ext_negate_paren = TRUE;
-      }
-      else if (*cur == '|') {
-         cur++;
-         src->SrcRegisterExtMod.Negate = 1;
-         src->SrcRegisterExtMod.Absolute = 1;
-         eat_opt_white(&cur);
-         ctx->cur = cur;
-      }
-   }
-   else if (*ctx->cur == '|') {
-      ctx->cur++;
-      eat_opt_white( &ctx->cur );
-      src->SrcRegisterExtMod.Absolute = 1;
-   }
-
    if (*ctx->cur == '-') {
       ctx->cur++;
       eat_opt_white( &ctx->cur );
       src->SrcRegister.Negate = 1;
    }
-
-   cur = ctx->cur;
-   if (parse_float( &cur, &value )) {
-      if (value == 2.0f) {
-         eat_opt_white( &cur );
-         if (*cur != '*') {
-            report_error( ctx, "Expected `*'" );
-            return FALSE;
-         }
-         cur++;
-         if (*cur != '(') {
-            report_error( ctx, "Expected `('" );
-            return FALSE;
-         }
-         cur++;
-         src->SrcRegisterExtMod.Scale2X = 1;
-         eat_opt_white( &cur );
-         ctx->cur = cur;
-      }
-   }
-
-   if (*ctx->cur == '(') {
+   
+   if (*ctx->cur == '|') {
       ctx->cur++;
       eat_opt_white( &ctx->cur );
-      src->SrcRegisterExtMod.Bias = 1;
-   }
-
-   cur = ctx->cur;
-   if (parse_float( &cur, &value )) {
-      if (value == 1.0f) {
-         eat_opt_white( &cur );
-         if (*cur != '-') {
-            report_error( ctx, "Expected `-'" );
-            return FALSE;
-         }
-         cur++;
-         if (*cur != '(') {
-            report_error( ctx, "Expected `('" );
-            return FALSE;
-         }
-         cur++;
-         src->SrcRegisterExtMod.Complement = 1;
-         eat_opt_white( &cur );
-         ctx->cur = cur;
-      }
+      src->SrcRegister.Absolute = 1;
    }
 
    if (!parse_register_src(ctx, &file, &index, &ind_file, &ind_index, &ind_comp))
@@ -690,49 +599,7 @@ parse_src_operand(
       }
    }
 
-   if (src->SrcRegisterExtMod.Complement) {
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != ')') {
-         report_error( ctx, "Expected `)'" );
-         return FALSE;
-      }
-      ctx->cur++;
-   }
-
-   if (src->SrcRegisterExtMod.Bias) {
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != ')') {
-         report_error( ctx, "Expected `)'" );
-         return FALSE;
-      }
-      ctx->cur++;
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != '-') {
-         report_error( ctx, "Expected `-'" );
-         return FALSE;
-      }
-      ctx->cur++;
-      eat_opt_white( &ctx->cur );
-      if (!parse_float( &ctx->cur, &value )) {
-         report_error( ctx, "Expected literal floating point" );
-         return FALSE;
-      }
-      if (value != 0.5f) {
-         report_error( ctx, "Expected 0.5" );
-         return FALSE;
-      }
-   }
-
-   if (src->SrcRegisterExtMod.Scale2X) {
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != ')') {
-         report_error( ctx, "Expected `)'" );
-         return FALSE;
-      }
-      ctx->cur++;
-   }
-
-   if (src->SrcRegisterExtMod.Absolute) {
+   if (src->SrcRegister.Absolute) {
       eat_opt_white( &ctx->cur );
       if (*ctx->cur != '|') {
          report_error( ctx, "Expected `|'" );
@@ -741,14 +608,6 @@ parse_src_operand(
       ctx->cur++;
    }
 
-   if (parsed_ext_negate_paren) {
-      eat_opt_white( &ctx->cur );
-      if (*ctx->cur != ')') {
-         report_error( ctx, "Expected `)'" );
-         return FALSE;
-      }
-      ctx->cur++;
-   }
 
    return TRUE;
 }
@@ -853,7 +712,8 @@ parse_instruction(
          for (j = 0; j < TGSI_TEXTURE_COUNT; j++) {
             if (str_match_no_case( &ctx->cur, texture_names[j] )) {
                if (!is_digit_alpha_underscore( ctx->cur )) {
-                  inst.InstructionExtTexture.Texture = j;
+                  inst.Instruction.Texture = 1;
+                  inst.InstructionTexture.Texture = j;
                   break;
                }
             }
@@ -879,7 +739,8 @@ parse_instruction(
          report_error( ctx, "Expected a label" );
          return FALSE;
       }
-      inst.InstructionExtLabel.Label = target;
+      inst.Instruction.Label = 1;
+      inst.InstructionLabel.Label = target;
    }
 
    advance = tgsi_build_full_instruction(
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 5526a5d034..de4bc6edb0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -48,13 +48,11 @@ union tgsi_any_token {
    union  tgsi_immediate_data imm_data;
    struct tgsi_instruction insn;
    struct tgsi_instruction_predicate insn_predicate;
-   struct tgsi_instruction_ext_label insn_ext_label;
-   struct tgsi_instruction_ext_texture insn_ext_texture;
+   struct tgsi_instruction_label insn_label;
+   struct tgsi_instruction_texture insn_texture;
    struct tgsi_src_register src;
-   struct tgsi_src_register_ext_mod src_ext_mod;
    struct tgsi_dimension dim;
    struct tgsi_dst_register dst;
-   struct tgsi_dst_register_ext_modulate dst_ext_mod;
    unsigned value;
 };
 
@@ -575,17 +573,8 @@ ureg_emit_src( struct ureg_program *ureg,
    out[n].src.SwizzleW = src.SwizzleW;
    out[n].src.Index = src.Index;
    out[n].src.Negate = src.Negate;
+   out[0].src.Absolute = src.Absolute;
    n++;
-   
-   if (src.Absolute) {
-      out[0].src.Extended = 1;
-      out[0].src.Negate = 0;
-      out[n].value = 0;
-      out[n].src_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD;
-      out[n].src_ext_mod.Absolute = 1;
-      out[n].src_ext_mod.Negate = src.Negate;
-      n++;
-   }
 
    if (src.Indirect) {
       out[0].src.Indirect = 1;
@@ -712,13 +701,11 @@ ureg_emit_label(struct ureg_program *ureg,
       return;
 
    out = get_tokens( ureg, DOMAIN_INSN, 1 );
-   insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
+   out[0].value = 0;
 
-   insn->token.Extended = 1;
+   insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
+   insn->insn.Label = 1;
 
-   out[0].value = 0;
-   out[0].insn_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL;
-   
    *label_token = ureg->domain[DOMAIN_INSN].count - 1;
 }
 
@@ -741,8 +728,7 @@ ureg_fixup_label(struct ureg_program *ureg,
 {
    union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token );
 
-   assert(out->insn_ext_label.Type == TGSI_INSTRUCTION_EXT_TYPE_LABEL);
-   out->insn_ext_label.Label = instruction_number;
+   out->insn_label.Label = instruction_number;
 }
 
 
@@ -756,11 +742,10 @@ ureg_emit_texture(struct ureg_program *ureg,
    out = get_tokens( ureg, DOMAIN_INSN, 1 );
    insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
 
-   insn->token.Extended = 1;
+   insn->insn.Texture = 1;
 
    out[0].value = 0;
-   out[0].insn_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE;
-   out[0].insn_ext_texture.Texture = target;
+   out[0].insn_texture.Texture = target;
 }
 
 
@@ -961,7 +946,6 @@ static void emit_immediate( struct ureg_program *ureg,
    out[0].imm.NrTokens = 5;
    out[0].imm.DataType = TGSI_IMM_FLOAT32;
    out[0].imm.Padding = 0;
-   out[0].imm.Extended = 0;
 
    out[1].imm_data.Float = v[0];
    out[2].imm_data.Float = v[1];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 4dee1be9e8..3544011b47 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -111,10 +111,10 @@ tgsi_util_get_full_src_register_sign_mode(
 {
    unsigned sign_mode;
 
-   if( reg->SrcRegisterExtMod.Absolute ) {
+   if( reg->SrcRegister.Absolute ) {
       /* Consider only the post-abs negation. */
 
-      if( reg->SrcRegisterExtMod.Negate ) {
+      if( reg->SrcRegister.Negate ) {
          sign_mode = TGSI_UTIL_SIGN_SET;
       }
       else {
@@ -122,17 +122,7 @@ tgsi_util_get_full_src_register_sign_mode(
       }
    }
    else {
-      /* Accumulate the three negations. */
-
-      unsigned negate;
-
-      negate = reg->SrcRegister.Negate;
-
-      if( reg->SrcRegisterExtMod.Negate ) {
-         negate = !negate;
-      }
-
-      if( negate ) {
+      if( reg->SrcRegister.Negate ) {
          sign_mode = TGSI_UTIL_SIGN_TOGGLE;
       }
       else {
@@ -152,26 +142,22 @@ tgsi_util_set_full_src_register_sign_mode(
    {
    case TGSI_UTIL_SIGN_CLEAR:
       reg->SrcRegister.Negate = 0;
-      reg->SrcRegisterExtMod.Absolute = 1;
-      reg->SrcRegisterExtMod.Negate = 0;
+      reg->SrcRegister.Absolute = 1;
       break;
 
    case TGSI_UTIL_SIGN_SET:
-      reg->SrcRegister.Negate = 0;
-      reg->SrcRegisterExtMod.Absolute = 1;
-      reg->SrcRegisterExtMod.Negate = 1;
+      reg->SrcRegister.Absolute = 1;
+      reg->SrcRegister.Negate = 1;
       break;
 
    case TGSI_UTIL_SIGN_TOGGLE:
       reg->SrcRegister.Negate = 1;
-      reg->SrcRegisterExtMod.Absolute = 0;
-      reg->SrcRegisterExtMod.Negate = 0;
+      reg->SrcRegister.Absolute = 0;
       break;
 
    case TGSI_UTIL_SIGN_KEEP:
       reg->SrcRegister.Negate = 0;
-      reg->SrcRegisterExtMod.Absolute = 0;
-      reg->SrcRegisterExtMod.Negate = 0;
+      reg->SrcRegister.Absolute = 0;
       break;
 
    default:
diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c
index faa20a903c..9637cbed8a 100644
--- a/src/gallium/auxiliary/vl/vl_shader_build.c
+++ b/src/gallium/auxiliary/vl/vl_shader_build.c
@@ -191,7 +191,8 @@ struct tgsi_full_instruction vl_tex
    inst.FullDstRegisters[0].DstRegister.File = dst_file;
    inst.FullDstRegisters[0].DstRegister.Index = dst_index;
    inst.Instruction.NumSrcRegs = 2;
-   inst.InstructionExtTexture.Texture = tex;
+   inst.Instruction.Texture = 1;
+   inst.InstructionTexture.Texture = tex;
    inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
    inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
    inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 379d47e79a..a96ba8f192 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -339,7 +339,7 @@ emit_tex(struct i915_fp_compile *p,
          const struct tgsi_full_instruction *inst,
          uint opcode)
 {
-   uint texture = inst->InstructionExtTexture.Texture;
+   uint texture = inst->InstructionTexture.Texture;
    uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
    uint tex = translate_tex_src_target( p, texture );
    uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index ea409b7e16..3ef6cb1074 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -227,14 +227,14 @@ translate_src_register( const struct svga_shader_emitter *emit,
    /* src.mod isn't a bitfield, unfortunately:
     * See tgsi_util_get_full_src_register_sign_mode for implementation details.
     */
-   if (reg->SrcRegisterExtMod.Absolute) {
-      if (reg->SrcRegisterExtMod.Negate)
+   if (reg->SrcRegister.Absolute) {
+      if (reg->SrcRegister.Negate)
          src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
       else
          src.base.srcMod = SVGA3DSRCMOD_ABS;
    }
    else {
-      if (reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate)
+      if (reg->SrcRegister.Negate)
          src.base.srcMod = SVGA3DSRCMOD_NEG;
       else
          src.base.srcMod = SVGA3DSRCMOD_NONE;
@@ -986,8 +986,8 @@ static boolean emit_kil(struct svga_shader_emitter *emit,
    inst = inst_token( SVGA3DOP_TEXKILL );
    src0 = translate_src_register( emit, reg );
 
-   if (reg->SrcRegisterExtMod.Absolute ||
-       reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate ||
+   if (reg->SrcRegister.Absolute ||
+       reg->SrcRegister.Negate ||
        reg->SrcRegister.Indirect ||
        reg->SrcRegister.SwizzleX != 0 ||
        reg->SrcRegister.SwizzleY != 1 ||
@@ -1953,7 +1953,7 @@ static boolean emit_bgnsub( struct svga_shader_emitter *emit,
 static boolean emit_call( struct svga_shader_emitter *emit,
                            const struct tgsi_full_instruction *insn )
 {
-   unsigned position = insn->InstructionExtLabel.Label;
+   unsigned position = insn->InstructionLabel.Label;
    unsigned i;
    
    for (i = 0; i < emit->nr_labels; i++) {
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index c4c28522c8..ac999e0c18 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -66,8 +66,7 @@ struct tgsi_token
 {
    unsigned Type       : 4;  /**< TGSI_TOKEN_TYPE_x */
    unsigned NrTokens   : 8;  /**< UINT */
-   unsigned Padding    : 19;
-   unsigned Extended   : 1;  /**< BOOL */
+   unsigned Padding    : 20;
 };
 
 enum tgsi_file_type {
@@ -117,8 +116,7 @@ struct tgsi_declaration
    unsigned Semantic    : 1;  /**< BOOL, any semantic info? */
    unsigned Centroid    : 1;  /**< centroid sampling? */
    unsigned Invariant   : 1;  /**< invariant optimization? */
-   unsigned Padding     : 4;
-   unsigned Extended    : 1;  /**< BOOL */
+   unsigned Padding     : 5;
 };
 
 struct tgsi_declaration_range
@@ -151,8 +149,7 @@ struct tgsi_immediate
    unsigned Type       : 4;  /**< TGSI_TOKEN_TYPE_IMMEDIATE */
    unsigned NrTokens   : 8;  /**< UINT */
    unsigned DataType   : 4;  /**< one of TGSI_IMM_x */
-   unsigned Padding    : 15;
-   unsigned Extended   : 1;  /**< BOOL */
+   unsigned Padding    : 16;
 };
 
 union tgsi_immediate_data
@@ -295,8 +292,6 @@ union tgsi_immediate_data
  *
  * If Predicate is TRUE, tgsi_instruction_predicate token immediately follows.
  *
- * If Extended is TRUE, it is now executed.
- *
  * Saturate controls how are final results in destination registers modified.
  */
 
@@ -309,12 +304,15 @@ struct tgsi_instruction
    unsigned NumDstRegs : 2;  /* UINT */
    unsigned NumSrcRegs : 4;  /* UINT */
    unsigned Predicate  : 1;  /* BOOL */
-   unsigned Padding    : 2;
-   unsigned Extended   : 1;  /* BOOL */
+   unsigned Label      : 1;
+   unsigned Texture    : 1;
+   unsigned Padding    : 1;
 };
 
 /*
- * If tgsi_instruction::Extended is TRUE, tgsi_instruction_ext follows.
+ * If tgsi_instruction::Label is TRUE, tgsi_instruction_label follows.
+ *
+ * If tgsi_instruction::Texture is TRUE, tgsi_instruction_texture follows.
  * 
  * Then, tgsi_instruction::NumDstRegs of tgsi_dst_register follow.
  * 
@@ -324,38 +322,15 @@ struct tgsi_instruction
  * instruction, including the instruction word.
  */
 
-#define TGSI_INSTRUCTION_EXT_TYPE_LABEL     1
-#define TGSI_INSTRUCTION_EXT_TYPE_TEXTURE   2
-
-struct tgsi_instruction_ext
-{
-   unsigned Type       : 4;  /* TGSI_INSTRUCTION_EXT_TYPE_ */
-   unsigned Padding    : 27;
-   unsigned Extended   : 1;  /* BOOL */
-};
-
-/*
- * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_LABEL, it
- * should be cast to tgsi_instruction_ext_label.
- * 
- * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_TEXTURE, it
- * should be cast to tgsi_instruction_ext_texture.
- * 
- * If tgsi_instruction_ext::Extended is TRUE, another tgsi_instruction_ext
- * follows.
- */
-
 #define TGSI_SWIZZLE_X      0
 #define TGSI_SWIZZLE_Y      1
 #define TGSI_SWIZZLE_Z      2
 #define TGSI_SWIZZLE_W      3
 
-struct tgsi_instruction_ext_label
+struct tgsi_instruction_label
 {
-   unsigned Type     : 4;    /* TGSI_INSTRUCTION_EXT_TYPE_LABEL */
    unsigned Label    : 24;   /* UINT */
-   unsigned Padding  : 3;
-   unsigned Extended : 1;    /* BOOL */
+   unsigned Padding  : 8;
 };
 
 #define TGSI_TEXTURE_UNKNOWN        0
@@ -369,12 +344,10 @@ struct tgsi_instruction_ext_label
 #define TGSI_TEXTURE_SHADOWRECT     8
 #define TGSI_TEXTURE_COUNT          9
 
-struct tgsi_instruction_ext_texture
+struct tgsi_instruction_texture
 {
-   unsigned Type     : 4;    /* TGSI_INSTRUCTION_EXT_TYPE_TEXTURE */
    unsigned Texture  : 8;    /* TGSI_TEXTURE_ */
-   unsigned Padding  : 19;
-   unsigned Extended : 1;    /* BOOL */
+   unsigned Padding  : 24;
 };
 
 /*
@@ -406,26 +379,24 @@ struct tgsi_instruction_predicate
  * The fetched register components are swizzled according to SwizzleX, SwizzleY,
  * SwizzleZ and SwizzleW.
  *
- * If Extended is TRUE, any further modifications to the source register are
- * made to this temporary storage.
  */
 
 struct tgsi_src_register
 {
    unsigned File        : 4;  /* TGSI_FILE_ */
+   unsigned Indirect    : 1;  /* BOOL */
+   unsigned Dimension   : 1;  /* BOOL */
+   int      Index       : 16; /* SINT */
    unsigned SwizzleX    : 2;  /* TGSI_SWIZZLE_ */
    unsigned SwizzleY    : 2;  /* TGSI_SWIZZLE_ */
    unsigned SwizzleZ    : 2;  /* TGSI_SWIZZLE_ */
    unsigned SwizzleW    : 2;  /* TGSI_SWIZZLE_ */
-   unsigned Negate      : 1;  /* BOOL */
-   unsigned Indirect    : 1;  /* BOOL */
-   unsigned Dimension   : 1;  /* BOOL */
-   int      Index       : 16; /* SINT */
-   unsigned Extended    : 1;  /* BOOL */
+   unsigned Absolute    : 1;    /* BOOL */
+   unsigned Negate      : 1;    /* BOOL */
 };
 
 /**
- * If tgsi_src_register::Extended is TRUE, tgsi_src_register_ext follows.
+ * If tgsi_src_register::Modifier is TRUE, tgsi_src_register_modifier follows.
  * 
  * Then, if tgsi_src_register::Indirect is TRUE, another tgsi_src_register
  * follows.
@@ -433,58 +404,13 @@ struct tgsi_src_register
  * Then, if tgsi_src_register::Dimension is TRUE, tgsi_dimension follows.
  */
 
-#define TGSI_SRC_REGISTER_EXT_TYPE_MOD      1
-
-struct tgsi_src_register_ext
-{
-   unsigned Type     : 4;    /* TGSI_SRC_REGISTER_EXT_TYPE_ */
-   unsigned Padding  : 27;
-   unsigned Extended : 1;    /* BOOL */
-};
-
-/**
- * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_MOD,
- * it should be cast to tgsi_src_register_ext_mod.
- * 
- * If tgsi_dst_register_ext::Extended is TRUE, another tgsi_dst_register_ext
- * follows.
- */
-
-
-/**
- * Extra src register modifiers
- *
- * If Complement is TRUE, the source register is modified by subtracting it
- * from 1.0.
- *
- * If Bias is TRUE, the source register is modified by subtracting 0.5 from it.
- *
- * If Scale2X is TRUE, the source register is modified by multiplying it by 2.0.
- *
- * If Absolute is TRUE, the source register is modified by removing the sign.
- *
- * If Negate is TRUE, the source register is modified by negating it.
- */
-
-struct tgsi_src_register_ext_mod
-{
-   unsigned Type         : 4;    /* TGSI_SRC_REGISTER_EXT_TYPE_MOD */
-   unsigned Complement   : 1;    /* BOOL */
-   unsigned Bias         : 1;    /* BOOL */
-   unsigned Scale2X      : 1;    /* BOOL */
-   unsigned Absolute     : 1;    /* BOOL */
-   unsigned Negate       : 1;    /* BOOL */
-   unsigned Padding      : 22;
-   unsigned Extended     : 1;    /* BOOL */
-};
 
 struct tgsi_dimension
 {
    unsigned Indirect    : 1;  /* BOOL */
    unsigned Dimension   : 1;  /* BOOL */
-   unsigned Padding     : 13;
+   unsigned Padding     : 14;
    int      Index       : 16; /* SINT */
-   unsigned Extended    : 1;  /* BOOL */
 };
 
 struct tgsi_dst_register
@@ -494,51 +420,9 @@ struct tgsi_dst_register
    unsigned Indirect    : 1;  /* BOOL */
    unsigned Dimension   : 1;  /* BOOL */
    int      Index       : 16; /* SINT */
-   unsigned Padding     : 5;
-   unsigned Extended    : 1;  /* BOOL */
-};
-
-/*
- * If tgsi_dst_register::Extended is TRUE, tgsi_dst_register_ext follows.
- * 
- * Then, if tgsi_dst_register::Indirect is TRUE, tgsi_src_register follows.
- */
-
-#define TGSI_DST_REGISTER_EXT_TYPE_MODULATE     1
-
-struct tgsi_dst_register_ext
-{
-   unsigned Type     : 4;    /* TGSI_DST_REGISTER_EXT_TYPE_ */
-   unsigned Padding  : 27;
-   unsigned Extended : 1;    /* BOOL */
+   unsigned Padding     : 6;
 };
 
-/**
- * Extra destination register modifiers
- *
- * If tgsi_dst_register_ext::Type is TGSI_DST_REGISTER_EXT_TYPE_MODULATE,
- * it should be cast to tgsi_dst_register_ext_modulate.
- * 
- * If tgsi_dst_register_ext::Extended is TRUE, another tgsi_dst_register_ext
- * follows.
- */
-
-#define TGSI_MODULATE_1X        0
-#define TGSI_MODULATE_2X        1
-#define TGSI_MODULATE_4X        2
-#define TGSI_MODULATE_8X        3
-#define TGSI_MODULATE_HALF      4
-#define TGSI_MODULATE_QUARTER   5
-#define TGSI_MODULATE_EIGHTH    6
-#define TGSI_MODULATE_COUNT     7
-
-struct tgsi_dst_register_ext_modulate
-{
-   unsigned Type     : 4;    /* TGSI_DST_REGISTER_EXT_TYPE_MODULATE */
-   unsigned Modulate : 4;    /* TGSI_MODULATE_ */
-   unsigned Padding  : 23;
-   unsigned Extended : 1;    /* BOOL */
-};
 
 #ifdef __cplusplus
 }
-- 
cgit v1.2.3


From 56ee132f9671f70ff2b3ee04659beac0dfc6126d Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 24 Nov 2009 14:09:24 +0000
Subject: gallium: try and update r300 and nv drivers for tgsi changes

It would be nice if these drivers built under the linux-debug header
so that these types of interface changes can be minimally propogated
into those drivers by people without the hardware.  They don't have to
generate a working driver -- though a command-dumping winsys would be
an excellent for regression checking.
---
 src/gallium/drivers/i915/i915_fpc_translate.c  |  5 ++---
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c |  2 +-
 src/gallium/drivers/nv20/nv20_vertprog.c       |  2 +-
 src/gallium/drivers/nv30/nv30_fragprog.c       |  2 +-
 src/gallium/drivers/nv30/nv30_vertprog.c       |  2 +-
 src/gallium/drivers/nv40/nv40_fragprog.c       |  2 +-
 src/gallium/drivers/nv40/nv40_vertprog.c       |  2 +-
 src/gallium/drivers/nv50/nv50_program.c        | 10 +++++-----
 src/gallium/drivers/r300/r300_tgsi_to_rc.c     |  7 ++++---
 9 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index a96ba8f192..f2554998a9 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -229,12 +229,11 @@ src_vector(struct i915_fp_compile *p,
       src = negate(src, n, n, n, n);
    }
 
-   /* no abs() or post-abs negation */
+   /* no abs() */
 #if 0
    /* XXX assertions disabled to allow arbfplight.c to run */
    /* XXX enable these assertions, or fix things */
-   assert(!source->SrcRegisterExtMod.Absolute);
-   assert(!source->SrcRegisterExtMod.Negate);
+   assert(!source->SrcRegister.Absolute);
 #endif
    return src;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 64027de6aa..b2234ef679 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -326,7 +326,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
    unsigned num_coords;
    unsigned i;
 
-   switch (inst->InstructionExtTexture.Texture) {
+   switch (inst->InstructionTexture.Texture) {
    case TGSI_TEXTURE_1D:
       num_coords = 1;
       break;
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
index 388245ecb0..48df356faf 100644
--- a/src/gallium/drivers/nv20/nv20_vertprog.c
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -273,7 +273,7 @@ tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
 		break;
 	}
 
-	src.abs = fsrc->SrcRegisterExtMod.Absolute;
+	src.abs = fsrc->SrcRegister.Absolute;
 	src.negate = fsrc->SrcRegister.Negate;
 	src.swz[0] = fsrc->SrcRegister.SwizzleX;
 	src.swz[1] = fsrc->SrcRegister.SwizzleY;
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index 0ce702d6f8..eb978b6838 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -268,7 +268,7 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 		break;
 	}
 
-	src.abs = fsrc->SrcRegisterExtMod.Absolute;
+	src.abs = fsrc->SrcRegister.Absolute;
 	src.negate = fsrc->SrcRegister.Negate;
 	src.swz[0] = fsrc->SrcRegister.SwizzleX;
 	src.swz[1] = fsrc->SrcRegister.SwizzleY;
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index 14a5c0260d..b04fb229bc 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -273,7 +273,7 @@ tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
 		break;
 	}
 
-	src.abs = fsrc->SrcRegisterExtMod.Absolute;
+	src.abs = fsrc->SrcRegister.Absolute;
 	src.negate = fsrc->SrcRegister.Negate;
 	src.swz[0] = fsrc->SrcRegister.SwizzleX;
 	src.swz[1] = fsrc->SrcRegister.SwizzleY;
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 99277506fc..dbbb736670 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -279,7 +279,7 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 		break;
 	}
 
-	src.abs = fsrc->SrcRegisterExtMod.Absolute;
+	src.abs = fsrc->SrcRegister.Absolute;
 	src.negate = fsrc->SrcRegister.Negate;
 	src.swz[0] = fsrc->SrcRegister.SwizzleX;
 	src.swz[1] = fsrc->SrcRegister.SwizzleY;
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index 31dae2457f..df9cb227a3 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -313,7 +313,7 @@ tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
 		break;
 	}
 
-	src.abs = fsrc->SrcRegisterExtMod.Absolute;
+	src.abs = fsrc->SrcRegister.Absolute;
 	src.negate = fsrc->SrcRegister.Negate;
 	src.swz[0] = fsrc->SrcRegister.SwizzleX;
 	src.swz[1] = fsrc->SrcRegister.SwizzleY;
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index bf50982dd1..e40e37d07c 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1575,10 +1575,10 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 	case TGSI_OPCODE_TEX:
 	case TGSI_OPCODE_TXP:
 	{
-		const struct tgsi_instruction_ext_texture *tex;
+		const struct tgsi_instruction_texture *tex;
 
-		assert(insn->Instruction.Extended);
-		tex = &insn->InstructionExtTexture;
+		assert(insn->Instruction.Texture);
+		tex = &insn->InstructionTexture;
 
 		mask = 0x7;
 		if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
@@ -2181,11 +2181,11 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		break;
 	case TGSI_OPCODE_TEX:
 		emit_tex(pc, dst, mask, src[0], unit,
-			 inst->InstructionExtTexture.Texture, FALSE);
+			 inst->InstructionTexture.Texture, FALSE);
 		break;
 	case TGSI_OPCODE_TXP:
 		emit_tex(pc, dst, mask, src[0], unit,
-			 inst->InstructionExtTexture.Texture, TRUE);
+			 inst->InstructionTexture.Texture, TRUE);
 		break;
 	case TGSI_OPCODE_TRUNC:
 		for (c = 0; c < 4; c++) {
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 589f1984ee..25a634e5a2 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -208,11 +208,11 @@ static void transform_srcreg(
     dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3;
     dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6;
     dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9;
-    dst->Abs = src->SrcRegisterExtMod.Absolute;
+    dst->Abs = src->SrcRegister.Absolute;
     dst->Negate = src->SrcRegister.Negate ? RC_MASK_XYZW : 0;
 }
 
-static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_ext_texture src)
+static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src)
 {
     switch(src.Texture) {
         case TGSI_TEXTURE_1D:
@@ -268,7 +268,8 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst
     }
 
     /* Texturing. */
-    transform_texture(dst, src->InstructionExtTexture);
+    if (src->Instruction.Texture)
+       transform_texture(dst, src->InstructionTexture);
 }
 
 static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm)
-- 
cgit v1.2.3


From 52df532b02594e624bddd58ee60fd25075f8ec42 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 10 Nov 2009 16:55:44 -0800
Subject: llvmpipe: Fix typo in comparison operator.

---
 src/gallium/drivers/llvmpipe/lp_bld_depth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 98ec1cb1b9..d438c0e63d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -90,7 +90,7 @@ lp_depth_type(const struct util_format_description *format_desc,
 
    if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
       type.floating = TRUE;
-      assert(swizzle = 0);
+      assert(swizzle == 0);
       assert(format_desc->channel[swizzle].size == format_desc->block.bits);
    }
    else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
-- 
cgit v1.2.3


From b375526b50271317868a20484c8a1f36707e6005 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 10 Nov 2009 17:51:06 -0800
Subject: llvmpipe: Be more conservative with the supported formats.

We'll likely support much more formats, but doing this allows to run
more testsuites without immediately hit assertion failures.
---
 src/gallium/drivers/llvmpipe/lp_screen.c | 58 ++++++++++++++++++++++++++++----
 1 file changed, 51 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 0518927458..0fb133486a 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -27,6 +27,7 @@
 
 
 #include "util/u_memory.h"
+#include "util/u_format.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 
@@ -131,17 +132,17 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
 {
    struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
    struct llvmpipe_winsys *winsys = screen->winsys;
+   const struct util_format_description *format_desc;
+
+   format_desc = util_format_description(format);
+   if(!format_desc)
+      return FALSE;
 
    assert(target == PIPE_TEXTURE_1D ||
           target == PIPE_TEXTURE_2D ||
           target == PIPE_TEXTURE_3D ||
           target == PIPE_TEXTURE_CUBE);
 
-   if(format == PIPE_FORMAT_Z16_UNORM)
-      return FALSE;
-   if(format == PIPE_FORMAT_S8_UNORM)
-      return FALSE;
-
    switch(format) {
    case PIPE_FORMAT_DXT1_RGB:
    case PIPE_FORMAT_DXT1_RGBA:
@@ -152,8 +153,51 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
       break;
    }
 
-   if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
-      return winsys->is_displaytarget_format_supported(winsys, format);
+   if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+      if(format_desc->block.width != 1 ||
+         format_desc->block.height != 1)
+         return FALSE;
+
+      if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+         return FALSE;
+
+      if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
+         format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB)
+         return FALSE;
+   }
+
+   if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) {
+      if(!winsys->is_displaytarget_format_supported(winsys, format))
+         return FALSE;
+   }
+
+   if(tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
+      if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+         return FALSE;
+
+      /* FIXME: Temporary restriction. See lp_state_fs.c. */
+      if(format_desc->block.bits != 32)
+         return FALSE;
+   }
+
+   /* FIXME: Temporary restrictions. See lp_bld_sample_soa.c */
+   if(tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) {
+      if(format_desc->block.width != 1 ||
+         format_desc->block.height != 1)
+         return FALSE;
+
+      if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+         return FALSE;
+
+      if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
+         format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB &&
+         format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+         return FALSE;
+   }
 
    return TRUE;
 }
-- 
cgit v1.2.3


From 2282fb7710d386bd10ccdd18f030069fae0a5d55 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 10 Nov 2009 17:52:53 -0800
Subject: llvmpipe: Use the generic conversion routine for depths.

This allows for z32f depth format to work correctly.
---
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 2e9aa9fffe..2bde24430e 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -148,6 +148,20 @@ generate_depth(LLVMBuilderRef builder,
    format_desc = util_format_description(key->zsbuf_format);
    assert(format_desc);
 
+   /*
+    * Depths are expected to be between 0 and 1, even if they are stored in
+    * floats. Setting these bits here will ensure that the lp_build_conv() call
+    * below won't try to unnecessarily clamp the incoming values.
+    */
+   if(src_type.floating) {
+      src_type.sign = FALSE;
+      src_type.norm = TRUE;
+   }
+   else {
+      assert(!src_type.sign);
+      assert(src_type.norm);
+   }
+
    /* Pick the depth type. */
    dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);
 
@@ -155,14 +169,11 @@ generate_depth(LLVMBuilderRef builder,
    assert(dst_type.width == src_type.width);
    assert(dst_type.length == src_type.length);
 
-#if 1
-   src = lp_build_clamped_float_to_unsigned_norm(builder,
-                                                 src_type,
-                                                 dst_type.width,
-                                                 src);
-#else
    lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1);
-#endif
+
+   dst_ptr = LLVMBuildBitCast(builder,
+                              dst_ptr,
+                              LLVMPointerType(lp_build_vec_type(dst_type), 0), "");
 
    lp_build_depth_test(builder,
                        &key->depth,
-- 
cgit v1.2.3


From 066991c8d147db94b9661361bb191919b962fc4e Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 15 Nov 2009 06:46:48 -0800
Subject: llvmpipe: Fix memory leak.

---
 src/gallium/drivers/llvmpipe/lp_state_vs.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c
index 15c3029614..8a761648e7 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c
@@ -92,5 +92,6 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
       (struct lp_vertex_shader *)vs;
 
    draw_delete_vertex_shader(llvmpipe->draw, state->draw_data);
+   FREE( (void *)state->shader.tokens );
    FREE( state );
 }
-- 
cgit v1.2.3


From 4ae3e88dc9856f2f32c37dd04a3321765ed61e07 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 23 Nov 2009 11:21:11 +0000
Subject: llvmpipe: Use assert instead of abort. Only verify functions on debug
 builds.

---
 src/gallium/drivers/llvmpipe/lp_jit.c      | 2 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 13535dd638..c601c79480 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -167,7 +167,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
    if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) {
       _debug_printf("%s\n", error);
       LLVMDisposeMessage(error);
-      abort();
+      assert(0);
    }
 
    screen->target = LLVMGetExecutionEngineTargetData(screen->engine);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 2bde24430e..ee0f69b2af 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -622,10 +622,12 @@ generate_fragment(struct llvmpipe_context *lp,
     * Translate the LLVM IR into machine code.
     */
 
+#ifdef DEBUG
    if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
       LLVMDumpValue(variant->function);
-      abort();
+      assert(0);
    }
+#endif
 
    LLVMRunFunctionPassManager(screen->pass, variant->function);
 
-- 
cgit v1.2.3


From a71f8365049fb81f63245089b5438dcad6e83b19 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 24 Nov 2009 14:37:45 +0000
Subject: svga: Use consistent file names for dumping facilities.

---
 src/gallium/drivers/svga/Makefile                  |   4 +-
 src/gallium/drivers/svga/SConscript                |   4 +-
 src/gallium/drivers/svga/svga_tgsi.c               |   2 +-
 src/gallium/drivers/svga/svgadump/st_shader.h      | 214 -------
 src/gallium/drivers/svga/svgadump/st_shader_dump.c | 649 ---------------------
 src/gallium/drivers/svga/svgadump/st_shader_dump.h |  42 --
 src/gallium/drivers/svga/svgadump/st_shader_op.c   | 168 ------
 src/gallium/drivers/svga/svgadump/st_shader_op.h   |  46 --
 src/gallium/drivers/svga/svgadump/svga_dump.c      |   2 +-
 src/gallium/drivers/svga/svgadump/svga_dump.py     |   2 +-
 src/gallium/drivers/svga/svgadump/svga_shader.h    | 214 +++++++
 .../drivers/svga/svgadump/svga_shader_dump.c       | 649 +++++++++++++++++++++
 .../drivers/svga/svgadump/svga_shader_dump.h       |  42 ++
 src/gallium/drivers/svga/svgadump/svga_shader_op.c | 168 ++++++
 src/gallium/drivers/svga/svgadump/svga_shader_op.h |  46 ++
 15 files changed, 1126 insertions(+), 1126 deletions(-)
 delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader.h
 delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.c
 delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.h
 delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.c
 delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.h
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader.h
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_dump.c
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_dump.h
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_op.c
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_op.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile
index d1413319c9..38b63394e3 100644
--- a/src/gallium/drivers/svga/Makefile
+++ b/src/gallium/drivers/svga/Makefile
@@ -4,8 +4,8 @@ include $(TOP)/configs/current
 LIBNAME = svga
 
 C_SOURCES = \
-	svgadump/st_shader_dump.c \
-	svgadump/st_shader_op.c \
+	svgadump/svga_shader_dump.c \
+	svgadump/svga_shader_op.c \
 	svgadump/svga_dump.c \
 	svga_cmd.c \
 	svga_context.c \
diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript
index ff9645fc03..737b791ceb 100644
--- a/src/gallium/drivers/svga/SConscript
+++ b/src/gallium/drivers/svga/SConscript
@@ -60,8 +60,8 @@ sources = [
     'svga_tgsi_insn.c',
     
     'svgadump/svga_dump.c',
-    'svgadump/st_shader_dump.c',
-    'svgadump/st_shader_op.c',
+    'svgadump/svga_shader_dump.c',
+    'svgadump/svga_shader_op.c',
 ]
 
 svga = env.ConvenienceLibrary(
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index 44d0930bc0..81eea1a145 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -32,7 +32,7 @@
 #include "tgsi/tgsi_scan.h"
 #include "util/u_memory.h"
 
-#include "svgadump/st_shader_dump.h"
+#include "svgadump/svga_shader_dump.h"
 
 #include "svga_context.h"
 #include "svga_tgsi.h"
diff --git a/src/gallium/drivers/svga/svgadump/st_shader.h b/src/gallium/drivers/svga/svgadump/st_shader.h
deleted file mode 100644
index 2fc1796a90..0000000000
--- a/src/gallium/drivers/svga/svgadump/st_shader.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/**********************************************************
- * Copyright 2007-2009 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **********************************************************/
-
-/**
- * @file
- * SVGA Shader Token Definitions
- * 
- * @author Michal Krol <michal@vmware.com>
- */
-
-#ifndef ST_SHADER_SVGA_H
-#define ST_SHADER_SVGA_H
-
-#include "pipe/p_compiler.h"
-
-struct sh_op
-{
-   unsigned opcode:16;
-   unsigned control:8;
-   unsigned length:4;
-   unsigned predicated:1;
-   unsigned unused:1;
-   unsigned coissue:1;
-   unsigned is_reg:1;
-};
-
-struct sh_reg
-{
-   unsigned number:11;
-   unsigned type_hi:2;
-   unsigned relative:1;
-   unsigned unused:14;
-   unsigned type_lo:3;
-   unsigned is_reg:1;
-};
-
-static INLINE unsigned
-sh_reg_type( struct sh_reg reg )
-{
-   return reg.type_lo | (reg.type_hi << 3);
-}
-
-struct sh_cdata
-{
-   float xyzw[4];
-};
-
-struct sh_def
-{
-   struct sh_op op;
-   struct sh_reg reg;
-   struct sh_cdata cdata;
-};
-
-struct sh_defb
-{
-   struct sh_op op;
-   struct sh_reg reg;
-   uint data;
-};
-
-struct sh_idata
-{
-   int xyzw[4];
-};
-
-struct sh_defi
-{
-   struct sh_op op;
-   struct sh_reg reg;
-   struct sh_idata idata;
-};
-
-#define PS_TEXTURETYPE_UNKNOWN   SVGA3DSAMP_UNKNOWN
-#define PS_TEXTURETYPE_2D        SVGA3DSAMP_2D
-#define PS_TEXTURETYPE_CUBE      SVGA3DSAMP_CUBE
-#define PS_TEXTURETYPE_VOLUME    SVGA3DSAMP_VOLUME
-
-struct ps_sampleinfo
-{
-   unsigned unused:27;
-   unsigned texture_type:4;
-   unsigned is_reg:1;
-};
-
-struct vs_semantic
-{
-   unsigned usage:5;
-   unsigned unused1:11;
-   unsigned usage_index:4;
-   unsigned unused2:12;
-};
-
-struct sh_dstreg
-{
-   unsigned number:11;
-   unsigned type_hi:2;
-   unsigned relative:1;
-   unsigned unused:2;
-   unsigned write_mask:4;
-   unsigned modifier:4;
-   unsigned shift_scale:4;
-   unsigned type_lo:3;
-   unsigned is_reg:1;
-};
-
-static INLINE unsigned
-sh_dstreg_type( struct sh_dstreg reg )
-{
-   return reg.type_lo | (reg.type_hi << 3);
-}
-
-struct sh_dcl
-{
-   struct sh_op op;
-   union {
-      struct {
-         struct ps_sampleinfo sampleinfo;
-      } ps;
-      struct {
-         struct vs_semantic semantic;
-      } vs;
-   } u;
-   struct sh_dstreg reg;
-};
-
-
-struct sh_srcreg
-{
-   unsigned number:11;
-   unsigned type_hi:2;
-   unsigned relative:1;
-   unsigned unused:2;
-   unsigned swizzle_x:2;
-   unsigned swizzle_y:2;
-   unsigned swizzle_z:2;
-   unsigned swizzle_w:2;
-   unsigned modifier:4;
-   unsigned type_lo:3;
-   unsigned is_reg:1;
-};
-
-static INLINE unsigned
-sh_srcreg_type( struct sh_srcreg reg )
-{
-   return reg.type_lo | (reg.type_hi << 3);
-}
-
-struct sh_dstop
-{
-   struct sh_op op;
-   struct sh_dstreg dst;
-};
-
-struct sh_srcop
-{
-   struct sh_op op;
-   struct sh_srcreg src;
-};
-
-struct sh_src2op
-{
-   struct sh_op op;
-   struct sh_srcreg src0;
-   struct sh_srcreg src1;
-};
-
-struct sh_unaryop
-{
-   struct sh_op op;
-   struct sh_dstreg dst;
-   struct sh_srcreg src;
-};
-
-struct sh_binaryop
-{
-   struct sh_op op;
-   struct sh_dstreg dst;
-   struct sh_srcreg src0;
-   struct sh_srcreg src1;
-};
-
-struct sh_trinaryop
-{
-   struct sh_op op;
-   struct sh_dstreg dst;
-   struct sh_srcreg src0;
-   struct sh_srcreg src1;
-   struct sh_srcreg src2;
-};
-
-#endif /* ST_SHADER_SVGA_H */
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.c b/src/gallium/drivers/svga/svgadump/st_shader_dump.c
deleted file mode 100644
index d65cc93bfd..0000000000
--- a/src/gallium/drivers/svga/svgadump/st_shader_dump.c
+++ /dev/null
@@ -1,649 +0,0 @@
-/**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **********************************************************/
-
-/**
- * @file
- * SVGA Shader Dump Facilities
- * 
- * @author Michal Krol <michal@vmware.com>
- */
-
-#include "st_shader.h"
-#include "st_shader_dump.h"
-#include "st_shader_op.h"
-#include "util/u_debug.h"
-
-#include "../svga_hw_reg.h"
-#include "svga3d_shaderdefs.h"
-
-struct dump_info
-{
-   SVGA3dShaderVersion version;
-   boolean is_ps;
-};
-
-static void dump_op( struct sh_op op, const char *mnemonic )
-{
-   assert( op.predicated == 0 );
-   assert( op.is_reg == 0 );
-
-   if (op.coissue)
-      debug_printf( "+" );
-   debug_printf( "%s", mnemonic );
-   switch (op.control) {
-   case 0:
-      break;
-   case SVGA3DOPCONT_PROJECT:
-      debug_printf( "p" );
-      break;
-   case SVGA3DOPCONT_BIAS:
-      debug_printf( "b" );
-      break;
-   default:
-      assert( 0 );
-   }
-}
-
-
-static void dump_comp_op( struct sh_op op, const char *mnemonic )
-{
-   assert( op.is_reg == 0 );
-
-   if (op.coissue)
-      debug_printf( "+" );
-   debug_printf( "%s", mnemonic );
-   switch (op.control) {
-   case SVGA3DOPCOMP_RESERVED0:
-      break;
-   case SVGA3DOPCOMP_GT:
-      debug_printf("_gt");
-      break;
-   case SVGA3DOPCOMP_EQ:
-      debug_printf("_eq");
-      break;
-   case SVGA3DOPCOMP_GE:
-      debug_printf("_ge");
-      break;
-   case SVGA3DOPCOMP_LT:
-      debug_printf("_lt");
-      break;
-   case SVGA3DOPCOMPC_NE:
-      debug_printf("_ne");
-      break;
-   case SVGA3DOPCOMP_LE:
-      debug_printf("_le");
-      break;
-   case SVGA3DOPCOMP_RESERVED1:
-   default:
-      assert( 0 );
-   }
-}
-
-
-static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di )
-{
-   assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 );
-   assert( reg.is_reg == 1 );
-
-   switch (sh_reg_type( reg )) {
-   case SVGA3DREG_TEMP:
-      debug_printf( "r%u", reg.number );
-      break;
-
-   case SVGA3DREG_INPUT:
-      debug_printf( "v%u", reg.number );
-      break;
-
-   case SVGA3DREG_CONST:
-      if (reg.relative) {
-         if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP)
-            debug_printf( "c[aL+%u]", reg.number );
-         else
-            debug_printf( "c[a%u.x+%u]", indreg->number, reg.number );
-      }
-      else
-         debug_printf( "c%u", reg.number );
-      break;
-
-   case SVGA3DREG_ADDR:    /* VS */
-   /* SVGA3DREG_TEXTURE */ /* PS */
-      if (di->is_ps)
-         debug_printf( "t%u", reg.number );
-      else
-         debug_printf( "a%u", reg.number );
-      break;
-
-   case SVGA3DREG_RASTOUT:
-      switch (reg.number) {
-      case 0 /*POSITION*/:
-         debug_printf( "oPos" );
-         break;
-      case 1 /*FOG*/:
-         debug_printf( "oFog" );
-         break;
-      case 2 /*POINT_SIZE*/:
-         debug_printf( "oPts" );
-         break;
-      default:
-         assert( 0 );
-         debug_printf( "???" );
-      }
-      break;
-
-   case SVGA3DREG_ATTROUT:
-      assert( reg.number < 2 );
-      debug_printf( "oD%u", reg.number );
-      break;
-
-   case SVGA3DREG_TEXCRDOUT:
-   /* SVGA3DREG_OUTPUT */
-      debug_printf( "oT%u", reg.number );
-      break;
-
-   case SVGA3DREG_COLOROUT:
-      debug_printf( "oC%u", reg.number );
-      break;
-
-   case SVGA3DREG_DEPTHOUT:
-      debug_printf( "oD%u", reg.number );
-      break;
-
-   case SVGA3DREG_SAMPLER:
-      debug_printf( "s%u", reg.number );
-      break;
-
-   case SVGA3DREG_CONSTBOOL:
-      assert( !reg.relative );
-      debug_printf( "b%u", reg.number );
-      break;
-
-   case SVGA3DREG_CONSTINT:
-      assert( !reg.relative );
-      debug_printf( "i%u", reg.number );
-      break;
-
-   case SVGA3DREG_LOOP:
-      assert( reg.number == 0 );
-      debug_printf( "aL" );
-      break;
-
-   case SVGA3DREG_MISCTYPE:
-      switch (reg.number) {
-      case SVGA3DMISCREG_POSITION:
-         debug_printf( "vPos" );
-         break;
-      case SVGA3DMISCREG_FACE:
-         debug_printf( "vFace" );
-         break;
-      default:
-         assert(0);
-         break;
-      }
-      break;
-
-   case SVGA3DREG_LABEL:
-      debug_printf( "l%u", reg.number );
-      break;
-
-   case SVGA3DREG_PREDICATE:
-      debug_printf( "p%u", reg.number );
-      break;
-
-
-   default:
-      assert( 0 );
-      debug_printf( "???" );
-   }
-}
-
-static void dump_cdata( struct sh_cdata cdata )
-{
-   debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] );
-}
-
-static void dump_idata( struct sh_idata idata )
-{
-   debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] );
-}
-
-static void dump_bdata( boolean bdata )
-{
-   debug_printf( bdata ? "TRUE" : "FALSE" );
-}
-
-static void dump_sampleinfo( struct ps_sampleinfo sampleinfo )
-{
-   switch (sampleinfo.texture_type) {
-   case SVGA3DSAMP_2D:
-      debug_printf( "_2d" );
-      break;
-   case SVGA3DSAMP_CUBE:
-      debug_printf( "_cube" );
-      break;
-   case SVGA3DSAMP_VOLUME:
-      debug_printf( "_volume" );
-      break;
-   default:
-      assert( 0 );
-   }
-}
-
-
-static void dump_usageinfo( struct vs_semantic semantic )
-{
-   switch (semantic.usage) {
-   case SVGA3D_DECLUSAGE_POSITION:
-      debug_printf("_position" );
-      break;
-   case SVGA3D_DECLUSAGE_BLENDWEIGHT:
-      debug_printf("_blendweight" );
-      break;
-   case SVGA3D_DECLUSAGE_BLENDINDICES:
-      debug_printf("_blendindices" );
-      break;
-   case SVGA3D_DECLUSAGE_NORMAL:
-      debug_printf("_normal" );
-      break;
-   case SVGA3D_DECLUSAGE_PSIZE:
-      debug_printf("_psize" );
-      break;
-   case SVGA3D_DECLUSAGE_TEXCOORD:
-      debug_printf("_texcoord");
-      break;
-   case SVGA3D_DECLUSAGE_TANGENT:
-      debug_printf("_tangent" );
-      break;
-   case SVGA3D_DECLUSAGE_BINORMAL:
-      debug_printf("_binormal" );
-      break;
-   case SVGA3D_DECLUSAGE_TESSFACTOR:
-      debug_printf("_tessfactor" );
-      break;
-   case SVGA3D_DECLUSAGE_POSITIONT:
-      debug_printf("_positiont" );
-      break;
-   case SVGA3D_DECLUSAGE_COLOR:
-      debug_printf("_color" );
-      break;
-   case SVGA3D_DECLUSAGE_FOG:
-      debug_printf("_fog" );
-      break;
-   case SVGA3D_DECLUSAGE_DEPTH:
-      debug_printf("_depth" );
-      break;
-   case SVGA3D_DECLUSAGE_SAMPLE:
-      debug_printf("_sample");
-      break;
-   default:
-      assert( 0 );
-      return;
-   }
-
-   if (semantic.usage_index != 0) {
-      debug_printf("%d", semantic.usage_index );
-   }
-}
-
-static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di )
-{
-   union {
-      struct sh_reg reg;
-      struct sh_dstreg dstreg;
-   } u;
-
-   assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier );
-
-   if (dstreg.modifier & SVGA3DDSTMOD_SATURATE)
-      debug_printf( "_sat" );
-   if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION)
-      debug_printf( "_pp" );
-   switch (dstreg.shift_scale) {
-   case 0:
-      break;
-   case 1:
-      debug_printf( "_x2" );
-      break;
-   case 2:
-      debug_printf( "_x4" );
-      break;
-   case 3:
-      debug_printf( "_x8" );
-      break;
-   case 13:
-      debug_printf( "_d8" );
-      break;
-   case 14:
-      debug_printf( "_d4" );
-      break;
-   case 15:
-      debug_printf( "_d2" );
-      break;
-   default:
-      assert( 0 );
-   }
-   debug_printf( " " );
-
-   u.dstreg = dstreg;
-   dump_reg( u.reg, NULL, di );
-   if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) {
-      debug_printf( "." );
-      if (dstreg.write_mask & SVGA3DWRITEMASK_0)
-         debug_printf( "x" );
-      if (dstreg.write_mask & SVGA3DWRITEMASK_1)
-         debug_printf( "y" );
-      if (dstreg.write_mask & SVGA3DWRITEMASK_2)
-         debug_printf( "z" );
-      if (dstreg.write_mask & SVGA3DWRITEMASK_3)
-         debug_printf( "w" );
-   }
-}
-
-static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di )
-{
-   union {
-      struct sh_reg reg;
-      struct sh_srcreg srcreg;
-   } u;
-
-   switch (srcreg.modifier) {
-   case SVGA3DSRCMOD_NEG:
-   case SVGA3DSRCMOD_BIASNEG:
-   case SVGA3DSRCMOD_SIGNNEG:
-   case SVGA3DSRCMOD_X2NEG:
-      debug_printf( "-" );
-      break;
-   case SVGA3DSRCMOD_ABS:
-      debug_printf( "|" );
-      break;
-   case SVGA3DSRCMOD_ABSNEG:
-      debug_printf( "-|" );
-      break;
-   case SVGA3DSRCMOD_COMP:
-      debug_printf( "1-" );
-      break;
-   case SVGA3DSRCMOD_NOT:
-      debug_printf( "!" );
-   }
-
-   u.srcreg = srcreg;
-   dump_reg( u.reg, indreg, di );
-   switch (srcreg.modifier) {
-   case SVGA3DSRCMOD_NONE:
-   case SVGA3DSRCMOD_NEG:
-   case SVGA3DSRCMOD_COMP:
-   case SVGA3DSRCMOD_NOT:
-      break;
-   case SVGA3DSRCMOD_ABS:
-   case SVGA3DSRCMOD_ABSNEG:
-      debug_printf( "|" );
-      break;
-   case SVGA3DSRCMOD_BIAS:
-   case SVGA3DSRCMOD_BIASNEG:
-      debug_printf( "_bias" );
-      break;
-   case SVGA3DSRCMOD_SIGN:
-   case SVGA3DSRCMOD_SIGNNEG:
-      debug_printf( "_bx2" );
-      break;
-   case SVGA3DSRCMOD_X2:
-   case SVGA3DSRCMOD_X2NEG:
-      debug_printf( "_x2" );
-      break;
-   case SVGA3DSRCMOD_DZ:
-      debug_printf( "_dz" );
-      break;
-   case SVGA3DSRCMOD_DW:
-      debug_printf( "_dw" );
-      break;
-   default:
-      assert( 0 );
-   }
-   if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) {
-      debug_printf( "." );
-      if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) {
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
-      }
-      else {
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_y] );
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_z] );
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_w] );
-      }
-   }
-}
-
-void
-sh_svga_dump(
-   const unsigned *assem,
-   unsigned dwords,
-   unsigned do_binary )
-{
-   const unsigned *start = assem;
-   boolean finished = FALSE;
-   struct dump_info di;
-   unsigned i;
-
-   if (do_binary) {
-      for (i = 0; i < dwords; i++) 
-         debug_printf("  0x%08x,\n", assem[i]);
-      
-      debug_printf("\n\n");
-   }
-
-   di.version.value = *assem++;
-   di.is_ps = (di.version.type == SVGA3D_PS_TYPE);
-
-   debug_printf(
-      "%s_%u_%u\n",
-      di.is_ps ? "ps" : "vs",
-      di.version.major,
-      di.version.minor );
-
-   while (!finished) {
-      struct sh_op op = *(struct sh_op *) assem;
-
-      if (assem - start >= dwords) {
-         debug_printf("... ran off end of buffer\n");
-         assert(0);
-         return;
-      }
-
-      switch (op.opcode) {
-      case SVGA3DOP_DCL:
-         {
-            struct sh_dcl dcl = *(struct sh_dcl *) assem;
-
-            debug_printf( "dcl" );
-            if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER)
-               dump_sampleinfo( dcl.u.ps.sampleinfo );
-            else if (di.is_ps) {
-               if (di.version.major == 3 && 
-                   sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE)
-                  dump_usageinfo( dcl.u.vs.semantic );
-            }
-            else
-               dump_usageinfo( dcl.u.vs.semantic );
-            dump_dstreg( dcl.reg, &di );
-            debug_printf( "\n" );
-            assem += sizeof( struct sh_dcl ) / sizeof( unsigned );
-         }
-         break;
-
-      case SVGA3DOP_DEFB:
-         {
-            struct sh_defb defb = *(struct sh_defb *) assem;
-
-            debug_printf( "defb " );
-            dump_reg( defb.reg, NULL, &di );
-            debug_printf( ", " );
-            dump_bdata( defb.data );
-            debug_printf( "\n" );
-            assem += sizeof( struct sh_defb ) / sizeof( unsigned );
-         }
-         break;
-
-      case SVGA3DOP_DEFI:
-         {
-            struct sh_defi defi = *(struct sh_defi *) assem;
-
-            debug_printf( "defi " );
-            dump_reg( defi.reg, NULL, &di );
-            debug_printf( ", " );
-            dump_idata( defi.idata );
-            debug_printf( "\n" );
-            assem += sizeof( struct sh_defi ) / sizeof( unsigned );
-         }
-         break;
-
-      case SVGA3DOP_TEXCOORD:
-         assert( di.is_ps );
-         dump_op( op, "texcoord" );
-         if (0) {
-            struct sh_dstop dstop = *(struct sh_dstop *) assem;
-            dump_dstreg( dstop.dst, &di );
-            assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
-         }
-         else {
-            struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
-            dump_dstreg( unaryop.dst, &di );
-            debug_printf( ", " );
-            dump_srcreg( unaryop.src, NULL, &di );
-            assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
-         }
-         debug_printf( "\n" );
-         break;
-
-      case SVGA3DOP_TEX:
-         assert( di.is_ps );
-         if (0) {
-            dump_op( op, "tex" );
-            if (0) {
-               struct sh_dstop dstop = *(struct sh_dstop *) assem;
-
-               dump_dstreg( dstop.dst, &di );
-               assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
-            }
-            else {
-               struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
-
-               dump_dstreg( unaryop.dst, &di );
-               debug_printf( ", " );
-               dump_srcreg( unaryop.src, NULL, &di );
-               assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
-            }
-         }
-         else {
-            struct sh_binaryop binaryop = *(struct sh_binaryop *) assem;
-
-            dump_op( op, "texld" );
-            dump_dstreg( binaryop.dst, &di );
-            debug_printf( ", " );
-            dump_srcreg( binaryop.src0, NULL, &di );
-            debug_printf( ", " );
-            dump_srcreg( binaryop.src1, NULL, &di );
-            assem += sizeof( struct sh_binaryop ) / sizeof( unsigned );
-         }
-         debug_printf( "\n" );
-         break;
-
-      case SVGA3DOP_DEF:
-         {
-            struct sh_def def = *(struct sh_def *) assem;
-
-            debug_printf( "def " );
-            dump_reg( def.reg, NULL, &di );
-            debug_printf( ", " );
-            dump_cdata( def.cdata );
-            debug_printf( "\n" );
-            assem += sizeof( struct sh_def ) / sizeof( unsigned );
-         }
-         break;
-
-      case SVGA3DOP_PHASE:
-         debug_printf( "phase\n" );
-         assem += sizeof( struct sh_op ) / sizeof( unsigned );
-         break;
-
-      case SVGA3DOP_COMMENT:
-         assert( 0 );
-         break;
-
-      case SVGA3DOP_RET:
-         debug_printf( "ret\n" );
-         assem += sizeof( struct sh_op ) / sizeof( unsigned );
-         break;
-
-      case SVGA3DOP_END:
-         debug_printf( "end\n" );
-         finished = TRUE;
-         break;
-
-      default:
-         {
-            const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode );
-            uint i;
-            uint num_src = info->num_src + op.predicated;
-            boolean not_first_arg = FALSE;
-
-            assert( info->num_dst <= 1 );
-
-            if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3)
-               num_src += 2;
-
-            dump_comp_op( op, info->mnemonic );
-            assem += sizeof( struct sh_op ) / sizeof( unsigned );
-
-            if (info->num_dst > 0) {
-               struct sh_dstreg dstreg = *(struct sh_dstreg *) assem;
-
-               dump_dstreg( dstreg, &di );
-               assem += sizeof( struct sh_dstreg ) / sizeof( unsigned );
-               not_first_arg = TRUE;
-            }
-
-            for (i = 0; i < num_src; i++) {
-               struct sh_srcreg srcreg;
-               struct sh_srcreg indreg;
-
-               srcreg = *(struct sh_srcreg *) assem;
-               assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
-               if (srcreg.relative && !di.is_ps && di.version.major >= 2) {
-                  indreg = *(struct sh_srcreg *) assem;
-                  assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
-               }
-
-               if (not_first_arg)
-                  debug_printf( ", " );
-               else
-                  debug_printf( " " );
-               dump_srcreg( srcreg, &indreg, &di );
-               not_first_arg = TRUE;
-            }
-
-            debug_printf( "\n" );
-         }
-      }
-   }
-}
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.h b/src/gallium/drivers/svga/svgadump/st_shader_dump.h
deleted file mode 100644
index af5549cdba..0000000000
--- a/src/gallium/drivers/svga/svgadump/st_shader_dump.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **********************************************************/
-
-/**
- * @file
- * SVGA Shader Dump Facilities
- * 
- * @author Michal Krol <michal@vmware.com>
- */
-
-#ifndef ST_SHADER_SVGA_DUMP_H
-#define ST_SHADER_SVGA_DUMP_H
-
-void
-sh_svga_dump(
-   const unsigned *assem,
-   unsigned dwords,
-   unsigned do_binary );
-
-#endif /* ST_SHADER_SVGA_DUMP_H */
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.c b/src/gallium/drivers/svga/svgadump/st_shader_op.c
deleted file mode 100644
index 2c05382ab9..0000000000
--- a/src/gallium/drivers/svga/svgadump/st_shader_op.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **********************************************************/
-
-/**
- * @file
- * SVGA Shader Token Opcode Info
- * 
- * @author Michal Krol <michal@vmware.com>
- */
-
-#include "util/u_debug.h"
-#include "st_shader_op.h"
-
-#include "../svga_hw_reg.h"
-#include "svga3d_shaderdefs.h"
-
-#define SVGA3DOP_INVALID SVGA3DOP_END
-#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST
-
-static struct sh_opcode_info opcode_info[] =
-{
-   { "nop",          0, 0, SVGA3DOP_NOP          },
-   { "mov",          1, 1, SVGA3DOP_MOV,         },
-   { "add",          1, 2, SVGA3DOP_ADD,         },
-   { "sub",          1, 2, SVGA3DOP_SUB,         },
-   { "mad",          1, 3, SVGA3DOP_MAD,         },
-   { "mul",          1, 2, SVGA3DOP_MUL,         },
-   { "rcp",          1, 1, SVGA3DOP_RCP,         },
-   { "rsq",          1, 1, SVGA3DOP_RSQ,         },
-   { "dp3",          1, 2, SVGA3DOP_DP3,         },
-   { "dp4",          1, 2, SVGA3DOP_DP4,         },
-   { "min",          1, 2, SVGA3DOP_MIN,         },
-   { "max",          1, 2, SVGA3DOP_MAX,         },
-   { "slt",          1, 2, SVGA3DOP_SLT,         },
-   { "sge",          1, 2, SVGA3DOP_SGE,         },
-   { "exp",          1, 1, SVGA3DOP_EXP,         },
-   { "log",          1, 1, SVGA3DOP_LOG,         },
-   { "lit",          1, 1, SVGA3DOP_LIT,         },
-   { "dst",          1, 2, SVGA3DOP_DST,         },
-   { "lrp",          1, 3, SVGA3DOP_LRP,         },
-   { "frc",          1, 1, SVGA3DOP_FRC,         },
-   { "m4x4",         1, 2, SVGA3DOP_M4x4,        },
-   { "m4x3",         1, 2, SVGA3DOP_M4x3,        },
-   { "m3x4",         1, 2, SVGA3DOP_M3x4,        },
-   { "m3x3",         1, 2, SVGA3DOP_M3x3,        },
-   { "m3x2",         1, 2, SVGA3DOP_M3x2,        },
-   { "call",         0, 1, SVGA3DOP_CALL,        },
-   { "callnz",       0, 2, SVGA3DOP_CALLNZ,      },
-   { "loop",         0, 2, SVGA3DOP_LOOP,        },
-   { "ret",          0, 0, SVGA3DOP_RET,         },
-   { "endloop",      0, 0, SVGA3DOP_ENDLOOP,     },
-   { "label",        0, 1, SVGA3DOP_LABEL,       },
-   { "dcl",          0, 0, SVGA3DOP_DCL,         },
-   { "pow",          1, 2, SVGA3DOP_POW,         },
-   { "crs",          1, 2, SVGA3DOP_CRS,         },
-   { "sgn",          1, 3, SVGA3DOP_SGN,         },
-   { "abs",          1, 1, SVGA3DOP_ABS,         },
-   { "nrm",          1, 1, SVGA3DOP_NRM,         }, /* 3-componenet normalization */
-   { "sincos",       1, 1, SVGA3DOP_SINCOS,      },
-   { "rep",          0, 1, SVGA3DOP_REP,         },
-   { "endrep",       0, 0, SVGA3DOP_ENDREP,      },
-   { "if",           0, 1, SVGA3DOP_IF,          },
-   { "ifc",          0, 2, SVGA3DOP_IFC,         },
-   { "else",         0, 0, SVGA3DOP_ELSE,        },
-   { "endif",        0, 0, SVGA3DOP_ENDIF,       },
-   { "break",        0, 0, SVGA3DOP_BREAK,       },
-   { "breakc",       0, 0, SVGA3DOP_BREAKC,      },
-   { "mova",         1, 1, SVGA3DOP_MOVA,        },
-   { "defb",         0, 0, SVGA3DOP_DEFB,        },
-   { "defi",         0, 0, SVGA3DOP_DEFI,        },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "texcoord",     0, 0, SVGA3DOP_TEXCOORD,    },
-   { "texkill",      1, 0, SVGA3DOP_TEXKILL,     },
-   { "tex",          0, 0, SVGA3DOP_TEX,         },
-   { "texbem",       1, 1, SVGA3DOP_TEXBEM,      },
-   { "texbeml",      1, 1, SVGA3DOP_TEXBEML,     },
-   { "texreg2ar",    1, 1, SVGA3DOP_TEXREG2AR,   },
-   { "texreg2gb",    1, 1, SVGA3DOP_TEXREG2GB,   },
-   { "texm3x2pad",   1, 1, SVGA3DOP_TEXM3x2PAD,  },
-   { "texm3x2tex",   1, 1, SVGA3DOP_TEXM3x2TEX,  },
-   { "texm3x3pad",   1, 1, SVGA3DOP_TEXM3x3PAD,  },
-   { "texm3x3tex",   1, 1, SVGA3DOP_TEXM3x3TEX,  },
-   { "reserved0",    0, 0, SVGA3DOP_RESERVED0,   },
-   { "texm3x3spec",  1, 2, SVGA3DOP_TEXM3x3SPEC, },
-   { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,},
-   { "expp",         1, 1, SVGA3DOP_EXPP,        },
-   { "logp",         1, 1, SVGA3DOP_LOGP,        },
-   { "cnd",          1, 3, SVGA3DOP_CND,         },
-   { "def",          0, 0, SVGA3DOP_DEF,         },
-   { "texreg2rgb",   1, 1, SVGA3DOP_TEXREG2RGB,  },
-   { "texdp3tex",    1, 1, SVGA3DOP_TEXDP3TEX,   },
-   { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,},
-   { "texdp3",       1, 1, SVGA3DOP_TEXDP3,      },
-   { "texm3x3",      1, 1, SVGA3DOP_TEXM3x3,     },
-   { "texdepth",     1, 0, SVGA3DOP_TEXDEPTH,    },
-   { "cmp",          1, 3, SVGA3DOP_CMP,         },
-   { "bem",          1, 2, SVGA3DOP_BEM,         },
-   { "dp2add",       1, 3, SVGA3DOP_DP2ADD,      },
-   { "dsx",          1, 1, SVGA3DOP_INVALID,     },
-   { "dsy",          1, 1, SVGA3DOP_INVALID,     },
-   { "texldd",       1, 1, SVGA3DOP_INVALID,     },
-   { "setp",         1, 2, SVGA3DOP_SETP,        },
-   { "texldl",       1, 1, SVGA3DOP_INVALID,     },
-   { "breakp",       1, 1, SVGA3DOP_INVALID,     },
-};
-
-const struct sh_opcode_info *sh_svga_opcode_info( uint op )
-{
-   struct sh_opcode_info *info;
-
-   if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) {
-      /* The opcode is either PHASE, COMMENT, END or out of range.
-       */
-      assert( 0 );
-      return NULL;
-   }
-
-   info = &opcode_info[op];
-
-   if (info->svga_opcode == SVGA3DOP_INVALID) {
-      /* No valid information. Please provide number of dst/src registers.
-       */
-      assert( 0 );
-      return NULL;
-   }
-
-   /* Sanity check.
-    */
-   assert( op == info->svga_opcode );
-
-   return info;
-}
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.h b/src/gallium/drivers/svga/svgadump/st_shader_op.h
deleted file mode 100644
index 01d39dca84..0000000000
--- a/src/gallium/drivers/svga/svgadump/st_shader_op.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **********************************************************/
-
-/**
- * @file
- * SVGA Shader Token Opcode Info
- * 
- * @author Michal Krol <michal@vmware.com>
- */
-
-#ifndef ST_SHADER_SVGA_OP_H
-#define ST_SHADER_SVGA_OP_H
-
-struct sh_opcode_info
-{
-   const char *mnemonic;
-   unsigned num_dst:8;
-   unsigned num_src:8;
-   unsigned svga_opcode:16;
-};
-
-const struct sh_opcode_info *sh_svga_opcode_info( unsigned op );
-
-#endif /* ST_SHADER_SVGA_OP_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
index 180dde8dc1..c6c353f58e 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -31,7 +31,7 @@
  */
 
 #include "svga_types.h"
-#include "st_shader_dump.h"
+#include "svga_shader_dump.h"
 #include "svga3d_reg.h"
 
 #include "util/u_debug.h"
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py
index 3cb29c395b..288e753296 100755
--- a/src/gallium/drivers/svga/svgadump/svga_dump.py
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.py
@@ -291,7 +291,7 @@ def main():
     print ' */'
     print
     print '#include "svga_types.h"'
-    print '#include "shader_dump/st_shader_dump.h"'
+    print '#include "svga_shader_dump.h"'
     print '#include "svga3d_reg.h"'
     print
     print '#include "pipe/p_debug.h"'
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h
new file mode 100644
index 0000000000..2fc1796a90
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader.h
@@ -0,0 +1,214 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Definitions
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef ST_SHADER_SVGA_H
+#define ST_SHADER_SVGA_H
+
+#include "pipe/p_compiler.h"
+
+struct sh_op
+{
+   unsigned opcode:16;
+   unsigned control:8;
+   unsigned length:4;
+   unsigned predicated:1;
+   unsigned unused:1;
+   unsigned coissue:1;
+   unsigned is_reg:1;
+};
+
+struct sh_reg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:14;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_reg_type( struct sh_reg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_cdata
+{
+   float xyzw[4];
+};
+
+struct sh_def
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   struct sh_cdata cdata;
+};
+
+struct sh_defb
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   uint data;
+};
+
+struct sh_idata
+{
+   int xyzw[4];
+};
+
+struct sh_defi
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   struct sh_idata idata;
+};
+
+#define PS_TEXTURETYPE_UNKNOWN   SVGA3DSAMP_UNKNOWN
+#define PS_TEXTURETYPE_2D        SVGA3DSAMP_2D
+#define PS_TEXTURETYPE_CUBE      SVGA3DSAMP_CUBE
+#define PS_TEXTURETYPE_VOLUME    SVGA3DSAMP_VOLUME
+
+struct ps_sampleinfo
+{
+   unsigned unused:27;
+   unsigned texture_type:4;
+   unsigned is_reg:1;
+};
+
+struct vs_semantic
+{
+   unsigned usage:5;
+   unsigned unused1:11;
+   unsigned usage_index:4;
+   unsigned unused2:12;
+};
+
+struct sh_dstreg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:2;
+   unsigned write_mask:4;
+   unsigned modifier:4;
+   unsigned shift_scale:4;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_dstreg_type( struct sh_dstreg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_dcl
+{
+   struct sh_op op;
+   union {
+      struct {
+         struct ps_sampleinfo sampleinfo;
+      } ps;
+      struct {
+         struct vs_semantic semantic;
+      } vs;
+   } u;
+   struct sh_dstreg reg;
+};
+
+
+struct sh_srcreg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:2;
+   unsigned swizzle_x:2;
+   unsigned swizzle_y:2;
+   unsigned swizzle_z:2;
+   unsigned swizzle_w:2;
+   unsigned modifier:4;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_srcreg_type( struct sh_srcreg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_dstop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+};
+
+struct sh_srcop
+{
+   struct sh_op op;
+   struct sh_srcreg src;
+};
+
+struct sh_src2op
+{
+   struct sh_op op;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+};
+
+struct sh_unaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src;
+};
+
+struct sh_binaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+};
+
+struct sh_trinaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+   struct sh_srcreg src2;
+};
+
+#endif /* ST_SHADER_SVGA_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
new file mode 100644
index 0000000000..c654126d3a
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
@@ -0,0 +1,649 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Dump Facilities
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#include "svga_shader.h"
+#include "svga_shader_dump.h"
+#include "svga_shader_op.h"
+#include "util/u_debug.h"
+
+#include "../svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+struct dump_info
+{
+   SVGA3dShaderVersion version;
+   boolean is_ps;
+};
+
+static void dump_op( struct sh_op op, const char *mnemonic )
+{
+   assert( op.predicated == 0 );
+   assert( op.is_reg == 0 );
+
+   if (op.coissue)
+      debug_printf( "+" );
+   debug_printf( "%s", mnemonic );
+   switch (op.control) {
+   case 0:
+      break;
+   case SVGA3DOPCONT_PROJECT:
+      debug_printf( "p" );
+      break;
+   case SVGA3DOPCONT_BIAS:
+      debug_printf( "b" );
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_comp_op( struct sh_op op, const char *mnemonic )
+{
+   assert( op.is_reg == 0 );
+
+   if (op.coissue)
+      debug_printf( "+" );
+   debug_printf( "%s", mnemonic );
+   switch (op.control) {
+   case SVGA3DOPCOMP_RESERVED0:
+      break;
+   case SVGA3DOPCOMP_GT:
+      debug_printf("_gt");
+      break;
+   case SVGA3DOPCOMP_EQ:
+      debug_printf("_eq");
+      break;
+   case SVGA3DOPCOMP_GE:
+      debug_printf("_ge");
+      break;
+   case SVGA3DOPCOMP_LT:
+      debug_printf("_lt");
+      break;
+   case SVGA3DOPCOMPC_NE:
+      debug_printf("_ne");
+      break;
+   case SVGA3DOPCOMP_LE:
+      debug_printf("_le");
+      break;
+   case SVGA3DOPCOMP_RESERVED1:
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di )
+{
+   assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 );
+   assert( reg.is_reg == 1 );
+
+   switch (sh_reg_type( reg )) {
+   case SVGA3DREG_TEMP:
+      debug_printf( "r%u", reg.number );
+      break;
+
+   case SVGA3DREG_INPUT:
+      debug_printf( "v%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONST:
+      if (reg.relative) {
+         if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP)
+            debug_printf( "c[aL+%u]", reg.number );
+         else
+            debug_printf( "c[a%u.x+%u]", indreg->number, reg.number );
+      }
+      else
+         debug_printf( "c%u", reg.number );
+      break;
+
+   case SVGA3DREG_ADDR:    /* VS */
+   /* SVGA3DREG_TEXTURE */ /* PS */
+      if (di->is_ps)
+         debug_printf( "t%u", reg.number );
+      else
+         debug_printf( "a%u", reg.number );
+      break;
+
+   case SVGA3DREG_RASTOUT:
+      switch (reg.number) {
+      case 0 /*POSITION*/:
+         debug_printf( "oPos" );
+         break;
+      case 1 /*FOG*/:
+         debug_printf( "oFog" );
+         break;
+      case 2 /*POINT_SIZE*/:
+         debug_printf( "oPts" );
+         break;
+      default:
+         assert( 0 );
+         debug_printf( "???" );
+      }
+      break;
+
+   case SVGA3DREG_ATTROUT:
+      assert( reg.number < 2 );
+      debug_printf( "oD%u", reg.number );
+      break;
+
+   case SVGA3DREG_TEXCRDOUT:
+   /* SVGA3DREG_OUTPUT */
+      debug_printf( "oT%u", reg.number );
+      break;
+
+   case SVGA3DREG_COLOROUT:
+      debug_printf( "oC%u", reg.number );
+      break;
+
+   case SVGA3DREG_DEPTHOUT:
+      debug_printf( "oD%u", reg.number );
+      break;
+
+   case SVGA3DREG_SAMPLER:
+      debug_printf( "s%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONSTBOOL:
+      assert( !reg.relative );
+      debug_printf( "b%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONSTINT:
+      assert( !reg.relative );
+      debug_printf( "i%u", reg.number );
+      break;
+
+   case SVGA3DREG_LOOP:
+      assert( reg.number == 0 );
+      debug_printf( "aL" );
+      break;
+
+   case SVGA3DREG_MISCTYPE:
+      switch (reg.number) {
+      case SVGA3DMISCREG_POSITION:
+         debug_printf( "vPos" );
+         break;
+      case SVGA3DMISCREG_FACE:
+         debug_printf( "vFace" );
+         break;
+      default:
+         assert(0);
+         break;
+      }
+      break;
+
+   case SVGA3DREG_LABEL:
+      debug_printf( "l%u", reg.number );
+      break;
+
+   case SVGA3DREG_PREDICATE:
+      debug_printf( "p%u", reg.number );
+      break;
+
+
+   default:
+      assert( 0 );
+      debug_printf( "???" );
+   }
+}
+
+static void dump_cdata( struct sh_cdata cdata )
+{
+   debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] );
+}
+
+static void dump_idata( struct sh_idata idata )
+{
+   debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] );
+}
+
+static void dump_bdata( boolean bdata )
+{
+   debug_printf( bdata ? "TRUE" : "FALSE" );
+}
+
+static void dump_sampleinfo( struct ps_sampleinfo sampleinfo )
+{
+   switch (sampleinfo.texture_type) {
+   case SVGA3DSAMP_2D:
+      debug_printf( "_2d" );
+      break;
+   case SVGA3DSAMP_CUBE:
+      debug_printf( "_cube" );
+      break;
+   case SVGA3DSAMP_VOLUME:
+      debug_printf( "_volume" );
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_usageinfo( struct vs_semantic semantic )
+{
+   switch (semantic.usage) {
+   case SVGA3D_DECLUSAGE_POSITION:
+      debug_printf("_position" );
+      break;
+   case SVGA3D_DECLUSAGE_BLENDWEIGHT:
+      debug_printf("_blendweight" );
+      break;
+   case SVGA3D_DECLUSAGE_BLENDINDICES:
+      debug_printf("_blendindices" );
+      break;
+   case SVGA3D_DECLUSAGE_NORMAL:
+      debug_printf("_normal" );
+      break;
+   case SVGA3D_DECLUSAGE_PSIZE:
+      debug_printf("_psize" );
+      break;
+   case SVGA3D_DECLUSAGE_TEXCOORD:
+      debug_printf("_texcoord");
+      break;
+   case SVGA3D_DECLUSAGE_TANGENT:
+      debug_printf("_tangent" );
+      break;
+   case SVGA3D_DECLUSAGE_BINORMAL:
+      debug_printf("_binormal" );
+      break;
+   case SVGA3D_DECLUSAGE_TESSFACTOR:
+      debug_printf("_tessfactor" );
+      break;
+   case SVGA3D_DECLUSAGE_POSITIONT:
+      debug_printf("_positiont" );
+      break;
+   case SVGA3D_DECLUSAGE_COLOR:
+      debug_printf("_color" );
+      break;
+   case SVGA3D_DECLUSAGE_FOG:
+      debug_printf("_fog" );
+      break;
+   case SVGA3D_DECLUSAGE_DEPTH:
+      debug_printf("_depth" );
+      break;
+   case SVGA3D_DECLUSAGE_SAMPLE:
+      debug_printf("_sample");
+      break;
+   default:
+      assert( 0 );
+      return;
+   }
+
+   if (semantic.usage_index != 0) {
+      debug_printf("%d", semantic.usage_index );
+   }
+}
+
+static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di )
+{
+   union {
+      struct sh_reg reg;
+      struct sh_dstreg dstreg;
+   } u;
+
+   assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier );
+
+   if (dstreg.modifier & SVGA3DDSTMOD_SATURATE)
+      debug_printf( "_sat" );
+   if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION)
+      debug_printf( "_pp" );
+   switch (dstreg.shift_scale) {
+   case 0:
+      break;
+   case 1:
+      debug_printf( "_x2" );
+      break;
+   case 2:
+      debug_printf( "_x4" );
+      break;
+   case 3:
+      debug_printf( "_x8" );
+      break;
+   case 13:
+      debug_printf( "_d8" );
+      break;
+   case 14:
+      debug_printf( "_d4" );
+      break;
+   case 15:
+      debug_printf( "_d2" );
+      break;
+   default:
+      assert( 0 );
+   }
+   debug_printf( " " );
+
+   u.dstreg = dstreg;
+   dump_reg( u.reg, NULL, di );
+   if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) {
+      debug_printf( "." );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_0)
+         debug_printf( "x" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_1)
+         debug_printf( "y" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_2)
+         debug_printf( "z" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_3)
+         debug_printf( "w" );
+   }
+}
+
+static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di )
+{
+   union {
+      struct sh_reg reg;
+      struct sh_srcreg srcreg;
+   } u;
+
+   switch (srcreg.modifier) {
+   case SVGA3DSRCMOD_NEG:
+   case SVGA3DSRCMOD_BIASNEG:
+   case SVGA3DSRCMOD_SIGNNEG:
+   case SVGA3DSRCMOD_X2NEG:
+      debug_printf( "-" );
+      break;
+   case SVGA3DSRCMOD_ABS:
+      debug_printf( "|" );
+      break;
+   case SVGA3DSRCMOD_ABSNEG:
+      debug_printf( "-|" );
+      break;
+   case SVGA3DSRCMOD_COMP:
+      debug_printf( "1-" );
+      break;
+   case SVGA3DSRCMOD_NOT:
+      debug_printf( "!" );
+   }
+
+   u.srcreg = srcreg;
+   dump_reg( u.reg, indreg, di );
+   switch (srcreg.modifier) {
+   case SVGA3DSRCMOD_NONE:
+   case SVGA3DSRCMOD_NEG:
+   case SVGA3DSRCMOD_COMP:
+   case SVGA3DSRCMOD_NOT:
+      break;
+   case SVGA3DSRCMOD_ABS:
+   case SVGA3DSRCMOD_ABSNEG:
+      debug_printf( "|" );
+      break;
+   case SVGA3DSRCMOD_BIAS:
+   case SVGA3DSRCMOD_BIASNEG:
+      debug_printf( "_bias" );
+      break;
+   case SVGA3DSRCMOD_SIGN:
+   case SVGA3DSRCMOD_SIGNNEG:
+      debug_printf( "_bx2" );
+      break;
+   case SVGA3DSRCMOD_X2:
+   case SVGA3DSRCMOD_X2NEG:
+      debug_printf( "_x2" );
+      break;
+   case SVGA3DSRCMOD_DZ:
+      debug_printf( "_dz" );
+      break;
+   case SVGA3DSRCMOD_DW:
+      debug_printf( "_dw" );
+      break;
+   default:
+      assert( 0 );
+   }
+   if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) {
+      debug_printf( "." );
+      if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) {
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+      }
+      else {
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_y] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_z] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_w] );
+      }
+   }
+}
+
+void
+sh_svga_dump(
+   const unsigned *assem,
+   unsigned dwords,
+   unsigned do_binary )
+{
+   const unsigned *start = assem;
+   boolean finished = FALSE;
+   struct dump_info di;
+   unsigned i;
+
+   if (do_binary) {
+      for (i = 0; i < dwords; i++) 
+         debug_printf("  0x%08x,\n", assem[i]);
+      
+      debug_printf("\n\n");
+   }
+
+   di.version.value = *assem++;
+   di.is_ps = (di.version.type == SVGA3D_PS_TYPE);
+
+   debug_printf(
+      "%s_%u_%u\n",
+      di.is_ps ? "ps" : "vs",
+      di.version.major,
+      di.version.minor );
+
+   while (!finished) {
+      struct sh_op op = *(struct sh_op *) assem;
+
+      if (assem - start >= dwords) {
+         debug_printf("... ran off end of buffer\n");
+         assert(0);
+         return;
+      }
+
+      switch (op.opcode) {
+      case SVGA3DOP_DCL:
+         {
+            struct sh_dcl dcl = *(struct sh_dcl *) assem;
+
+            debug_printf( "dcl" );
+            if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER)
+               dump_sampleinfo( dcl.u.ps.sampleinfo );
+            else if (di.is_ps) {
+               if (di.version.major == 3 && 
+                   sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE)
+                  dump_usageinfo( dcl.u.vs.semantic );
+            }
+            else
+               dump_usageinfo( dcl.u.vs.semantic );
+            dump_dstreg( dcl.reg, &di );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_dcl ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_DEFB:
+         {
+            struct sh_defb defb = *(struct sh_defb *) assem;
+
+            debug_printf( "defb " );
+            dump_reg( defb.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_bdata( defb.data );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_defb ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_DEFI:
+         {
+            struct sh_defi defi = *(struct sh_defi *) assem;
+
+            debug_printf( "defi " );
+            dump_reg( defi.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_idata( defi.idata );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_defi ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_TEXCOORD:
+         assert( di.is_ps );
+         dump_op( op, "texcoord" );
+         if (0) {
+            struct sh_dstop dstop = *(struct sh_dstop *) assem;
+            dump_dstreg( dstop.dst, &di );
+            assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
+         }
+         else {
+            struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
+            dump_dstreg( unaryop.dst, &di );
+            debug_printf( ", " );
+            dump_srcreg( unaryop.src, NULL, &di );
+            assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
+         }
+         debug_printf( "\n" );
+         break;
+
+      case SVGA3DOP_TEX:
+         assert( di.is_ps );
+         if (0) {
+            dump_op( op, "tex" );
+            if (0) {
+               struct sh_dstop dstop = *(struct sh_dstop *) assem;
+
+               dump_dstreg( dstop.dst, &di );
+               assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
+            }
+            else {
+               struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
+
+               dump_dstreg( unaryop.dst, &di );
+               debug_printf( ", " );
+               dump_srcreg( unaryop.src, NULL, &di );
+               assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
+            }
+         }
+         else {
+            struct sh_binaryop binaryop = *(struct sh_binaryop *) assem;
+
+            dump_op( op, "texld" );
+            dump_dstreg( binaryop.dst, &di );
+            debug_printf( ", " );
+            dump_srcreg( binaryop.src0, NULL, &di );
+            debug_printf( ", " );
+            dump_srcreg( binaryop.src1, NULL, &di );
+            assem += sizeof( struct sh_binaryop ) / sizeof( unsigned );
+         }
+         debug_printf( "\n" );
+         break;
+
+      case SVGA3DOP_DEF:
+         {
+            struct sh_def def = *(struct sh_def *) assem;
+
+            debug_printf( "def " );
+            dump_reg( def.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_cdata( def.cdata );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_def ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_PHASE:
+         debug_printf( "phase\n" );
+         assem += sizeof( struct sh_op ) / sizeof( unsigned );
+         break;
+
+      case SVGA3DOP_COMMENT:
+         assert( 0 );
+         break;
+
+      case SVGA3DOP_RET:
+         debug_printf( "ret\n" );
+         assem += sizeof( struct sh_op ) / sizeof( unsigned );
+         break;
+
+      case SVGA3DOP_END:
+         debug_printf( "end\n" );
+         finished = TRUE;
+         break;
+
+      default:
+         {
+            const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode );
+            uint i;
+            uint num_src = info->num_src + op.predicated;
+            boolean not_first_arg = FALSE;
+
+            assert( info->num_dst <= 1 );
+
+            if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3)
+               num_src += 2;
+
+            dump_comp_op( op, info->mnemonic );
+            assem += sizeof( struct sh_op ) / sizeof( unsigned );
+
+            if (info->num_dst > 0) {
+               struct sh_dstreg dstreg = *(struct sh_dstreg *) assem;
+
+               dump_dstreg( dstreg, &di );
+               assem += sizeof( struct sh_dstreg ) / sizeof( unsigned );
+               not_first_arg = TRUE;
+            }
+
+            for (i = 0; i < num_src; i++) {
+               struct sh_srcreg srcreg;
+               struct sh_srcreg indreg;
+
+               srcreg = *(struct sh_srcreg *) assem;
+               assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
+               if (srcreg.relative && !di.is_ps && di.version.major >= 2) {
+                  indreg = *(struct sh_srcreg *) assem;
+                  assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
+               }
+
+               if (not_first_arg)
+                  debug_printf( ", " );
+               else
+                  debug_printf( " " );
+               dump_srcreg( srcreg, &indreg, &di );
+               not_first_arg = TRUE;
+            }
+
+            debug_printf( "\n" );
+         }
+      }
+   }
+}
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
new file mode 100644
index 0000000000..af5549cdba
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
@@ -0,0 +1,42 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Dump Facilities
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef ST_SHADER_SVGA_DUMP_H
+#define ST_SHADER_SVGA_DUMP_H
+
+void
+sh_svga_dump(
+   const unsigned *assem,
+   unsigned dwords,
+   unsigned do_binary );
+
+#endif /* ST_SHADER_SVGA_DUMP_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
new file mode 100644
index 0000000000..cecc22106b
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
@@ -0,0 +1,168 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Opcode Info
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#include "util/u_debug.h"
+#include "svga_shader_op.h"
+
+#include "../svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+#define SVGA3DOP_INVALID SVGA3DOP_END
+#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST
+
+static struct sh_opcode_info opcode_info[] =
+{
+   { "nop",          0, 0, SVGA3DOP_NOP          },
+   { "mov",          1, 1, SVGA3DOP_MOV,         },
+   { "add",          1, 2, SVGA3DOP_ADD,         },
+   { "sub",          1, 2, SVGA3DOP_SUB,         },
+   { "mad",          1, 3, SVGA3DOP_MAD,         },
+   { "mul",          1, 2, SVGA3DOP_MUL,         },
+   { "rcp",          1, 1, SVGA3DOP_RCP,         },
+   { "rsq",          1, 1, SVGA3DOP_RSQ,         },
+   { "dp3",          1, 2, SVGA3DOP_DP3,         },
+   { "dp4",          1, 2, SVGA3DOP_DP4,         },
+   { "min",          1, 2, SVGA3DOP_MIN,         },
+   { "max",          1, 2, SVGA3DOP_MAX,         },
+   { "slt",          1, 2, SVGA3DOP_SLT,         },
+   { "sge",          1, 2, SVGA3DOP_SGE,         },
+   { "exp",          1, 1, SVGA3DOP_EXP,         },
+   { "log",          1, 1, SVGA3DOP_LOG,         },
+   { "lit",          1, 1, SVGA3DOP_LIT,         },
+   { "dst",          1, 2, SVGA3DOP_DST,         },
+   { "lrp",          1, 3, SVGA3DOP_LRP,         },
+   { "frc",          1, 1, SVGA3DOP_FRC,         },
+   { "m4x4",         1, 2, SVGA3DOP_M4x4,        },
+   { "m4x3",         1, 2, SVGA3DOP_M4x3,        },
+   { "m3x4",         1, 2, SVGA3DOP_M3x4,        },
+   { "m3x3",         1, 2, SVGA3DOP_M3x3,        },
+   { "m3x2",         1, 2, SVGA3DOP_M3x2,        },
+   { "call",         0, 1, SVGA3DOP_CALL,        },
+   { "callnz",       0, 2, SVGA3DOP_CALLNZ,      },
+   { "loop",         0, 2, SVGA3DOP_LOOP,        },
+   { "ret",          0, 0, SVGA3DOP_RET,         },
+   { "endloop",      0, 0, SVGA3DOP_ENDLOOP,     },
+   { "label",        0, 1, SVGA3DOP_LABEL,       },
+   { "dcl",          0, 0, SVGA3DOP_DCL,         },
+   { "pow",          1, 2, SVGA3DOP_POW,         },
+   { "crs",          1, 2, SVGA3DOP_CRS,         },
+   { "sgn",          1, 3, SVGA3DOP_SGN,         },
+   { "abs",          1, 1, SVGA3DOP_ABS,         },
+   { "nrm",          1, 1, SVGA3DOP_NRM,         }, /* 3-componenet normalization */
+   { "sincos",       1, 1, SVGA3DOP_SINCOS,      },
+   { "rep",          0, 1, SVGA3DOP_REP,         },
+   { "endrep",       0, 0, SVGA3DOP_ENDREP,      },
+   { "if",           0, 1, SVGA3DOP_IF,          },
+   { "ifc",          0, 2, SVGA3DOP_IFC,         },
+   { "else",         0, 0, SVGA3DOP_ELSE,        },
+   { "endif",        0, 0, SVGA3DOP_ENDIF,       },
+   { "break",        0, 0, SVGA3DOP_BREAK,       },
+   { "breakc",       0, 0, SVGA3DOP_BREAKC,      },
+   { "mova",         1, 1, SVGA3DOP_MOVA,        },
+   { "defb",         0, 0, SVGA3DOP_DEFB,        },
+   { "defi",         0, 0, SVGA3DOP_DEFI,        },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "texcoord",     0, 0, SVGA3DOP_TEXCOORD,    },
+   { "texkill",      1, 0, SVGA3DOP_TEXKILL,     },
+   { "tex",          0, 0, SVGA3DOP_TEX,         },
+   { "texbem",       1, 1, SVGA3DOP_TEXBEM,      },
+   { "texbeml",      1, 1, SVGA3DOP_TEXBEML,     },
+   { "texreg2ar",    1, 1, SVGA3DOP_TEXREG2AR,   },
+   { "texreg2gb",    1, 1, SVGA3DOP_TEXREG2GB,   },
+   { "texm3x2pad",   1, 1, SVGA3DOP_TEXM3x2PAD,  },
+   { "texm3x2tex",   1, 1, SVGA3DOP_TEXM3x2TEX,  },
+   { "texm3x3pad",   1, 1, SVGA3DOP_TEXM3x3PAD,  },
+   { "texm3x3tex",   1, 1, SVGA3DOP_TEXM3x3TEX,  },
+   { "reserved0",    0, 0, SVGA3DOP_RESERVED0,   },
+   { "texm3x3spec",  1, 2, SVGA3DOP_TEXM3x3SPEC, },
+   { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,},
+   { "expp",         1, 1, SVGA3DOP_EXPP,        },
+   { "logp",         1, 1, SVGA3DOP_LOGP,        },
+   { "cnd",          1, 3, SVGA3DOP_CND,         },
+   { "def",          0, 0, SVGA3DOP_DEF,         },
+   { "texreg2rgb",   1, 1, SVGA3DOP_TEXREG2RGB,  },
+   { "texdp3tex",    1, 1, SVGA3DOP_TEXDP3TEX,   },
+   { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,},
+   { "texdp3",       1, 1, SVGA3DOP_TEXDP3,      },
+   { "texm3x3",      1, 1, SVGA3DOP_TEXM3x3,     },
+   { "texdepth",     1, 0, SVGA3DOP_TEXDEPTH,    },
+   { "cmp",          1, 3, SVGA3DOP_CMP,         },
+   { "bem",          1, 2, SVGA3DOP_BEM,         },
+   { "dp2add",       1, 3, SVGA3DOP_DP2ADD,      },
+   { "dsx",          1, 1, SVGA3DOP_INVALID,     },
+   { "dsy",          1, 1, SVGA3DOP_INVALID,     },
+   { "texldd",       1, 1, SVGA3DOP_INVALID,     },
+   { "setp",         1, 2, SVGA3DOP_SETP,        },
+   { "texldl",       1, 1, SVGA3DOP_INVALID,     },
+   { "breakp",       1, 1, SVGA3DOP_INVALID,     },
+};
+
+const struct sh_opcode_info *sh_svga_opcode_info( uint op )
+{
+   struct sh_opcode_info *info;
+
+   if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) {
+      /* The opcode is either PHASE, COMMENT, END or out of range.
+       */
+      assert( 0 );
+      return NULL;
+   }
+
+   info = &opcode_info[op];
+
+   if (info->svga_opcode == SVGA3DOP_INVALID) {
+      /* No valid information. Please provide number of dst/src registers.
+       */
+      assert( 0 );
+      return NULL;
+   }
+
+   /* Sanity check.
+    */
+   assert( op == info->svga_opcode );
+
+   return info;
+}
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.h b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
new file mode 100644
index 0000000000..01d39dca84
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
@@ -0,0 +1,46 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Opcode Info
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef ST_SHADER_SVGA_OP_H
+#define ST_SHADER_SVGA_OP_H
+
+struct sh_opcode_info
+{
+   const char *mnemonic;
+   unsigned num_dst:8;
+   unsigned num_src:8;
+   unsigned svga_opcode:16;
+};
+
+const struct sh_opcode_info *sh_svga_opcode_info( unsigned op );
+
+#endif /* ST_SHADER_SVGA_OP_H */
-- 
cgit v1.2.3


From d185c2fd1318bd41f303ab4a5f6e0a048b76c11c Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 24 Nov 2009 14:43:30 +0000
Subject: svga: Use consistent names for public symbol names of shader dumping
 facilities.

---
 src/gallium/drivers/svga/svgadump/svga_dump.c        | 2 +-
 src/gallium/drivers/svga/svgadump/svga_shader_dump.c | 4 ++--
 src/gallium/drivers/svga/svgadump/svga_shader_dump.h | 8 ++++----
 src/gallium/drivers/svga/svgadump/svga_shader_op.c   | 2 +-
 src/gallium/drivers/svga/svgadump/svga_shader_op.h   | 8 ++++----
 5 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
index c6c353f58e..910afa2528 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -1627,7 +1627,7 @@ svga_dump_commands(const void *commands, uint32_t size)
                const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
                dump_SVGA3dCmdDefineShader(cmd);
                body = (const uint8_t *)&cmd[1];
-               sh_svga_dump((const uint32_t *)body, 
+               svga_shader_dump((const uint32_t *)body, 
                             (unsigned)(next - body)/sizeof(uint32_t),
                             FALSE );
                body = next;
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
index c654126d3a..7718bdf757 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
@@ -435,7 +435,7 @@ static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, cons
 }
 
 void
-sh_svga_dump(
+svga_shader_dump(
    const unsigned *assem,
    unsigned dwords,
    unsigned do_binary )
@@ -602,7 +602,7 @@ sh_svga_dump(
 
       default:
          {
-            const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode );
+            const struct sh_opcode_info *info = svga_opcode_info( op.opcode );
             uint i;
             uint num_src = info->num_src + op.predicated;
             boolean not_first_arg = FALSE;
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
index af5549cdba..a2657acb2f 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
@@ -30,13 +30,13 @@
  * @author Michal Krol <michal@vmware.com>
  */
 
-#ifndef ST_SHADER_SVGA_DUMP_H
-#define ST_SHADER_SVGA_DUMP_H
+#ifndef SVGA_SHADER_DUMP_H
+#define SVGA_SHADER_DUMP_H
 
 void
-sh_svga_dump(
+svga_shader_dump(
    const unsigned *assem,
    unsigned dwords,
    unsigned do_binary );
 
-#endif /* ST_SHADER_SVGA_DUMP_H */
+#endif /* SVGA_SHADER_DUMP_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
index cecc22106b..8343bfdaab 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_op.c
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
@@ -140,7 +140,7 @@ static struct sh_opcode_info opcode_info[] =
    { "breakp",       1, 1, SVGA3DOP_INVALID,     },
 };
 
-const struct sh_opcode_info *sh_svga_opcode_info( uint op )
+const struct sh_opcode_info *svga_opcode_info( uint op )
 {
    struct sh_opcode_info *info;
 
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.h b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
index 01d39dca84..e558de02c5 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_op.h
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
@@ -30,8 +30,8 @@
  * @author Michal Krol <michal@vmware.com>
  */
 
-#ifndef ST_SHADER_SVGA_OP_H
-#define ST_SHADER_SVGA_OP_H
+#ifndef SVGA_SHADER_OP_H
+#define SVGA_SHADER_OP_H
 
 struct sh_opcode_info
 {
@@ -41,6 +41,6 @@ struct sh_opcode_info
    unsigned svga_opcode:16;
 };
 
-const struct sh_opcode_info *sh_svga_opcode_info( unsigned op );
+const struct sh_opcode_info *svga_opcode_info( unsigned op );
 
-#endif /* ST_SHADER_SVGA_OP_H */
+#endif /* SVGA_SHADER_OP_H */
-- 
cgit v1.2.3


From f3a0615fb0452f11f4db88861b30b2177bdd948a Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 15 Nov 2009 12:14:03 -0800
Subject: svga: Handle comment tokens when dumping.

---
 src/gallium/drivers/svga/svgadump/svga_shader.h      | 6 ++++++
 src/gallium/drivers/svga/svgadump/svga_shader_dump.c | 7 ++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h
index 2fc1796a90..9217af2dd9 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader.h
+++ b/src/gallium/drivers/svga/svgadump/svga_shader.h
@@ -211,4 +211,10 @@ struct sh_trinaryop
    struct sh_srcreg src2;
 };
 
+struct sh_comment
+{
+   unsigned opcode:16;
+   unsigned size:16;
+};
+
 #endif /* ST_SHADER_SVGA_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
index 7718bdf757..b0e7fdf378 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
@@ -587,7 +587,12 @@ svga_shader_dump(
          break;
 
       case SVGA3DOP_COMMENT:
-         assert( 0 );
+         {
+            struct sh_comment comment = *(struct sh_comment *)assem;
+
+            /* Ignore comment contents. */
+            assem += sizeof(struct sh_comment) / sizeof(unsigned) + comment.size;
+         }
          break;
 
       case SVGA3DOP_RET:
-- 
cgit v1.2.3


From 763426a0256f0ab06f8af53947bd630f8600183a Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 24 Nov 2009 14:53:29 +0000
Subject: tgsi: reduce repetition of structure name in its members

Rename Semantic.SemanticName to Semantic.Name.  Similar for
SemanticIndex, and the members of the tgsi_version struct.
---
 src/gallium/auxiliary/draw/draw_pipe_aaline.c   | 14 ++++-----
 src/gallium/auxiliary/draw/draw_pipe_aapoint.c  | 14 ++++-----
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c |  6 ++--
 src/gallium/auxiliary/tgsi/tgsi_build.c         | 16 +++++------
 src/gallium/auxiliary/tgsi/tgsi_dump.c          | 12 ++++----
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c        | 16 +++++------
 src/gallium/auxiliary/tgsi/tgsi_exec.c          |  8 +++---
 src/gallium/auxiliary/tgsi/tgsi_parse.c         |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_scan.c          | 10 +++----
 src/gallium/auxiliary/tgsi/tgsi_text.c          |  4 +--
 src/gallium/auxiliary/tgsi/tgsi_ureg.c          |  8 +++---
 src/gallium/auxiliary/vl/vl_shader_build.c      | 16 +++++------
 src/gallium/drivers/nv20/nv20_vertprog.c        | 14 ++++-----
 src/gallium/drivers/nv30/nv30_fragprog.c        | 12 ++++----
 src/gallium/drivers/nv30/nv30_vertprog.c        | 14 ++++-----
 src/gallium/drivers/nv40/nv40_fragprog.c        | 14 ++++-----
 src/gallium/drivers/nv40/nv40_vertprog.c        | 14 ++++-----
 src/gallium/drivers/nv50/nv50_program.c         |  4 +--
 src/gallium/drivers/r300/r300_vs.c              |  4 +--
 src/gallium/drivers/svga/svga_tgsi_decl_sm20.c  | 34 +++++++++++-----------
 src/gallium/drivers/svga/svga_tgsi_decl_sm30.c  | 38 ++++++++++++-------------
 src/gallium/include/pipe/p_shader_tokens.h      |  8 +++---
 22 files changed, 141 insertions(+), 141 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index e374010fee..7b0b236a3d 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -139,8 +139,8 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
 
    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
-       decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
-       decl->Semantic.SemanticIndex == 0) {
+       decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+       decl->Semantic.Index == 0) {
       aactx->colorOutput = decl->DeclarationRange.First;
    }
    else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
@@ -153,9 +153,9 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
       if ((int) decl->DeclarationRange.Last > aactx->maxInput)
          aactx->maxInput = decl->DeclarationRange.Last;
-      if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC &&
-           (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) {
-         aactx->maxGeneric = decl->Semantic.SemanticIndex;
+      if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
+           (int) decl->Semantic.Index > aactx->maxGeneric) {
+         aactx->maxGeneric = decl->Semantic.Index;
       }
    }
    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
@@ -228,8 +228,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       /* XXX this could be linear... */
       decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
       decl.Declaration.Semantic = 1;
-      decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
-      decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
+      decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
+      decl.Semantic.Index = aactx->maxGeneric + 1;
       decl.DeclarationRange.First = 
       decl.DeclarationRange.Last = aactx->maxInput + 1;
       ctx->emit_declaration(ctx, &decl);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index ae1712fe12..1f29448ff8 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -131,16 +131,16 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
 
    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
-       decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR &&
-       decl->Semantic.SemanticIndex == 0) {
+       decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+       decl->Semantic.Index == 0) {
       aactx->colorOutput = decl->DeclarationRange.First;
    }
    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
       if ((int) decl->DeclarationRange.Last > aactx->maxInput)
          aactx->maxInput = decl->DeclarationRange.Last;
-      if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC &&
-           (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) {
-         aactx->maxGeneric = decl->Semantic.SemanticIndex;
+      if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
+           (int) decl->Semantic.Index > aactx->maxGeneric) {
+         aactx->maxGeneric = decl->Semantic.Index;
       }
    }
    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
@@ -198,8 +198,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       /* XXX this could be linear... */
       decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
       decl.Declaration.Semantic = 1;
-      decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
-      decl.Semantic.SemanticIndex = aactx->maxGeneric + 1;
+      decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
+      decl.Semantic.Index = aactx->maxGeneric + 1;
       decl.DeclarationRange.First = 
       decl.DeclarationRange.Last = texInput;
       ctx->emit_declaration(ctx, &decl);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 9de06e37ed..75774e6626 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -140,7 +140,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx,
    }
    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
       pctx->maxInput = MAX2(pctx->maxInput, (int) decl->DeclarationRange.Last);
-      if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION)
+      if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
          pctx->wincoordInput = (int) decl->DeclarationRange.First;
    }
    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
@@ -226,8 +226,8 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
          decl.Declaration.File = TGSI_FILE_INPUT;
          decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */
          decl.Declaration.Semantic = 1;
-         decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION;
-         decl.Semantic.SemanticIndex = 0;
+         decl.Semantic.Name = TGSI_SEMANTIC_POSITION;
+         decl.Semantic.Index = 0;
          decl.DeclarationRange.First = 
             decl.DeclarationRange.Last = wincoordInput;
          ctx->emit_declaration(ctx, &decl);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index fbac265640..2e6c5b38b4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -39,8 +39,8 @@ tgsi_build_version( void )
 {
    struct tgsi_version  version;
 
-   version.MajorVersion = 1;
-   version.MinorVersion = 1;
+   version.Major = 1;
+   version.Minor = 1;
    version.Padding = 0;
 
    return version;
@@ -223,8 +223,8 @@ tgsi_build_full_declaration(
       size++;
 
       *ds = tgsi_build_declaration_semantic(
-         full_decl->Semantic.SemanticName,
-         full_decl->Semantic.SemanticIndex,
+         full_decl->Semantic.Name,
+         full_decl->Semantic.Index,
          declaration,
          header );
    }
@@ -269,8 +269,8 @@ tgsi_default_declaration_semantic( void )
 {
    struct tgsi_declaration_semantic ds;
 
-   ds.SemanticName = TGSI_SEMANTIC_POSITION;
-   ds.SemanticIndex = 0;
+   ds.Name = TGSI_SEMANTIC_POSITION;
+   ds.Index = 0;
    ds.Padding = 0;
 
    return ds;
@@ -289,8 +289,8 @@ tgsi_build_declaration_semantic(
    assert( semantic_index <= 0xFFFF );
 
    ds = tgsi_default_declaration_semantic();
-   ds.SemanticName = semantic_name;
-   ds.SemanticIndex = semantic_index;
+   ds.Name = semantic_name;
+   ds.Index = semantic_index;
 
    declaration_grow( declaration, header );
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 7eb64167fb..8f26d5dae3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -232,11 +232,11 @@ iter_declaration(
 
    if (decl->Declaration.Semantic) {
       TXT( ", " );
-      ENM( decl->Semantic.SemanticName, semantic_names );
-      if (decl->Semantic.SemanticIndex != 0 ||
-          decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) {
+      ENM( decl->Semantic.Name, semantic_names );
+      if (decl->Semantic.Index != 0 ||
+          decl->Semantic.Name == TGSI_SEMANTIC_GENERIC) {
          CHR( '[' );
-         UID( decl->Semantic.SemanticIndex );
+         UID( decl->Semantic.Index );
          CHR( ']' );
       }
    }
@@ -477,9 +477,9 @@ prolog(
 {
    struct dump_ctx *ctx = (struct dump_ctx *) iter;
    ENM( iter->processor.Processor, processor_type_names );
-   UID( iter->version.MajorVersion );
+   UID( iter->version.Major );
    CHR( '.' );
-   UID( iter->version.MinorVersion );
+   UID( iter->version.Minor );
    EOL();
    return TRUE;
 }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index 4648051e29..11d28b1653 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -229,10 +229,10 @@ dump_declaration_verbose(
 
    if( decl->Declaration.Semantic ) {
       EOL();
-      TXT( "\nSemanticName : " );
-      ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS );
-      TXT( "\nSemanticIndex: " );
-      UID( decl->Semantic.SemanticIndex );
+      TXT( "\nName : " );
+      ENM( decl->Semantic.Name, TGSI_SEMANTICS );
+      TXT( "\nIndex: " );
+      UID( decl->Semantic.Index );
       if( ignored ) {
          TXT( "\nPadding      : " );
          UIX( decl->Semantic.Padding );
@@ -485,10 +485,10 @@ tgsi_dump_c(
 
    TXT( "tgsi-dump begin -----------------" );
 
-   TXT( "\nMajorVersion: " );
-   UID( parse.FullVersion.Version.MajorVersion );
-   TXT( "\nMinorVersion: " );
-   UID( parse.FullVersion.Version.MinorVersion );
+   TXT( "\nMajor: " );
+   UID( parse.FullVersion.Version.Major );
+   TXT( "\nMinor: " );
+   UID( parse.FullVersion.Version.Minor );
    EOL();
 
    TXT( "\nHeaderSize: " );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 61d38e57f1..c113f4a3bc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1899,16 +1899,16 @@ exec_declaration(struct tgsi_exec_machine *mach,
          last = decl->DeclarationRange.Last;
          mask = decl->Declaration.UsageMask;
 
-         if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
-            assert(decl->Semantic.SemanticIndex == 0);
+         if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+            assert(decl->Semantic.Index == 0);
             assert(first == last);
             assert(mask = TGSI_WRITEMASK_XYZW);
 
             mach->Inputs[first] = mach->QuadPos;
-         } else if (decl->Semantic.SemanticName == TGSI_SEMANTIC_FACE) {
+         } else if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
             uint i;
 
-            assert(decl->Semantic.SemanticIndex == 0);
+            assert(decl->Semantic.Index == 0);
             assert(first == last);
 
             for (i = 0; i < QUAD_SIZE; i++) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 52c714ebbd..d4f27499b8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -36,7 +36,7 @@ tgsi_parse_init(
    const struct tgsi_token *tokens )
 {
    ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0];
-   if( ctx->FullVersion.Version.MajorVersion > 1 ) {
+   if( ctx->FullVersion.Version.Major > 1 ) {
       return TGSI_PARSE_ERROR;
    }
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 55595539ec..69567130e3 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -129,21 +129,21 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                info->file_max[file] = MAX2(info->file_max[file], (int)reg);
 
                if (file == TGSI_FILE_INPUT) {
-                  info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
-                  info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
+                  info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
+                  info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
                   info->input_interpolate[reg] = (ubyte)fulldecl->Declaration.Interpolate;
                   info->num_inputs++;
                }
                else if (file == TGSI_FILE_OUTPUT) {
-                  info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName;
-                  info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex;
+                  info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.Name;
+                  info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.Index;
                   info->num_outputs++;
                }
 
                /* special case */
                if (procType == TGSI_PROCESSOR_FRAGMENT &&
                    file == TGSI_FILE_OUTPUT &&
-                   fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+                   fulldecl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
                   info->writes_z = TRUE;
                }
             }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 7250f98cc9..d25f590df7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -831,13 +831,13 @@ static boolean parse_declaration( struct translate_ctx *ctx )
                }
                cur2++;
 
-               decl.Semantic.SemanticIndex = index;
+               decl.Semantic.Index = index;
 
                cur = cur2;
             }
 
             decl.Declaration.Semantic = 1;
-            decl.Semantic.SemanticName = i;
+            decl.Semantic.Name = i;
 
             ctx->cur = cur;
             break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index de4bc6edb0..6d646a529a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -910,8 +910,8 @@ static void emit_decl( struct ureg_program *ureg,
       out[1].decl_range.Last = index;
 
    out[2].value = 0;
-   out[2].decl_semantic.SemanticName = semantic_name;
-   out[2].decl_semantic.SemanticIndex = semantic_index;
+   out[2].decl_semantic.Name = semantic_name;
+   out[2].decl_semantic.Index = semantic_index;
 
 }
 
@@ -1064,8 +1064,8 @@ emit_header( struct ureg_program *ureg )
 {
    union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 );
 
-   out[0].version.MajorVersion = 1;
-   out[0].version.MinorVersion = 1;
+   out[0].version.Major = 1;
+   out[0].version.Minor = 1;
    out[0].version.Padding = 0;
 
    out[1].header.HeaderSize = 2;
diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c
index 9637cbed8a..d052e2c797 100644
--- a/src/gallium/auxiliary/vl/vl_shader_build.c
+++ b/src/gallium/auxiliary/vl/vl_shader_build.c
@@ -36,8 +36,8 @@ struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index
 
    decl.Declaration.File = TGSI_FILE_INPUT;
    decl.Declaration.Semantic = 1;
-   decl.Semantic.SemanticName = name;
-   decl.Semantic.SemanticIndex = index;
+   decl.Semantic.Name = name;
+   decl.Semantic.Index = index;
    decl.DeclarationRange.First = first;
    decl.DeclarationRange.Last = last;
 
@@ -64,8 +64,8 @@ struct tgsi_full_declaration vl_decl_interpolated_input
 
    decl.Declaration.File = TGSI_FILE_INPUT;
    decl.Declaration.Semantic = 1;
-   decl.Semantic.SemanticName = name;
-   decl.Semantic.SemanticIndex = index;
+   decl.Semantic.Name = name;
+   decl.Semantic.Index = index;
    decl.Declaration.Interpolate = interpolation;;
    decl.DeclarationRange.First = first;
    decl.DeclarationRange.Last = last;
@@ -79,8 +79,8 @@ struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int i
 
    decl.Declaration.File = TGSI_FILE_CONSTANT;
    decl.Declaration.Semantic = 1;
-   decl.Semantic.SemanticName = name;
-   decl.Semantic.SemanticIndex = index;
+   decl.Semantic.Name = name;
+   decl.Semantic.Index = index;
    decl.DeclarationRange.First = first;
    decl.DeclarationRange.Last = last;
 
@@ -93,8 +93,8 @@ struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int inde
 
    decl.Declaration.File = TGSI_FILE_OUTPUT;
    decl.Declaration.Semantic = 1;
-   decl.Semantic.SemanticName = name;
-   decl.Semantic.SemanticIndex = index;
+   decl.Semantic.Name = name;
+   decl.Semantic.Index = index;
    decl.DeclarationRange.First = first;
    decl.DeclarationRange.Last = last;
 
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
index 48df356faf..cd76910744 100644
--- a/src/gallium/drivers/nv20/nv20_vertprog.c
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -490,15 +490,15 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
 {
 	int hw;
 
-	switch (fdec->Semantic.SemanticName) {
+	switch (fdec->Semantic.Name) {
 	case TGSI_SEMANTIC_POSITION:
 		hw = NV30_VP_INST_DEST_POS;
 		break;
 	case TGSI_SEMANTIC_COLOR:
-		if (fdec->Semantic.SemanticIndex == 0) {
+		if (fdec->Semantic.Index == 0) {
 			hw = NV30_VP_INST_DEST_COL0;
 		} else
-		if (fdec->Semantic.SemanticIndex == 1) {
+		if (fdec->Semantic.Index == 1) {
 			hw = NV30_VP_INST_DEST_COL1;
 		} else {
 			NOUVEAU_ERR("bad colour semantic index\n");
@@ -506,10 +506,10 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
 		}
 		break;
 	case TGSI_SEMANTIC_BCOLOR:
-		if (fdec->Semantic.SemanticIndex == 0) {
+		if (fdec->Semantic.Index == 0) {
 			hw = NV30_VP_INST_DEST_BFC0;
 		} else
-		if (fdec->Semantic.SemanticIndex == 1) {
+		if (fdec->Semantic.Index == 1) {
 			hw = NV30_VP_INST_DEST_BFC1;
 		} else {
 			NOUVEAU_ERR("bad bcolour semantic index\n");
@@ -523,8 +523,8 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
 		hw = NV30_VP_INST_DEST_PSZ;
 		break;
 	case TGSI_SEMANTIC_GENERIC:
-		if (fdec->Semantic.SemanticIndex <= 7) {
-			hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+		if (fdec->Semantic.Index <= 7) {
+			hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index);
 		} else {
 			NOUVEAU_ERR("bad generic semantic index\n");
 			return FALSE;
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index eb978b6838..acf216bb61 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -572,15 +572,15 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
 {
 	int hw;
 
-	switch (fdec->Semantic.SemanticName) {
+	switch (fdec->Semantic.Name) {
 	case TGSI_SEMANTIC_POSITION:
 		hw = NV30_FP_OP_INPUT_SRC_POSITION;
 		break;
 	case TGSI_SEMANTIC_COLOR:
-		if (fdec->Semantic.SemanticIndex == 0) {
+		if (fdec->Semantic.Index == 0) {
 			hw = NV30_FP_OP_INPUT_SRC_COL0;
 		} else
-		if (fdec->Semantic.SemanticIndex == 1) {
+		if (fdec->Semantic.Index == 1) {
 			hw = NV30_FP_OP_INPUT_SRC_COL1;
 		} else {
 			NOUVEAU_ERR("bad colour semantic index\n");
@@ -591,9 +591,9 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
 		hw = NV30_FP_OP_INPUT_SRC_FOGC;
 		break;
 	case TGSI_SEMANTIC_GENERIC:
-		if (fdec->Semantic.SemanticIndex <= 7) {
+		if (fdec->Semantic.Index <= 7) {
 			hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic.
-						     SemanticIndex);
+						     Index);
 		} else {
 			NOUVEAU_ERR("bad generic semantic index\n");
 			return FALSE;
@@ -612,7 +612,7 @@ static boolean
 nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
 				const struct tgsi_full_declaration *fdec)
 {
-	switch (fdec->Semantic.SemanticName) {
+	switch (fdec->Semantic.Name) {
 	case TGSI_SEMANTIC_POSITION:
 		fpc->depth_id = fdec->DeclarationRange.First;
 		break;
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index b04fb229bc..e8fba8ab16 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -490,15 +490,15 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
 {
 	int hw;
 
-	switch (fdec->Semantic.SemanticName) {
+	switch (fdec->Semantic.Name) {
 	case TGSI_SEMANTIC_POSITION:
 		hw = NV30_VP_INST_DEST_POS;
 		break;
 	case TGSI_SEMANTIC_COLOR:
-		if (fdec->Semantic.SemanticIndex == 0) {
+		if (fdec->Semantic.Index == 0) {
 			hw = NV30_VP_INST_DEST_COL0;
 		} else
-		if (fdec->Semantic.SemanticIndex == 1) {
+		if (fdec->Semantic.Index == 1) {
 			hw = NV30_VP_INST_DEST_COL1;
 		} else {
 			NOUVEAU_ERR("bad colour semantic index\n");
@@ -506,10 +506,10 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
 		}
 		break;
 	case TGSI_SEMANTIC_BCOLOR:
-		if (fdec->Semantic.SemanticIndex == 0) {
+		if (fdec->Semantic.Index == 0) {
 			hw = NV30_VP_INST_DEST_BFC0;
 		} else
-		if (fdec->Semantic.SemanticIndex == 1) {
+		if (fdec->Semantic.Index == 1) {
 			hw = NV30_VP_INST_DEST_BFC1;
 		} else {
 			NOUVEAU_ERR("bad bcolour semantic index\n");
@@ -523,8 +523,8 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
 		hw = NV30_VP_INST_DEST_PSZ;
 		break;
 	case TGSI_SEMANTIC_GENERIC:
-		if (fdec->Semantic.SemanticIndex <= 7) {
-			hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+		if (fdec->Semantic.Index <= 7) {
+			hw = NV30_VP_INST_DEST_TC(fdec->Semantic.Index);
 		} else {
 			NOUVEAU_ERR("bad generic semantic index\n");
 			return FALSE;
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index dbbb736670..ca6a957fc1 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -644,15 +644,15 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
 {
 	int hw;
 
-	switch (fdec->Semantic.SemanticName) {
+	switch (fdec->Semantic.Name) {
 	case TGSI_SEMANTIC_POSITION:
 		hw = NV40_FP_OP_INPUT_SRC_POSITION;
 		break;
 	case TGSI_SEMANTIC_COLOR:
-		if (fdec->Semantic.SemanticIndex == 0) {
+		if (fdec->Semantic.Index == 0) {
 			hw = NV40_FP_OP_INPUT_SRC_COL0;
 		} else
-		if (fdec->Semantic.SemanticIndex == 1) {
+		if (fdec->Semantic.Index == 1) {
 			hw = NV40_FP_OP_INPUT_SRC_COL1;
 		} else {
 			NOUVEAU_ERR("bad colour semantic index\n");
@@ -663,9 +663,9 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
 		hw = NV40_FP_OP_INPUT_SRC_FOGC;
 		break;
 	case TGSI_SEMANTIC_GENERIC:
-		if (fdec->Semantic.SemanticIndex <= 7) {
+		if (fdec->Semantic.Index <= 7) {
 			hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic.
-						     SemanticIndex);
+						     Index);
 		} else {
 			NOUVEAU_ERR("bad generic semantic index\n");
 			return FALSE;
@@ -687,12 +687,12 @@ nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
 	unsigned idx = fdec->DeclarationRange.First;
 	unsigned hw;
 
-	switch (fdec->Semantic.SemanticName) {
+	switch (fdec->Semantic.Name) {
 	case TGSI_SEMANTIC_POSITION:
 		hw = 1;
 		break;
 	case TGSI_SEMANTIC_COLOR:
-		switch (fdec->Semantic.SemanticIndex) {
+		switch (fdec->Semantic.Index) {
 		case 0: hw = 0; break;
 		case 1: hw = 2; break;
 		case 2: hw = 3; break;
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index df9cb227a3..ed0f1d857d 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -580,16 +580,16 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
 	unsigned idx = fdec->DeclarationRange.First;
 	int hw;
 
-	switch (fdec->Semantic.SemanticName) {
+	switch (fdec->Semantic.Name) {
 	case TGSI_SEMANTIC_POSITION:
 		hw = NV40_VP_INST_DEST_POS;
 		vpc->hpos_idx = idx;
 		break;
 	case TGSI_SEMANTIC_COLOR:
-		if (fdec->Semantic.SemanticIndex == 0) {
+		if (fdec->Semantic.Index == 0) {
 			hw = NV40_VP_INST_DEST_COL0;
 		} else
-		if (fdec->Semantic.SemanticIndex == 1) {
+		if (fdec->Semantic.Index == 1) {
 			hw = NV40_VP_INST_DEST_COL1;
 		} else {
 			NOUVEAU_ERR("bad colour semantic index\n");
@@ -597,10 +597,10 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
 		}
 		break;
 	case TGSI_SEMANTIC_BCOLOR:
-		if (fdec->Semantic.SemanticIndex == 0) {
+		if (fdec->Semantic.Index == 0) {
 			hw = NV40_VP_INST_DEST_BFC0;
 		} else
-		if (fdec->Semantic.SemanticIndex == 1) {
+		if (fdec->Semantic.Index == 1) {
 			hw = NV40_VP_INST_DEST_BFC1;
 		} else {
 			NOUVEAU_ERR("bad bcolour semantic index\n");
@@ -614,8 +614,8 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
 		hw = NV40_VP_INST_DEST_PSZ;
 		break;
 	case TGSI_SEMANTIC_GENERIC:
-		if (fdec->Semantic.SemanticIndex <= 7) {
-			hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex);
+		if (fdec->Semantic.Index <= 7) {
+			hw = NV40_VP_INST_DEST_TC(fdec->Semantic.Index);
 		} else {
 			NOUVEAU_ERR("bad generic semantic index\n");
 			return FALSE;
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index e40e37d07c..00518af8c0 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2558,8 +2558,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 				    p->type == PIPE_SHADER_FRAGMENT)
 					break;
 
-				si = d->Semantic.SemanticIndex;
-				switch (d->Semantic.SemanticName) {
+				si = d->Semantic.Index;
+				switch (d->Semantic.Name) {
 				case TGSI_SEMANTIC_BCOLOR:
 					p->cfg.two_side[si].hw = first;
 					if (p->cfg.io_nr > first)
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 74ef416dc1..939b13e4b3 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -77,7 +77,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
         if (decl->Declaration.File != TGSI_FILE_OUTPUT)
             continue;
 
-        switch (decl->Semantic.SemanticName) {
+        switch (decl->Semantic.Name) {
             case TGSI_SEMANTIC_POSITION:
                 c->code->outputs[decl->DeclarationRange.First] = 0;
                 break;
@@ -98,7 +98,7 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
                 break;
             default:
                 debug_printf("r300: vs: Bad semantic declaration %d\n",
-                    decl->Semantic.SemanticName);
+                    decl->Semantic.Name);
                 break;
         }
     }
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
index 54457082a0..6f4822a89d 100644
--- a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
@@ -46,7 +46,7 @@ static boolean ps20_input( struct svga_shader_emitter *emit,
    dcl.values[0] = 0;
    dcl.values[1] = 0;
 
-   switch (semantic.SemanticName) {
+   switch (semantic.Name) {
    case TGSI_SEMANTIC_POSITION:
       /* Special case:
        */
@@ -55,15 +55,15 @@ static boolean ps20_input( struct svga_shader_emitter *emit,
       break;
    case TGSI_SEMANTIC_COLOR:
       reg = src_register( SVGA3DREG_INPUT, 
-                          semantic.SemanticIndex );
+                          semantic.Index );
       break;
    case TGSI_SEMANTIC_FOG:
-      assert(semantic.SemanticIndex == 0);
+      assert(semantic.Index == 0);
       reg = src_register( SVGA3DREG_TEXTURE, 0 );
       break;
    case TGSI_SEMANTIC_GENERIC:
       reg = src_register( SVGA3DREG_TEXTURE,
-                          semantic.SemanticIndex + 1 );
+                          semantic.Index + 1 );
       break;
    default:
       assert(0);
@@ -90,16 +90,16 @@ static boolean ps20_output( struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken reg;
 
-   switch (semantic.SemanticName) {
+   switch (semantic.Name) {
    case TGSI_SEMANTIC_COLOR:
-      if (semantic.SemanticIndex < PIPE_MAX_COLOR_BUFS) {
-         unsigned cbuf = semantic.SemanticIndex;
+      if (semantic.Index < PIPE_MAX_COLOR_BUFS) {
+         unsigned cbuf = semantic.Index;
 
          emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
                                                emit->nr_hw_temp++ );
          emit->temp_col[cbuf] = emit->output_map[idx];
          emit->true_col[cbuf] = dst_register( SVGA3DREG_COLOROUT, 
-                                              semantic.SemanticIndex );
+                                              semantic.Index );
       }
       else {
          assert(0);
@@ -111,7 +111,7 @@ static boolean ps20_output( struct svga_shader_emitter *emit,
                                             emit->nr_hw_temp++ );
       emit->temp_pos = emit->output_map[idx];
       emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, 
-                                     semantic.SemanticIndex );
+                                     semantic.Index );
       break;
    default:
       assert(0);
@@ -169,9 +169,9 @@ static boolean vs20_output( struct svga_shader_emitter *emit,
 
    /* Just build the register map table: 
     */
-   switch (semantic.SemanticName) {
+   switch (semantic.Name) {
    case TGSI_SEMANTIC_POSITION:
-      assert(semantic.SemanticIndex == 0);
+      assert(semantic.Index == 0);
       emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
                                             emit->nr_hw_temp++ );
       emit->temp_pos = emit->output_map[idx];
@@ -179,7 +179,7 @@ static boolean vs20_output( struct svga_shader_emitter *emit,
                                      SVGA3DRASTOUT_POSITION);
       break;
    case TGSI_SEMANTIC_PSIZE:
-      assert(semantic.SemanticIndex == 0);
+      assert(semantic.Index == 0);
       emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
                                             emit->nr_hw_temp++ );
       emit->temp_psiz = emit->output_map[idx];
@@ -187,17 +187,17 @@ static boolean vs20_output( struct svga_shader_emitter *emit,
                                       SVGA3DRASTOUT_PSIZE );
       break;
    case TGSI_SEMANTIC_FOG:
-      assert(semantic.SemanticIndex == 0);
+      assert(semantic.Index == 0);
       emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, 0 );
       break;
    case TGSI_SEMANTIC_COLOR:
       /* oD0 */
       emit->output_map[idx] = dst_register( SVGA3DREG_ATTROUT,
-                                            semantic.SemanticIndex );
+                                            semantic.Index );
       break;
    case TGSI_SEMANTIC_GENERIC:
       emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT,
-                                            semantic.SemanticIndex + 1 );
+                                            semantic.Index + 1 );
       break;
    default:
       assert(0);
@@ -237,8 +237,8 @@ boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit,
    unsigned idx;
    
    if (decl->Declaration.Semantic) {
-      semantic = decl->Semantic.SemanticName;
-      semantic_idx = decl->Semantic.SemanticIndex;
+      semantic = decl->Semantic.Name;
+      semantic_idx = decl->Semantic.Index;
    }
 
    for( idx = first; idx <= last; idx++ ) {
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
index 08e7dfb117..65aa23ce3e 100644
--- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@@ -35,35 +35,35 @@ static boolean translate_vs_ps_semantic( struct tgsi_declaration_semantic semant
                                          unsigned *usage,
                                          unsigned *idx )
 {
-   switch (semantic.SemanticName) {
+   switch (semantic.Name) {
    case TGSI_SEMANTIC_POSITION:  
-      *idx = semantic.SemanticIndex;
+      *idx = semantic.Index;
       *usage = SVGA3D_DECLUSAGE_POSITION;
       break;
    case TGSI_SEMANTIC_COLOR:     
 
-      *idx = semantic.SemanticIndex;
+      *idx = semantic.Index;
       *usage = SVGA3D_DECLUSAGE_COLOR;
       break;
    case TGSI_SEMANTIC_BCOLOR:
-      *idx = semantic.SemanticIndex + 2; /* sharing with COLOR */
+      *idx = semantic.Index + 2; /* sharing with COLOR */
       *usage = SVGA3D_DECLUSAGE_COLOR;
       break;
    case TGSI_SEMANTIC_FOG:       
       *idx = 0;
-      assert(semantic.SemanticIndex == 0);
+      assert(semantic.Index == 0);
       *usage = SVGA3D_DECLUSAGE_TEXCOORD;
       break;
    case TGSI_SEMANTIC_PSIZE:     
-      *idx = semantic.SemanticIndex;
+      *idx = semantic.Index;
       *usage = SVGA3D_DECLUSAGE_PSIZE;
       break;
    case TGSI_SEMANTIC_GENERIC:   
-      *idx = semantic.SemanticIndex + 1; /* texcoord[0] is reserved for fog */
+      *idx = semantic.Index + 1; /* texcoord[0] is reserved for fog */
       *usage = SVGA3D_DECLUSAGE_TEXCOORD;
       break;
    case TGSI_SEMANTIC_NORMAL:    
-      *idx = semantic.SemanticIndex;
+      *idx = semantic.Index;
       *usage = SVGA3D_DECLUSAGE_NORMAL;
       break;
    default:
@@ -120,7 +120,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit,
    unsigned usage, index;
    SVGA3dShaderDestToken reg;
 
-   if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+   if (semantic.Name == TGSI_SEMANTIC_POSITION) {
       emit->input_map[idx] = src_register( SVGA3DREG_MISCTYPE,
                                            SVGA3DMISCREG_POSITION );
 
@@ -135,7 +135,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit,
       return emit_decl( emit, reg, 0, 0 );
    }
    else if (emit->key.fkey.light_twoside &&
-            (semantic.SemanticName == TGSI_SEMANTIC_COLOR)) {
+            (semantic.Name == TGSI_SEMANTIC_COLOR)) {
 
       if (!translate_vs_ps_semantic( semantic, &usage, &index ))
          return FALSE;
@@ -150,7 +150,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit,
       if (!emit_decl( emit, reg, usage, index ))
          return FALSE;
 
-      semantic.SemanticName = TGSI_SEMANTIC_BCOLOR;
+      semantic.Name = TGSI_SEMANTIC_BCOLOR;
       if (!translate_vs_ps_semantic( semantic, &usage, &index ))
          return FALSE;
 
@@ -164,7 +164,7 @@ static boolean ps30_input( struct svga_shader_emitter *emit,
 
       return TRUE;
    }
-   else if (semantic.SemanticName == TGSI_SEMANTIC_FACE) {
+   else if (semantic.Name == TGSI_SEMANTIC_FACE) {
       if (!emit_vface_decl( emit ))
          return FALSE;
       emit->emit_frontface = TRUE;
@@ -193,17 +193,17 @@ static boolean ps30_output( struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken reg;
 
-   switch (semantic.SemanticName) {
+   switch (semantic.Name) {
    case TGSI_SEMANTIC_COLOR:
       emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, 
-                                            semantic.SemanticIndex );
+                                            semantic.Index );
       break;
    case TGSI_SEMANTIC_POSITION:
       emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
                                             emit->nr_hw_temp++ );
       emit->temp_pos = emit->output_map[idx];
       emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, 
-                                     semantic.SemanticIndex );
+                                     semantic.Index );
       break;
    default:
       assert(0);
@@ -283,14 +283,14 @@ static boolean vs30_output( struct svga_shader_emitter *emit,
    dcl.index = index;
    dcl.values[0] |= 1<<31;
 
-   if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+   if (semantic.Name == TGSI_SEMANTIC_POSITION) {
       assert(idx == 0);
       emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
                                             emit->nr_hw_temp++ );
       emit->temp_pos = emit->output_map[idx];
       emit->true_pos = dcl.dst;
    }
-   else if (semantic.SemanticName == TGSI_SEMANTIC_PSIZE) {
+   else if (semantic.Name == TGSI_SEMANTIC_PSIZE) {
       emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
                                             emit->nr_hw_temp++ );
       emit->temp_psiz = emit->output_map[idx];
@@ -342,8 +342,8 @@ boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit,
    unsigned idx;
 
    if (decl->Declaration.Semantic) {
-      semantic = decl->Semantic.SemanticName;
-      semantic_idx = decl->Semantic.SemanticIndex;
+      semantic = decl->Semantic.Name;
+      semantic_idx = decl->Semantic.Index;
    }
 
    for( idx = first; idx <= last; idx++ ) {
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index ac999e0c18..7d73d7df85 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -37,8 +37,8 @@ extern "C" {
 
 struct tgsi_version
 {
-   unsigned MajorVersion  : 8;
-   unsigned MinorVersion  : 8;
+   unsigned Major  : 8;
+   unsigned Minor  : 8;
    unsigned Padding       : 16;
 };
 
@@ -137,8 +137,8 @@ struct tgsi_declaration_range
 
 struct tgsi_declaration_semantic
 {
-   unsigned SemanticName   : 8;  /**< one of TGSI_SEMANTIC_x */
-   unsigned SemanticIndex  : 16; /**< UINT */
+   unsigned Name           : 8;  /**< one of TGSI_SEMANTIC_x */
+   unsigned Index          : 16; /**< UINT */
    unsigned Padding        : 8;
 };
 
-- 
cgit v1.2.3


From 7d6c8f980d1e23ad6f557d650e89c715861a3b0c Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 24 Nov 2009 15:02:23 +0000
Subject: tgsi: rename fields of tgsi_full_instruction to avoid excessive
 verbosity

InstructionPredicate -> Predicate
InstructionLabel -> Label
InstructionTexture -> Texture
FullSrcRegisters -> Src
FullDstRegisters -> Dst
---
 src/gallium/auxiliary/draw/draw_pipe_aaline.c    |  40 ++--
 src/gallium/auxiliary/draw/draw_pipe_aapoint.c   | 238 +++++++++++------------
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c  |  32 +--
 src/gallium/auxiliary/draw/draw_vs_aos.c         | 176 ++++++++---------
 src/gallium/auxiliary/gallivm/tgsitollvm.cpp     |   8 +-
 src/gallium/auxiliary/tgsi/tgsi_build.c          |  34 ++--
 src/gallium/auxiliary/tgsi/tgsi_dump.c           |   8 +-
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c         |   8 +-
 src/gallium/auxiliary/tgsi/tgsi_exec.c           |  46 ++---
 src/gallium/auxiliary/tgsi/tgsi_parse.c          |  42 ++--
 src/gallium/auxiliary/tgsi/tgsi_parse.h          |  10 +-
 src/gallium/auxiliary/tgsi/tgsi_ppc.c            |   8 +-
 src/gallium/auxiliary/tgsi/tgsi_sanity.c         |  24 +--
 src/gallium/auxiliary/tgsi/tgsi_scan.c           |   6 +-
 src/gallium/auxiliary/tgsi/tgsi_sse2.c           |  16 +-
 src/gallium/auxiliary/tgsi/tgsi_text.c           |   8 +-
 src/gallium/auxiliary/vl/vl_compositor.c         |   2 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  32 +--
 src/gallium/auxiliary/vl/vl_shader_build.c       |  50 ++---
 src/gallium/drivers/cell/ppu/cell_gen_fp.c       | 226 ++++++++++-----------
 src/gallium/drivers/cell/spu/spu_exec.c          |  12 +-
 src/gallium/drivers/i915/i915_fpc_translate.c    | 116 +++++------
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c   |  16 +-
 src/gallium/drivers/nv20/nv20_vertprog.c         |   8 +-
 src/gallium/drivers/nv30/nv30_fragprog.c         |   8 +-
 src/gallium/drivers/nv30/nv30_vertprog.c         |   8 +-
 src/gallium/drivers/nv40/nv40_fragprog.c         |   8 +-
 src/gallium/drivers/nv40/nv40_vertprog.c         |  10 +-
 src/gallium/drivers/nv50/nv50_program.c          |  36 ++--
 src/gallium/drivers/r300/r300_tgsi_to_rc.c       |  10 +-
 src/gallium/drivers/svga/svga_tgsi_insn.c        | 110 +++++------
 31 files changed, 678 insertions(+), 678 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 7b0b236a3d..58d867faeb 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -265,15 +265,15 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_TEX;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = aactx->texTemp;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Instruction.Texture = TRUE;
-      newInst.InstructionTexture.Texture = TGSI_TEXTURE_2D;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->freeSampler;
+      newInst.Texture.Texture = TGSI_TEXTURE_2D;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
+      newInst.Src[0].SrcRegister.Index = aactx->maxInput + 1;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_SAMPLER;
+      newInst.Src[1].SrcRegister.Index = aactx->freeSampler;
 
       ctx->emit_instruction(ctx, &newInst);
 
@@ -281,26 +281,26 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
-      newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT;
+      newInst.Dst[0].DstRegister.Index = aactx->colorOutput;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
       ctx->emit_instruction(ctx, &newInst);
 
       /* MUL alpha */
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
-      newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT;
+      newInst.Dst[0].DstRegister.Index = aactx->colorOutput;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[1].SrcRegister.Index = aactx->texTemp;
       ctx->emit_instruction(ctx, &newInst);
 
       /* END */
@@ -317,7 +317,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       uint i;
 
       for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
-         struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+         struct tgsi_full_dst_register *dst = &inst->Dst[i];
          if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
              dst->DstRegister.Index == aactx->colorOutput) {
             dst->DstRegister.File = TGSI_FILE_TEMPORARY;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 1f29448ff8..09fc55cb5e 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -234,30 +234,30 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = tmp0;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
+      newInst.Src[0].SrcRegister.Index = texInput;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT;
+      newInst.Src[1].SrcRegister.Index = texInput;
       ctx->emit_instruction(ctx, &newInst);
 
       /* ADD t0.x, t0.x, t0.y;  # x^2 + y^2 */
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = tmp0;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
-      newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = tmp0;
+      newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[1].SrcRegister.Index = tmp0;
+      newInst.Src[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
       ctx->emit_instruction(ctx, &newInst);
 
 #if NORMALIZE  /* OPTIONAL normalization of length */
@@ -265,24 +265,24 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = tmp0;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = tmp0;
       ctx->emit_instruction(ctx, &newInst);
 
       /* RCP t0.x, t0.x; */
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = tmp0;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = tmp0;
       ctx->emit_instruction(ctx, &newInst);
 #endif
 
@@ -290,16 +290,16 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = tmp0;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
-      newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = tmp0;
+      newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT;
+      newInst.Src[1].SrcRegister.Index = texInput;
+      newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
       ctx->emit_instruction(ctx, &newInst);
 
       /* KIL -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
@@ -307,13 +307,13 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
       newInst.Instruction.NumDstRegs = 0;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
-      newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = tmp0;
+      newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+      newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+      newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+      newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+      newInst.Src[0].SrcRegister.Negate = 1;
       ctx->emit_instruction(ctx, &newInst);
 
 
@@ -323,77 +323,77 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = tmp0;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
-      newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
+      newInst.Src[0].SrcRegister.Index = texInput;
+      newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT;
+      newInst.Src[1].SrcRegister.Index = texInput;
+      newInst.Src[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
       ctx->emit_instruction(ctx, &newInst);
 
       /* RCP t0.z, t0.z;  # t0.z = 1 / m */
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = tmp0;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = tmp0;
+      newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
       ctx->emit_instruction(ctx, &newInst);
 
       /* SUB t0.y, 1, t0.x;  # d = 1 - d */
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = tmp0;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = texInput;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
-      newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
+      newInst.Src[0].SrcRegister.Index = texInput;
+      newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[1].SrcRegister.Index = tmp0;
+      newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
       ctx->emit_instruction(ctx, &newInst);
 
       /* MUL t0.w, t0.y, t0.z;   # coverage = d * m */
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = tmp0;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0;
-      newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = tmp0;
+      newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[1].SrcRegister.Index = tmp0;
+      newInst.Src[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
       ctx->emit_instruction(ctx, &newInst);
 
       /* SLE t0.y, t0.x, tex.z;  # bool b = distance <= k */
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = tmp0;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
-      newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = tmp0;
+      newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT;
+      newInst.Src[1].SrcRegister.Index = texInput;
+      newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z;
       ctx->emit_instruction(ctx, &newInst);
 
       /* CMP t0.w, -t0.y, tex.w, t0.w;
@@ -405,29 +405,29 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = tmp0;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = tmp0;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 3;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
-      newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
-      newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = texInput;
-      newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
-      newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
-      newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
-      newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
-      newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0;
-      newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
-      newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
-      newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
-      newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = tmp0;
+      newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+      newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
+      newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
+      newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
+      newInst.Src[0].SrcRegister.Negate = 1;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT;
+      newInst.Src[1].SrcRegister.Index = texInput;
+      newInst.Src[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
+      newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+      newInst.Src[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
+      newInst.Src[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
+      newInst.Src[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[2].SrcRegister.Index = tmp0;
+      newInst.Src[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
+      newInst.Src[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+      newInst.Src[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
+      newInst.Src[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
       ctx->emit_instruction(ctx, &newInst);
 
    }
@@ -439,26 +439,26 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
-      newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT;
+      newInst.Dst[0].DstRegister.Index = aactx->colorOutput;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
       ctx->emit_instruction(ctx, &newInst);
 
       /* MUL result.color.w, colorTemp, tmp0.w; */
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
-      newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput;
-      newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT;
+      newInst.Dst[0].DstRegister.Index = aactx->colorOutput;
+      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[1].SrcRegister.Index = aactx->tmp0;
       ctx->emit_instruction(ctx, &newInst);
    }
    else {
@@ -468,7 +468,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       uint i;
 
       for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
-         struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+         struct tgsi_full_dst_register *dst = &inst->Dst[i];
          if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
              dst->DstRegister.Index == aactx->colorOutput) {
             dst->DstRegister.File = TGSI_FILE_TEMPORARY;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 75774e6626..fe0d511218 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -280,28 +280,28 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = pctx->texTemp;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
+      newInst.Src[0].SrcRegister.Index = wincoordInput;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_IMMEDIATE;
+      newInst.Src[1].SrcRegister.Index = pctx->numImmed;
       ctx->emit_instruction(ctx, &newInst);
 
       /* TEX texTemp, texTemp, sampler; */
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_TEX;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp;
+      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].DstRegister.Index = pctx->texTemp;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Instruction.Texture = TRUE;
-      newInst.InstructionTexture.Texture = TGSI_TEXTURE_2D;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp;
-      newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
-      newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->freeSampler;
+      newInst.Texture.Texture = TGSI_TEXTURE_2D;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = pctx->texTemp;
+      newInst.Src[1].SrcRegister.File = TGSI_FILE_SAMPLER;
+      newInst.Src[1].SrcRegister.Index = pctx->freeSampler;
       ctx->emit_instruction(ctx, &newInst);
 
       /* KIL -texTemp;   # if -texTemp < 0, KILL fragment */
@@ -309,9 +309,9 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
       newInst.Instruction.NumDstRegs = 0;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp;
-      newInst.FullSrcRegisters[0].SrcRegister.Negate = 1;
+      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].SrcRegister.Index = pctx->texTemp;
+      newInst.Src[0].SrcRegister.Negate = 1;
       ctx->emit_instruction(ctx, &newInst);
    }
 
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 88bc790b62..a9c8715bc8 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -956,7 +956,7 @@ static void emit_print( struct aos_compilation *cp,
 
 static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
    struct x86_reg neg = aos_get_internal(cp, IMM_NEGS);
    struct x86_reg tmp = aos_get_xmm_reg(cp);
 
@@ -964,27 +964,27 @@ static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_inst
    sse_mulps(cp->func, tmp, neg);
    sse_maxps(cp->func, tmp, arg0);
    
-   store_dest(cp, &op->FullDstRegisters[0], tmp);
+   store_dest(cp, &op->Dst[0], tmp);
    return TRUE;
 }
 
 static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
    struct x86_reg dst = get_xmm_writable(cp, arg0);
 
    sse_addps(cp->func, dst, arg1);
 
-   store_dest(cp, &op->FullDstRegisters[0], dst);
+   store_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
 static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
-   x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+   x87_fld_src(cp, &op->Src[0], 0);
    x87_fcos(cp->func);
-   x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+   x87_fstp_dest4(cp, &op->Dst[0]);
    return TRUE;
 }
 
@@ -993,8 +993,8 @@ static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_inst
  */
 static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
    struct x86_reg tmp = aos_get_xmm_reg(cp); 
    struct x86_reg dst = get_xmm_writable(cp, arg0);
 
@@ -1007,14 +1007,14 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst
    sse_addss(cp->func, dst, tmp);
    
    aos_release_xmm_reg(cp, tmp.idx);
-   store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+   store_scalar_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
 static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
    struct x86_reg tmp = aos_get_xmm_reg(cp);      
    struct x86_reg dst = get_xmm_writable(cp, arg0);
 
@@ -1028,14 +1028,14 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst
    sse_addss(cp->func, dst, tmp);
 
    aos_release_xmm_reg(cp, tmp.idx);
-   store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+   store_scalar_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
 static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
    struct x86_reg tmp = aos_get_xmm_reg(cp);
    struct x86_reg dst = get_xmm_writable(cp, arg0);
 
@@ -1051,14 +1051,14 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst
    sse_addss(cp->func, dst, tmp);
 
    aos_release_xmm_reg(cp, tmp.idx);
-   store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+   store_scalar_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
 static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-    struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-    struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+    struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+    struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
     struct x86_reg dst = aos_get_xmm_reg(cp);
     struct x86_reg tmp = aos_get_xmm_reg(cp);
     struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
@@ -1073,25 +1073,25 @@ static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_inst
     sse_mulps(cp->func, dst, tmp);
 
     aos_release_xmm_reg(cp, tmp.idx);
-    store_dest(cp, &op->FullDstRegisters[0], dst);
+    store_dest(cp, &op->Dst[0], dst);
     return TRUE;
 }
 
 static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
    x87_fld1(cp->func);		/* 1 */
-   x87_fld_src(cp, &op->FullSrcRegisters[0], 0);	/* a0 1 */
+   x87_fld_src(cp, &op->Src[0], 0);	/* a0 1 */
    x87_fyl2x(cp->func);	/* log2(a0) */
-   x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+   x87_fstp_dest4(cp, &op->Dst[0]);
    return TRUE;
 }
 
 #if 0
 static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
-   x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+   x87_fld_src(cp, &op->Src[0], 0);
    x87_emit_ex2(cp);
-   x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+   x87_fstp_dest4(cp, &op->Dst[0]);
    return TRUE;
 }
 #endif
@@ -1099,8 +1099,8 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst
 
 static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
-   struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); 
-   unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+   struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); 
+   unsigned writemask = op->Dst[0].DstRegister.WriteMask;
    int i;
 
    set_fpu_round_neg_inf( cp );
@@ -1109,7 +1109,7 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst
     */
    for (i = 3; i >= 0; i--) {
       if (writemask & (1<<i)) {
-         x87_fld_src(cp, &op->FullSrcRegisters[0], i);   
+         x87_fld_src(cp, &op->Src[0], i);   
       }
    }
 
@@ -1126,8 +1126,8 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst
 
 static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
-   struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); 
-   unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+   struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); 
+   unsigned writemask = op->Dst[0].DstRegister.WriteMask;
    int i;
 
    set_fpu_round_nearest( cp );
@@ -1136,7 +1136,7 @@ static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_inst
     */
    for (i = 3; i >= 0; i--) {
       if (writemask & (1<<i)) {
-         x87_fld_src(cp, &op->FullSrcRegisters[0], i);   
+         x87_fld_src(cp, &op->Src[0], i);   
       }
    }
 
@@ -1153,10 +1153,10 @@ static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_inst
 
 static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
-   struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); 
+   struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); 
    struct x86_reg st0 = x86_make_reg(file_x87, 0);
    struct x86_reg st1 = x86_make_reg(file_x87, 1);
-   unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+   unsigned writemask = op->Dst[0].DstRegister.WriteMask;
    int i;
 
    set_fpu_round_neg_inf( cp );
@@ -1166,7 +1166,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
     */
    for (i = 3; i >= 0; i--) {
       if (writemask & (1<<i)) {
-         x87_fld_src(cp, &op->FullSrcRegisters[0], i);   
+         x87_fld_src(cp, &op->Src[0], i);   
       }
    }
 
@@ -1190,7 +1190,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
 static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
    struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-   unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+   unsigned writemask = op->Dst[0].DstRegister.WriteMask;
    unsigned lit_count = cp->lit_count++;
    struct x86_reg result, arg0;
    unsigned i;
@@ -1209,10 +1209,10 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
    if (writemask != TGSI_WRITEMASK_XYZW) 
       result = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[0]));
    else 
-      result = get_dst_ptr(cp, &op->FullDstRegisters[0]);    
+      result = get_dst_ptr(cp, &op->Dst[0]);    
 
    
-   arg0 = fetch_src( cp, &op->FullSrcRegisters[0] );
+   arg0 = fetch_src( cp, &op->Src[0] );
    if (arg0.file == file_XMM) {
       struct x86_reg tmp = x86_make_disp(cp->machine_EDX, 
                                          Offset(struct aos_machine, tmp[1]));
@@ -1259,7 +1259,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
 
    if (writemask != TGSI_WRITEMASK_XYZW) {
       store_dest( cp, 
-                  &op->FullDstRegisters[0],
+                  &op->Dst[0],
                   get_xmm_writable( cp, result ) );
    }
 
@@ -1269,8 +1269,8 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
 #if 0   
 static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); 
-   unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask;
+   struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); 
+   unsigned writemask = op->Dst[0].DstRegister.WriteMask;
 
    if (writemask & TGSI_WRITEMASK_YZ) {
       struct x86_reg st1 = x86_make_reg(file_x87, 1);
@@ -1286,13 +1286,13 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu
        */
       x87_fldz(cp->func);                           /* 1 0  */
 #endif
-      x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0  */
+      x87_fld_src(cp, &op->Src[0], 1); /* a1 1 0  */
       x87_fcomi(cp->func, st2);	                    /* a1 1 0  */
       x87_fcmovb(cp->func, st1);                    /* a1' 1 0  */
       x87_fstp(cp->func, st1);                      /* a1' 0  */
       x87_fstp(cp->func, st1);                      /* a1'  */
 
-      x87_fld_src(cp, &op->FullSrcRegisters[0], 3); /* a3 a1'  */
+      x87_fld_src(cp, &op->Src[0], 3); /* a3 a1'  */
       x87_fxch(cp->func, st1);                      /* a1' a3  */
       
 
@@ -1305,7 +1305,7 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu
       /* a0' = max2(a0, 0):
        */
       x87_fldz(cp->func);                           /* 0 r2 */
-      x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 0 r2 */
+      x87_fld_src(cp, &op->Src[0], 0); /* a0 0 r2 */
       x87_fcomi(cp->func, st1);	
       x87_fcmovb(cp->func, st1);                    /* a0' 0 r2 */
 
@@ -1333,58 +1333,58 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu
 
 static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
    struct x86_reg dst = get_xmm_writable(cp, arg0);
 
    sse_maxps(cp->func, dst, arg1);
 
-   store_dest(cp, &op->FullDstRegisters[0], dst);
+   store_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
 
 static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
    struct x86_reg dst = get_xmm_writable(cp, arg0);
 
    sse_minps(cp->func, dst, arg1);
 
-   store_dest(cp, &op->FullDstRegisters[0], dst);
+   store_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
 static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
    struct x86_reg dst = get_xmm_writable(cp, arg0);
 
    /* potentially nothing to do */
 
-   store_dest(cp, &op->FullDstRegisters[0], dst);
+   store_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
 static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
    struct x86_reg dst = get_xmm_writable(cp, arg0);
 
    sse_mulps(cp->func, dst, arg1);
 
-   store_dest(cp, &op->FullDstRegisters[0], dst);
+   store_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
 
 static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
-   struct x86_reg arg2 = fetch_src(cp, &op->FullSrcRegisters[2]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
+   struct x86_reg arg2 = fetch_src(cp, &op->Src[2]);
 
    /* If we can't clobber old contents of arg0, get a temporary & copy
     * it there, then clobber it...
@@ -1393,7 +1393,7 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst
 
    sse_mulps(cp->func, arg0, arg1);
    sse_addps(cp->func, arg0, arg2);
-   store_dest(cp, &op->FullDstRegisters[0], arg0);
+   store_dest(cp, &op->Dst[0], arg0);
    return TRUE;
 }
 
@@ -1425,13 +1425,13 @@ static float PIPE_CDECL _exp2(float x)
 static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
 #if 0
-   x87_fld_src(cp, &op->FullSrcRegisters[1], 0);  /* a1.x */
-   x87_fld_src(cp, &op->FullSrcRegisters[0], 0);	/* a0.x a1.x */
+   x87_fld_src(cp, &op->Src[1], 0);  /* a1.x */
+   x87_fld_src(cp, &op->Src[0], 0);	/* a0.x a1.x */
    x87_fyl2x(cp->func);	                                /* a1*log2(a0) */
 
    x87_emit_ex2( cp );		/* 2^(a1*log2(a0)) */
 
-   x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+   x87_fstp_dest4(cp, &op->Dst[0]);
 #else
    uint i;
 
@@ -1450,9 +1450,9 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst
 
    x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -8) );
 
-   x87_fld_src( cp, &op->FullSrcRegisters[1], 0 );
+   x87_fld_src( cp, &op->Src[1], 0 );
    x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 4 ) );
-   x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
+   x87_fld_src( cp, &op->Src[0], 0 );
    x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
 
    /* tmp_EAX has been pushed & will be restored below */
@@ -1467,7 +1467,7 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst
     */
    cp->func->x87_stack++;
 
-   x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
+   x87_fstp_dest4( cp, &op->Dst[0] );
 #endif
    return TRUE;
 }
@@ -1493,7 +1493,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full
 
    x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) );
 
-   x87_fld_src( cp, &op->FullSrcRegisters[0], 0 );
+   x87_fld_src( cp, &op->Src[0], 0 );
    x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) );
 
    /* tmp_EAX has been pushed & will be restored below */
@@ -1508,7 +1508,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full
     */
    cp->func->x87_stack++;
 
-   x87_fstp_dest4( cp, &op->FullDstRegisters[0] );
+   x87_fstp_dest4( cp, &op->Dst[0] );
 
    return TRUE;
 }
@@ -1517,7 +1517,7 @@ static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full
 
 static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
    struct x86_reg dst = aos_get_xmm_reg(cp);
 
    if (cp->have_sse2) {
@@ -1531,7 +1531,7 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst
       sse_divss(cp->func, dst, arg0);
    }
 
-   store_scalar_dest(cp, &op->FullDstRegisters[0], dst);
+   store_scalar_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
@@ -1551,14 +1551,14 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst
 static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
    if (0) {
-      struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+      struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
       struct x86_reg r = aos_get_xmm_reg(cp);
       sse_rsqrtss(cp->func, r, arg0);
-      store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+      store_scalar_dest(cp, &op->Dst[0], r);
       return TRUE;
    }
    else {
-      struct x86_reg arg0           = fetch_src(cp, &op->FullSrcRegisters[0]);
+      struct x86_reg arg0           = fetch_src(cp, &op->Src[0]);
       struct x86_reg r              = aos_get_xmm_reg(cp);
 
       struct x86_reg neg_half       = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ );
@@ -1578,7 +1578,7 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst
       sse_addss(   cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */
       sse_mulss(   cp->func, r,  tmp );             /* r * (1.5 - .5 * a * r * r) */
 
-      store_scalar_dest(cp, &op->FullDstRegisters[0], r);
+      store_scalar_dest(cp, &op->Dst[0], r);
 
       aos_release_xmm_reg(cp, tmp.idx);
 
@@ -1589,23 +1589,23 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst
 
 static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
    struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
    struct x86_reg dst = get_xmm_writable(cp, arg0);
 
    sse_cmpps(cp->func, dst, arg1, cc_NotLessThan);
    sse_andps(cp->func, dst, ones);
 
-   store_dest(cp, &op->FullDstRegisters[0], dst);
+   store_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
 static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
-   x87_fld_src(cp, &op->FullSrcRegisters[0], 0);
+   x87_fld_src(cp, &op->Src[0], 0);
    x87_fsin(cp->func);
-   x87_fstp_dest4(cp, &op->FullDstRegisters[0]);
+   x87_fstp_dest4(cp, &op->Dst[0]);
    return TRUE;
 }
 
@@ -1613,46 +1613,46 @@ static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_inst
 
 static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
    struct x86_reg ones = aos_get_internal(cp, IMM_ONES);
    struct x86_reg dst = get_xmm_writable(cp, arg0);
    
    sse_cmpps(cp->func, dst, arg1, cc_LessThan);
    sse_andps(cp->func, dst, ones);
 
-   store_dest(cp, &op->FullDstRegisters[0], dst);
+   store_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
 static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
    struct x86_reg dst = get_xmm_writable(cp, arg0);
 
    sse_subps(cp->func, dst, arg1);
 
-   store_dest(cp, &op->FullDstRegisters[0], dst);
+   store_dest(cp, &op->Dst[0], dst);
    return TRUE;
 }
 
 static boolean emit_TRUNC( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
    struct x86_reg tmp0 = aos_get_xmm_reg(cp);
 
    sse2_cvttps2dq(cp->func, tmp0, arg0);
    sse2_cvtdq2ps(cp->func, tmp0, tmp0);
 
-   store_dest(cp, &op->FullDstRegisters[0], tmp0);
+   store_dest(cp, &op->Dst[0], tmp0);
    return TRUE;
 }
 
 static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
-   struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]);
-   struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]);
+   struct x86_reg arg0 = fetch_src(cp, &op->Src[0]);
+   struct x86_reg arg1 = fetch_src(cp, &op->Src[1]);
    struct x86_reg tmp0 = aos_get_xmm_reg(cp);
    struct x86_reg tmp1 = aos_get_xmm_reg(cp);
 
@@ -1670,7 +1670,7 @@ static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_inst
 
 
    aos_release_xmm_reg(cp, tmp0.idx);
-   store_dest(cp, &op->FullDstRegisters[0], tmp1);
+   store_dest(cp, &op->Dst[0], tmp1);
    return TRUE;
 }
 
@@ -1897,10 +1897,10 @@ static void find_last_write_outputs( struct aos_compilation *cp )
          continue;
 
       for (i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++) {
-         if (parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.File ==
+         if (parse.FullToken.FullInstruction.Dst[i].DstRegister.File ==
              TGSI_FILE_OUTPUT) 
          {
-            unsigned idx = parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.Index;
+            unsigned idx = parse.FullToken.FullInstruction.Dst[i].DstRegister.Index;
             cp->output_last_write[idx] = this_instruction;
          }
       }
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index bf84401e11..fbf4d2636d 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -234,7 +234,7 @@ translate_instruction(llvm::Module *module,
    inputs[3] = 0;
 
    for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
-      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+      struct tgsi_full_src_register *src = &inst->Src[i];
       llvm::Value *val = 0;
       llvm::Value *indIdx = 0;
 
@@ -656,7 +656,7 @@ translate_instruction(llvm::Module *module,
 
    /* store results  */
    for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
-      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+      struct tgsi_full_dst_register *dst = &inst->Dst[i];
 
       if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
          storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
@@ -683,7 +683,7 @@ translate_instructionir(llvm::Module *module,
    std::vector< std::vector<llvm::Value*> > inputs(inst->Instruction.NumSrcRegs);
 
    for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) {
-      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+      struct tgsi_full_src_register *src = &inst->Src[i];
       std::vector<llvm::Value*> val;
       llvm::Value *indIdx = 0;
       int swizzle = swizzleInt(src);
@@ -993,7 +993,7 @@ translate_instructionir(llvm::Module *module,
 
    /* store results  */
    for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
-      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+      struct tgsi_full_dst_register *dst = &inst->Dst[i];
       storage->store((enum tgsi_file_type)dst->DstRegister.File,
                      dst->DstRegister.Index, out, dst->DstRegister.WriteMask,
 		     instr->getIRBuilder() );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 2e6c5b38b4..7ec832aad9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -473,14 +473,14 @@ tgsi_default_full_instruction( void )
    unsigned i;
 
    full_instruction.Instruction = tgsi_default_instruction();
-   full_instruction.InstructionPredicate = tgsi_default_instruction_predicate();
-   full_instruction.InstructionLabel = tgsi_default_instruction_label();
-   full_instruction.InstructionTexture = tgsi_default_instruction_texture();
+   full_instruction.Predicate = tgsi_default_instruction_predicate();
+   full_instruction.Label = tgsi_default_instruction_label();
+   full_instruction.Texture = tgsi_default_instruction_texture();
    for( i = 0;  i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) {
-      full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register();
+      full_instruction.Dst[i] = tgsi_default_full_dst_register();
    }
    for( i = 0;  i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) {
-      full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register();
+      full_instruction.Src[i] = tgsi_default_full_src_register();
    }
 
    return full_instruction;
@@ -521,18 +521,18 @@ tgsi_build_full_instruction(
       size++;
 
       *instruction_predicate =
-         tgsi_build_instruction_predicate(full_inst->InstructionPredicate.Index,
-                                          full_inst->InstructionPredicate.Negate,
-                                          full_inst->InstructionPredicate.SwizzleX,
-                                          full_inst->InstructionPredicate.SwizzleY,
-                                          full_inst->InstructionPredicate.SwizzleZ,
-                                          full_inst->InstructionPredicate.SwizzleW,
+         tgsi_build_instruction_predicate(full_inst->Predicate.Index,
+                                          full_inst->Predicate.Negate,
+                                          full_inst->Predicate.SwizzleX,
+                                          full_inst->Predicate.SwizzleY,
+                                          full_inst->Predicate.SwizzleZ,
+                                          full_inst->Predicate.SwizzleW,
                                           instruction,
                                           header);
    }
 
    if( tgsi_compare_instruction_label(
-         full_inst->InstructionLabel,
+         full_inst->Label,
          tgsi_default_instruction_label() ) ) {
       struct tgsi_instruction_label *instruction_label;
 
@@ -543,7 +543,7 @@ tgsi_build_full_instruction(
       size++;
 
       *instruction_label = tgsi_build_instruction_label(
-         full_inst->InstructionLabel.Label,
+         full_inst->Label.Label,
          prev_token,
          instruction,
          header );
@@ -551,7 +551,7 @@ tgsi_build_full_instruction(
    }
 
    if( tgsi_compare_instruction_texture(
-         full_inst->InstructionTexture,
+         full_inst->Texture,
          tgsi_default_instruction_texture() ) ) {
       struct tgsi_instruction_texture *instruction_texture;
 
@@ -562,7 +562,7 @@ tgsi_build_full_instruction(
       size++;
 
       *instruction_texture = tgsi_build_instruction_texture(
-         full_inst->InstructionTexture.Texture,
+         full_inst->Texture.Texture,
          prev_token,
          instruction,
          header   );
@@ -570,7 +570,7 @@ tgsi_build_full_instruction(
    }
 
    for( i = 0;  i <   full_inst->Instruction.NumDstRegs; i++ ) {
-      const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i];
+      const struct tgsi_full_dst_register *reg = &full_inst->Dst[i];
       struct tgsi_dst_register *dst_register;
       struct tgsi_token *prev_token;
 
@@ -613,7 +613,7 @@ tgsi_build_full_instruction(
    }
 
    for( i = 0;  i < full_inst->Instruction.NumSrcRegs; i++ ) {
-      const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i];
+      const struct tgsi_full_src_register *reg = &full_inst->Src[i];
       struct tgsi_src_register *src_register;
       struct tgsi_token *prev_token;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 8f26d5dae3..4ff7f4b11e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -352,7 +352,7 @@ iter_instruction(
    }
 
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
-      const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
+      const struct tgsi_full_dst_register *dst = &inst->Dst[i];
 
       if (!first_reg)
          CHR( ',' );
@@ -380,7 +380,7 @@ iter_instruction(
    }
 
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-      const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
+      const struct tgsi_full_src_register *src = &inst->Src[i];
 
       if (!first_reg)
          CHR( ',' );
@@ -429,7 +429,7 @@ iter_instruction(
 
    if (inst->Instruction.Texture) {
       TXT( ", " );
-      ENM( inst->InstructionTexture.Texture, texture_names );
+      ENM( inst->Texture.Texture, texture_names );
    }
 
    switch (inst->Instruction.Opcode) {
@@ -439,7 +439,7 @@ iter_instruction(
    case TGSI_OPCODE_ENDLOOP:
    case TGSI_OPCODE_CAL:
       TXT( " :" );
-      UID( inst->InstructionLabel.Label );
+      UID( inst->Label.Label );
       break;
    }
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index 11d28b1653..194b2473bc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -334,8 +334,8 @@ dump_instruction_verbose(
    }
 
    for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
-      struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
-      struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i];
+      struct tgsi_full_dst_register *dst = &inst->Dst[i];
+      struct tgsi_full_dst_register *fd = &fi->Dst[i];
 
       EOL();
       TXT( "\nFile     : " );
@@ -387,8 +387,8 @@ dump_instruction_verbose(
    }
 
    for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
-      struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
-      struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i];
+      struct tgsi_full_src_register *src = &inst->Src[i];
+      struct tgsi_full_src_register *fs = &fi->Src[i];
 
       EOL();
       TXT( "\nFile     : ");
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index c113f4a3bc..a9bfb0d6df 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -107,10 +107,10 @@
 #define TEMP_P0            TGSI_EXEC_TEMP_P0
 
 #define IS_CHANNEL_ENABLED(INST, CHAN)\
-   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN)))
 
 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
-   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[1].DstRegister.WriteMask & (1 << (CHAN)))
 
 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
@@ -188,7 +188,7 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
 {
    uint i, chan;
 
-   uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+   uint writemask = inst->Dst[0].DstRegister.WriteMask;
    if (writemask == TGSI_WRITEMASK_X ||
        writemask == TGSI_WRITEMASK_Y ||
        writemask == TGSI_WRITEMASK_Z ||
@@ -200,15 +200,15 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
 
    /* loop over src regs */
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-      if ((inst->FullSrcRegisters[i].SrcRegister.File ==
-           inst->FullDstRegisters[0].DstRegister.File) &&
-          (inst->FullSrcRegisters[i].SrcRegister.Index ==
-           inst->FullDstRegisters[0].DstRegister.Index)) {
+      if ((inst->Src[i].SrcRegister.File ==
+           inst->Dst[0].DstRegister.File) &&
+          (inst->Src[i].SrcRegister.Index ==
+           inst->Dst[0].DstRegister.Index)) {
          /* loop over dest channels */
          uint channelsWritten = 0x0;
          FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
             /* check if we're reading a channel that's been written */
-            uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan);
+            uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
             if (channelsWritten & (1 << swizzle)) {
                return TRUE;
             }
@@ -1500,27 +1500,27 @@ store_dest(
 
       switch (chan_index) {
       case CHAN_X:
-         swizzle = inst->InstructionPredicate.SwizzleX;
+         swizzle = inst->Predicate.SwizzleX;
          break;
       case CHAN_Y:
-         swizzle = inst->InstructionPredicate.SwizzleY;
+         swizzle = inst->Predicate.SwizzleY;
          break;
       case CHAN_Z:
-         swizzle = inst->InstructionPredicate.SwizzleZ;
+         swizzle = inst->Predicate.SwizzleZ;
          break;
       case CHAN_W:
-         swizzle = inst->InstructionPredicate.SwizzleW;
+         swizzle = inst->Predicate.SwizzleW;
          break;
       default:
          assert(0);
          return;
       }
 
-      assert(inst->InstructionPredicate.Index == 0);
+      assert(inst->Predicate.Index == 0);
 
-      pred = &mach->Predicates[inst->InstructionPredicate.Index].xyzw[swizzle];
+      pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
 
-      if (inst->InstructionPredicate.Negate) {
+      if (inst->Predicate.Negate) {
          for (i = 0; i < QUAD_SIZE; i++) {
             if (pred->u[i]) {
                execmask &= ~(1 << i);
@@ -1572,10 +1572,10 @@ store_dest(
 }
 
 #define FETCH(VAL,INDEX,CHAN)\
-    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
+    fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
 
 #define STORE(VAL,INDEX,CHAN)\
-    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
+    store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
 
 
 /**
@@ -1601,7 +1601,7 @@ exec_kil(struct tgsi_exec_machine *mach,
 
       /* unswizzle channel */
       swizzle = tgsi_util_get_full_src_register_swizzle (
-                        &inst->FullSrcRegisters[0],
+                        &inst->Src[0],
                         chan_index);
 
       /* check if the component has not been already tested */
@@ -1668,14 +1668,14 @@ exec_tex(struct tgsi_exec_machine *mach,
          boolean biasLod,
          boolean projected)
 {
-   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   const uint unit = inst->Src[1].SrcRegister.Index;
    union tgsi_exec_channel r[4];
    uint chan_index;
    float lodBias;
 
    /*   debug_printf("Sampler %u unit %u\n", sampler, unit); */
 
-   switch (inst->InstructionTexture.Texture) {
+   switch (inst->Texture.Texture) {
    case TGSI_TEXTURE_1D:
    case TGSI_TEXTURE_SHADOW1D:
 
@@ -1765,7 +1765,7 @@ static void
 exec_txd(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
 {
-   const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index;
+   const uint unit = inst->Src[3].SrcRegister.Index;
    union tgsi_exec_channel r[4];
    uint chan_index;
 
@@ -1773,7 +1773,7 @@ exec_txd(struct tgsi_exec_machine *mach,
     * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
     */
 
-   switch (inst->InstructionTexture.Texture) {
+   switch (inst->Texture.Texture) {
    case TGSI_TEXTURE_1D:
    case TGSI_TEXTURE_SHADOW1D:
 
@@ -2740,7 +2740,7 @@ exec_instruction(
          mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
 
          /* Finally, jump to the subroutine */
-         *pc = inst->InstructionLabel.Label;
+         *pc = inst->Label.Label;
       }
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index d4f27499b8..ff593fdc32 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -153,36 +153,36 @@ tgsi_parse_token(
       copy_token(&inst->Instruction, &token);
 
       if (inst->Instruction.Predicate) {
-         next_token(ctx, &inst->InstructionPredicate);
+         next_token(ctx, &inst->Predicate);
       }
 
       if (inst->Instruction.Label) {
-         next_token( ctx, &inst->InstructionLabel);
+         next_token( ctx, &inst->Label);
       }
 
       if (inst->Instruction.Texture) {
-         next_token( ctx, &inst->InstructionTexture);
+         next_token( ctx, &inst->Texture);
       }
 
       assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS );
 
       for(  i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
 
-         next_token( ctx, &inst->FullDstRegisters[i].DstRegister );
+         next_token( ctx, &inst->Dst[i].DstRegister );
 
          /*
           * No support for indirect or multi-dimensional addressing.
           */
-         assert( !inst->FullDstRegisters[i].DstRegister.Dimension );
+         assert( !inst->Dst[i].DstRegister.Dimension );
 
-         if( inst->FullDstRegisters[i].DstRegister.Indirect ) {
-            next_token( ctx, &inst->FullDstRegisters[i].DstRegisterInd );
+         if( inst->Dst[i].DstRegister.Indirect ) {
+            next_token( ctx, &inst->Dst[i].DstRegisterInd );
 
             /*
              * No support for indirect or multi-dimensional addressing.
              */
-            assert( !inst->FullDstRegisters[i].DstRegisterInd.Dimension );
-            assert( !inst->FullDstRegisters[i].DstRegisterInd.Indirect );
+            assert( !inst->Dst[i].DstRegisterInd.Dimension );
+            assert( !inst->Dst[i].DstRegisterInd.Indirect );
          }
       }
 
@@ -190,34 +190,34 @@ tgsi_parse_token(
 
       for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
 
-         next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister );
+         next_token( ctx, &inst->Src[i].SrcRegister );
 
-         if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) {
-            next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd );
+         if( inst->Src[i].SrcRegister.Indirect ) {
+            next_token( ctx, &inst->Src[i].SrcRegisterInd );
 
             /*
              * No support for indirect or multi-dimensional addressing.
              */
-            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
-            assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
+            assert( !inst->Src[i].SrcRegisterInd.Indirect );
+            assert( !inst->Src[i].SrcRegisterInd.Dimension );
          }
 
-         if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) {
-            next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim );
+         if( inst->Src[i].SrcRegister.Dimension ) {
+            next_token( ctx, &inst->Src[i].SrcRegisterDim );
 
             /*
              * No support for multi-dimensional addressing.
              */
-            assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension );
+            assert( !inst->Src[i].SrcRegisterDim.Dimension );
 
-            if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) {
-               next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd );
+            if( inst->Src[i].SrcRegisterDim.Indirect ) {
+               next_token( ctx, &inst->Src[i].SrcRegisterDimInd );
 
                /*
                * No support for indirect or multi-dimensional addressing.
                */
-               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect );
-               assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension );
+               assert( !inst->Src[i].SrcRegisterInd.Indirect );
+               assert( !inst->Src[i].SrcRegisterInd.Dimension );
             }
          }
       }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 48e6987ab7..2f8f4d488b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -78,11 +78,11 @@ struct tgsi_full_immediate
 struct tgsi_full_instruction
 {
    struct tgsi_instruction             Instruction;
-   struct tgsi_instruction_predicate   InstructionPredicate;
-   struct tgsi_instruction_label       InstructionLabel;
-   struct tgsi_instruction_texture     InstructionTexture;
-   struct tgsi_full_dst_register       FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
-   struct tgsi_full_src_register       FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
+   struct tgsi_instruction_predicate   Predicate;
+   struct tgsi_instruction_label       Label;
+   struct tgsi_instruction_texture     Texture;
+   struct tgsi_full_dst_register       Dst[TGSI_FULL_MAX_DST_REGISTERS];
+   struct tgsi_full_src_register       Src[TGSI_FULL_MAX_SRC_REGISTERS];
 };
 
 union tgsi_full_token
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 617fd7f6be..8397f432f9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -60,7 +60,7 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
 
 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
-   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN)))
 
 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
    if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
@@ -431,7 +431,7 @@ get_src_vec(struct gen_context *gen,
             struct tgsi_full_instruction *inst, int src_reg, uint chan)
 {
    const const struct tgsi_full_src_register *src = 
-      &inst->FullSrcRegisters[src_reg];
+      &inst->Src[src_reg];
    int vec;
    uint i;
 
@@ -482,7 +482,7 @@ get_dst_vec(struct gen_context *gen,
             const struct tgsi_full_instruction *inst,
             unsigned chan_index)
 {
-   const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0];
+   const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 
    if (is_ppc_vec_temporary_dst(reg)) {
       int vec = gen->temps_map[reg->DstRegister.Index][chan_index];
@@ -505,7 +505,7 @@ emit_store(struct gen_context *gen,
            unsigned chan_index,
            boolean free_vec)
 {
-   const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0];
+   const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 
    switch (reg->DstRegister.File) {
    case TGSI_FILE_OUTPUT:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 36e27ea52f..8422b91a30 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -212,24 +212,24 @@ iter_instruction(
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
       check_register_usage(
          ctx,
-         inst->FullDstRegisters[i].DstRegister.File,
-         inst->FullDstRegisters[i].DstRegister.Index,
+         inst->Dst[i].DstRegister.File,
+         inst->Dst[i].DstRegister.Index,
          "destination",
          FALSE );
    }
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
       check_register_usage(
          ctx,
-         inst->FullSrcRegisters[i].SrcRegister.File,
-         inst->FullSrcRegisters[i].SrcRegister.Index,
+         inst->Src[i].SrcRegister.File,
+         inst->Src[i].SrcRegister.Index,
          "source",
-         (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect );
-      if (inst->FullSrcRegisters[i].SrcRegister.Indirect) {
+         (boolean)inst->Src[i].SrcRegister.Indirect );
+      if (inst->Src[i].SrcRegister.Indirect) {
          uint file;
          int index;
 
-         file = inst->FullSrcRegisters[i].SrcRegisterInd.File;
-         index = inst->FullSrcRegisters[i].SrcRegisterInd.Index;
+         file = inst->Src[i].SrcRegisterInd.File;
+         index = inst->Src[i].SrcRegisterInd.Index;
          check_register_usage(
             ctx,
             file,
@@ -245,8 +245,8 @@ iter_instruction(
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_BGNFOR:
    case TGSI_OPCODE_ENDFOR:
-      if (inst->FullDstRegisters[0].DstRegister.File != TGSI_FILE_LOOP ||
-          inst->FullDstRegisters[0].DstRegister.Index != 0) {
+      if (inst->Dst[0].DstRegister.File != TGSI_FILE_LOOP ||
+          inst->Dst[0].DstRegister.Index != 0) {
          report_error(ctx, "Destination register must be LOOP[0]");
       }
       break;
@@ -254,8 +254,8 @@ iter_instruction(
 
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_BGNFOR:
-      if (inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_CONSTANT &&
-          inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) {
+      if (inst->Src[0].SrcRegister.File != TGSI_FILE_CONSTANT &&
+          inst->Src[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) {
          report_error(ctx, "Source register file must be either CONST or IMM");
       }
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 69567130e3..be25b3dc5c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -96,7 +96,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                uint i;
                for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
                   const struct tgsi_full_src_register *src =
-                     &fullinst->FullSrcRegisters[i];
+                     &fullinst->Src[i];
                   if (src->SrcRegister.File == TGSI_FILE_INPUT) {
                      const int ind = src->SrcRegister.Index;
                      if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) {
@@ -205,9 +205,9 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
             struct tgsi_full_instruction *fullinst =
                &parse.FullToken.FullInstruction;
             const struct tgsi_full_src_register *src =
-               &fullinst->FullSrcRegisters[0];
+               &fullinst->Src[0];
             const struct tgsi_full_dst_register *dst =
-               &fullinst->FullDstRegisters[0];
+               &fullinst->Dst[0];
 
             /* Do a whole bunch of checks for a simple move */
             if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV ||
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index a6cc3a5398..2d2ee321c9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -58,7 +58,7 @@
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
 
 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
-   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN)))
 
 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
    if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
@@ -1331,7 +1331,7 @@ emit_fetch(
 }
 
 #define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
-   emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
+   emit_fetch( FUNC, XMM, &(INST).Src[INDEX], CHAN )
 
 /**
  * Register store.
@@ -1402,7 +1402,7 @@ emit_store(
 }
 
 #define STORE( FUNC, INST, XMM, INDEX, CHAN )\
-   emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
+   emit_store( FUNC, XMM, &(INST).Dst[INDEX], &(INST), CHAN )
 
 
 static void PIPE_CDECL
@@ -1459,13 +1459,13 @@ emit_tex( struct x86_function *func,
           boolean lodbias,
           boolean projected)
 {
-   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   const uint unit = inst->Src[1].SrcRegister.Index;
    struct x86_reg args[2];
    unsigned count;
    unsigned i;
 
    assert(inst->Instruction.Texture);
-   switch (inst->InstructionTexture.Texture) {
+   switch (inst->Texture.Texture) {
    case TGSI_TEXTURE_1D:
       count = 1;
       break;
@@ -1720,13 +1720,13 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
 {
    uint i;
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-      const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
+      const struct tgsi_full_src_register *reg = &inst->Src[i];
       if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
           reg->SrcRegister.Indirect)
          return TRUE;
    }
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
-      const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
+      const struct tgsi_full_dst_register *reg = &inst->Dst[i];
       if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
           reg->DstRegister.Indirect)
          return TRUE;
@@ -2244,7 +2244,7 @@ emit_instruction(
 
    case TGSI_OPCODE_KIL:
       /* conditional kill */
-      emit_kil( func, &inst->FullSrcRegisters[0] );
+      emit_kil( func, &inst->Src[0] );
       break;
 
    case TGSI_OPCODE_PK2H:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index d25f590df7..e9b1a21fb4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -699,11 +699,11 @@ parse_instruction(
       }
 
       if (i < info->num_dst) {
-         if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] ))
+         if (!parse_dst_operand( ctx, &inst.Dst[i] ))
             return FALSE;
       }
       else if (i < info->num_dst + info->num_src) {
-         if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] ))
+         if (!parse_src_operand( ctx, &inst.Src[i - info->num_dst] ))
             return FALSE;
       }
       else {
@@ -713,7 +713,7 @@ parse_instruction(
             if (str_match_no_case( &ctx->cur, texture_names[j] )) {
                if (!is_digit_alpha_underscore( ctx->cur )) {
                   inst.Instruction.Texture = 1;
-                  inst.InstructionTexture.Texture = j;
+                  inst.Texture.Texture = j;
                   break;
                }
             }
@@ -740,7 +740,7 @@ parse_instruction(
          return FALSE;
       }
       inst.Instruction.Label = 1;
-      inst.InstructionLabel.Label = target;
+      inst.Label.Label = target;
    }
 
    advance = tgsi_build_full_instruction(
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index cda6dc134a..34a02b5042 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -213,7 +213,7 @@ create_frag_shader(struct vl_compositor *c)
     */
    for (i = 0; i < 4; ++i) {
       inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i);
-      inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+      inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
    }
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index c4ba69817f..93e79e7f37 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -237,10 +237,10 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
 
       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-      inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+      inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+      inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+      inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+      inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
    }
 
@@ -415,10 +415,10 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
 
       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-      inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+      inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+      inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+      inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+      inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
    }
 
@@ -620,10 +620,10 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
 
       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-      inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+      inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+      inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+      inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+      inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
    }
 
@@ -642,10 +642,10 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    /* lerp t1, c1.x, t1, t2        ; Blend past and future texels */
    inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
-   inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-   inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-   inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-   inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+   inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
+   inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+   inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+   inst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
    ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
 
    /* add o0, t0, t1               ; Add past/future ref and differential to form final output */
diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c
index d052e2c797..82300b1da2 100644
--- a/src/gallium/auxiliary/vl/vl_shader_build.c
+++ b/src/gallium/auxiliary/vl/vl_shader_build.c
@@ -138,11 +138,11 @@ struct tgsi_full_instruction vl_inst2
 
    inst.Instruction.Opcode = opcode;
    inst.Instruction.NumDstRegs = 1;
-   inst.FullDstRegisters[0].DstRegister.File = dst_file;
-   inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+   inst.Dst[0].DstRegister.File = dst_file;
+   inst.Dst[0].DstRegister.Index = dst_index;
    inst.Instruction.NumSrcRegs = 1;
-   inst.FullSrcRegisters[0].SrcRegister.File = src_file;
-   inst.FullSrcRegisters[0].SrcRegister.Index = src_index;
+   inst.Src[0].SrcRegister.File = src_file;
+   inst.Src[0].SrcRegister.Index = src_index;
 
    return inst;
 }
@@ -162,13 +162,13 @@ struct tgsi_full_instruction vl_inst3
 
    inst.Instruction.Opcode = opcode;
    inst.Instruction.NumDstRegs = 1;
-   inst.FullDstRegisters[0].DstRegister.File = dst_file;
-   inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+   inst.Dst[0].DstRegister.File = dst_file;
+   inst.Dst[0].DstRegister.Index = dst_index;
    inst.Instruction.NumSrcRegs = 2;
-   inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
-   inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
-   inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
-   inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
+   inst.Src[0].SrcRegister.File = src1_file;
+   inst.Src[0].SrcRegister.Index = src1_index;
+   inst.Src[1].SrcRegister.File = src2_file;
+   inst.Src[1].SrcRegister.Index = src2_index;
 
    return inst;
 }
@@ -188,15 +188,15 @@ struct tgsi_full_instruction vl_tex
 
    inst.Instruction.Opcode = TGSI_OPCODE_TEX;
    inst.Instruction.NumDstRegs = 1;
-   inst.FullDstRegisters[0].DstRegister.File = dst_file;
-   inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+   inst.Dst[0].DstRegister.File = dst_file;
+   inst.Dst[0].DstRegister.Index = dst_index;
    inst.Instruction.NumSrcRegs = 2;
    inst.Instruction.Texture = 1;
-   inst.InstructionTexture.Texture = tex;
-   inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
-   inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
-   inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
-   inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
+   inst.Texture.Texture = tex;
+   inst.Src[0].SrcRegister.File = src1_file;
+   inst.Src[0].SrcRegister.Index = src1_index;
+   inst.Src[1].SrcRegister.File = src2_file;
+   inst.Src[1].SrcRegister.Index = src2_index;
 
    return inst;
 }
@@ -218,15 +218,15 @@ struct tgsi_full_instruction vl_inst4
 
    inst.Instruction.Opcode = opcode;
    inst.Instruction.NumDstRegs = 1;
-   inst.FullDstRegisters[0].DstRegister.File = dst_file;
-   inst.FullDstRegisters[0].DstRegister.Index = dst_index;
+   inst.Dst[0].DstRegister.File = dst_file;
+   inst.Dst[0].DstRegister.Index = dst_index;
    inst.Instruction.NumSrcRegs = 3;
-   inst.FullSrcRegisters[0].SrcRegister.File = src1_file;
-   inst.FullSrcRegisters[0].SrcRegister.Index = src1_index;
-   inst.FullSrcRegisters[1].SrcRegister.File = src2_file;
-   inst.FullSrcRegisters[1].SrcRegister.Index = src2_index;
-   inst.FullSrcRegisters[2].SrcRegister.File = src3_file;
-   inst.FullSrcRegisters[2].SrcRegister.Index = src3_index;
+   inst.Src[0].SrcRegister.File = src1_file;
+   inst.Src[0].SrcRegister.Index = src1_index;
+   inst.Src[1].SrcRegister.File = src2_file;
+   inst.Src[1].SrcRegister.Index = src2_index;
+   inst.Src[2].SrcRegister.File = src3_file;
+   inst.Src[2].SrcRegister.Index = src3_index;
 
    return inst;
 }
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 19e3ab0844..b0afad349f 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -544,7 +544,7 @@ emit_epilogue(struct codegen *gen)
 
 #define FOR_EACH_ENABLED_CHANNEL(inst, ch) \
    for (ch = 0; ch < 4; ch++) \
-      if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch))
+      if (inst->Dst[0].DstRegister.WriteMask & (1 << ch))
 
 
 static boolean
@@ -552,7 +552,7 @@ emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst)
 {
    int ch = 0, src_reg, addr_reg;
 
-   src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+   src_reg = get_src_reg(gen, ch, &inst->Src[0]);
    addr_reg = get_address_reg(gen);
 
    /* convert float to int */
@@ -570,19 +570,19 @@ emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
    int ch, src_reg[4], dst_reg[4];
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      src_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      dst_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
    }
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) &&
-          is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) {
+      if (is_register_src(gen, ch, &inst->Src[0]) &&
+          is_memory_dst(gen, ch, &inst->Dst[0])) {
          /* special-case: register to memory store */
-         store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]);
+         store_dest_reg(gen, src_reg[ch], ch, &inst->Dst[0]);
       }
       else {
          spe_move(gen->f, dst_reg[ch], src_reg[ch]);
-         store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]);
+         store_dest_reg(gen, dst_reg[ch], ch, &inst->Dst[0]);
       }
    }
 
@@ -601,9 +601,9 @@ emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
 
    /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
+      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
    }
 
    /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */
@@ -626,7 +626,7 @@ emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst)
 
    /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
    }
 
    /* Free any intermediate temps we allocated */
@@ -645,16 +645,16 @@ emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
    int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
-      s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
+      s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
+      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
    }
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
       spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
    }
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
    }
    free_itemps(gen);
    return TRUE;
@@ -671,10 +671,10 @@ emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst)
 
    /* setup/get src/dst/temp regs */
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
-      s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
+      s3_reg[ch] = get_src_reg(gen, ch, &inst->Src[2]);
+      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
       tmp_reg[ch] = get_itemp(gen);
    }
 
@@ -687,7 +687,7 @@ emit_LRP(struct codegen *gen, const struct tgsi_full_instruction *inst)
       spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
    }
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
    }
    free_itemps(gen);
    return TRUE;
@@ -704,8 +704,8 @@ emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
    int ch, s1_reg[4], d_reg[4], tmp_reg[4];
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
       tmp_reg[ch] = get_itemp(gen);
    }
 
@@ -726,7 +726,7 @@ emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
    }
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -747,8 +747,8 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
    spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
    }
 
    /* d = sign bit cleared in s1 */
@@ -757,7 +757,7 @@ emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
    }
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -775,12 +775,12 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
    int s2x_reg, s2y_reg, s2z_reg;
    int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
 
-   s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
-   s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
-   s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
-   s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
-   s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
-   s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+   s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+   s2x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
+   s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+   s2y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
+   s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
+   s2z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
 
    /* t0 = x0 * x1 */
    spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
@@ -795,9 +795,9 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
    spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
       spe_move(gen->f, d_reg, t0_reg);
-      store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -815,14 +815,14 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
    int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
    int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
 
-   s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
-   s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
-   s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
-   s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
-   s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
-   s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
-   s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
-   s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
+   s0x_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+   s1x_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
+   s0y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+   s1y_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
+   s0z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
+   s1z_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
+   s0w_reg = get_src_reg(gen, CHAN_W, &inst->Src[0]);
+   s1w_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
 
    /* t0 = x0 * x1 */
    spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
@@ -840,9 +840,9 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
    spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
       spe_move(gen->f, d_reg, t0_reg);
-      store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -857,31 +857,31 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
 {
    /* XXX rewrite this function to look more like DP3/DP4 */
    int ch;
-   int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
-   int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+   int s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+   int s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
    int tmp_reg = get_itemp(gen);
 
    /* t = x0 * x1 */
    spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
 
-   s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
-   s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+   s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+   s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
    /* t = y0 * y1 + t */
    spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
 
-   s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
-   s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+   s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
+   s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
    /* t = z0 * z1 + t */
    spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
 
-   s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
+   s2_reg = get_src_reg(gen, CHAN_W, &inst->Src[1]);
    /* t = w1 + t */
    spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
       spe_move(gen->f, d_reg, tmp_reg);
-      store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, tmp_reg, ch, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -898,9 +898,9 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
    int src_reg[3];
    int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
 
-   src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
-   src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
-   src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
+   src_reg[0] = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+   src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+   src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
 
    /* t0 = x * x */
    spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
@@ -919,10 +919,10 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
    spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
       /* dst = src[ch] * t1 */
       spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
-      store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -936,48 +936,48 @@ emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
 static boolean
 emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
 {
-   int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
-   int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+   int s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
+   int s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
    int tmp_reg = get_itemp(gen);
 
    /* t = z0 * y1 */
    spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
 
-   s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
-   s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+   s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+   s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
    /* t = y0 * z1 - t */
    spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
 
-   if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
-      store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
+   if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_X)) {
+      store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]);
    }
 
-   s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
-   s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
+   s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+   s2_reg = get_src_reg(gen, CHAN_Z, &inst->Src[1]);
    /* t = x0 * z1 */
    spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
 
-   s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
-   s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+   s1_reg = get_src_reg(gen, CHAN_Z, &inst->Src[0]);
+   s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
    /* t = z0 * x1 - t */
    spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
 
-   if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
-      store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
+   if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
+      store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]);
    }
 
-   s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
-   s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
+   s1_reg = get_src_reg(gen, CHAN_Y, &inst->Src[0]);
+   s2_reg = get_src_reg(gen, CHAN_X, &inst->Src[1]);
    /* t = y0 * x1 */
    spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
 
-   s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
-   s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
+   s1_reg = get_src_reg(gen, CHAN_X, &inst->Src[0]);
+   s2_reg = get_src_reg(gen, CHAN_Y, &inst->Src[1]);
    /* t = x0 * y1 - t */
    spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
 
-   if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
-      store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
+   if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
+      store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -1000,9 +1000,9 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
    one_reg = get_const_one_reg(gen);
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      s2_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
+      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
    }
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
@@ -1043,7 +1043,7 @@ emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
    }
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -1060,10 +1060,10 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
    int ch;
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
-      int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
-      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      int s1_reg = get_src_reg(gen, ch, &inst->Src[0]);
+      int s2_reg = get_src_reg(gen, ch, &inst->Src[1]);
+      int s3_reg = get_src_reg(gen, ch, &inst->Src[2]);
+      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
       int zero_reg = get_itemp(gen);
    
       spe_zero(gen->f, zero_reg);
@@ -1072,7 +1072,7 @@ emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
       spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
       spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
 
-      store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
       free_itemps(gen);
    }
 
@@ -1090,8 +1090,8 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
    int ch, s1_reg[4], d_reg[4];
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
    }
 
    /* Convert float to int */
@@ -1105,7 +1105,7 @@ emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
    }
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -1129,8 +1129,8 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
    one_reg = get_const_one_reg(gen);
    
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
       tmp_reg[ch] = get_itemp(gen);
    }
 
@@ -1156,7 +1156,7 @@ emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
    }
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -1177,8 +1177,8 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
    one_reg = get_const_one_reg(gen);
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
       tmp_reg[ch] = get_itemp(gen);
    }
 
@@ -1210,7 +1210,7 @@ emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
 
    /* store result */
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -1272,7 +1272,7 @@ emit_function_call(struct codegen *gen,
 
    if (scalar) {
       for (a = 0; a < num_args; a++) {
-         s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]);
+         s_regs[a] = get_src_reg(gen, CHAN_X, &inst->Src[a]);
       }
       /* we'll call the function, put the return value in this register,
        * then replicate it across all write-enabled components in d_reg.
@@ -1287,11 +1287,11 @@ emit_function_call(struct codegen *gen,
 
       if (!scalar) {
          for (a = 0; a < num_args; a++) {
-            s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
+            s_regs[a] = get_src_reg(gen, ch, &inst->Src[a]);
          }
       }
 
-      d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
 
       if (!scalar || !func_called) {
          /* for a scalar function, we'll really only call the function once */
@@ -1336,7 +1336,7 @@ emit_function_call(struct codegen *gen,
          spe_move(gen->f, d_reg, retval_reg);
       }
 
-      store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg, ch, &inst->Dst[0]);
       free_itemps(gen);
    }
 
@@ -1352,7 +1352,7 @@ static boolean
 emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
 {
    const uint target = inst->InstructionExtTexture.Texture;
-   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   const uint unit = inst->Src[1].SrcRegister.Index;
    uint addr;
    int ch;
    int coord_regs[4], d_regs[4];
@@ -1373,14 +1373,14 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
       return FALSE;
    }
 
-   assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER);
+   assert(inst->Src[1].SrcRegister.File == TGSI_FILE_SAMPLER);
 
    spe_comment(gen->f, -4, "CALL tex:");
 
    /* get src/dst reg info */
    for (ch = 0; ch < 4; ch++) {
-      coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      coord_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      d_regs[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
    }
 
    {
@@ -1425,7 +1425,7 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
    }
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_regs[ch], ch, &inst->Dst[0]);
       free_itemps(gen);
    }
 
@@ -1452,7 +1452,7 @@ emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
 
    /* get src regs */
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
+      s_regs[ch] = get_src_reg(gen, ch, &inst->Src[0]);
    }
 
    /* test if any src regs are < 0 */
@@ -1500,9 +1500,9 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
    int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
-      d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      s0_reg[ch] = get_src_reg(gen, ch, &inst->Src[0]);
+      s1_reg[ch] = get_src_reg(gen, ch, &inst->Src[1]);
+      d_reg[ch] = get_dst_reg(gen, ch, &inst->Dst[0]);
       tmp_reg[ch] = get_itemp(gen);         
    }
 
@@ -1518,7 +1518,7 @@ emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
    }
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
+      store_dest_reg(gen, d_reg[ch], ch, &inst->Dst[0]);
    }
 
    free_itemps(gen);
@@ -1575,7 +1575,7 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
 
    /* update conditional execution mask with the predicate register */
    int tmp_reg = get_itemp(gen);
-   int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
+   int s1_reg = get_src_reg(gen, channel, &inst->Src[0]);
 
    /* tmp = (s1_reg == 0) */
    spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
@@ -1699,8 +1699,8 @@ emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
    int ch;
 
    FOR_EACH_ENABLED_CHANNEL(inst, ch) {
-      int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
-      int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
+      int s_reg = get_src_reg(gen, ch, &inst->Src[0]);
+      int d_reg = get_dst_reg(gen, ch, &inst->Dst[0]);
 
       int t1_reg = get_itemp(gen);
       int t2_reg = get_itemp(gen);
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 4c32b2d06d..8d58c534be 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -108,10 +108,10 @@
    for (CHAN = 0; CHAN < 4; CHAN++)
 
 #define IS_CHANNEL_ENABLED(INST, CHAN)\
-   ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN)))
 
 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
-   ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[1].DstRegister.WriteMask & (1 << (CHAN)))
 
 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
    FOR_EACH_CHANNEL( CHAN )\
@@ -583,10 +583,10 @@ store_dest(
 }
 
 #define FETCH(VAL,INDEX,CHAN)\
-    fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
+    fetch_source (mach, VAL, &inst->Src[INDEX], CHAN)
 
 #define STORE(VAL,INDEX,CHAN)\
-    store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
+    store_dest (mach, VAL, &inst->Dst[INDEX], inst, CHAN )
 
 
 /**
@@ -612,7 +612,7 @@ exec_kil(struct spu_exec_machine *mach,
 
       /* unswizzle channel */
       swizzle = tgsi_util_get_full_src_register_swizzle (
-                        &inst->FullSrcRegisters[0],
+                        &inst->Src[0],
                         chan_index);
 
       /* check if the component has not been already tested */
@@ -677,7 +677,7 @@ exec_tex(struct spu_exec_machine *mach,
          const struct tgsi_full_instruction *inst,
          boolean biasLod, boolean projected)
 {
-   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   const uint unit = inst->Src[1].SrcRegister.Index;
    union spu_exec_channel r[8];
    uint chan_index;
    float lodBias;
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index f2554998a9..9e626c85c0 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -276,7 +276,7 @@ static uint
 get_result_flags(const struct tgsi_full_instruction *inst)
 {
    const uint writeMask
-      = inst->FullDstRegisters[0].DstRegister.WriteMask;
+      = inst->Dst[0].DstRegister.WriteMask;
    uint flags = 0x0;
 
    if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
@@ -338,14 +338,14 @@ emit_tex(struct i915_fp_compile *p,
          const struct tgsi_full_instruction *inst,
          uint opcode)
 {
-   uint texture = inst->InstructionTexture.Texture;
-   uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   uint texture = inst->Texture.Texture;
+   uint unit = inst->Src[1].SrcRegister.Index;
    uint tex = translate_tex_src_target( p, texture );
    uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
-   uint coord = src_vector( p, &inst->FullSrcRegisters[0]);
+   uint coord = src_vector( p, &inst->Src[0]);
 
    i915_emit_texld( p,
-                    get_result_vector( p, &inst->FullDstRegisters[0] ),
+                    get_result_vector( p, &inst->Dst[0] ),
                     get_result_flags( inst ),
                     sampler,
                     coord,
@@ -367,13 +367,13 @@ emit_simple_arith(struct i915_fp_compile *p,
 
    assert(numArgs <= 3);
 
-   arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] );
-   arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] );
-   arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] );
+   arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0] );
+   arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1] );
+   arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2] );
 
    i915_emit_arith( p,
                     opcode,
-                    get_result_vector( p, &inst->FullDstRegisters[0]),
+                    get_result_vector( p, &inst->Dst[0]),
                     get_result_flags( inst ), 0,
                     arg1,
                     arg2,
@@ -393,8 +393,8 @@ emit_simple_arith_swap2(struct i915_fp_compile *p,
 
    /* transpose first two registers */
    inst2 = *inst;
-   inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1];
-   inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0];
+   inst2.Src[0] = inst->Src[1];
+   inst2.Src[1] = inst->Src[0];
 
    emit_simple_arith(p, &inst2, opcode, numArgs);
 }
@@ -423,10 +423,10 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_ABS:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0]);
       i915_emit_arith(p,
                       A0_MAX,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       src0, negate(src0, 1, 1, 1, 1), 0);
       break;
@@ -436,17 +436,17 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_CMP:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
-      src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+      src0 = src_vector(p, &inst->Src[0]);
+      src1 = src_vector(p, &inst->Src[1]);
+      src2 = src_vector(p, &inst->Src[2]);
       i915_emit_arith(p, A0_CMP, 
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 
                       0, src0, src2, src1);   /* NOTE: order of src2, src1 */
       break;
 
    case TGSI_OPCODE_COS:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0]);
       tmp = i915_get_utemp(p);
 
       i915_emit_arith(p,
@@ -489,7 +489,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
       i915_emit_arith(p,
                       A0_DP4,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(tmp, ONE, Z, Y, X),
                       i915_emit_const4fv(p, cos_constants), 0);
@@ -504,19 +504,19 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_DPH:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0]);
+      src1 = src_vector(p, &inst->Src[1]);
 
       i915_emit_arith(p,
                       A0_DP4,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, X, Y, Z, ONE), src1, 0);
       break;
 
    case TGSI_OPCODE_DST:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0]);
+      src1 = src_vector(p, &inst->Src[1]);
 
       /* result[0] = 1    * 1;
        * result[1] = a[1] * b[1];
@@ -525,7 +525,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
        */
       i915_emit_arith(p,
                       A0_MUL,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, ONE, Y, Z, ONE),
                       swizzle(src1, ONE, Y, ONE, W), 0);
@@ -536,11 +536,11 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_EX2:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0]);
 
       i915_emit_arith(p,
                       A0_EXP,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, X, X, X, X), 0, 0);
       break;
@@ -555,7 +555,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
    case TGSI_OPCODE_KIL:
       /* kill if src[0].x < 0 || src[0].y < 0 ... */
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0]);
       tmp = i915_get_utemp(p);
 
       i915_emit_texld(p,
@@ -571,17 +571,17 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_LG2:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0]);
 
       i915_emit_arith(p,
                       A0_LOG,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, X, X, X, X), 0, 0);
       break;
 
    case TGSI_OPCODE_LIT:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0]);
       tmp = i915_get_utemp(p);
 
       /* tmp = max( a.xyzw, a.00zw )
@@ -605,7 +605,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
                       swizzle(tmp, Y, Y, Y, Y), 0, 0);
 
       i915_emit_arith(p, A0_CMP,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
                       swizzle(tmp, ONE, X, ZERO, ONE),
@@ -614,9 +614,9 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_LRP:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
-      src2 = src_vector(p, &inst->FullSrcRegisters[2]);
+      src0 = src_vector(p, &inst->Src[0]);
+      src1 = src_vector(p, &inst->Src[1]);
+      src2 = src_vector(p, &inst->Src[2]);
       flags = get_result_flags(inst);
       tmp = i915_get_utemp(p);
 
@@ -631,7 +631,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
                       flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
 
       i915_emit_arith(p, A0_MAD,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
       break;
 
@@ -644,8 +644,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_MIN:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0]);
+      src1 = src_vector(p, &inst->Src[1]);
       tmp = i915_get_utemp(p);
       flags = get_result_flags(inst);
 
@@ -657,7 +657,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
       i915_emit_arith(p,
                       A0_MOV,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
       break;
 
@@ -670,8 +670,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_POW:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0]);
+      src1 = src_vector(p, &inst->Src[1]);
       tmp = i915_get_utemp(p);
       flags = get_result_flags(inst);
 
@@ -686,7 +686,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
       i915_emit_arith(p,
                       A0_EXP,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
       break;
       
@@ -695,27 +695,27 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
       
    case TGSI_OPCODE_RCP:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0]);
 
       i915_emit_arith(p,
                       A0_RCP,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                          get_result_flags(inst), 0,
                       swizzle(src0, X, X, X, X), 0, 0);
       break;
 
    case TGSI_OPCODE_RSQ:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0]);
 
       i915_emit_arith(p,
                       A0_RSQ,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, X, X, X, X), 0, 0);
       break;
 
    case TGSI_OPCODE_SCS:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0]);
       tmp = i915_get_utemp(p);
 
       /* 
@@ -738,7 +738,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
                       swizzle(tmp, X, Y, X, Y),
                       swizzle(tmp, X, X, ONE, ONE), 0);
 
-      writemask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+      writemask = inst->Dst[0].DstRegister.WriteMask;
 
       if (writemask & TGSI_WRITEMASK_Y) {
          uint tmp1;
@@ -756,7 +756,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
          i915_emit_arith(p,
                          A0_DP4,
-                         get_result_vector(p, &inst->FullDstRegisters[0]),
+                         get_result_vector(p, &inst->Dst[0]),
                          A0_DEST_CHANNEL_Y, 0,
                          swizzle(tmp1, W, Z, Y, X),
                          i915_emit_const4fv(p, sin_constants), 0);
@@ -771,7 +771,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
          i915_emit_arith(p,
                          A0_DP4,
-                         get_result_vector(p, &inst->FullDstRegisters[0]),
+                         get_result_vector(p, &inst->Dst[0]),
                          A0_DEST_CHANNEL_X, 0,
                          swizzle(tmp, ONE, Z, Y, X),
                          i915_emit_const4fv(p, cos_constants), 0);
@@ -788,7 +788,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_SIN:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
+      src0 = src_vector(p, &inst->Src[0]);
       tmp = i915_get_utemp(p);
 
       i915_emit_arith(p,
@@ -831,7 +831,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
       i915_emit_arith(p,
                       A0_DP4,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(tmp, W, Z, Y, X),
                       i915_emit_const4fv(p, sin_constants), 0);
@@ -847,12 +847,12 @@ i915_translate_instruction(struct i915_fp_compile *p,
       break;
 
    case TGSI_OPCODE_SUB:
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0]);
+      src1 = src_vector(p, &inst->Src[1]);
 
       i915_emit_arith(p,
                       A0_ADD,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       src0, negate(src1, 1, 1, 1, 1), 0);
       break;
@@ -876,8 +876,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
        *      result.z = src0.x * src1.y - src0.y * src1.x;
        *      result.w = undef;
        */
-      src0 = src_vector(p, &inst->FullSrcRegisters[0]);
-      src1 = src_vector(p, &inst->FullSrcRegisters[1]);
+      src0 = src_vector(p, &inst->Src[0]);
+      src1 = src_vector(p, &inst->Src[1]);
       tmp = i915_get_utemp(p);
 
       i915_emit_arith(p,
@@ -888,7 +888,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
 
       i915_emit_arith(p,
                       A0_MAD,
-                      get_result_vector(p, &inst->FullDstRegisters[0]),
+                      get_result_vector(p, &inst->Dst[0]),
                       get_result_flags(inst), 0,
                       swizzle(src0, Y, Z, X, ONE),
                       swizzle(src1, Z, X, Y, ONE),
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index b2234ef679..893e665e69 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -64,7 +64,7 @@
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
 
 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
-   ((INST)->FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST)->Dst[0].DstRegister.WriteMask & (1 << (CHAN)))
 
 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
    if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
@@ -157,7 +157,7 @@ emit_fetch(
    unsigned index,
    const unsigned chan_index )
 {
-   const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[index];
+   const struct tgsi_full_src_register *reg = &inst->Src[index];
    unsigned swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
    LLVMValueRef res;
 
@@ -267,7 +267,7 @@ emit_store(
    unsigned chan_index,
    LLVMValueRef value)
 {
-   const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[index];
+   const struct tgsi_full_dst_register *reg = &inst->Dst[index];
 
    switch( inst->Instruction.Saturate ) {
    case TGSI_SAT_NONE:
@@ -319,14 +319,14 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
           boolean projected,
           LLVMValueRef *texel)
 {
-   const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+   const uint unit = inst->Src[1].SrcRegister.Index;
    LLVMValueRef lodbias;
    LLVMValueRef oow;
    LLVMValueRef coords[3];
    unsigned num_coords;
    unsigned i;
 
-   switch (inst->InstructionTexture.Texture) {
+   switch (inst->Texture.Texture) {
    case TGSI_TEXTURE_1D:
       num_coords = 1;
       break;
@@ -375,7 +375,7 @@ emit_kil(
    struct lp_build_tgsi_soa_context *bld,
    const struct tgsi_full_instruction *inst )
 {
-   const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[0];
+   const struct tgsi_full_src_register *reg = &inst->Src[0];
    LLVMValueRef terms[NUM_CHANNELS];
    LLVMValueRef mask;
    unsigned chan_index;
@@ -423,13 +423,13 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
 {
    uint i;
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-      const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
+      const struct tgsi_full_src_register *reg = &inst->Src[i];
       if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
           reg->SrcRegister.Indirect)
          return TRUE;
    }
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
-      const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
+      const struct tgsi_full_dst_register *reg = &inst->Dst[i];
       if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
           reg->DstRegister.Indirect)
          return TRUE;
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
index cd76910744..e82a23d475 100644
--- a/src/gallium/drivers/nv20/nv20_vertprog.c
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -334,7 +334,7 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
 	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fsrc;
 
-		fsrc = &finst->FullSrcRegisters[i];
+		fsrc = &finst->Src[i];
 		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
 			src[i] = tgsi_src(vpc, fsrc);
 		}
@@ -343,7 +343,7 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
 	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fsrc;
 
-		fsrc = &finst->FullSrcRegisters[i];
+		fsrc = &finst->Src[i];
 		switch (fsrc->SrcRegister.File) {
 		case TGSI_FILE_INPUT:
 			if (ai == -1 || ai == fsrc->SrcRegister.Index) {
@@ -378,8 +378,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
 		}
 	}
 
-	dst  = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
-	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+	dst  = tgsi_dst(vpc, &finst->Dst[0]);
+	mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask);
 
 	switch (finst->Instruction.Opcode) {
 	case TGSI_OPCODE_ABS:
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index acf216bb61..dfffeb3263 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -363,7 +363,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fsrc;
 
-		fsrc = &finst->FullSrcRegisters[i];
+		fsrc = &finst->Src[i];
 		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
 			src[i] = tgsi_src(fpc, fsrc);
 		}
@@ -372,7 +372,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fsrc;
 
-		fsrc = &finst->FullSrcRegisters[i];
+		fsrc = &finst->Src[i];
 
 		switch (fsrc->SrcRegister.File) {
 		case TGSI_FILE_INPUT:
@@ -423,8 +423,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 		}
 	}
 
-	dst  = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
-	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+	dst  = tgsi_dst(fpc, &finst->Dst[0]);
+	mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask);
 	sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
 
 	switch (finst->Instruction.Opcode) {
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index e8fba8ab16..41bd45ad29 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -334,7 +334,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
 	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fsrc;
 
-		fsrc = &finst->FullSrcRegisters[i];
+		fsrc = &finst->Src[i];
 		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
 			src[i] = tgsi_src(vpc, fsrc);
 		}
@@ -343,7 +343,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
 	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fsrc;
 
-		fsrc = &finst->FullSrcRegisters[i];
+		fsrc = &finst->Src[i];
 		switch (fsrc->SrcRegister.File) {
 		case TGSI_FILE_INPUT:
 			if (ai == -1 || ai == fsrc->SrcRegister.Index) {
@@ -378,8 +378,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
 		}
 	}
 
-	dst  = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
-	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+	dst  = tgsi_dst(vpc, &finst->Dst[0]);
+	mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask);
 
 	switch (finst->Instruction.Opcode) {
 	case TGSI_OPCODE_ABS:
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index ca6a957fc1..6addc45247 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -364,7 +364,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fsrc;
 
-		fsrc = &finst->FullSrcRegisters[i];
+		fsrc = &finst->Src[i];
 		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
 			src[i] = tgsi_src(fpc, fsrc);
 		}
@@ -373,7 +373,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fsrc;
 
-		fsrc = &finst->FullSrcRegisters[i];
+		fsrc = &finst->Src[i];
 
 		switch (fsrc->SrcRegister.File) {
 		case TGSI_FILE_INPUT:
@@ -433,8 +433,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 		}
 	}
 
-	dst  = tgsi_dst(fpc, &finst->FullDstRegisters[0]);
-	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+	dst  = tgsi_dst(fpc, &finst->Dst[0]);
+	mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask);
 	sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
 
 	switch (finst->Instruction.Opcode) {
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index ed0f1d857d..0cdc511166 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -405,7 +405,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fsrc;
 
-		fsrc = &finst->FullSrcRegisters[i];
+		fsrc = &finst->Src[i];
 		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
 			src[i] = tgsi_src(vpc, fsrc);
 		}
@@ -414,7 +414,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 	for (i = 0; i < finst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fsrc;
 
-		fsrc = &finst->FullSrcRegisters[i];
+		fsrc = &finst->Src[i];
 
 		switch (fsrc->SrcRegister.File) {
 		case TGSI_FILE_INPUT:
@@ -469,8 +469,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 		}
 	}
 
-	dst  = tgsi_dst(vpc, &finst->FullDstRegisters[0]);
-	mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask);
+	dst  = tgsi_dst(vpc, &finst->Dst[0]);
+	mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask);
 
 	switch (finst->Instruction.Opcode) {
 	case TGSI_OPCODE_ABS:
@@ -681,7 +681,7 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
 			const struct tgsi_full_dst_register *fdst;
 
 			finst = &p.FullToken.FullInstruction;
-			fdst = &finst->FullDstRegisters[0];
+			fdst = &finst->Dst[0];
 
 			if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) {
 				if (fdst->DstRegister.Index > high_addr)
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 00518af8c0..9fbf918601 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1535,10 +1535,10 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
 	for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) {
 		if (s == i)
 			continue;
-		if ((insn->FullSrcRegisters[s].SrcRegister.Index ==
-		     insn->FullSrcRegisters[i].SrcRegister.Index) &&
-		    (insn->FullSrcRegisters[s].SrcRegister.File ==
-		     insn->FullSrcRegisters[i].SrcRegister.File))
+		if ((insn->Src[s].SrcRegister.Index ==
+		     insn->Src[i].SrcRegister.Index) &&
+		    (insn->Src[s].SrcRegister.File ==
+		     insn->Src[i].SrcRegister.File))
 			return FALSE;
 	}
 
@@ -1549,7 +1549,7 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
 static unsigned
 nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 {
-	unsigned x, mask = insn->FullDstRegisters[0].DstRegister.WriteMask;
+	unsigned x, mask = insn->Dst[0].DstRegister.WriteMask;
 
 	switch (insn->Instruction.Opcode) {
 	case TGSI_OPCODE_COS:
@@ -1578,7 +1578,7 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 		const struct tgsi_instruction_texture *tex;
 
 		assert(insn->Instruction.Texture);
-		tex = &insn->InstructionTexture;
+		tex = &insn->Texture;
 
 		mask = 0x7;
 		if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
@@ -1850,21 +1850,21 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 	unsigned mask, sat, unit;
 	int i, c;
 
-	mask = inst->FullDstRegisters[0].DstRegister.WriteMask;
+	mask = inst->Dst[0].DstRegister.WriteMask;
 	sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
 
 	memset(src, 0, sizeof(src));
 
 	for (c = 0; c < 4; c++) {
 		if ((mask & (1 << c)) && !pc->r_dst[c])
-			dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]);
+			dst[c] = tgsi_dst(pc, c, &inst->Dst[0]);
 		else
 			dst[c] = pc->r_dst[c];
 		rdst[c] = dst[c];
 	}
 
 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-		const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i];
+		const struct tgsi_full_src_register *fs = &inst->Src[i];
 		unsigned src_mask;
 		boolean neg_supp;
 
@@ -2181,11 +2181,11 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		break;
 	case TGSI_OPCODE_TEX:
 		emit_tex(pc, dst, mask, src[0], unit,
-			 inst->InstructionTexture.Texture, FALSE);
+			 inst->Texture.Texture, FALSE);
 		break;
 	case TGSI_OPCODE_TXP:
 		emit_tex(pc, dst, mask, src[0], unit,
-			 inst->InstructionTexture.Texture, TRUE);
+			 inst->Texture.Texture, TRUE);
 		break;
 	case TGSI_OPCODE_TRUNC:
 		for (c = 0; c < 4; c++) {
@@ -2264,7 +2264,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
 	const struct tgsi_dst_register *dst;
 	unsigned i, c, k, mask;
 
-	dst = &insn->FullDstRegisters[0].DstRegister;
+	dst = &insn->Dst[0].DstRegister;
 	mask = dst->WriteMask;
 
         if (dst->File == TGSI_FILE_TEMPORARY)
@@ -2282,7 +2282,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
 	}
 
 	for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
-		src = &insn->FullSrcRegisters[i];
+		src = &insn->Src[i];
 
 		if (src->SrcRegister.File == TGSI_FILE_TEMPORARY)
 			reg = pc->temp;
@@ -2379,7 +2379,7 @@ static unsigned
 nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
 		       unsigned rdep[4])
 {
-	const struct tgsi_full_dst_register *fd = &insn->FullDstRegisters[0];
+	const struct tgsi_full_dst_register *fd = &insn->Dst[0];
 	const struct tgsi_full_src_register *fs;
 	unsigned i, deqs = 0;
 
@@ -2390,7 +2390,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
 		unsigned chn, mask = nv50_tgsi_src_mask(insn, i);
 		boolean neg_supp = negate_supported(insn, i);
 
-		fs = &insn->FullSrcRegisters[i];
+		fs = &insn->Src[i];
 		if (fs->SrcRegister.File != fd->DstRegister.File ||
 		    fs->SrcRegister.Index != fd->DstRegister.Index)
 			continue;
@@ -2427,7 +2427,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 	const struct tgsi_full_dst_register *fd;
 	unsigned i, deqs, rdep[4], m[4];
 
-	fd = &tok->FullInstruction.FullDstRegisters[0];
+	fd = &tok->FullInstruction.Dst[0];
 	deqs = nv50_tgsi_scan_swizzle(&insn, rdep);
 
 	if (is_scalar_op(insn.Instruction.Opcode)) {
@@ -2446,10 +2446,10 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 	for (i = 0; i < 4; ++i) {
 		assert(pc->r_dst[m[i]] == NULL);
 
-		insn.FullDstRegisters[0].DstRegister.WriteMask =
+		insn.Dst[0].DstRegister.WriteMask =
 			fd->DstRegister.WriteMask & (1 << m[i]);
 
-		if (!insn.FullDstRegisters[0].DstRegister.WriteMask)
+		if (!insn.Dst[0].DstRegister.WriteMask)
 			continue;
 
 		if (deqs & (1 << i))
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 25a634e5a2..82466e245a 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -258,18 +258,18 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst
     dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate);
 
     if (src->Instruction.NumDstRegs)
-        transform_dstreg(ttr, &dst->U.I.DstReg, &src->FullDstRegisters[0]);
+        transform_dstreg(ttr, &dst->U.I.DstReg, &src->Dst[0]);
 
     for(i = 0; i < src->Instruction.NumSrcRegs; ++i) {
-        if (src->FullSrcRegisters[i].SrcRegister.File == TGSI_FILE_SAMPLER)
-            dst->U.I.TexSrcUnit = src->FullSrcRegisters[i].SrcRegister.Index;
+        if (src->Src[i].SrcRegister.File == TGSI_FILE_SAMPLER)
+            dst->U.I.TexSrcUnit = src->Src[i].SrcRegister.Index;
         else
-            transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->FullSrcRegisters[i]);
+            transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->Src[i]);
     }
 
     /* Texturing. */
     if (src->Instruction.Texture)
-       transform_texture(dst, src->InstructionTexture);
+       transform_texture(dst, src->Texture);
 }
 
 static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm)
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 3ef6cb1074..39fd7a6025 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -96,7 +96,7 @@ translate_dst_register( struct svga_shader_emitter *emit,
                         const struct tgsi_full_instruction *insn,
                         unsigned idx )
 {
-   const struct tgsi_full_dst_register *reg = &insn->FullDstRegisters[idx];
+   const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
    SVGA3dShaderDestToken dest;
 
    switch (reg->DstRegister.File) {
@@ -629,7 +629,7 @@ static boolean emit_fake_arl(struct svga_shader_emitter *emit,
                              const struct tgsi_full_instruction *insn)
 {
    const struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    struct src_register src1 = get_fake_arl_const( emit );
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    SVGA3dShaderDestToken tmp = get_temp( emit );
@@ -653,7 +653,7 @@ static boolean emit_if(struct svga_shader_emitter *emit,
                        const struct tgsi_full_instruction *insn)
 {
    const struct src_register src = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    struct src_register zero = get_zero_immediate( emit );
    SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
 
@@ -690,7 +690,7 @@ static boolean emit_floor(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    const struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    SVGA3dShaderDestToken temp = get_temp( emit );
 
    /* FRC  TMP, SRC */
@@ -716,11 +716,11 @@ static boolean emit_cmp(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    const struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    const struct src_register src1 = translate_src_register(
-      emit, &insn->FullSrcRegisters[1] );
+      emit, &insn->Src[1] );
    const struct src_register src2 = translate_src_register(
-      emit, &insn->FullSrcRegisters[2] );
+      emit, &insn->Src[2] );
 
    /* CMP  DST, SRC0, SRC2, SRC1 */
    return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
@@ -740,9 +740,9 @@ static boolean emit_div(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    const struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    const struct src_register src1 = translate_src_register(
-      emit, &insn->FullSrcRegisters[1] );
+      emit, &insn->Src[1] );
    SVGA3dShaderDestToken temp = get_temp( emit );
    int i;
 
@@ -782,9 +782,9 @@ static boolean emit_dp2(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    const struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    const struct src_register src1 = translate_src_register(
-      emit, &insn->FullSrcRegisters[1] );
+      emit, &insn->Src[1] );
    SVGA3dShaderDestToken temp = get_temp( emit );
    struct src_register temp_src0, temp_src1;
 
@@ -815,9 +815,9 @@ static boolean emit_dph(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    const struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    struct src_register src1 = translate_src_register(
-      emit, &insn->FullSrcRegisters[1] );
+      emit, &insn->Src[1] );
    SVGA3dShaderDestToken temp = get_temp( emit );
 
    /* DP3  TMP, SRC1, SRC2 */
@@ -846,7 +846,7 @@ static boolean emit_nrm(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    const struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    SVGA3dShaderDestToken temp = get_temp( emit );
 
    /* DP3  TMP, SRC, SRC */
@@ -889,7 +889,7 @@ static boolean emit_sincos(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    SVGA3dShaderDestToken temp = get_temp( emit );
 
    /* SCS TMP SRC */
@@ -912,7 +912,7 @@ static boolean emit_sin(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    SVGA3dShaderDestToken temp = get_temp( emit );
 
    /* SCS TMP SRC */
@@ -937,7 +937,7 @@ static boolean emit_cos(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    SVGA3dShaderDestToken temp = get_temp( emit );
 
    /* SCS TMP SRC */
@@ -962,9 +962,9 @@ static boolean emit_sub(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    struct src_register src1 = translate_src_register(
-      emit, &insn->FullSrcRegisters[1] );
+      emit, &insn->Src[1] );
 
    src1 = negate(src1);
 
@@ -980,7 +980,7 @@ static boolean emit_kil(struct svga_shader_emitter *emit,
                         const struct tgsi_full_instruction *insn )
 {
    SVGA3dShaderInstToken inst;
-   const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0];
+   const struct tgsi_full_src_register *reg = &insn->Src[0];
    struct src_register src0;
 
    inst = inst_token( SVGA3DOP_TEXKILL );
@@ -1154,9 +1154,9 @@ static boolean emit_select_op(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    struct src_register src1 = translate_src_register(
-      emit, &insn->FullSrcRegisters[1] );
+      emit, &insn->Src[1] );
       
    return emit_select( emit, compare, dst, src0, src1 );
 }
@@ -1189,8 +1189,8 @@ static boolean emit_tex2(struct svga_shader_emitter *emit,
       return FALSE;
    }
 
-   src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] );
-   src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] );
+   src0 = translate_src_register( emit, &insn->Src[0] );
+   src1 = translate_src_register( emit, &insn->Src[1] );
 
    if (emit->key.fkey.tex[src1.base.num].unnormalized) {
       struct src_register wh = get_tex_dimensions( emit, src1.base.num );
@@ -1231,9 +1231,9 @@ static boolean emit_tex3(struct svga_shader_emitter *emit,
       break;
    }
 
-   src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] );
-   src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] );
-   src2 = translate_src_register( emit, &insn->FullSrcRegisters[2] );
+   src0 = translate_src_register( emit, &insn->Src[0] );
+   src1 = translate_src_register( emit, &insn->Src[1] );
+   src2 = translate_src_register( emit, &insn->Src[2] );
 
    return submit_op3( emit, inst, dst, src0, src1, src2 );
 }
@@ -1245,9 +1245,9 @@ static boolean emit_tex(struct svga_shader_emitter *emit,
    SVGA3dShaderDestToken dst = 
       translate_dst_register( emit, insn, 0 );
    struct src_register src0 =
-      translate_src_register( emit, &insn->FullSrcRegisters[0] );
+      translate_src_register( emit, &insn->Src[0] );
    struct src_register src1 =
-      translate_src_register( emit, &insn->FullSrcRegisters[1] );
+      translate_src_register( emit, &insn->Src[1] );
 
    SVGA3dShaderDestToken tex_result;
 
@@ -1359,7 +1359,7 @@ static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
 
    inst = inst_token( opcode );
    dst = translate_dst_register( emit, insn, 0 );
-   src = translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   src = translate_src_register( emit, &insn->Src[0] );
    src = scalar( src, TGSI_SWIZZLE_X );
 
    return submit_op1( emit, inst, dst, src );
@@ -1370,7 +1370,7 @@ static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
                                        unsigned opcode,
                                        const struct tgsi_full_instruction *insn )
 {
-   const struct tgsi_full_src_register *src = insn->FullSrcRegisters;
+   const struct tgsi_full_src_register *src = insn->Src;
    SVGA3dShaderInstToken inst;
    SVGA3dShaderDestToken dst;
 
@@ -1428,13 +1428,13 @@ static boolean emit_pow(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    struct src_register src1 = translate_src_register(
-      emit, &insn->FullSrcRegisters[1] );
+      emit, &insn->Src[1] );
    boolean need_tmp = FALSE;
    
    /* POW can only output to a temporary */
-   if (insn->FullDstRegisters[0].DstRegister.File != TGSI_FILE_TEMPORARY)
+   if (insn->Dst[0].DstRegister.File != TGSI_FILE_TEMPORARY)
       need_tmp = TRUE;
    
    /* POW src1 must not be the same register as dst */
@@ -1463,9 +1463,9 @@ static boolean emit_xpd(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    const struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    const struct src_register src1 = translate_src_register(
-      emit, &insn->FullSrcRegisters[1] );
+      emit, &insn->Src[1] );
    boolean need_dst_tmp = FALSE;
 
    /* XPD can only output to a temporary */
@@ -1517,11 +1517,11 @@ static boolean emit_lrp(struct svga_shader_emitter *emit,
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    SVGA3dShaderDestToken tmp;
    const struct src_register src0 = translate_src_register(
-      emit, &insn->FullSrcRegisters[0] );
+      emit, &insn->Src[0] );
    const struct src_register src1 = translate_src_register(
-      emit, &insn->FullSrcRegisters[1] );
+      emit, &insn->Src[1] );
    const struct src_register src2 = translate_src_register(
-      emit, &insn->FullSrcRegisters[2] );
+      emit, &insn->Src[2] );
    boolean need_dst_tmp = FALSE;
 
    /* The dst reg must not be the same as src0 or src2 */
@@ -1568,9 +1568,9 @@ static boolean emit_dst_insn(struct svga_shader_emitter *emit,
       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
       SVGA3dShaderDestToken tmp;
       const struct src_register src0 = translate_src_register(
-         emit, &insn->FullSrcRegisters[0] );
+         emit, &insn->Src[0] );
       const struct src_register src1 = translate_src_register(
-         emit, &insn->FullSrcRegisters[1] );
+         emit, &insn->Src[1] );
       struct src_register zero = get_zero_immediate( emit );
       boolean need_tmp = FALSE;
 
@@ -1633,7 +1633,7 @@ static boolean emit_exp(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    struct src_register src0 =
-      translate_src_register( emit, &insn->FullSrcRegisters[0] );
+      translate_src_register( emit, &insn->Src[0] );
    struct src_register zero = get_zero_immediate( emit );
    SVGA3dShaderDestToken fraction;
 
@@ -1723,7 +1723,7 @@ static boolean emit_lit(struct svga_shader_emitter *emit,
       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
       SVGA3dShaderDestToken tmp = get_temp( emit );
       const struct src_register src0 = translate_src_register(
-         emit, &insn->FullSrcRegisters[0] );
+         emit, &insn->Src[0] );
       struct src_register zero = get_zero_immediate( emit );
 
       /* tmp = pow(src.y, src.w)
@@ -1806,7 +1806,7 @@ static boolean emit_ex2( struct svga_shader_emitter *emit,
 
    inst = inst_token( SVGA3DOP_EXP );
    dst = translate_dst_register( emit, insn, 0 );
-   src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   src0 = translate_src_register( emit, &insn->Src[0] );
    src0 = scalar( src0, TGSI_SWIZZLE_X );
 
    if (dst.mask != TGSI_WRITEMASK_XYZW) {
@@ -1829,7 +1829,7 @@ static boolean emit_log(struct svga_shader_emitter *emit,
 {
    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
    struct src_register src0 =
-      translate_src_register( emit, &insn->FullSrcRegisters[0] );
+      translate_src_register( emit, &insn->Src[0] );
    struct src_register zero = get_zero_immediate( emit );
    SVGA3dShaderDestToken abs_tmp;
    struct src_register abs_src0;
@@ -1953,7 +1953,7 @@ static boolean emit_bgnsub( struct svga_shader_emitter *emit,
 static boolean emit_call( struct svga_shader_emitter *emit,
                            const struct tgsi_full_instruction *insn )
 {
-   unsigned position = insn->InstructionLabel.Label;
+   unsigned position = insn->Label.Label;
    unsigned i;
    
    for (i = 0; i < emit->nr_labels; i++) {
@@ -2543,25 +2543,25 @@ pre_parse_instruction( struct svga_shader_emitter *emit,
                        const struct tgsi_full_instruction *insn,
                        int current_arl)
 {
-   if (insn->FullSrcRegisters[0].SrcRegister.Indirect &&
-       insn->FullSrcRegisters[0].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
-      const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0];
+   if (insn->Src[0].SrcRegister.Indirect &&
+       insn->Src[0].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+      const struct tgsi_full_src_register *reg = &insn->Src[0];
       if (reg->SrcRegister.Index < 0) {
          pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
       }
    }
 
-   if (insn->FullSrcRegisters[1].SrcRegister.Indirect &&
-       insn->FullSrcRegisters[1].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
-      const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[1];
+   if (insn->Src[1].SrcRegister.Indirect &&
+       insn->Src[1].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+      const struct tgsi_full_src_register *reg = &insn->Src[1];
       if (reg->SrcRegister.Index < 0) {
          pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
       }
    }
 
-   if (insn->FullSrcRegisters[2].SrcRegister.Indirect &&
-       insn->FullSrcRegisters[2].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
-      const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[2];
+   if (insn->Src[2].SrcRegister.Indirect &&
+       insn->Src[2].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+      const struct tgsi_full_src_register *reg = &insn->Src[2];
       if (reg->SrcRegister.Index < 0) {
          pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
       }
-- 
cgit v1.2.3


From fe2b31e4a896167a33d267822b36eb2de0ceecba Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 24 Nov 2009 15:04:18 +0000
Subject: tgsi: rename fields of tgsi_full_declaration to reduce verbosity

DeclarationRange -> Range
---
 src/gallium/auxiliary/draw/draw_pipe_aaline.c   | 30 ++++++++++++-------------
 src/gallium/auxiliary/draw/draw_pipe_aapoint.c  | 22 +++++++++---------
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 24 ++++++++++----------
 src/gallium/auxiliary/gallivm/tgsitollvm.cpp    |  6 ++---
 src/gallium/auxiliary/tgsi/tgsi_build.c         |  6 ++---
 src/gallium/auxiliary/tgsi/tgsi_dump.c          |  4 ++--
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c        |  4 ++--
 src/gallium/auxiliary/tgsi/tgsi_exec.c          |  4 ++--
 src/gallium/auxiliary/tgsi/tgsi_parse.c         |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_parse.h         |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_ppc.c           |  4 ++--
 src/gallium/auxiliary/tgsi/tgsi_sanity.c        |  2 +-
 src/gallium/auxiliary/tgsi/tgsi_scan.c          |  4 ++--
 src/gallium/auxiliary/tgsi/tgsi_sse2.c          |  4 ++--
 src/gallium/auxiliary/tgsi/tgsi_text.c          |  4 ++--
 src/gallium/auxiliary/vl/vl_shader_build.c      | 24 ++++++++++----------
 src/gallium/drivers/cell/ppu/cell_gen_fp.c      |  4 ++--
 src/gallium/drivers/cell/spu/spu_exec.c         |  4 ++--
 src/gallium/drivers/i915/i915_fpc_translate.c   |  8 +++----
 src/gallium/drivers/llvmpipe/lp_bld_interp.c    |  4 ++--
 src/gallium/drivers/nv20/nv20_vertprog.c        |  2 +-
 src/gallium/drivers/nv30/nv30_fragprog.c        | 10 ++++-----
 src/gallium/drivers/nv30/nv30_vertprog.c        |  2 +-
 src/gallium/drivers/nv40/nv40_fragprog.c        |  8 +++----
 src/gallium/drivers/nv40/nv40_vertprog.c        | 10 ++++-----
 src/gallium/drivers/nv50/nv50_program.c         |  4 ++--
 src/gallium/drivers/r300/r300_vs.c              |  8 +++----
 src/gallium/drivers/svga/svga_tgsi_decl_sm20.c  |  4 ++--
 src/gallium/drivers/svga/svga_tgsi_decl_sm30.c  |  4 ++--
 29 files changed, 109 insertions(+), 109 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 58d867faeb..f1d1715237 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -141,18 +141,18 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
        decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
        decl->Semantic.Index == 0) {
-      aactx->colorOutput = decl->DeclarationRange.First;
+      aactx->colorOutput = decl->Range.First;
    }
    else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
       uint i;
-      for (i = decl->DeclarationRange.First;
-           i <= decl->DeclarationRange.Last; i++) {
+      for (i = decl->Range.First;
+           i <= decl->Range.Last; i++) {
          aactx->samplersUsed |= 1 << i;
       }
    }
    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
-      if ((int) decl->DeclarationRange.Last > aactx->maxInput)
-         aactx->maxInput = decl->DeclarationRange.Last;
+      if ((int) decl->Range.Last > aactx->maxInput)
+         aactx->maxInput = decl->Range.Last;
       if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
            (int) decl->Semantic.Index > aactx->maxGeneric) {
          aactx->maxGeneric = decl->Semantic.Index;
@@ -160,8 +160,8 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
    }
    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
       uint i;
-      for (i = decl->DeclarationRange.First;
-           i <= decl->DeclarationRange.Last; i++) {
+      for (i = decl->Range.First;
+           i <= decl->Range.Last; i++) {
          aactx->tempsUsed |= (1 << i);
       }
    }
@@ -230,28 +230,28 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       decl.Declaration.Semantic = 1;
       decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
       decl.Semantic.Index = aactx->maxGeneric + 1;
-      decl.DeclarationRange.First = 
-      decl.DeclarationRange.Last = aactx->maxInput + 1;
+      decl.Range.First = 
+      decl.Range.Last = aactx->maxInput + 1;
       ctx->emit_declaration(ctx, &decl);
 
       /* declare new sampler */
       decl = tgsi_default_full_declaration();
       decl.Declaration.File = TGSI_FILE_SAMPLER;
-      decl.DeclarationRange.First = 
-      decl.DeclarationRange.Last = aactx->freeSampler;
+      decl.Range.First = 
+      decl.Range.Last = aactx->freeSampler;
       ctx->emit_declaration(ctx, &decl);
 
       /* declare new temp regs */
       decl = tgsi_default_full_declaration();
       decl.Declaration.File = TGSI_FILE_TEMPORARY;
-      decl.DeclarationRange.First = 
-      decl.DeclarationRange.Last = aactx->texTemp;
+      decl.Range.First = 
+      decl.Range.Last = aactx->texTemp;
       ctx->emit_declaration(ctx, &decl);
 
       decl = tgsi_default_full_declaration();
       decl.Declaration.File = TGSI_FILE_TEMPORARY;
-      decl.DeclarationRange.First = 
-      decl.DeclarationRange.Last = aactx->colorTemp;
+      decl.Range.First = 
+      decl.Range.Last = aactx->colorTemp;
       ctx->emit_declaration(ctx, &decl);
 
       aactx->firstInstruction = FALSE;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 09fc55cb5e..e9e2402c23 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -133,11 +133,11 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
        decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
        decl->Semantic.Index == 0) {
-      aactx->colorOutput = decl->DeclarationRange.First;
+      aactx->colorOutput = decl->Range.First;
    }
    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
-      if ((int) decl->DeclarationRange.Last > aactx->maxInput)
-         aactx->maxInput = decl->DeclarationRange.Last;
+      if ((int) decl->Range.Last > aactx->maxInput)
+         aactx->maxInput = decl->Range.Last;
       if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
            (int) decl->Semantic.Index > aactx->maxGeneric) {
          aactx->maxGeneric = decl->Semantic.Index;
@@ -145,8 +145,8 @@ aa_transform_decl(struct tgsi_transform_context *ctx,
    }
    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
       uint i;
-      for (i = decl->DeclarationRange.First;
-           i <= decl->DeclarationRange.Last; i++) {
+      for (i = decl->Range.First;
+           i <= decl->Range.Last; i++) {
          aactx->tempsUsed |= (1 << i);
       }
    }
@@ -200,21 +200,21 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       decl.Declaration.Semantic = 1;
       decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
       decl.Semantic.Index = aactx->maxGeneric + 1;
-      decl.DeclarationRange.First = 
-      decl.DeclarationRange.Last = texInput;
+      decl.Range.First = 
+      decl.Range.Last = texInput;
       ctx->emit_declaration(ctx, &decl);
 
       /* declare new temp regs */
       decl = tgsi_default_full_declaration();
       decl.Declaration.File = TGSI_FILE_TEMPORARY;
-      decl.DeclarationRange.First = 
-      decl.DeclarationRange.Last = tmp0;
+      decl.Range.First = 
+      decl.Range.Last = tmp0;
       ctx->emit_declaration(ctx, &decl);
 
       decl = tgsi_default_full_declaration();
       decl.Declaration.File = TGSI_FILE_TEMPORARY;
-      decl.DeclarationRange.First = 
-      decl.DeclarationRange.Last = aactx->colorTemp;
+      decl.Range.First = 
+      decl.Range.Last = aactx->colorTemp;
       ctx->emit_declaration(ctx, &decl);
 
       aactx->firstInstruction = FALSE;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index fe0d511218..218dcb9d12 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -133,20 +133,20 @@ pstip_transform_decl(struct tgsi_transform_context *ctx,
 
    if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
       uint i;
-      for (i = decl->DeclarationRange.First;
-           i <= decl->DeclarationRange.Last; i++) {
+      for (i = decl->Range.First;
+           i <= decl->Range.Last; i++) {
          pctx->samplersUsed |= 1 << i;
       }
    }
    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
-      pctx->maxInput = MAX2(pctx->maxInput, (int) decl->DeclarationRange.Last);
+      pctx->maxInput = MAX2(pctx->maxInput, (int) decl->Range.Last);
       if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
-         pctx->wincoordInput = (int) decl->DeclarationRange.First;
+         pctx->wincoordInput = (int) decl->Range.First;
    }
    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
       uint i;
-      for (i = decl->DeclarationRange.First;
-           i <= decl->DeclarationRange.Last; i++) {
+      for (i = decl->Range.First;
+           i <= decl->Range.Last; i++) {
          pctx->tempsUsed |= (1 << i);
       }
    }
@@ -228,23 +228,23 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
          decl.Declaration.Semantic = 1;
          decl.Semantic.Name = TGSI_SEMANTIC_POSITION;
          decl.Semantic.Index = 0;
-         decl.DeclarationRange.First = 
-            decl.DeclarationRange.Last = wincoordInput;
+         decl.Range.First = 
+            decl.Range.Last = wincoordInput;
          ctx->emit_declaration(ctx, &decl);
       }
 
       /* declare new sampler */
       decl = tgsi_default_full_declaration();
       decl.Declaration.File = TGSI_FILE_SAMPLER;
-      decl.DeclarationRange.First = 
-      decl.DeclarationRange.Last = pctx->freeSampler;
+      decl.Range.First = 
+      decl.Range.Last = pctx->freeSampler;
       ctx->emit_declaration(ctx, &decl);
 
       /* declare new temp regs */
       decl = tgsi_default_full_declaration();
       decl.Declaration.File = TGSI_FILE_TEMPORARY;
-      decl.DeclarationRange.First = 
-      decl.DeclarationRange.Last = pctx->texTemp;
+      decl.Range.First = 
+      decl.Range.Last = pctx->texTemp;
       ctx->emit_declaration(ctx, &decl);
 
       /* emit immediate = {1/32, 1/32, 1, 1}
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index fbf4d2636d..3edff0e5b2 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -94,8 +94,8 @@ translate_declaration(struct gallivm_ir *prog,
       unsigned first, last, mask;
       uint interp_method;
 
-      first = decl->DeclarationRange.First;
-      last = decl->DeclarationRange.Last;
+      first = decl->Range.First;
+      last = decl->Range.Last;
       mask = decl->Declaration.UsageMask;
 
       /* Do not touch WPOS.xy */
@@ -149,7 +149,7 @@ translate_declarationir(struct gallivm_ir *,
                       struct tgsi_full_declaration *)
 {
    if (decl->Declaration.File == TGSI_FILE_ADDRESS) {
-      int idx = decl->DeclarationRange.First;
+      int idx = decl->Range.First;
       storage->addAddress(idx);
    }
 }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 7ec832aad9..094d8d52d8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -172,7 +172,7 @@ tgsi_default_full_declaration( void )
    struct tgsi_full_declaration  full_declaration;
 
    full_declaration.Declaration  = tgsi_default_declaration();
-   full_declaration.DeclarationRange = tgsi_default_declaration_range();
+   full_declaration.Range = tgsi_default_declaration_range();
    full_declaration.Semantic = tgsi_default_declaration_semantic();
 
    return full_declaration;
@@ -209,8 +209,8 @@ tgsi_build_full_declaration(
    size++;
 
    *dr = tgsi_build_declaration_range(
-      full_decl->DeclarationRange.First,
-      full_decl->DeclarationRange.Last,
+      full_decl->Range.First,
+      full_decl->Range.Last,
       declaration,
       header );
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 4ff7f4b11e..7791f9f4fc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -224,8 +224,8 @@ iter_declaration(
    _dump_register(
       ctx,
       decl->Declaration.File,
-      decl->DeclarationRange.First,
-      decl->DeclarationRange.Last );
+      decl->Range.First,
+      decl->Range.Last );
    _dump_writemask(
       ctx,
       decl->Declaration.UsageMask );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index 194b2473bc..5593942154 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -223,9 +223,9 @@ dump_declaration_verbose(
 
    EOL();
    TXT( "\nFirst: " );
-   UID( decl->DeclarationRange.First );
+   UID( decl->Range.First );
    TXT( "\nLast : " );
-   UID( decl->DeclarationRange.Last );
+   UID( decl->Range.Last );
 
    if( decl->Declaration.Semantic ) {
       EOL();
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index a9bfb0d6df..3f8d59e46a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1895,8 +1895,8 @@ exec_declaration(struct tgsi_exec_machine *mach,
       if (decl->Declaration.File == TGSI_FILE_INPUT) {
          uint first, last, mask;
 
-         first = decl->DeclarationRange.First;
-         last = decl->DeclarationRange.Last;
+         first = decl->Range.First;
+         last = decl->Range.Last;
          mask = decl->Declaration.UsageMask;
 
          if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index ff593fdc32..7946fdd732 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -112,7 +112,7 @@ tgsi_parse_token(
       memset(decl, 0, sizeof *decl);
       copy_token(&decl->Declaration, &token);
 
-      next_token( ctx, &decl->DeclarationRange );
+      next_token( ctx, &decl->Range );
 
       if( decl->Declaration.Semantic ) {
          next_token( ctx, &decl->Semantic );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 2f8f4d488b..1965c5181d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -62,7 +62,7 @@ struct tgsi_full_src_register
 struct tgsi_full_declaration
 {
    struct tgsi_declaration Declaration;
-   struct tgsi_declaration_range DeclarationRange;
+   struct tgsi_declaration_range Range;
    struct tgsi_declaration_semantic Semantic;
 };
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 8397f432f9..ec5f235143 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -1178,8 +1178,8 @@ emit_declaration(
       unsigned first, last, mask;
       unsigned i, j;
 
-      first = decl->DeclarationRange.First;
-      last = decl->DeclarationRange.Last;
+      first = decl->Range.First;
+      last = decl->Range.Last;
       mask = decl->Declaration.UsageMask;
 
       for( i = first; i <= last; i++ ) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 8422b91a30..005894e604 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -286,7 +286,7 @@ iter_declaration(
    file = decl->Declaration.File;
    if (!check_file_name( ctx, file ))
       return TRUE;
-   for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) {
+   for (i = decl->Range.First; i <= decl->Range.Last; i++) {
       if (is_register_declared( ctx, file, i ))
          report_error( ctx, "%s[%u]: The same register declared more than once", file_names[file], i );
       ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG));
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index be25b3dc5c..6ca25c36ec 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -119,8 +119,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                = &parse.FullToken.FullDeclaration;
             const uint file = fulldecl->Declaration.File;
             uint reg;
-            for (reg = fulldecl->DeclarationRange.First;
-                 reg <= fulldecl->DeclarationRange.Last;
+            for (reg = fulldecl->Range.First;
+                 reg <= fulldecl->Range.Last;
                  reg++) {
 
                /* only first 32 regs will appear in this bitfield */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 2d2ee321c9..c23b0cc343 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2637,8 +2637,8 @@ emit_declaration(
       unsigned first, last, mask;
       unsigned i, j;
 
-      first = decl->DeclarationRange.First;
-      last = decl->DeclarationRange.Last;
+      first = decl->Range.First;
+      last = decl->Range.Last;
       mask = decl->Declaration.UsageMask;
 
       for( i = first; i <= last; i++ ) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index e9b1a21fb4..295ded9664 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -799,8 +799,8 @@ static boolean parse_declaration( struct translate_ctx *ctx )
    decl = tgsi_default_full_declaration();
    decl.Declaration.File = file;
    decl.Declaration.UsageMask = writemask;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
+   decl.Range.First = first;
+   decl.Range.Last = last;
 
    cur = ctx->cur;
    eat_opt_white( &cur );
diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c
index 82300b1da2..548dfca05a 100644
--- a/src/gallium/auxiliary/vl/vl_shader_build.c
+++ b/src/gallium/auxiliary/vl/vl_shader_build.c
@@ -38,8 +38,8 @@ struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index
    decl.Declaration.Semantic = 1;
    decl.Semantic.Name = name;
    decl.Semantic.Index = index;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
+   decl.Range.First = first;
+   decl.Range.Last = last;
 
    return decl;
 }
@@ -67,8 +67,8 @@ struct tgsi_full_declaration vl_decl_interpolated_input
    decl.Semantic.Name = name;
    decl.Semantic.Index = index;
    decl.Declaration.Interpolate = interpolation;;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
+   decl.Range.First = first;
+   decl.Range.Last = last;
 
    return decl;
 }
@@ -81,8 +81,8 @@ struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int i
    decl.Declaration.Semantic = 1;
    decl.Semantic.Name = name;
    decl.Semantic.Index = index;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
+   decl.Range.First = first;
+   decl.Range.Last = last;
 
    return decl;
 }
@@ -95,8 +95,8 @@ struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int inde
    decl.Declaration.Semantic = 1;
    decl.Semantic.Name = name;
    decl.Semantic.Index = index;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
+   decl.Range.First = first;
+   decl.Range.Last = last;
 
    return decl;
 }
@@ -107,8 +107,8 @@ struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last
 
    decl = tgsi_default_full_declaration();
    decl.Declaration.File = TGSI_FILE_TEMPORARY;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
+   decl.Range.First = first;
+   decl.Range.Last = last;
 
    return decl;
 }
@@ -119,8 +119,8 @@ struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int l
 
    decl = tgsi_default_full_declaration();
    decl.Declaration.File = TGSI_FILE_SAMPLER;
-   decl.DeclarationRange.First = first;
-   decl.DeclarationRange.Last = last;
+   decl.Range.First = first;
+   decl.Range.Last = last;
 
    return decl;
 }
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index b0afad349f..aeabe002d0 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -1909,8 +1909,8 @@ emit_declaration(struct cell_context *cell,
 
    switch (decl->Declaration.File) {
    case TGSI_FILE_TEMPORARY:
-      for (i = decl->DeclarationRange.First;
-           i <= decl->DeclarationRange.Last;
+      for (i = decl->Range.First;
+           i <= decl->Range.Last;
            i++) {
          assert(i < MAX_TEMPS);
          for (ch = 0; ch < 4; ch++) {
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 8d58c534be..ee5e3432d5 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -833,8 +833,8 @@ exec_declaration(struct spu_exec_machine *mach,
          unsigned first, last, mask;
          interpolation_func interp;
 
-         first = decl->DeclarationRange.First;
-         last = decl->DeclarationRange.Last;
+         first = decl->Range.First;
+         last = decl->Range.Last;
          mask = decl->Declaration.UsageMask;
 
          switch( decl->Declaration.Interpolate ) {
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 9e626c85c0..1a4a7bbe62 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -928,8 +928,8 @@ i915_translate_instructions(struct i915_fp_compile *p,
          if (parse.FullToken.FullDeclaration.Declaration.File
                   == TGSI_FILE_CONSTANT) {
             uint i;
-            for (i = parse.FullToken.FullDeclaration.DeclarationRange.First;
-                 i <= parse.FullToken.FullDeclaration.DeclarationRange.Last;
+            for (i = parse.FullToken.FullDeclaration.Range.First;
+                 i <= parse.FullToken.FullDeclaration.Range.Last;
                  i++) {
                assert(ifs->constant_flags[i] == 0x0);
                ifs->constant_flags[i] = I915_CONSTFLAG_USER;
@@ -939,8 +939,8 @@ i915_translate_instructions(struct i915_fp_compile *p,
          else if (parse.FullToken.FullDeclaration.Declaration.File
                   == TGSI_FILE_TEMPORARY) {
             uint i;
-            for (i = parse.FullToken.FullDeclaration.DeclarationRange.First;
-                 i <= parse.FullToken.FullDeclaration.DeclarationRange.Last;
+            for (i = parse.FullToken.FullDeclaration.Range.First;
+                 i <= parse.FullToken.FullDeclaration.Range.Last;
                  i++) {
                assert(i < I915_MAX_TEMPORARY);
                /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 818c0e943e..49dab8ab61 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -303,8 +303,8 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
             unsigned first, last, mask;
             unsigned attrib;
 
-            first = decl->DeclarationRange.First;
-            last = decl->DeclarationRange.Last;
+            first = decl->Range.First;
+            last = decl->Range.Last;
             mask = decl->Declaration.UsageMask;
 
             for( attrib = first; attrib <= last; ++attrib ) {
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
index e82a23d475..abffbe33a8 100644
--- a/src/gallium/drivers/nv20/nv20_vertprog.c
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -535,7 +535,7 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
 		return FALSE;
 	}
 
-	vpc->output_map[fdec->DeclarationRange.First] = hw;
+	vpc->output_map[fdec->Range.First] = hw;
 	return TRUE;
 }
 
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index dfffeb3263..20f7d4152c 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -604,7 +604,7 @@ nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc,
 		return FALSE;
 	}
 
-	fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+	fpc->attrib_map[fdec->Range.First] = hw;
 	return TRUE;
 }
 
@@ -614,10 +614,10 @@ nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc,
 {
 	switch (fdec->Semantic.Name) {
 	case TGSI_SEMANTIC_POSITION:
-		fpc->depth_id = fdec->DeclarationRange.First;
+		fpc->depth_id = fdec->Range.First;
 		break;
 	case TGSI_SEMANTIC_COLOR:
-		fpc->colour_id = fdec->DeclarationRange.First;
+		fpc->colour_id = fdec->Range.First;
 		break;
 	default:
 		NOUVEAU_ERR("bad output semantic\n");
@@ -653,9 +653,9 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
 					goto out_err;
 				break;
 			/*case TGSI_FILE_TEMPORARY:
-				if (fdec->DeclarationRange.Last > high_temp) {
+				if (fdec->Range.Last > high_temp) {
 					high_temp =
-						fdec->DeclarationRange.Last;
+						fdec->Range.Last;
 				}
 				break;*/
 			default:
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index 41bd45ad29..99fde93245 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -535,7 +535,7 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
 		return FALSE;
 	}
 
-	vpc->output_map[fdec->DeclarationRange.First] = hw;
+	vpc->output_map[fdec->Range.First] = hw;
 	return TRUE;
 }
 
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 6addc45247..8e8cba1a0c 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -676,7 +676,7 @@ nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc,
 		return FALSE;
 	}
 
-	fpc->attrib_map[fdec->DeclarationRange.First] = hw;
+	fpc->attrib_map[fdec->Range.First] = hw;
 	return TRUE;
 }
 
@@ -684,7 +684,7 @@ static boolean
 nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc,
 				const struct tgsi_full_declaration *fdec)
 {
-	unsigned idx = fdec->DeclarationRange.First;
+	unsigned idx = fdec->Range.First;
 	unsigned hw;
 
 	switch (fdec->Semantic.Name) {
@@ -738,9 +738,9 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
 					goto out_err;
 				break;
 			case TGSI_FILE_TEMPORARY:
-				if (fdec->DeclarationRange.Last > high_temp) {
+				if (fdec->Range.Last > high_temp) {
 					high_temp =
-						fdec->DeclarationRange.Last;
+						fdec->Range.Last;
 				}
 				break;
 			default:
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index 0cdc511166..913e050389 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -577,7 +577,7 @@ static boolean
 nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
 				const struct tgsi_full_declaration *fdec)
 {
-	unsigned idx = fdec->DeclarationRange.First;
+	unsigned idx = fdec->Range.First;
 	int hw;
 
 	switch (fdec->Semantic.Name) {
@@ -652,16 +652,16 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
 			fdec = &p.FullToken.FullDeclaration;
 			switch (fdec->Declaration.File) {
 			case TGSI_FILE_TEMPORARY:
-				if (fdec->DeclarationRange.Last > high_temp) {
+				if (fdec->Range.Last > high_temp) {
 					high_temp =
-						fdec->DeclarationRange.Last;
+						fdec->Range.Last;
 				}
 				break;
 #if 0 /* this would be nice.. except gallium doesn't track it */
 			case TGSI_FILE_ADDRESS:
-				if (fdec->DeclarationRange.Last > high_addr) {
+				if (fdec->Range.Last > high_addr) {
 					high_addr =
-						fdec->DeclarationRange.Last;
+						fdec->Range.Last;
 				}
 				break;
 #endif
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 9fbf918601..57747a1840 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2547,8 +2547,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			unsigned si, last, first, mode;
 
 			d = &tp.FullToken.FullDeclaration;
-			first = d->DeclarationRange.First;
-			last = d->DeclarationRange.Last;
+			first = d->Range.First;
+			last = d->Range.Last;
 
 			switch (d->Declaration.File) {
 			case TGSI_FILE_TEMPORARY:
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 939b13e4b3..096707dda4 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -79,19 +79,19 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
 
         switch (decl->Semantic.Name) {
             case TGSI_SEMANTIC_POSITION:
-                c->code->outputs[decl->DeclarationRange.First] = 0;
+                c->code->outputs[decl->Range.First] = 0;
                 break;
             case TGSI_SEMANTIC_PSIZE:
-                c->code->outputs[decl->DeclarationRange.First] = 1;
+                c->code->outputs[decl->Range.First] = 1;
                 break;
             case TGSI_SEMANTIC_COLOR:
-                c->code->outputs[decl->DeclarationRange.First] = 1 +
+                c->code->outputs[decl->Range.First] = 1 +
                     (pointsize ? 1 : 0) +
                     colors++;
                 break;
             case TGSI_SEMANTIC_FOG:
             case TGSI_SEMANTIC_GENERIC:
-                c->code->outputs[decl->DeclarationRange.First] = 1 +
+                c->code->outputs[decl->Range.First] = 1 +
                     (pointsize ? 1 : 0) +
                     out_colors +
                     generic++;
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
index 6f4822a89d..23b3ace7f3 100644
--- a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
@@ -230,8 +230,8 @@ static boolean ps20_sampler( struct svga_shader_emitter *emit,
 boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit,
                              const struct tgsi_full_declaration *decl )
 {
-   unsigned first = decl->DeclarationRange.First;
-   unsigned last = decl->DeclarationRange.Last;
+   unsigned first = decl->Range.First;
+   unsigned last = decl->Range.Last;
    unsigned semantic = 0;
    unsigned semantic_idx = 0;
    unsigned idx;
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
index 65aa23ce3e..d1c7336dec 100644
--- a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@@ -335,8 +335,8 @@ static boolean ps30_sampler( struct svga_shader_emitter *emit,
 boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit,
                              const struct tgsi_full_declaration *decl )
 {
-   unsigned first = decl->DeclarationRange.First;
-   unsigned last = decl->DeclarationRange.Last;
+   unsigned first = decl->Range.First;
+   unsigned last = decl->Range.Last;
    unsigned semantic = 0;
    unsigned semantic_idx = 0;
    unsigned idx;
-- 
cgit v1.2.3


From 5b0824dfe5eaf59fa87134e7482b3d147b262901 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 24 Nov 2009 15:08:55 +0000
Subject: tgsi: rename fields of tgsi_full_dst_register to reduce verbosity

DstRegister -> Register
DstRegisterInd -> Indirect
---
 src/gallium/auxiliary/draw/draw_pipe_aaline.c    | 24 +++----
 src/gallium/auxiliary/draw/draw_pipe_aapoint.c   | 86 ++++++++++++------------
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c  |  8 +--
 src/gallium/auxiliary/draw/draw_vs_aos.c         | 50 +++++++-------
 src/gallium/auxiliary/gallivm/tgsitollvm.cpp     | 16 ++---
 src/gallium/auxiliary/tgsi/tgsi_build.c          | 34 +++++-----
 src/gallium/auxiliary/tgsi/tgsi_dump.c           | 20 +++---
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c         | 38 +++++------
 src/gallium/auxiliary/tgsi/tgsi_exec.c           | 36 +++++-----
 src/gallium/auxiliary/tgsi/tgsi_parse.c          | 12 ++--
 src/gallium/auxiliary/tgsi/tgsi_parse.h          |  4 +-
 src/gallium/auxiliary/tgsi/tgsi_ppc.c            | 18 ++---
 src/gallium/auxiliary/tgsi/tgsi_sanity.c         |  8 +--
 src/gallium/auxiliary/tgsi/tgsi_scan.c           |  6 +-
 src/gallium/auxiliary/tgsi/tgsi_sse2.c           | 14 ++--
 src/gallium/auxiliary/tgsi/tgsi_text.c           |  6 +-
 src/gallium/auxiliary/vl/vl_compositor.c         |  2 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  6 +-
 src/gallium/auxiliary/vl/vl_shader_build.c       | 16 ++---
 src/gallium/drivers/cell/ppu/cell_gen_fp.c       | 20 +++---
 src/gallium/drivers/cell/spu/spu_exec.c          | 12 ++--
 src/gallium/drivers/i915/i915_fpc_translate.c    | 10 +--
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c   | 12 ++--
 src/gallium/drivers/nv20/nv20_vertprog.c         |  8 +--
 src/gallium/drivers/nv30/nv30_fragprog.c         | 10 +--
 src/gallium/drivers/nv30/nv30_vertprog.c         |  8 +--
 src/gallium/drivers/nv40/nv40_fragprog.c         | 10 +--
 src/gallium/drivers/nv40/nv40_vertprog.c         | 16 ++---
 src/gallium/drivers/nv50/nv50_program.c          | 36 +++++-----
 src/gallium/drivers/r300/r300_tgsi_to_rc.c       |  8 +--
 src/gallium/drivers/svga/svga_tgsi_insn.c        | 12 ++--
 31 files changed, 283 insertions(+), 283 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index f1d1715237..fe200983ca 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -265,8 +265,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_TEX;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = aactx->texTemp;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = aactx->texTemp;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Instruction.Texture = TRUE;
       newInst.Texture.Texture = TGSI_TEXTURE_2D;
@@ -281,9 +281,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT;
-      newInst.Dst[0].DstRegister.Index = aactx->colorOutput;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
+      newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
+      newInst.Dst[0].Register.Index = aactx->colorOutput;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
       newInst.Instruction.NumSrcRegs = 1;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
@@ -293,9 +293,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT;
-      newInst.Dst[0].DstRegister.Index = aactx->colorOutput;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+      newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
+      newInst.Dst[0].Register.Index = aactx->colorOutput;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
@@ -318,10 +318,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
 
       for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
          struct tgsi_full_dst_register *dst = &inst->Dst[i];
-         if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
-             dst->DstRegister.Index == aactx->colorOutput) {
-            dst->DstRegister.File = TGSI_FILE_TEMPORARY;
-            dst->DstRegister.Index = aactx->colorTemp;
+         if (dst->Register.File == TGSI_FILE_OUTPUT &&
+             dst->Register.Index == aactx->colorOutput) {
+            dst->Register.File = TGSI_FILE_TEMPORARY;
+            dst->Register.Index = aactx->colorTemp;
          }
       }
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index e9e2402c23..39e1406e96 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -234,9 +234,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = tmp0;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = tmp0;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
       newInst.Src[0].SrcRegister.Index = texInput;
@@ -248,9 +248,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = tmp0;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = tmp0;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = tmp0;
@@ -265,9 +265,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = tmp0;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = tmp0;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
       newInst.Instruction.NumSrcRegs = 1;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = tmp0;
@@ -277,9 +277,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = tmp0;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = tmp0;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
       newInst.Instruction.NumSrcRegs = 1;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = tmp0;
@@ -290,9 +290,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = tmp0;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = tmp0;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = tmp0;
@@ -323,9 +323,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = tmp0;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = tmp0;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
       newInst.Src[0].SrcRegister.Index = texInput;
@@ -339,9 +339,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = tmp0;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = tmp0;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
       newInst.Instruction.NumSrcRegs = 1;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = tmp0;
@@ -352,9 +352,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = tmp0;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = tmp0;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
       newInst.Src[0].SrcRegister.Index = texInput;
@@ -368,9 +368,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = tmp0;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = tmp0;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = tmp0;
@@ -384,9 +384,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = tmp0;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = tmp0;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = tmp0;
@@ -405,9 +405,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = tmp0;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = tmp0;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 3;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = tmp0;
@@ -439,9 +439,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT;
-      newInst.Dst[0].DstRegister.Index = aactx->colorOutput;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ;
+      newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
+      newInst.Dst[0].Register.Index = aactx->colorOutput;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
       newInst.Instruction.NumSrcRegs = 1;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
@@ -451,9 +451,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_OUTPUT;
-      newInst.Dst[0].DstRegister.Index = aactx->colorOutput;
-      newInst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_W;
+      newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
+      newInst.Dst[0].Register.Index = aactx->colorOutput;
+      newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
       newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
@@ -469,10 +469,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
 
       for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
          struct tgsi_full_dst_register *dst = &inst->Dst[i];
-         if (dst->DstRegister.File == TGSI_FILE_OUTPUT &&
-             dst->DstRegister.Index == aactx->colorOutput) {
-            dst->DstRegister.File = TGSI_FILE_TEMPORARY;
-            dst->DstRegister.Index = aactx->colorTemp;
+         if (dst->Register.File == TGSI_FILE_OUTPUT &&
+             dst->Register.Index == aactx->colorOutput) {
+            dst->Register.File = TGSI_FILE_TEMPORARY;
+            dst->Register.Index = aactx->colorTemp;
          }
       }
    }
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 218dcb9d12..99165b1006 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -280,8 +280,8 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = pctx->texTemp;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = pctx->texTemp;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
       newInst.Src[0].SrcRegister.Index = wincoordInput;
@@ -293,8 +293,8 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
       newInst = tgsi_default_full_instruction();
       newInst.Instruction.Opcode = TGSI_OPCODE_TEX;
       newInst.Instruction.NumDstRegs = 1;
-      newInst.Dst[0].DstRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Dst[0].DstRegister.Index = pctx->texTemp;
+      newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Dst[0].Register.Index = pctx->texTemp;
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Instruction.Texture = TRUE;
       newInst.Texture.Texture = TGSI_TEXTURE_2D;
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index a9c8715bc8..8c93642954 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -361,8 +361,8 @@ static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp,
 static struct x86_reg get_dst_ptr( struct aos_compilation *cp, 
                                    const struct tgsi_full_dst_register *dst )
 {
-   unsigned file = dst->DstRegister.File;
-   unsigned idx = dst->DstRegister.Index;
+   unsigned file = dst->Register.File;
+   unsigned idx = dst->Register.Index;
    unsigned i;
    
 
@@ -669,15 +669,15 @@ static void store_dest( struct aos_compilation *cp,
 {
    struct x86_reg dst;
 
-   switch (reg->DstRegister.WriteMask) {
+   switch (reg->Register.WriteMask) {
    case 0:
       return;
    
    case TGSI_WRITEMASK_XYZW:
       aos_adopt_xmm_reg(cp, 
                         get_xmm_writable(cp, result), 
-                        reg->DstRegister.File,
-                        reg->DstRegister.Index,
+                        reg->Register.File,
+                        reg->Register.Index,
                         TRUE);
       return;
    default: 
@@ -685,10 +685,10 @@ static void store_dest( struct aos_compilation *cp,
    }
 
    dst = aos_get_shader_reg_xmm(cp, 
-                                reg->DstRegister.File,
-                                reg->DstRegister.Index);
+                                reg->Register.File,
+                                reg->Register.Index);
 
-   switch (reg->DstRegister.WriteMask) {
+   switch (reg->Register.WriteMask) {
    case TGSI_WRITEMASK_X:
       sse_movss(cp->func, dst, get_xmm(cp, result));
       break;
@@ -710,14 +710,14 @@ static void store_dest( struct aos_compilation *cp,
       break;
 
    default:
-      mask_write(cp, dst, result, reg->DstRegister.WriteMask);
+      mask_write(cp, dst, result, reg->Register.WriteMask);
       break;
    }
 
    aos_adopt_xmm_reg(cp, 
                      dst, 
-                     reg->DstRegister.File,
-                     reg->DstRegister.Index,
+                     reg->Register.File,
+                     reg->Register.Index,
                      TRUE);
 
 }
@@ -737,7 +737,7 @@ static void store_scalar_dest( struct aos_compilation *cp,
                                const struct tgsi_full_dst_register *reg,
                                struct x86_reg result )
 {
-   unsigned writemask = reg->DstRegister.WriteMask;
+   unsigned writemask = reg->Register.WriteMask;
    struct x86_reg dst;
 
    if (writemask != TGSI_WRITEMASK_X &&
@@ -754,12 +754,12 @@ static void store_scalar_dest( struct aos_compilation *cp,
 
    result = get_xmm(cp, result);
    dst = aos_get_shader_reg_xmm(cp, 
-                                reg->DstRegister.File,
-                                reg->DstRegister.Index);
+                                reg->Register.File,
+                                reg->Register.Index);
 
 
-   switch (reg->DstRegister.WriteMask) {
+   switch (reg->Register.WriteMask) {
    case TGSI_WRITEMASK_X:
       sse_movss(cp->func, dst, result);
       break;
@@ -782,8 +782,8 @@ static void store_scalar_dest( struct aos_compilation *cp,
 
    aos_adopt_xmm_reg(cp, 
                      dst, 
-                     reg->DstRegister.File,
-                     reg->DstRegister.Index,
+                     reg->Register.File,
+                     reg->Register.Index,
                      TRUE);
 }
    
@@ -819,7 +819,7 @@ static void x87_fstp_dest4( struct aos_compilation *cp,
                             const struct tgsi_full_dst_register *dst )
 {
    struct x86_reg ptr = get_dst_ptr(cp, dst); 
-   unsigned writemask = dst->DstRegister.WriteMask;
+   unsigned writemask = dst->Register.WriteMask;
 
    x87_fst_or_nop(cp->func, writemask, 0, ptr);
    x87_fst_or_nop(cp->func, writemask, 1, ptr);
@@ -1100,7 +1100,7 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst
 static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
    struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); 
-   unsigned writemask = op->Dst[0].DstRegister.WriteMask;
+   unsigned writemask = op->Dst[0].Register.WriteMask;
    int i;
 
    set_fpu_round_neg_inf( cp );
@@ -1127,7 +1127,7 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst
 static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) 
 {
    struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); 
-   unsigned writemask = op->Dst[0].DstRegister.WriteMask;
+   unsigned writemask = op->Dst[0].Register.WriteMask;
    int i;
 
    set_fpu_round_nearest( cp );
@@ -1156,7 +1156,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
    struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); 
    struct x86_reg st0 = x86_make_reg(file_x87, 0);
    struct x86_reg st1 = x86_make_reg(file_x87, 1);
-   unsigned writemask = op->Dst[0].DstRegister.WriteMask;
+   unsigned writemask = op->Dst[0].Register.WriteMask;
    int i;
 
    set_fpu_round_neg_inf( cp );
@@ -1190,7 +1190,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst
 static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
    struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
-   unsigned writemask = op->Dst[0].DstRegister.WriteMask;
+   unsigned writemask = op->Dst[0].Register.WriteMask;
    unsigned lit_count = cp->lit_count++;
    struct x86_reg result, arg0;
    unsigned i;
@@ -1270,7 +1270,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst
 static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op )
 {
    struct x86_reg dst = get_dst_ptr(cp, &op->Dst[0]); 
-   unsigned writemask = op->Dst[0].DstRegister.WriteMask;
+   unsigned writemask = op->Dst[0].Register.WriteMask;
 
    if (writemask & TGSI_WRITEMASK_YZ) {
       struct x86_reg st1 = x86_make_reg(file_x87, 1);
@@ -1897,10 +1897,10 @@ static void find_last_write_outputs( struct aos_compilation *cp )
          continue;
 
       for (i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++) {
-         if (parse.FullToken.FullInstruction.Dst[i].DstRegister.File ==
+         if (parse.FullToken.FullInstruction.Dst[i].Register.File ==
              TGSI_FILE_OUTPUT) 
          {
-            unsigned idx = parse.FullToken.FullInstruction.Dst[i].DstRegister.Index;
+            unsigned idx = parse.FullToken.FullInstruction.Dst[i].Register.Index;
             cp->output_last_write[idx] = this_instruction;
          }
       }
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 3edff0e5b2..135d307ce1 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -658,12 +658,12 @@ translate_instruction(llvm::Module *module,
    for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
       struct tgsi_full_dst_register *dst = &inst->Dst[i];
 
-      if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
-         storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
-      } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) {
-         storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
-      } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) {
-         storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+      if (dst->Register.File == TGSI_FILE_OUTPUT) {
+         storage->setOutputElement(dst->Register.Index, out, dst->Register.WriteMask);
+      } else if (dst->Register.File == TGSI_FILE_TEMPORARY) {
+         storage->setTempElement(dst->Register.Index, out, dst->Register.WriteMask);
+      } else if (dst->Register.File == TGSI_FILE_ADDRESS) {
+         storage->setAddrElement(dst->Register.Index, out, dst->Register.WriteMask);
       } else {
          fprintf(stderr, "ERROR: unsupported LLVM destination!");
          assert(!"wrong destination");
@@ -994,8 +994,8 @@ translate_instructionir(llvm::Module *module,
    /* store results  */
    for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
       struct tgsi_full_dst_register *dst = &inst->Dst[i];
-      storage->store((enum tgsi_file_type)dst->DstRegister.File,
-                     dst->DstRegister.Index, out, dst->DstRegister.WriteMask,
+      storage->store((enum tgsi_file_type)dst->Register.File,
+                     dst->Register.Index, out, dst->Register.WriteMask,
 		     instr->getIRBuilder() );
    }
 }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 094d8d52d8..91fb4f68e5 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -580,15 +580,15 @@ tgsi_build_full_instruction(
       size++;
 
       *dst_register = tgsi_build_dst_register(
-         reg->DstRegister.File,
-         reg->DstRegister.WriteMask,
-         reg->DstRegister.Indirect,
-         reg->DstRegister.Index,
+         reg->Register.File,
+         reg->Register.WriteMask,
+         reg->Register.Indirect,
+         reg->Register.Index,
          instruction,
          header );
       prev_token = (struct tgsi_token  *) dst_register;
 
-      if( reg->DstRegister.Indirect ) {
+      if( reg->Register.Indirect ) {
          struct tgsi_src_register *ind;
 
          if( maxsize <= size )
@@ -597,16 +597,16 @@ tgsi_build_full_instruction(
          size++;
 
          *ind = tgsi_build_src_register(
-            reg->DstRegisterInd.File,
-            reg->DstRegisterInd.SwizzleX,
-            reg->DstRegisterInd.SwizzleY,
-            reg->DstRegisterInd.SwizzleZ,
-            reg->DstRegisterInd.SwizzleW,
-            reg->DstRegisterInd.Negate,
-            reg->DstRegisterInd.Absolute,
-            reg->DstRegisterInd.Indirect,
-            reg->DstRegisterInd.Dimension,
-            reg->DstRegisterInd.Index,
+            reg->Indirect.File,
+            reg->Indirect.SwizzleX,
+            reg->Indirect.SwizzleY,
+            reg->Indirect.SwizzleZ,
+            reg->Indirect.SwizzleW,
+            reg->Indirect.Negate,
+            reg->Indirect.Absolute,
+            reg->Indirect.Indirect,
+            reg->Indirect.Dimension,
+            reg->Indirect.Index,
             instruction,
             header );
       }
@@ -980,8 +980,8 @@ tgsi_default_full_dst_register( void )
 {
    struct tgsi_full_dst_register full_dst_register;
 
-   full_dst_register.DstRegister = tgsi_default_dst_register();
-   full_dst_register.DstRegisterInd = tgsi_default_src_register();
+   full_dst_register.Register = tgsi_default_dst_register();
+   full_dst_register.Indirect = tgsi_default_src_register();
 
    return full_dst_register;
 }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 7791f9f4fc..6141865f03 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -358,23 +358,23 @@ iter_instruction(
          CHR( ',' );
       CHR( ' ' );
 
-      if (dst->DstRegister.Indirect) {
+      if (dst->Register.Indirect) {
          _dump_register_ind(
             ctx,
-            dst->DstRegister.File,
-            dst->DstRegister.Index,
-            dst->DstRegisterInd.File,
-            dst->DstRegisterInd.Index,
-            dst->DstRegisterInd.SwizzleX );
+            dst->Register.File,
+            dst->Register.Index,
+            dst->Indirect.File,
+            dst->Indirect.Index,
+            dst->Indirect.SwizzleX );
       }
       else {
          _dump_register(
             ctx,
-            dst->DstRegister.File,
-            dst->DstRegister.Index,
-            dst->DstRegister.Index );
+            dst->Register.File,
+            dst->Register.Index,
+            dst->Register.Index );
       }
-      _dump_writemask( ctx, dst->DstRegister.WriteMask );
+      _dump_writemask( ctx, dst->Register.WriteMask );
 
       first_reg = FALSE;
    }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index 5593942154..5fae5a225f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -339,48 +339,48 @@ dump_instruction_verbose(
 
       EOL();
       TXT( "\nFile     : " );
-      ENM( dst->DstRegister.File, TGSI_FILES );
-      if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) {
+      ENM( dst->Register.File, TGSI_FILES );
+      if( deflt || fd->Register.WriteMask != dst->Register.WriteMask ) {
          TXT( "\nWriteMask: " );
-         ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS );
+         ENM( dst->Register.WriteMask, TGSI_WRITEMASKS );
       }
       if( ignored ) {
-         if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) {
+         if( deflt || fd->Register.Indirect != dst->Register.Indirect ) {
             TXT( "\nIndirect : " );
-            UID( dst->DstRegister.Indirect );
+            UID( dst->Register.Indirect );
          }
-         if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) {
+         if( deflt || fd->Register.Dimension != dst->Register.Dimension ) {
             TXT( "\nDimension: " );
-            UID( dst->DstRegister.Dimension );
+            UID( dst->Register.Dimension );
          }
       }
-      if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) {
+      if( deflt || fd->Register.Index != dst->Register.Index ) {
          TXT( "\nIndex    : " );
-         SID( dst->DstRegister.Index );
+         SID( dst->Register.Index );
       }
       if( ignored ) {
          TXT( "\nPadding  : " );
-         UIX( dst->DstRegister.Padding );
-         if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) {
+         UIX( dst->Register.Padding );
+         if( deflt || fd->Register.Extended != dst->Register.Extended ) {
             TXT( "\nExtended : " );
-            UID( dst->DstRegister.Extended );
+            UID( dst->Register.Extended );
          }
       }
 
-      if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) {
+      if( deflt || tgsi_compare_dst_register_ext_modulate( dst->RegisterExtModulate, fd->RegisterExtModulate ) ) {
          EOL();
          TXT( "\nType    : " );
-         ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
-         if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) {
+         ENM( dst->RegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS );
+         if( deflt || fd->RegisterExtModulate.Modulate != dst->RegisterExtModulate.Modulate ) {
             TXT( "\nModulate: " );
-            ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES );
+            ENM( dst->RegisterExtModulate.Modulate, TGSI_MODULATES );
          }
          if( ignored ) {
             TXT( "\nPadding : " );
-            UIX( dst->DstRegisterExtModulate.Padding );
-            if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) {
+            UIX( dst->RegisterExtModulate.Padding );
+            if( deflt || fd->RegisterExtModulate.Extended != dst->RegisterExtModulate.Extended ) {
                TXT( "\nExtended: " );
-               UID( dst->DstRegisterExtModulate.Extended );
+               UID( dst->RegisterExtModulate.Extended );
             }
          }
       }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 3f8d59e46a..a6bd1a784f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -107,10 +107,10 @@
 #define TEMP_P0            TGSI_EXEC_TEMP_P0
 
 #define IS_CHANNEL_ENABLED(INST, CHAN)\
-   ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
 
 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
-   ((INST).Dst[1].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
 
 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\
@@ -188,7 +188,7 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
 {
    uint i, chan;
 
-   uint writemask = inst->Dst[0].DstRegister.WriteMask;
+   uint writemask = inst->Dst[0].Register.WriteMask;
    if (writemask == TGSI_WRITEMASK_X ||
        writemask == TGSI_WRITEMASK_Y ||
        writemask == TGSI_WRITEMASK_Z ||
@@ -201,9 +201,9 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
    /* loop over src regs */
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
       if ((inst->Src[i].SrcRegister.File ==
-           inst->Dst[0].DstRegister.File) &&
+           inst->Dst[0].Register.File) &&
           (inst->Src[i].SrcRegister.Index ==
-           inst->Dst[0].DstRegister.Index)) {
+           inst->Dst[0].Register.Index)) {
          /* loop over dest channels */
          uint channelsWritten = 0x0;
          FOR_EACH_ENABLED_CHANNEL(*inst, chan) {
@@ -1424,11 +1424,11 @@ store_dest(
     *
     *    file[ind[2].x+1],
     *    where:
-    *       ind = DstRegisterInd.File
-    *       [2] = DstRegisterInd.Index
-    *       .x = DstRegisterInd.SwizzleX
+    *       ind = Indirect.File
+    *       [2] = Indirect.Index
+    *       .x = Indirect.SwizzleX
     */
-   if (reg->DstRegister.Indirect) {
+   if (reg->Register.Indirect) {
       union tgsi_exec_channel index;
       union tgsi_exec_channel indir_index;
       uint swizzle;
@@ -1437,15 +1437,15 @@ store_dest(
       index.i[0] =
       index.i[1] =
       index.i[2] =
-      index.i[3] = reg->DstRegisterInd.Index;
+      index.i[3] = reg->Indirect.Index;
 
       /* get current value of address register[swizzle] */
-      swizzle = tgsi_util_get_src_register_swizzle( &reg->DstRegisterInd, CHAN_X );
+      swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
 
       /* fetch values from the address/indirection register */
       fetch_src_file_channel(
          mach,
-         reg->DstRegisterInd.File,
+         reg->Indirect.File,
          swizzle,
          &index,
          &indir_index );
@@ -1454,37 +1454,37 @@ store_dest(
       offset = (int) indir_index.f[0];
    }
 
-   switch (reg->DstRegister.File) {
+   switch (reg->Register.File) {
    case TGSI_FILE_NULL:
       dst = &null;
       break;
 
    case TGSI_FILE_OUTPUT:
       index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
-         + reg->DstRegister.Index;
+         + reg->Register.Index;
       dst = &mach->Outputs[offset + index].xyzw[chan_index];
       break;
 
    case TGSI_FILE_TEMPORARY:
-      index = reg->DstRegister.Index;
+      index = reg->Register.Index;
       assert( index < TGSI_EXEC_NUM_TEMPS );
       dst = &mach->Temps[offset + index].xyzw[chan_index];
       break;
 
    case TGSI_FILE_ADDRESS:
-      index = reg->DstRegister.Index;
+      index = reg->Register.Index;
       dst = &mach->Addrs[index].xyzw[chan_index];
       break;
 
    case TGSI_FILE_LOOP:
-      assert(reg->DstRegister.Index == 0);
+      assert(reg->Register.Index == 0);
       assert(mach->LoopCounterStackTop > 0);
       assert(chan_index == CHAN_X);
       dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
       break;
 
    case TGSI_FILE_PREDICATE:
-      index = reg->DstRegister.Index;
+      index = reg->Register.Index;
       assert(index < TGSI_EXEC_NUM_PREDS);
       dst = &mach->Predicates[index].xyzw[chan_index];
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 7946fdd732..e3a6bc0f54 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -168,21 +168,21 @@ tgsi_parse_token(
 
       for(  i = 0; i < inst->Instruction.NumDstRegs; i++ ) {
 
-         next_token( ctx, &inst->Dst[i].DstRegister );
+         next_token( ctx, &inst->Dst[i].Register );
 
          /*
           * No support for indirect or multi-dimensional addressing.
           */
-         assert( !inst->Dst[i].DstRegister.Dimension );
+         assert( !inst->Dst[i].Register.Dimension );
 
-         if( inst->Dst[i].DstRegister.Indirect ) {
-            next_token( ctx, &inst->Dst[i].DstRegisterInd );
+         if( inst->Dst[i].Register.Indirect ) {
+            next_token( ctx, &inst->Dst[i].Indirect );
 
             /*
              * No support for indirect or multi-dimensional addressing.
              */
-            assert( !inst->Dst[i].DstRegisterInd.Dimension );
-            assert( !inst->Dst[i].DstRegisterInd.Indirect );
+            assert( !inst->Dst[i].Indirect.Dimension );
+            assert( !inst->Dst[i].Indirect.Indirect );
          }
       }
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 1965c5181d..331a533dd9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -47,8 +47,8 @@ struct tgsi_full_header
 
 struct tgsi_full_dst_register
 {
-   struct tgsi_dst_register               DstRegister;
-   struct tgsi_src_register               DstRegisterInd;
+   struct tgsi_dst_register               Register;
+   struct tgsi_src_register               Indirect;
 };
 
 struct tgsi_full_src_register
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index ec5f235143..adb16f6ac9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -60,7 +60,7 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = {
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
 
 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
-   ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
 
 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
    if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
@@ -167,8 +167,8 @@ is_ppc_vec_temporary(const struct tgsi_full_src_register *reg)
 static boolean
 is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg)
 {
-   return (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
-           reg->DstRegister.Index < MAX_PPC_TEMPS);
+   return (reg->Register.File == TGSI_FILE_TEMPORARY &&
+           reg->Register.Index < MAX_PPC_TEMPS);
 }
 
 
@@ -485,7 +485,7 @@ get_dst_vec(struct gen_context *gen,
    const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 
    if (is_ppc_vec_temporary_dst(reg)) {
-      int vec = gen->temps_map[reg->DstRegister.Index][chan_index];
+      int vec = gen->temps_map[reg->Register.Index][chan_index];
       return vec;
    }
    else {
@@ -507,10 +507,10 @@ emit_store(struct gen_context *gen,
 {
    const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 
-   switch (reg->DstRegister.File) {
+   switch (reg->Register.File) {
    case TGSI_FILE_OUTPUT:
       {
-         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
+         int offset = (reg->Register.Index * 4 + chan_index) * 16;
          int offset_reg = emit_li_offset(gen, offset);
          ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg);
       }
@@ -518,14 +518,14 @@ emit_store(struct gen_context *gen,
    case TGSI_FILE_TEMPORARY:
       if (is_ppc_vec_temporary_dst(reg)) {
          if (!free_vec) {
-            int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index];
+            int dst_vec = gen->temps_map[reg->Register.Index][chan_index];
             if (dst_vec != src_vec)
                ppc_vmove(gen->f, dst_vec, src_vec);
          }
          free_vec = FALSE;
       }
       else {
-         int offset = (reg->DstRegister.Index * 4 + chan_index) * 16;
+         int offset = (reg->Register.Index * 4 + chan_index) * 16;
          int offset_reg = emit_li_offset(gen, offset);
          ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg);
       }
@@ -535,7 +535,7 @@ emit_store(struct gen_context *gen,
       emit_addrs(
          func,
          xmm,
-         reg->DstRegister.Index,
+         reg->Register.Index,
          chan_index );
       break;
 #endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 005894e604..7e50e25353 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -212,8 +212,8 @@ iter_instruction(
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
       check_register_usage(
          ctx,
-         inst->Dst[i].DstRegister.File,
-         inst->Dst[i].DstRegister.Index,
+         inst->Dst[i].Register.File,
+         inst->Dst[i].Register.Index,
          "destination",
          FALSE );
    }
@@ -245,8 +245,8 @@ iter_instruction(
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_BGNFOR:
    case TGSI_OPCODE_ENDFOR:
-      if (inst->Dst[0].DstRegister.File != TGSI_FILE_LOOP ||
-          inst->Dst[0].DstRegister.Index != 0) {
+      if (inst->Dst[0].Register.File != TGSI_FILE_LOOP ||
+          inst->Dst[0].Register.Index != 0) {
          report_error(ctx, "Destination register must be LOOP[0]");
       }
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 6ca25c36ec..90832e71bb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -212,8 +212,8 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
             /* Do a whole bunch of checks for a simple move */
             if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV ||
                 src->SrcRegister.File != TGSI_FILE_INPUT ||
-                dst->DstRegister.File != TGSI_FILE_OUTPUT ||
-                src->SrcRegister.Index != dst->DstRegister.Index ||
+                dst->Register.File != TGSI_FILE_OUTPUT ||
+                src->SrcRegister.Index != dst->Register.Index ||
 
                 src->SrcRegister.Negate ||
                 src->SrcRegister.Absolute ||
@@ -223,7 +223,7 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
                 src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
                 src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ||
 
-                dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW)
+                dst->Register.WriteMask != TGSI_WRITEMASK_XYZW)
             {
                tgsi_parse_free(&parse);
                return FALSE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index c23b0cc343..785076a520 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -58,7 +58,7 @@
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
 
 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
-   ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
 
 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
    if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
@@ -1371,12 +1371,12 @@ emit_store(
    }
 
 
-   switch( reg->DstRegister.File ) {
+   switch( reg->Register.File ) {
    case TGSI_FILE_OUTPUT:
       emit_output(
          func,
          xmm,
-         reg->DstRegister.Index,
+         reg->Register.Index,
          chan_index );
       break;
 
@@ -1384,7 +1384,7 @@ emit_store(
       emit_temps(
          func,
          xmm,
-         reg->DstRegister.Index,
+         reg->Register.Index,
          chan_index );
       break;
 
@@ -1392,7 +1392,7 @@ emit_store(
       emit_addrs(
          func,
          xmm,
-         reg->DstRegister.Index,
+         reg->Register.Index,
          chan_index );
       break;
 
@@ -1727,8 +1727,8 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
    }
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
       const struct tgsi_full_dst_register *reg = &inst->Dst[i];
-      if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
-          reg->DstRegister.Indirect)
+      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+          reg->Register.Indirect)
          return TRUE;
    }
    return FALSE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 295ded9664..27b90f5ab7 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -506,9 +506,9 @@ parse_dst_operand(
    if (!parse_opt_writemask( ctx, &writemask ))
       return FALSE;
 
-   dst->DstRegister.File = file;
-   dst->DstRegister.Index = index;
-   dst->DstRegister.WriteMask = writemask;
+   dst->Register.File = file;
+   dst->Register.Index = index;
+   dst->Register.WriteMask = writemask;
    return TRUE;
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 34a02b5042..e31a46ba46 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -213,7 +213,7 @@ create_frag_shader(struct vl_compositor *c)
     */
    for (i = 0; i < 4; ++i) {
       inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i);
-      inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+      inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
    }
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 93e79e7f37..4564a6c67f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -240,7 +240,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
       inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
       inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
       inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-      inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+      inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
    }
 
@@ -418,7 +418,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
       inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
       inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-      inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+      inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
    }
 
@@ -623,7 +623,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
       inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
       inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-      inst.Dst[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i;
+      inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
    }
 
diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c
index 548dfca05a..9ebb4a9171 100644
--- a/src/gallium/auxiliary/vl/vl_shader_build.c
+++ b/src/gallium/auxiliary/vl/vl_shader_build.c
@@ -138,8 +138,8 @@ struct tgsi_full_instruction vl_inst2
 
    inst.Instruction.Opcode = opcode;
    inst.Instruction.NumDstRegs = 1;
-   inst.Dst[0].DstRegister.File = dst_file;
-   inst.Dst[0].DstRegister.Index = dst_index;
+   inst.Dst[0].Register.File = dst_file;
+   inst.Dst[0].Register.Index = dst_index;
    inst.Instruction.NumSrcRegs = 1;
    inst.Src[0].SrcRegister.File = src_file;
    inst.Src[0].SrcRegister.Index = src_index;
@@ -162,8 +162,8 @@ struct tgsi_full_instruction vl_inst3
 
    inst.Instruction.Opcode = opcode;
    inst.Instruction.NumDstRegs = 1;
-   inst.Dst[0].DstRegister.File = dst_file;
-   inst.Dst[0].DstRegister.Index = dst_index;
+   inst.Dst[0].Register.File = dst_file;
+   inst.Dst[0].Register.Index = dst_index;
    inst.Instruction.NumSrcRegs = 2;
    inst.Src[0].SrcRegister.File = src1_file;
    inst.Src[0].SrcRegister.Index = src1_index;
@@ -188,8 +188,8 @@ struct tgsi_full_instruction vl_tex
 
    inst.Instruction.Opcode = TGSI_OPCODE_TEX;
    inst.Instruction.NumDstRegs = 1;
-   inst.Dst[0].DstRegister.File = dst_file;
-   inst.Dst[0].DstRegister.Index = dst_index;
+   inst.Dst[0].Register.File = dst_file;
+   inst.Dst[0].Register.Index = dst_index;
    inst.Instruction.NumSrcRegs = 2;
    inst.Instruction.Texture = 1;
    inst.Texture.Texture = tex;
@@ -218,8 +218,8 @@ struct tgsi_full_instruction vl_inst4
 
    inst.Instruction.Opcode = opcode;
    inst.Instruction.NumDstRegs = 1;
-   inst.Dst[0].DstRegister.File = dst_file;
-   inst.Dst[0].DstRegister.Index = dst_index;
+   inst.Dst[0].Register.File = dst_file;
+   inst.Dst[0].Register.Index = dst_index;
    inst.Instruction.NumSrcRegs = 3;
    inst.Src[0].SrcRegister.File = src1_file;
    inst.Src[0].SrcRegister.Index = src1_index;
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index aeabe002d0..f639c62605 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -249,7 +249,7 @@ static boolean
 is_memory_dst(struct codegen *gen, int channel,
               const struct tgsi_full_dst_register *dst)
 {
-   if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
+   if (dst->Register.File == TGSI_FILE_OUTPUT) {
       return TRUE;
    }
    else {
@@ -374,12 +374,12 @@ get_dst_reg(struct codegen *gen,
 {
    int reg = -1;
 
-   switch (dest->DstRegister.File) {
+   switch (dest->Register.File) {
    case TGSI_FILE_TEMPORARY:
       if (gen->if_nesting > 0 || gen->loop_nesting > 0)
          reg = get_itemp(gen);
       else
-         reg = gen->temp_regs[dest->DstRegister.Index][channel];
+         reg = gen->temp_regs[dest->Register.Index][channel];
       break;
    case TGSI_FILE_OUTPUT:
       reg = get_itemp(gen);
@@ -419,10 +419,10 @@ store_dest_reg(struct codegen *gen,
    }
 #endif
 
-   switch (dest->DstRegister.File) {
+   switch (dest->Register.File) {
    case TGSI_FILE_TEMPORARY:
       if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
-         int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
+         int d_reg = gen->temp_regs[dest->Register.Index][channel];
          int exec_reg = get_exec_mask_reg(gen);
          /* Mix d with new value according to exec mask:
           * d[i] = mask_reg[i] ? value_reg : d_reg
@@ -437,7 +437,7 @@ store_dest_reg(struct codegen *gen,
    case TGSI_FILE_OUTPUT:
       {
          /* offset is measured in quadwords, not bytes */
-         int offset = dest->DstRegister.Index * 4 + channel;
+         int offset = dest->Register.Index * 4 + channel;
          if (gen->if_nesting > 0 || gen->loop_nesting > 0) {
             int exec_reg = get_exec_mask_reg(gen);
             int curval_reg = get_itemp(gen);
@@ -544,7 +544,7 @@ emit_epilogue(struct codegen *gen)
 
 #define FOR_EACH_ENABLED_CHANNEL(inst, ch) \
    for (ch = 0; ch < 4; ch++) \
-      if (inst->Dst[0].DstRegister.WriteMask & (1 << ch))
+      if (inst->Dst[0].Register.WriteMask & (1 << ch))
 
 
 static boolean
@@ -948,7 +948,7 @@ emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
    /* t = y0 * z1 - t */
    spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
 
-   if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_X)) {
+   if (inst->Dst[0].Register.WriteMask & (1 << CHAN_X)) {
       store_dest_reg(gen, tmp_reg, CHAN_X, &inst->Dst[0]);
    }
 
@@ -962,7 +962,7 @@ emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
    /* t = z0 * x1 - t */
    spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
 
-   if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
+   if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Y)) {
       store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->Dst[0]);
    }
 
@@ -976,7 +976,7 @@ emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
    /* t = x0 * y1 - t */
    spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
 
-   if (inst->Dst[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
+   if (inst->Dst[0].Register.WriteMask & (1 << CHAN_Z)) {
       store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->Dst[0]);
    }
 
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index ee5e3432d5..1b4792a316 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -108,10 +108,10 @@
    for (CHAN = 0; CHAN < 4; CHAN++)
 
 #define IS_CHANNEL_ENABLED(INST, CHAN)\
-   ((INST).Dst[0].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[0].Register.WriteMask & (1 << (CHAN)))
 
 #define IS_CHANNEL_ENABLED2(INST, CHAN)\
-   ((INST).Dst[1].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST).Dst[1].Register.WriteMask & (1 << (CHAN)))
 
 #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
    FOR_EACH_CHANNEL( CHAN )\
@@ -532,21 +532,21 @@ store_dest(
 {
    union spu_exec_channel *dst;
 
-   switch( reg->DstRegister.File ) {
+   switch( reg->Register.File ) {
    case TGSI_FILE_NULL:
       return;
 
    case TGSI_FILE_OUTPUT:
       dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
-                           + reg->DstRegister.Index].xyzw[chan_index];
+                           + reg->Register.Index].xyzw[chan_index];
       break;
 
    case TGSI_FILE_TEMPORARY:
-      dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
+      dst = &mach->Temps[reg->Register.Index].xyzw[chan_index];
       break;
 
    case TGSI_FILE_ADDRESS:
-      dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
+      dst = &mach->Addrs[reg->Register.Index].xyzw[chan_index];
       break;
 
    default:
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 1a4a7bbe62..13c280827a 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -246,10 +246,10 @@ static uint
 get_result_vector(struct i915_fp_compile *p,
                   const struct tgsi_full_dst_register *dest)
 {
-   switch (dest->DstRegister.File) {
+   switch (dest->Register.File) {
    case TGSI_FILE_OUTPUT:
       {
-         uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index];
+         uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index];
          switch (sem_name) {
          case TGSI_SEMANTIC_POSITION:
             return UREG(REG_TYPE_OD, 0);
@@ -261,7 +261,7 @@ get_result_vector(struct i915_fp_compile *p,
          }
       }
    case TGSI_FILE_TEMPORARY:
-      return UREG(REG_TYPE_R, dest->DstRegister.Index);
+      return UREG(REG_TYPE_R, dest->Register.Index);
    default:
       i915_program_error(p, "Bad inst->DstReg.File");
       return 0;
@@ -276,7 +276,7 @@ static uint
 get_result_flags(const struct tgsi_full_instruction *inst)
 {
    const uint writeMask
-      = inst->Dst[0].DstRegister.WriteMask;
+      = inst->Dst[0].Register.WriteMask;
    uint flags = 0x0;
 
    if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
@@ -738,7 +738,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
                       swizzle(tmp, X, Y, X, Y),
                       swizzle(tmp, X, X, ONE, ONE), 0);
 
-      writemask = inst->Dst[0].DstRegister.WriteMask;
+      writemask = inst->Dst[0].Register.WriteMask;
 
       if (writemask & TGSI_WRITEMASK_Y) {
          uint tmp1;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 893e665e69..99266f34ed 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -64,7 +64,7 @@
    for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
 
 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
-   ((INST)->Dst[0].DstRegister.WriteMask & (1 << (CHAN)))
+   ((INST)->Dst[0].Register.WriteMask & (1 << (CHAN)))
 
 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
    if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
@@ -287,13 +287,13 @@ emit_store(
       assert(0);
    }
 
-   switch( reg->DstRegister.File ) {
+   switch( reg->Register.File ) {
    case TGSI_FILE_OUTPUT:
-      bld->outputs[reg->DstRegister.Index][chan_index] = value;
+      bld->outputs[reg->Register.Index][chan_index] = value;
       break;
 
    case TGSI_FILE_TEMPORARY:
-      bld->temps[reg->DstRegister.Index][chan_index] = value;
+      bld->temps[reg->Register.Index][chan_index] = value;
       break;
 
    case TGSI_FILE_ADDRESS:
@@ -430,8 +430,8 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
    }
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
       const struct tgsi_full_dst_register *reg = &inst->Dst[i];
-      if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
-          reg->DstRegister.Indirect)
+      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+          reg->Register.Indirect)
          return TRUE;
    }
    return FALSE;
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
index abffbe33a8..e3bb9f9d7f 100644
--- a/src/gallium/drivers/nv20/nv20_vertprog.c
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -286,14 +286,14 @@ static INLINE struct nv20_sreg
 tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
 	struct nv20_sreg dst;
 
-	switch (fdst->DstRegister.File) {
+	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
 		dst = nv20_sr(NV30SR_OUTPUT,
-			      vpc->output_map[fdst->DstRegister.Index]);
+			      vpc->output_map[fdst->Register.Index]);
 
 		break;
 	case TGSI_FILE_TEMPORARY:
-		dst = nv20_sr(NV30SR_TEMP, fdst->DstRegister.Index);
+		dst = nv20_sr(NV30SR_TEMP, fdst->Register.Index);
 		if (vpc->high_temp < dst.index)
 			vpc->high_temp = dst.index;
 		break;
@@ -379,7 +379,7 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
 	}
 
 	dst  = tgsi_dst(vpc, &finst->Dst[0]);
-	mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask);
+	mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
 
 	switch (finst->Instruction.Opcode) {
 	case TGSI_OPCODE_ABS:
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index 20f7d4152c..14dc884b3a 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -281,22 +281,22 @@ static INLINE struct nv30_sreg
 tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
 	int idx;
 
-	switch (fdst->DstRegister.File) {
+	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
-		if (fdst->DstRegister.Index == fpc->colour_id)
+		if (fdst->Register.Index == fpc->colour_id)
 			return nv30_sr(NV30SR_OUTPUT, 0);
 		else
 			return nv30_sr(NV30SR_OUTPUT, 1);
 		break;
 	case TGSI_FILE_TEMPORARY:
-		idx = fdst->DstRegister.Index + 1;
+		idx = fdst->Register.Index + 1;
 		if (fpc->high_temp < idx)
 			fpc->high_temp = idx;
 		return nv30_sr(NV30SR_TEMP, idx);
 	case TGSI_FILE_NULL:
 		return nv30_sr(NV30SR_NONE, 0);
 	default:
-		NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+		NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
 		return nv30_sr(NV30SR_NONE, 0);
 	}
 }
@@ -424,7 +424,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 	}
 
 	dst  = tgsi_dst(fpc, &finst->Dst[0]);
-	mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask);
+	mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
 	sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
 
 	switch (finst->Instruction.Opcode) {
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index 99fde93245..41e4161dda 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -286,14 +286,14 @@ static INLINE struct nv30_sreg
 tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
 	struct nv30_sreg dst;
 
-	switch (fdst->DstRegister.File) {
+	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
 		dst = nv30_sr(NV30SR_OUTPUT,
-			      vpc->output_map[fdst->DstRegister.Index]);
+			      vpc->output_map[fdst->Register.Index]);
 
 		break;
 	case TGSI_FILE_TEMPORARY:
-		dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index);
+		dst = nv30_sr(NV30SR_TEMP, fdst->Register.Index);
 		if (vpc->high_temp < dst.index)
 			vpc->high_temp = dst.index;
 		break;
@@ -379,7 +379,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
 	}
 
 	dst  = tgsi_dst(vpc, &finst->Dst[0]);
-	mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask);
+	mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
 
 	switch (finst->Instruction.Opcode) {
 	case TGSI_OPCODE_ABS:
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 8e8cba1a0c..02c23e92c0 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -290,15 +290,15 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 
 static INLINE struct nv40_sreg
 tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) {
-	switch (fdst->DstRegister.File) {
+	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
-		return fpc->r_result[fdst->DstRegister.Index];
+		return fpc->r_result[fdst->Register.Index];
 	case TGSI_FILE_TEMPORARY:
-		return fpc->r_temp[fdst->DstRegister.Index];
+		return fpc->r_temp[fdst->Register.Index];
 	case TGSI_FILE_NULL:
 		return nv40_sr(NV40SR_NONE, 0);
 	default:
-		NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File);
+		NOUVEAU_ERR("bad dst file %d\n", fdst->Register.File);
 		return nv40_sr(NV40SR_NONE, 0);
 	}
 }
@@ -434,7 +434,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 	}
 
 	dst  = tgsi_dst(fpc, &finst->Dst[0]);
-	mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask);
+	mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
 	sat  = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
 
 	switch (finst->Instruction.Opcode) {
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index 913e050389..c4f51d622c 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -326,15 +326,15 @@ static INLINE struct nv40_sreg
 tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) {
 	struct nv40_sreg dst;
 
-	switch (fdst->DstRegister.File) {
+	switch (fdst->Register.File) {
 	case TGSI_FILE_OUTPUT:
-		dst = vpc->r_result[fdst->DstRegister.Index];
+		dst = vpc->r_result[fdst->Register.Index];
 		break;
 	case TGSI_FILE_TEMPORARY:
-		dst = vpc->r_temp[fdst->DstRegister.Index];
+		dst = vpc->r_temp[fdst->Register.Index];
 		break;
 	case TGSI_FILE_ADDRESS:
-		dst = vpc->r_address[fdst->DstRegister.Index];
+		dst = vpc->r_address[fdst->Register.Index];
 		break;
 	default:
 		NOUVEAU_ERR("bad dst file\n");
@@ -470,7 +470,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 	}
 
 	dst  = tgsi_dst(vpc, &finst->Dst[0]);
-	mask = tgsi_mask(finst->Dst[0].DstRegister.WriteMask);
+	mask = tgsi_mask(finst->Dst[0].Register.WriteMask);
 
 	switch (finst->Instruction.Opcode) {
 	case TGSI_OPCODE_ABS:
@@ -683,9 +683,9 @@ nv40_vertprog_prepare(struct nv40_vpc *vpc)
 			finst = &p.FullToken.FullInstruction;
 			fdst = &finst->Dst[0];
 
-			if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) {
-				if (fdst->DstRegister.Index > high_addr)
-					high_addr = fdst->DstRegister.Index;
+			if (fdst->Register.File == TGSI_FILE_ADDRESS) {
+				if (fdst->Register.Index > high_addr)
+					high_addr = fdst->Register.Index;
 			}
 		
 		}
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 57747a1840..3409edb4c8 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1549,7 +1549,7 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
 static unsigned
 nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 {
-	unsigned x, mask = insn->Dst[0].DstRegister.WriteMask;
+	unsigned x, mask = insn->Dst[0].Register.WriteMask;
 
 	switch (insn->Instruction.Opcode) {
 	case TGSI_OPCODE_COS:
@@ -1612,17 +1612,17 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 static struct nv50_reg *
 tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
 {
-	switch (dst->DstRegister.File) {
+	switch (dst->Register.File) {
 	case TGSI_FILE_TEMPORARY:
-		return &pc->temp[dst->DstRegister.Index * 4 + c];
+		return &pc->temp[dst->Register.Index * 4 + c];
 	case TGSI_FILE_OUTPUT:
-		return &pc->result[dst->DstRegister.Index * 4 + c];
+		return &pc->result[dst->Register.Index * 4 + c];
 	case TGSI_FILE_ADDRESS:
 	{
-		struct nv50_reg *r = pc->addr[dst->DstRegister.Index * 4 + c];
+		struct nv50_reg *r = pc->addr[dst->Register.Index * 4 + c];
 		if (!r) {
 			r = alloc_addr(pc, NULL);
-			pc->addr[dst->DstRegister.Index * 4 + c] = r;
+			pc->addr[dst->Register.Index * 4 + c] = r;
 		}
 		assert(r);
 		return r;
@@ -1850,7 +1850,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 	unsigned mask, sat, unit;
 	int i, c;
 
-	mask = inst->Dst[0].DstRegister.WriteMask;
+	mask = inst->Dst[0].Register.WriteMask;
 	sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE;
 
 	memset(src, 0, sizeof(src));
@@ -2264,7 +2264,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
 	const struct tgsi_dst_register *dst;
 	unsigned i, c, k, mask;
 
-	dst = &insn->Dst[0].DstRegister;
+	dst = &insn->Dst[0].Register;
 	mask = dst->WriteMask;
 
         if (dst->File == TGSI_FILE_TEMPORARY)
@@ -2359,13 +2359,13 @@ static struct nv50_reg *
 tgsi_broadcast_dst(struct nv50_pc *pc,
 		   const struct tgsi_full_dst_register *fd, unsigned mask)
 {
-	if (fd->DstRegister.File == TGSI_FILE_TEMPORARY) {
-		int c = ffs(~mask & fd->DstRegister.WriteMask);
+	if (fd->Register.File == TGSI_FILE_TEMPORARY) {
+		int c = ffs(~mask & fd->Register.WriteMask);
 		if (c)
 			return tgsi_dst(pc, c - 1, fd);
 	} else {
-		int c = ffs(fd->DstRegister.WriteMask) - 1;
-		if ((1 << c) == fd->DstRegister.WriteMask)
+		int c = ffs(fd->Register.WriteMask) - 1;
+		if ((1 << c) == fd->Register.WriteMask)
 			return tgsi_dst(pc, c, fd);
 	}
 
@@ -2391,8 +2391,8 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
 		boolean neg_supp = negate_supported(insn, i);
 
 		fs = &insn->Src[i];
-		if (fs->SrcRegister.File != fd->DstRegister.File ||
-		    fs->SrcRegister.Index != fd->DstRegister.Index)
+		if (fs->SrcRegister.File != fd->Register.File ||
+		    fs->SrcRegister.Index != fd->Register.Index)
 			continue;
 
 		for (chn = 0; chn < 4; ++chn) {
@@ -2403,7 +2403,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
 			c = tgsi_util_get_full_src_register_swizzle(fs, chn);
 			s = tgsi_util_get_full_src_register_sign_mode(fs, chn);
 
-			if (!(fd->DstRegister.WriteMask & (1 << c)))
+			if (!(fd->Register.WriteMask & (1 << c)))
 				continue;
 
 			/* no danger if src is copied to TEMP first */
@@ -2446,10 +2446,10 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 	for (i = 0; i < 4; ++i) {
 		assert(pc->r_dst[m[i]] == NULL);
 
-		insn.Dst[0].DstRegister.WriteMask =
-			fd->DstRegister.WriteMask & (1 << m[i]);
+		insn.Dst[0].Register.WriteMask =
+			fd->Register.WriteMask & (1 << m[i]);
 
-		if (!insn.Dst[0].DstRegister.WriteMask)
+		if (!insn.Dst[0].Register.WriteMask)
 			continue;
 
 		if (deqs & (1 << i))
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 82466e245a..92796d150b 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -190,10 +190,10 @@ static void transform_dstreg(
     struct rc_dst_register * dst,
     struct tgsi_full_dst_register * src)
 {
-    dst->File = translate_register_file(src->DstRegister.File);
-    dst->Index = translate_register_index(ttr, src->DstRegister.File, src->DstRegister.Index);
-    dst->WriteMask = src->DstRegister.WriteMask;
-    dst->RelAddr = src->DstRegister.Indirect;
+    dst->File = translate_register_file(src->Register.File);
+    dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index);
+    dst->WriteMask = src->Register.WriteMask;
+    dst->RelAddr = src->Register.Indirect;
 }
 
 static void transform_srcreg(
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 39fd7a6025..9ca89f1cdd 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -99,21 +99,21 @@ translate_dst_register( struct svga_shader_emitter *emit,
    const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
    SVGA3dShaderDestToken dest;
 
-   switch (reg->DstRegister.File) {
+   switch (reg->Register.File) {
    case TGSI_FILE_OUTPUT:
       /* Output registers encode semantic information in their name.
        * Need to lookup a table built at decl time:
        */
-      dest = emit->output_map[reg->DstRegister.Index];
+      dest = emit->output_map[reg->Register.Index];
       break;
 
    default:
-      dest = dst_register( translate_file( reg->DstRegister.File ),
-                           reg->DstRegister.Index );
+      dest = dst_register( translate_file( reg->Register.File ),
+                           reg->Register.Index );
       break;
    }
 
-   dest.mask = reg->DstRegister.WriteMask;
+   dest.mask = reg->Register.WriteMask;
 
    if (insn->Instruction.Saturate) 
       dest.dstMod = SVGA3DDSTMOD_SATURATE;
@@ -1434,7 +1434,7 @@ static boolean emit_pow(struct svga_shader_emitter *emit,
    boolean need_tmp = FALSE;
    
    /* POW can only output to a temporary */
-   if (insn->Dst[0].DstRegister.File != TGSI_FILE_TEMPORARY)
+   if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
       need_tmp = TRUE;
    
    /* POW src1 must not be the same register as dst */
-- 
cgit v1.2.3


From 91a4e6d53f83c45c1da9240b6325011d96b61386 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 24 Nov 2009 15:13:17 +0000
Subject: tgsi: rename fields of tgsi_full_src_register to reduce verbosity

SrcRegister -> Register
SrcRegisterInd -> Indirect
SrcRegisterDim -> Dimension
SrcRegisterDimInd -> DimIndirect
---
 src/gallium/auxiliary/draw/draw_pipe_aaline.c    |  20 +--
 src/gallium/auxiliary/draw/draw_pipe_aapoint.c   | 158 +++++++++++------------
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c  |  22 ++--
 src/gallium/auxiliary/draw/draw_vs_aos.c         |   8 +-
 src/gallium/auxiliary/gallivm/tgsitollvm.cpp     |  34 ++---
 src/gallium/auxiliary/tgsi/tgsi_build.c          |  80 ++++++------
 src/gallium/auxiliary/tgsi/tgsi_dump.c           |  42 +++---
 src/gallium/auxiliary/tgsi/tgsi_dump_c.c         |  68 +++++-----
 src/gallium/auxiliary/tgsi/tgsi_exec.c           |  60 ++++-----
 src/gallium/auxiliary/tgsi/tgsi_parse.c          |  24 ++--
 src/gallium/auxiliary/tgsi/tgsi_parse.h          |   8 +-
 src/gallium/auxiliary/tgsi/tgsi_ppc.c            |  20 +--
 src/gallium/auxiliary/tgsi/tgsi_sanity.c         |  16 +--
 src/gallium/auxiliary/tgsi/tgsi_scan.c           |  24 ++--
 src/gallium/auxiliary/tgsi/tgsi_sse2.c           |  22 ++--
 src/gallium/auxiliary/tgsi/tgsi_text.c           |  32 ++---
 src/gallium/auxiliary/tgsi/tgsi_util.c           |  24 ++--
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |  26 ++--
 src/gallium/auxiliary/vl/vl_shader_build.c       |  32 ++---
 src/gallium/drivers/cell/ppu/cell_gen_fp.c       |  14 +-
 src/gallium/drivers/cell/spu/spu_exec.c          |  36 +++---
 src/gallium/drivers/cell/spu/spu_util.c          |  10 +-
 src/gallium/drivers/i915/i915_fpc_translate.c    |  20 +--
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c   |  16 +--
 src/gallium/drivers/nv20/nv20_vertprog.c         |  38 +++---
 src/gallium/drivers/nv30/nv30_fragprog.c         |  44 +++----
 src/gallium/drivers/nv30/nv30_vertprog.c         |  38 +++---
 src/gallium/drivers/nv40/nv40_fragprog.c         |  46 +++----
 src/gallium/drivers/nv40/nv40_vertprog.c         |  40 +++---
 src/gallium/drivers/nv50/nv50_program.c          |  46 +++----
 src/gallium/drivers/r300/r300_tgsi_to_rc.c       |  14 +-
 src/gallium/drivers/svga/svga_tgsi_insn.c        |  68 +++++-----
 32 files changed, 575 insertions(+), 575 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index fe200983ca..3bb9616122 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -270,10 +270,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Instruction.Texture = TRUE;
       newInst.Texture.Texture = TGSI_TEXTURE_2D;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.Src[0].SrcRegister.Index = aactx->maxInput + 1;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_SAMPLER;
-      newInst.Src[1].SrcRegister.Index = aactx->freeSampler;
+      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
+      newInst.Src[0].Register.Index = aactx->maxInput + 1;
+      newInst.Src[1].Register.File = TGSI_FILE_SAMPLER;
+      newInst.Src[1].Register.Index = aactx->freeSampler;
 
       ctx->emit_instruction(ctx, &newInst);
 
@@ -285,8 +285,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = aactx->colorOutput;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = aactx->colorTemp;
       ctx->emit_instruction(ctx, &newInst);
 
       /* MUL alpha */
@@ -297,10 +297,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = aactx->colorOutput;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[1].SrcRegister.Index = aactx->texTemp;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = aactx->colorTemp;
+      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[1].Register.Index = aactx->texTemp;
       ctx->emit_instruction(ctx, &newInst);
 
       /* END */
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 39e1406e96..75130a8fb0 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -238,10 +238,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = tmp0;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.Src[0].SrcRegister.Index = texInput;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.Src[1].SrcRegister.Index = texInput;
+      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
+      newInst.Src[0].Register.Index = texInput;
+      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
+      newInst.Src[1].Register.Index = texInput;
       ctx->emit_instruction(ctx, &newInst);
 
       /* ADD t0.x, t0.x, t0.y;  # x^2 + y^2 */
@@ -252,12 +252,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = tmp0;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = tmp0;
-      newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[1].SrcRegister.Index = tmp0;
-      newInst.Src[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = tmp0;
+      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[1].Register.Index = tmp0;
+      newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_Y;
       ctx->emit_instruction(ctx, &newInst);
 
 #if NORMALIZE  /* OPTIONAL normalization of length */
@@ -269,8 +269,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = tmp0;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = tmp0;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = tmp0;
       ctx->emit_instruction(ctx, &newInst);
 
       /* RCP t0.x, t0.x; */
@@ -281,8 +281,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = tmp0;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = tmp0;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = tmp0;
       ctx->emit_instruction(ctx, &newInst);
 #endif
 
@@ -294,12 +294,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = tmp0;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = tmp0;
-      newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.Src[1].SrcRegister.Index = texInput;
-      newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = tmp0;
+      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
+      newInst.Src[1].Register.Index = texInput;
+      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
       ctx->emit_instruction(ctx, &newInst);
 
       /* KIL -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
@@ -307,13 +307,13 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
       newInst.Instruction.NumDstRegs = 0;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = tmp0;
-      newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
-      newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
-      newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
-      newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
-      newInst.Src[0].SrcRegister.Negate = 1;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = tmp0;
+      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
+      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
+      newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
+      newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
+      newInst.Src[0].Register.Negate = 1;
       ctx->emit_instruction(ctx, &newInst);
 
 
@@ -327,12 +327,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = tmp0;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.Src[0].SrcRegister.Index = texInput;
-      newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.Src[1].SrcRegister.Index = texInput;
-      newInst.Src[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z;
+      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
+      newInst.Src[0].Register.Index = texInput;
+      newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_W;
+      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
+      newInst.Src[1].Register.Index = texInput;
+      newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
       ctx->emit_instruction(ctx, &newInst);
 
       /* RCP t0.z, t0.z;  # t0.z = 1 / m */
@@ -343,9 +343,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = tmp0;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = tmp0;
-      newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = tmp0;
+      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z;
       ctx->emit_instruction(ctx, &newInst);
 
       /* SUB t0.y, 1, t0.x;  # d = 1 - d */
@@ -356,12 +356,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = tmp0;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.Src[0].SrcRegister.Index = texInput;
-      newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[1].SrcRegister.Index = tmp0;
-      newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
+      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
+      newInst.Src[0].Register.Index = texInput;
+      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_W;
+      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[1].Register.Index = tmp0;
+      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_X;
       ctx->emit_instruction(ctx, &newInst);
 
       /* MUL t0.w, t0.y, t0.z;   # coverage = d * m */
@@ -372,12 +372,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = tmp0;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = tmp0;
-      newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[1].SrcRegister.Index = tmp0;
-      newInst.Src[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = tmp0;
+      newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
+      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[1].Register.Index = tmp0;
+      newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_Z;
       ctx->emit_instruction(ctx, &newInst);
 
       /* SLE t0.y, t0.x, tex.z;  # bool b = distance <= k */
@@ -388,12 +388,12 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = tmp0;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = tmp0;
-      newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.Src[1].SrcRegister.Index = texInput;
-      newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = tmp0;
+      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
+      newInst.Src[1].Register.Index = texInput;
+      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Z;
       ctx->emit_instruction(ctx, &newInst);
 
       /* CMP t0.w, -t0.y, tex.w, t0.w;
@@ -409,25 +409,25 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = tmp0;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 3;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = tmp0;
-      newInst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y;
-      newInst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y;
-      newInst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y;
-      newInst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y;
-      newInst.Src[0].SrcRegister.Negate = 1;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.Src[1].SrcRegister.Index = texInput;
-      newInst.Src[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
-      newInst.Src[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
-      newInst.Src[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
-      newInst.Src[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
-      newInst.Src[2].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[2].SrcRegister.Index = tmp0;
-      newInst.Src[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W;
-      newInst.Src[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W;
-      newInst.Src[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W;
-      newInst.Src[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = tmp0;
+      newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
+      newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
+      newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
+      newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
+      newInst.Src[0].Register.Negate = 1;
+      newInst.Src[1].Register.File = TGSI_FILE_INPUT;
+      newInst.Src[1].Register.Index = texInput;
+      newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_W;
+      newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
+      newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_W;
+      newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
+      newInst.Src[2].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[2].Register.Index = tmp0;
+      newInst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_W;
+      newInst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_W;
+      newInst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_W;
+      newInst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
       ctx->emit_instruction(ctx, &newInst);
 
    }
@@ -443,8 +443,8 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = aactx->colorOutput;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = aactx->colorTemp;
       ctx->emit_instruction(ctx, &newInst);
 
       /* MUL result.color.w, colorTemp, tmp0.w; */
@@ -455,10 +455,10 @@ aa_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.Index = aactx->colorOutput;
       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = aactx->colorTemp;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[1].SrcRegister.Index = aactx->tmp0;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = aactx->colorTemp;
+      newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[1].Register.Index = aactx->tmp0;
       ctx->emit_instruction(ctx, &newInst);
    }
    else {
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 99165b1006..45317227a8 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -283,10 +283,10 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
       newInst.Dst[0].Register.Index = pctx->texTemp;
       newInst.Instruction.NumSrcRegs = 2;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_INPUT;
-      newInst.Src[0].SrcRegister.Index = wincoordInput;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_IMMEDIATE;
-      newInst.Src[1].SrcRegister.Index = pctx->numImmed;
+      newInst.Src[0].Register.File = TGSI_FILE_INPUT;
+      newInst.Src[0].Register.Index = wincoordInput;
+      newInst.Src[1].Register.File = TGSI_FILE_IMMEDIATE;
+      newInst.Src[1].Register.Index = pctx->numImmed;
       ctx->emit_instruction(ctx, &newInst);
 
       /* TEX texTemp, texTemp, sampler; */
@@ -298,10 +298,10 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Instruction.NumSrcRegs = 2;
       newInst.Instruction.Texture = TRUE;
       newInst.Texture.Texture = TGSI_TEXTURE_2D;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = pctx->texTemp;
-      newInst.Src[1].SrcRegister.File = TGSI_FILE_SAMPLER;
-      newInst.Src[1].SrcRegister.Index = pctx->freeSampler;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = pctx->texTemp;
+      newInst.Src[1].Register.File = TGSI_FILE_SAMPLER;
+      newInst.Src[1].Register.Index = pctx->freeSampler;
       ctx->emit_instruction(ctx, &newInst);
 
       /* KIL -texTemp;   # if -texTemp < 0, KILL fragment */
@@ -309,9 +309,9 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
       newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
       newInst.Instruction.NumDstRegs = 0;
       newInst.Instruction.NumSrcRegs = 1;
-      newInst.Src[0].SrcRegister.File = TGSI_FILE_TEMPORARY;
-      newInst.Src[0].SrcRegister.Index = pctx->texTemp;
-      newInst.Src[0].SrcRegister.Negate = 1;
+      newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+      newInst.Src[0].Register.Index = pctx->texTemp;
+      newInst.Src[0].Register.Negate = 1;
       ctx->emit_instruction(ctx, &newInst);
    }
 
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 8c93642954..1aaae4ab7a 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -529,8 +529,8 @@ static struct x86_reg fetch_src( struct aos_compilation *cp,
                                  const struct tgsi_full_src_register *src ) 
 {
    struct x86_reg arg0 = aos_get_shader_reg(cp, 
-                                            src->SrcRegister.File, 
-                                            src->SrcRegister.Index);
+                                            src->Register.File, 
+                                            src->Register.Index);
    unsigned i;
    ubyte swz = 0;
    unsigned negs = 0;
@@ -620,8 +620,8 @@ static void x87_fld_src( struct aos_compilation *cp,
                          unsigned channel ) 
 {
    struct x86_reg arg0 = aos_get_shader_reg_ptr(cp, 
-                                                src->SrcRegister.File, 
-                                                src->SrcRegister.Index);
+                                                src->Register.File, 
+                                                src->Register.Index);
 
    unsigned swizzle = tgsi_util_get_full_src_register_swizzle( src, channel );
    unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel );
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 135d307ce1..5cafe8c3f0 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -238,22 +238,22 @@ translate_instruction(llvm::Module *module,
       llvm::Value *val = 0;
       llvm::Value *indIdx = 0;
 
-      if (src->SrcRegister.Indirect) {
-         indIdx = storage->addrElement(src->SrcRegisterInd.Index);
+      if (src->Register.Indirect) {
+         indIdx = storage->addrElement(src->Indirect.Index);
          indIdx = storage->extractIndex(indIdx);
       }
-      if (src->SrcRegister.File == TGSI_FILE_CONSTANT) {
-         val = storage->constElement(src->SrcRegister.Index, indIdx);
-      } else if (src->SrcRegister.File == TGSI_FILE_INPUT) {
-         val = storage->inputElement(src->SrcRegister.Index, indIdx);
-      } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) {
-         val = storage->tempElement(src->SrcRegister.Index);
-      } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) {
-         val = storage->outputElement(src->SrcRegister.Index, indIdx);
-      } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
-         val = storage->immediateElement(src->SrcRegister.Index);
+      if (src->Register.File == TGSI_FILE_CONSTANT) {
+         val = storage->constElement(src->Register.Index, indIdx);
+      } else if (src->Register.File == TGSI_FILE_INPUT) {
+         val = storage->inputElement(src->Register.Index, indIdx);
+      } else if (src->Register.File == TGSI_FILE_TEMPORARY) {
+         val = storage->tempElement(src->Register.Index);
+      } else if (src->Register.File == TGSI_FILE_OUTPUT) {
+         val = storage->outputElement(src->Register.Index, indIdx);
+      } else if (src->Register.File == TGSI_FILE_IMMEDIATE) {
+         val = storage->immediateElement(src->Register.Index);
       } else {
-         fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File);
+         fprintf(stderr, "ERROR: not supported llvm source %d\n", src->Register.File);
          return;
       }
 
@@ -688,11 +688,11 @@ translate_instructionir(llvm::Module *module,
       llvm::Value *indIdx = 0;
       int swizzle = swizzleInt(src);
 
-      if (src->SrcRegister.Indirect) {
-         indIdx = storage->addrElement(src->SrcRegisterInd.Index);
+      if (src->Register.Indirect) {
+         indIdx = storage->addrElement(src->Indirect.Index);
       }
-      val = storage->load((enum tgsi_file_type)src->SrcRegister.File,
-                          src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx);
+      val = storage->load((enum tgsi_file_type)src->Register.File,
+                          src->Register.Index, swizzle, instr->getIRBuilder(), indIdx);
 
       inputs[i] = val;
    }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 91fb4f68e5..c35634c69a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -623,21 +623,21 @@ tgsi_build_full_instruction(
       size++;
 
       *src_register = tgsi_build_src_register(
-         reg->SrcRegister.File,
-         reg->SrcRegister.SwizzleX,
-         reg->SrcRegister.SwizzleY,
-         reg->SrcRegister.SwizzleZ,
-         reg->SrcRegister.SwizzleW,
-         reg->SrcRegister.Negate,
-         reg->SrcRegister.Absolute,
-         reg->SrcRegister.Indirect,
-         reg->SrcRegister.Dimension,
-         reg->SrcRegister.Index,
+         reg->Register.File,
+         reg->Register.SwizzleX,
+         reg->Register.SwizzleY,
+         reg->Register.SwizzleZ,
+         reg->Register.SwizzleW,
+         reg->Register.Negate,
+         reg->Register.Absolute,
+         reg->Register.Indirect,
+         reg->Register.Dimension,
+         reg->Register.Index,
          instruction,
          header );
       prev_token = (struct tgsi_token  *) src_register;
 
-      if( reg->SrcRegister.Indirect ) {
+      if( reg->Register.Indirect ) {
          struct  tgsi_src_register *ind;
 
          if( maxsize <= size )
@@ -646,24 +646,24 @@ tgsi_build_full_instruction(
          size++;
 
          *ind = tgsi_build_src_register(
-            reg->SrcRegisterInd.File,
-            reg->SrcRegisterInd.SwizzleX,
-            reg->SrcRegisterInd.SwizzleY,
-            reg->SrcRegisterInd.SwizzleZ,
-            reg->SrcRegisterInd.SwizzleW,
-            reg->SrcRegisterInd.Negate,
-            reg->SrcRegisterInd.Absolute,
-            reg->SrcRegisterInd.Indirect,
-            reg->SrcRegisterInd.Dimension,
-            reg->SrcRegisterInd.Index,
+            reg->Indirect.File,
+            reg->Indirect.SwizzleX,
+            reg->Indirect.SwizzleY,
+            reg->Indirect.SwizzleZ,
+            reg->Indirect.SwizzleW,
+            reg->Indirect.Negate,
+            reg->Indirect.Absolute,
+            reg->Indirect.Indirect,
+            reg->Indirect.Dimension,
+            reg->Indirect.Index,
             instruction,
             header );
       }
 
-      if( reg->SrcRegister.Dimension ) {
+      if( reg->Register.Dimension ) {
          struct  tgsi_dimension *dim;
 
-         assert( !reg->SrcRegisterDim.Dimension );
+         assert( !reg->Dimension.Dimension );
 
          if( maxsize <= size )
             return 0;
@@ -671,12 +671,12 @@ tgsi_build_full_instruction(
          size++;
 
          *dim = tgsi_build_dimension(
-            reg->SrcRegisterDim.Indirect,
-            reg->SrcRegisterDim.Index,
+            reg->Dimension.Indirect,
+            reg->Dimension.Index,
             instruction,
             header );
 
-         if( reg->SrcRegisterDim.Indirect ) {
+         if( reg->Dimension.Indirect ) {
             struct tgsi_src_register *ind;
 
             if( maxsize <= size )
@@ -685,16 +685,16 @@ tgsi_build_full_instruction(
             size++;
 
             *ind = tgsi_build_src_register(
-               reg->SrcRegisterDimInd.File,
-               reg->SrcRegisterDimInd.SwizzleX,
-               reg->SrcRegisterDimInd.SwizzleY,
-               reg->SrcRegisterDimInd.SwizzleZ,
-               reg->SrcRegisterDimInd.SwizzleW,
-               reg->SrcRegisterDimInd.Negate,
-               reg->SrcRegisterDimInd.Absolute,
-               reg->SrcRegisterDimInd.Indirect,
-               reg->SrcRegisterDimInd.Dimension,
-               reg->SrcRegisterDimInd.Index,
+               reg->DimIndirect.File,
+               reg->DimIndirect.SwizzleX,
+               reg->DimIndirect.SwizzleY,
+               reg->DimIndirect.SwizzleZ,
+               reg->DimIndirect.SwizzleW,
+               reg->DimIndirect.Negate,
+               reg->DimIndirect.Absolute,
+               reg->DimIndirect.Indirect,
+               reg->DimIndirect.Dimension,
+               reg->DimIndirect.Index,
                instruction,
                header );
          }
@@ -894,10 +894,10 @@ tgsi_default_full_src_register( void )
 {
    struct tgsi_full_src_register full_src_register;
 
-   full_src_register.SrcRegister = tgsi_default_src_register();
-   full_src_register.SrcRegisterInd = tgsi_default_src_register();
-   full_src_register.SrcRegisterDim = tgsi_default_dimension();
-   full_src_register.SrcRegisterDimInd = tgsi_default_src_register();
+   full_src_register.Register = tgsi_default_src_register();
+   full_src_register.Indirect = tgsi_default_src_register();
+   full_src_register.Dimension = tgsi_default_dimension();
+   full_src_register.DimIndirect = tgsi_default_src_register();
 
    return full_src_register;
 }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 6141865f03..da126f3b01 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -386,42 +386,42 @@ iter_instruction(
          CHR( ',' );
       CHR( ' ' );
 
-      if (src->SrcRegister.Negate)
+      if (src->Register.Negate)
          TXT( "-(" );
-      if (src->SrcRegister.Absolute)
+      if (src->Register.Absolute)
          CHR( '|' );
 
-      if (src->SrcRegister.Indirect) {
+      if (src->Register.Indirect) {
          _dump_register_ind(
             ctx,
-            src->SrcRegister.File,
-            src->SrcRegister.Index,
-            src->SrcRegisterInd.File,
-            src->SrcRegisterInd.Index,
-            src->SrcRegisterInd.SwizzleX );
+            src->Register.File,
+            src->Register.Index,
+            src->Indirect.File,
+            src->Indirect.Index,
+            src->Indirect.SwizzleX );
       }
       else {
          _dump_register(
             ctx,
-            src->SrcRegister.File,
-            src->SrcRegister.Index,
-            src->SrcRegister.Index );
+            src->Register.File,
+            src->Register.Index,
+            src->Register.Index );
       }
 
-      if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
-          src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
-          src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
-          src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) {
+      if (src->Register.SwizzleX != TGSI_SWIZZLE_X ||
+          src->Register.SwizzleY != TGSI_SWIZZLE_Y ||
+          src->Register.SwizzleZ != TGSI_SWIZZLE_Z ||
+          src->Register.SwizzleW != TGSI_SWIZZLE_W) {
          CHR( '.' );
-         ENM( src->SrcRegister.SwizzleX, swizzle_names );
-         ENM( src->SrcRegister.SwizzleY, swizzle_names );
-         ENM( src->SrcRegister.SwizzleZ, swizzle_names );
-         ENM( src->SrcRegister.SwizzleW, swizzle_names );
+         ENM( src->Register.SwizzleX, swizzle_names );
+         ENM( src->Register.SwizzleY, swizzle_names );
+         ENM( src->Register.SwizzleZ, swizzle_names );
+         ENM( src->Register.SwizzleW, swizzle_names );
       }
 
-      if (src->SrcRegister.Absolute)
+      if (src->Register.Absolute)
          CHR( '|' );
-      if (src->SrcRegister.Negate)
+      if (src->Register.Negate)
          CHR( ')' );
 
       first_reg = FALSE;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index 5fae5a225f..77f671e9eb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -392,78 +392,78 @@ dump_instruction_verbose(
 
       EOL();
       TXT( "\nFile     : ");
-      ENM( src->SrcRegister.File, TGSI_FILES );
-      if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) {
+      ENM( src->Register.File, TGSI_FILES );
+      if( deflt || fs->Register.SwizzleX != src->Register.SwizzleX ) {
          TXT( "\nSwizzleX : " );
-         ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES );
+         ENM( src->Register.SwizzleX, TGSI_SWIZZLES );
       }
-      if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) {
+      if( deflt || fs->Register.SwizzleY != src->Register.SwizzleY ) {
          TXT( "\nSwizzleY : " );
-         ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES );
+         ENM( src->Register.SwizzleY, TGSI_SWIZZLES );
       }
-      if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) {
+      if( deflt || fs->Register.SwizzleZ != src->Register.SwizzleZ ) {
          TXT( "\nSwizzleZ : " );
-         ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES );
+         ENM( src->Register.SwizzleZ, TGSI_SWIZZLES );
       }
-      if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) {
+      if( deflt || fs->Register.SwizzleW != src->Register.SwizzleW ) {
          TXT( "\nSwizzleW : " );
-         ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES );
+         ENM( src->Register.SwizzleW, TGSI_SWIZZLES );
       }
-      if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) {
+      if( deflt || fs->Register.Negate != src->Register.Negate ) {
          TXT( "\nNegate   : " );
-         UID( src->SrcRegister.Negate );
+         UID( src->Register.Negate );
       }
       if( ignored ) {
-         if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) {
+         if( deflt || fs->Register.Indirect != src->Register.Indirect ) {
             TXT( "\nIndirect : " );
-            UID( src->SrcRegister.Indirect );
+            UID( src->Register.Indirect );
          }
-         if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) {
+         if( deflt || fs->Register.Dimension != src->Register.Dimension ) {
             TXT( "\nDimension: " );
-            UID( src->SrcRegister.Dimension );
+            UID( src->Register.Dimension );
          }
       }
-      if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) {
+      if( deflt || fs->Register.Index != src->Register.Index ) {
          TXT( "\nIndex    : " );
-         SID( src->SrcRegister.Index );
+         SID( src->Register.Index );
       }
       if( ignored ) {
-         if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) {
+         if( deflt || fs->Register.Extended != src->Register.Extended ) {
             TXT( "\nExtended : " );
-            UID( src->SrcRegister.Extended );
+            UID( src->Register.Extended );
          }
       }
 
-      if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) {
+      if( deflt || tgsi_compare_src_register_ext_mod( src->RegisterExtMod, fs->RegisterExtMod ) ) {
          EOL();
          TXT( "\nType     : " );
-         ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
-         if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) {
+         ENM( src->RegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS );
+         if( deflt || fs->RegisterExtMod.Complement != src->RegisterExtMod.Complement ) {
             TXT( "\nComplement: " );
-            UID( src->SrcRegisterExtMod.Complement );
+            UID( src->RegisterExtMod.Complement );
          }
-         if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) {
+         if( deflt || fs->RegisterExtMod.Bias != src->RegisterExtMod.Bias ) {
             TXT( "\nBias     : " );
-            UID( src->SrcRegisterExtMod.Bias );
+            UID( src->RegisterExtMod.Bias );
          }
-         if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) {
+         if( deflt || fs->RegisterExtMod.Scale2X != src->RegisterExtMod.Scale2X ) {
             TXT( "\nScale2X   : " );
-            UID( src->SrcRegisterExtMod.Scale2X );
+            UID( src->RegisterExtMod.Scale2X );
          }
-         if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) {
+         if( deflt || fs->RegisterExtMod.Absolute != src->RegisterExtMod.Absolute ) {
             TXT( "\nAbsolute  : " );
-            UID( src->SrcRegisterExtMod.Absolute );
+            UID( src->RegisterExtMod.Absolute );
          }
-         if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) {
+         if( deflt || fs->RegisterExtMod.Negate != src->RegisterExtMod.Negate ) {
             TXT( "\nNegate   : " );
-            UID( src->SrcRegisterExtMod.Negate );
+            UID( src->RegisterExtMod.Negate );
          }
          if( ignored ) {
             TXT( "\nPadding   : " );
-            UIX( src->SrcRegisterExtMod.Padding );
-            if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) {
+            UIX( src->RegisterExtMod.Padding );
+            if( deflt || fs->RegisterExtMod.Extended != src->RegisterExtMod.Extended ) {
                TXT( "\nExtended  : " );
-               UID( src->SrcRegisterExtMod.Extended );
+               UID( src->RegisterExtMod.Extended );
             }
          }
       }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index a6bd1a784f..6cd23b37be 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -200,9 +200,9 @@ tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
 
    /* loop over src regs */
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-      if ((inst->Src[i].SrcRegister.File ==
+      if ((inst->Src[i].Register.File ==
            inst->Dst[0].Register.File) &&
-          (inst->Src[i].SrcRegister.Index ==
+          (inst->Src[i].Register.Index ==
            inst->Dst[0].Register.Index)) {
          /* loop over dest channels */
          uint channelsWritten = 0x0;
@@ -1233,13 +1233,13 @@ fetch_source(
     *
     *    file[1],
     *    where:
-    *       file = SrcRegister.File
-    *       [1] = SrcRegister.Index
+    *       file = Register.File
+    *       [1] = Register.Index
     */
    index.i[0] =
    index.i[1] =
    index.i[2] =
-   index.i[3] = reg->SrcRegister.Index;
+   index.i[3] = reg->Register.Index;
 
    /* There is an extra source register that indirectly subscripts
     * a register file. The direct index now becomes an offset
@@ -1247,11 +1247,11 @@ fetch_source(
     *
     *    file[ind[2].x+1],
     *    where:
-    *       ind = SrcRegisterInd.File
-    *       [2] = SrcRegisterInd.Index
-    *       .x = SrcRegisterInd.SwizzleX
+    *       ind = Indirect.File
+    *       [2] = Indirect.Index
+    *       .x = Indirect.SwizzleX
     */
-   if (reg->SrcRegister.Indirect) {
+   if (reg->Register.Indirect) {
       union tgsi_exec_channel index2;
       union tgsi_exec_channel indir_index;
       const uint execmask = mach->ExecMask;
@@ -1261,13 +1261,13 @@ fetch_source(
       index2.i[0] =
       index2.i[1] =
       index2.i[2] =
-      index2.i[3] = reg->SrcRegisterInd.Index;
+      index2.i[3] = reg->Indirect.Index;
 
       /* get current value of address register[swizzle] */
-      swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterInd, CHAN_X );
+      swizzle = tgsi_util_get_src_register_swizzle( &reg->Indirect, CHAN_X );
       fetch_src_file_channel(
          mach,
-         reg->SrcRegisterInd.File,
+         reg->Indirect.File,
          swizzle,
          &index2,
          &indir_index );
@@ -1293,14 +1293,14 @@ fetch_source(
     *
     *    file[1][3] == file[1*sizeof(file[1])+3],
     *    where:
-    *       [3] = SrcRegisterDim.Index
+    *       [3] = Dimension.Index
     */
-   if (reg->SrcRegister.Dimension) {
+   if (reg->Register.Dimension) {
       /* The size of the first-order array depends on the register file type.
        * We need to multiply the index to the first array to get an effective,
        * "flat" index that points to the beginning of the second-order array.
        */
-      switch (reg->SrcRegister.File) {
+      switch (reg->Register.File) {
       case TGSI_FILE_INPUT:
          index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
          index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS;
@@ -1317,10 +1317,10 @@ fetch_source(
          assert( 0 );
       }
 
-      index.i[0] += reg->SrcRegisterDim.Index;
-      index.i[1] += reg->SrcRegisterDim.Index;
-      index.i[2] += reg->SrcRegisterDim.Index;
-      index.i[3] += reg->SrcRegisterDim.Index;
+      index.i[0] += reg->Dimension.Index;
+      index.i[1] += reg->Dimension.Index;
+      index.i[2] += reg->Dimension.Index;
+      index.i[3] += reg->Dimension.Index;
 
       /* Again, the second subscript index can be addressed indirectly
        * identically to the first one.
@@ -1329,11 +1329,11 @@ fetch_source(
        *
        *    file[1][ind[4].y+3],
        *    where:
-       *       ind = SrcRegisterDimInd.File
-       *       [4] = SrcRegisterDimInd.Index
-       *       .y = SrcRegisterDimInd.SwizzleX
+       *       ind = DimIndirect.File
+       *       [4] = DimIndirect.Index
+       *       .y = DimIndirect.SwizzleX
        */
-      if (reg->SrcRegisterDim.Indirect) {
+      if (reg->Dimension.Indirect) {
          union tgsi_exec_channel index2;
          union tgsi_exec_channel indir_index;
          const uint execmask = mach->ExecMask;
@@ -1342,12 +1342,12 @@ fetch_source(
          index2.i[0] =
          index2.i[1] =
          index2.i[2] =
-         index2.i[3] = reg->SrcRegisterDimInd.Index;
+         index2.i[3] = reg->DimIndirect.Index;
 
-         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
+         swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
          fetch_src_file_channel(
             mach,
-            reg->SrcRegisterDimInd.File,
+            reg->DimIndirect.File,
             swizzle,
             &index2,
             &indir_index );
@@ -1367,7 +1367,7 @@ fetch_source(
       }
 
       /* If by any chance there was a need for a 3D array of register
-       * files, we would have to check whether SrcRegisterDim is followed
+       * files, we would have to check whether Dimension is followed
        * by a dimension register and continue the saga.
        */
    }
@@ -1375,7 +1375,7 @@ fetch_source(
    swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
    fetch_src_file_channel(
       mach,
-      reg->SrcRegister.File,
+      reg->Register.File,
       swizzle,
       &index,
       chan );
@@ -1668,7 +1668,7 @@ exec_tex(struct tgsi_exec_machine *mach,
          boolean biasLod,
          boolean projected)
 {
-   const uint unit = inst->Src[1].SrcRegister.Index;
+   const uint unit = inst->Src[1].Register.Index;
    union tgsi_exec_channel r[4];
    uint chan_index;
    float lodBias;
@@ -1765,7 +1765,7 @@ static void
 exec_txd(struct tgsi_exec_machine *mach,
          const struct tgsi_full_instruction *inst)
 {
-   const uint unit = inst->Src[3].SrcRegister.Index;
+   const uint unit = inst->Src[3].Register.Index;
    union tgsi_exec_channel r[4];
    uint chan_index;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index e3a6bc0f54..4b252915c9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -190,34 +190,34 @@ tgsi_parse_token(
 
       for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) {
 
-         next_token( ctx, &inst->Src[i].SrcRegister );
+         next_token( ctx, &inst->Src[i].Register );
 
-         if( inst->Src[i].SrcRegister.Indirect ) {
-            next_token( ctx, &inst->Src[i].SrcRegisterInd );
+         if( inst->Src[i].Register.Indirect ) {
+            next_token( ctx, &inst->Src[i].Indirect );
 
             /*
              * No support for indirect or multi-dimensional addressing.
              */
-            assert( !inst->Src[i].SrcRegisterInd.Indirect );
-            assert( !inst->Src[i].SrcRegisterInd.Dimension );
+            assert( !inst->Src[i].Indirect.Indirect );
+            assert( !inst->Src[i].Indirect.Dimension );
          }
 
-         if( inst->Src[i].SrcRegister.Dimension ) {
-            next_token( ctx, &inst->Src[i].SrcRegisterDim );
+         if( inst->Src[i].Register.Dimension ) {
+            next_token( ctx, &inst->Src[i].Dimension );
 
             /*
              * No support for multi-dimensional addressing.
              */
-            assert( !inst->Src[i].SrcRegisterDim.Dimension );
+            assert( !inst->Src[i].Dimension.Dimension );
 
-            if( inst->Src[i].SrcRegisterDim.Indirect ) {
-               next_token( ctx, &inst->Src[i].SrcRegisterDimInd );
+            if( inst->Src[i].Dimension.Indirect ) {
+               next_token( ctx, &inst->Src[i].DimIndirect );
 
                /*
                * No support for indirect or multi-dimensional addressing.
                */
-               assert( !inst->Src[i].SrcRegisterInd.Indirect );
-               assert( !inst->Src[i].SrcRegisterInd.Dimension );
+               assert( !inst->Src[i].Indirect.Indirect );
+               assert( !inst->Src[i].Indirect.Dimension );
             }
          }
       }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 331a533dd9..e9efa3fdd9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -53,10 +53,10 @@ struct tgsi_full_dst_register
 
 struct tgsi_full_src_register
 {
-   struct tgsi_src_register         SrcRegister;
-   struct tgsi_src_register         SrcRegisterInd;
-   struct tgsi_dimension            SrcRegisterDim;
-   struct tgsi_src_register         SrcRegisterDimInd;
+   struct tgsi_src_register         Register;
+   struct tgsi_src_register         Indirect;
+   struct tgsi_dimension            Dimension;
+   struct tgsi_src_register         DimIndirect;
 };
 
 struct tgsi_full_declaration
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index adb16f6ac9..da6ad6da04 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -156,8 +156,8 @@ init_gen_context(struct gen_context *gen, struct ppc_function *func)
 static boolean
 is_ppc_vec_temporary(const struct tgsi_full_src_register *reg)
 {
-   return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
-           reg->SrcRegister.Index < MAX_PPC_TEMPS);
+   return (reg->Register.File == TGSI_FILE_TEMPORARY &&
+           reg->Register.Index < MAX_PPC_TEMPS);
 }
 
 
@@ -291,10 +291,10 @@ emit_fetch(struct gen_context *gen,
    case TGSI_SWIZZLE_Y:
    case TGSI_SWIZZLE_Z:
    case TGSI_SWIZZLE_W:
-      switch (reg->SrcRegister.File) {
+      switch (reg->Register.File) {
       case TGSI_FILE_INPUT:
          {
-            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+            int offset = (reg->Register.Index * 4 + swizzle) * 16;
             int offset_reg = emit_li_offset(gen, offset);
             dst_vec = ppc_allocate_vec_register(gen->f);
             ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg);
@@ -303,11 +303,11 @@ emit_fetch(struct gen_context *gen,
       case TGSI_FILE_TEMPORARY:
          if (is_ppc_vec_temporary(reg)) {
             /* use PPC vec register */
-            dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle];
+            dst_vec = gen->temps_map[reg->Register.Index][swizzle];
          }
          else {
             /* use memory-based temp register "file" */
-            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16;
+            int offset = (reg->Register.Index * 4 + swizzle) * 16;
             int offset_reg = emit_li_offset(gen, offset);
             dst_vec = ppc_allocate_vec_register(gen->f);
             ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg);
@@ -315,7 +315,7 @@ emit_fetch(struct gen_context *gen,
          break;
       case TGSI_FILE_IMMEDIATE:
          {
-            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
+            int offset = (reg->Register.Index * 4 + swizzle) * 4;
             int offset_reg = emit_li_offset(gen, offset);
             dst_vec = ppc_allocate_vec_register(gen->f);
             /* Load 4-byte word into vector register.
@@ -331,7 +331,7 @@ emit_fetch(struct gen_context *gen,
          break;
       case TGSI_FILE_CONSTANT:
          {
-            int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4;
+            int offset = (reg->Register.Index * 4 + swizzle) * 4;
             int offset_reg = emit_li_offset(gen, offset);
             dst_vec = ppc_allocate_vec_register(gen->f);
             /* Load 4-byte word into vector register.
@@ -404,9 +404,9 @@ equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a,
 {
    int swz_a, swz_b;
    int sign_a, sign_b;
-   if (a->SrcRegister.File != b->SrcRegister.File)
+   if (a->Register.File != b->Register.File)
       return FALSE;
-   if (a->SrcRegister.Index != b->SrcRegister.Index)
+   if (a->Register.Index != b->Register.Index)
       return FALSE;
    swz_a = tgsi_util_get_full_src_register_swizzle(a, chan_a);
    swz_b = tgsi_util_get_full_src_register_swizzle(b, chan_b);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 7e50e25353..8bd1f31e9c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -220,16 +220,16 @@ iter_instruction(
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
       check_register_usage(
          ctx,
-         inst->Src[i].SrcRegister.File,
-         inst->Src[i].SrcRegister.Index,
+         inst->Src[i].Register.File,
+         inst->Src[i].Register.Index,
          "source",
-         (boolean)inst->Src[i].SrcRegister.Indirect );
-      if (inst->Src[i].SrcRegister.Indirect) {
+         (boolean)inst->Src[i].Register.Indirect );
+      if (inst->Src[i].Register.Indirect) {
          uint file;
          int index;
 
-         file = inst->Src[i].SrcRegisterInd.File;
-         index = inst->Src[i].SrcRegisterInd.Index;
+         file = inst->Src[i].Indirect.File;
+         index = inst->Src[i].Indirect.Index;
          check_register_usage(
             ctx,
             file,
@@ -254,8 +254,8 @@ iter_instruction(
 
    switch (inst->Instruction.Opcode) {
    case TGSI_OPCODE_BGNFOR:
-      if (inst->Src[0].SrcRegister.File != TGSI_FILE_CONSTANT &&
-          inst->Src[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) {
+      if (inst->Src[0].Register.File != TGSI_FILE_CONSTANT &&
+          inst->Src[0].Register.File != TGSI_FILE_IMMEDIATE) {
          report_error(ctx, "Source register file must be either CONST or IMM");
       }
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 90832e71bb..a5d2db04ec 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -97,13 +97,13 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
                for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) {
                   const struct tgsi_full_src_register *src =
                      &fullinst->Src[i];
-                  if (src->SrcRegister.File == TGSI_FILE_INPUT) {
-                     const int ind = src->SrcRegister.Index;
+                  if (src->Register.File == TGSI_FILE_INPUT) {
+                     const int ind = src->Register.Index;
                      if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) {
-                        if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_X) {
+                        if (src->Register.SwizzleX == TGSI_SWIZZLE_X) {
                            info->uses_fogcoord = TRUE;
                         }
-                        else if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_Y) {
+                        else if (src->Register.SwizzleX == TGSI_SWIZZLE_Y) {
                            info->uses_frontfacing = TRUE;
                         }
                      }
@@ -211,17 +211,17 @@ tgsi_is_passthrough_shader(const struct tgsi_token *tokens)
 
             /* Do a whole bunch of checks for a simple move */
             if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV ||
-                src->SrcRegister.File != TGSI_FILE_INPUT ||
+                src->Register.File != TGSI_FILE_INPUT ||
                 dst->Register.File != TGSI_FILE_OUTPUT ||
-                src->SrcRegister.Index != dst->Register.Index ||
+                src->Register.Index != dst->Register.Index ||
 
-                src->SrcRegister.Negate ||
-                src->SrcRegister.Absolute ||
+                src->Register.Negate ||
+                src->Register.Absolute ||
 
-                src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X ||
-                src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y ||
-                src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z ||
-                src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ||
+                src->Register.SwizzleX != TGSI_SWIZZLE_X ||
+                src->Register.SwizzleY != TGSI_SWIZZLE_Y ||
+                src->Register.SwizzleZ != TGSI_SWIZZLE_Z ||
+                src->Register.SwizzleW != TGSI_SWIZZLE_W ||
 
                 dst->Register.WriteMask != TGSI_WRITEMASK_XYZW)
             {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 785076a520..76051ea0d8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1267,23 +1267,23 @@ emit_fetch(
    case TGSI_SWIZZLE_Y:
    case TGSI_SWIZZLE_Z:
    case TGSI_SWIZZLE_W:
-      switch (reg->SrcRegister.File) {
+      switch (reg->Register.File) {
       case TGSI_FILE_CONSTANT:
          emit_const(
             func,
             xmm,
-            reg->SrcRegister.Index,
+            reg->Register.Index,
             swizzle,
-            reg->SrcRegister.Indirect,
-            reg->SrcRegisterInd.File,
-            reg->SrcRegisterInd.Index );
+            reg->Register.Indirect,
+            reg->Indirect.File,
+            reg->Indirect.Index );
          break;
 
       case TGSI_FILE_IMMEDIATE:
          emit_immediate(
             func,
             xmm,
-            reg->SrcRegister.Index,
+            reg->Register.Index,
             swizzle );
          break;
 
@@ -1291,7 +1291,7 @@ emit_fetch(
          emit_inputf(
             func,
             xmm,
-            reg->SrcRegister.Index,
+            reg->Register.Index,
             swizzle );
          break;
 
@@ -1299,7 +1299,7 @@ emit_fetch(
          emit_tempf(
             func,
             xmm,
-            reg->SrcRegister.Index,
+            reg->Register.Index,
             swizzle );
          break;
 
@@ -1459,7 +1459,7 @@ emit_tex( struct x86_function *func,
           boolean lodbias,
           boolean projected)
 {
-   const uint unit = inst->Src[1].SrcRegister.Index;
+   const uint unit = inst->Src[1].Register.Index;
    struct x86_reg args[2];
    unsigned count;
    unsigned i;
@@ -1721,8 +1721,8 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
    uint i;
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
       const struct tgsi_full_src_register *reg = &inst->Src[i];
-      if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
-          reg->SrcRegister.Indirect)
+      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+          reg->Register.Indirect)
          return TRUE;
    }
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index 27b90f5ab7..ca2e2bae11 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -565,41 +565,41 @@ parse_src_operand(
    if (*ctx->cur == '-') {
       ctx->cur++;
       eat_opt_white( &ctx->cur );
-      src->SrcRegister.Negate = 1;
+      src->Register.Negate = 1;
    }
    
    if (*ctx->cur == '|') {
       ctx->cur++;
       eat_opt_white( &ctx->cur );
-      src->SrcRegister.Absolute = 1;
+      src->Register.Absolute = 1;
    }
 
    if (!parse_register_src(ctx, &file, &index, &ind_file, &ind_index, &ind_comp))
       return FALSE;
-   src->SrcRegister.File = file;
-   src->SrcRegister.Index = index;
+   src->Register.File = file;
+   src->Register.Index = index;
    if (ind_file != TGSI_FILE_NULL) {
-      src->SrcRegister.Indirect = 1;
-      src->SrcRegisterInd.File = ind_file;
-      src->SrcRegisterInd.Index = ind_index;
-      src->SrcRegisterInd.SwizzleX = ind_comp;
-      src->SrcRegisterInd.SwizzleY = ind_comp;
-      src->SrcRegisterInd.SwizzleZ = ind_comp;
-      src->SrcRegisterInd.SwizzleW = ind_comp;
+      src->Register.Indirect = 1;
+      src->Indirect.File = ind_file;
+      src->Indirect.Index = ind_index;
+      src->Indirect.SwizzleX = ind_comp;
+      src->Indirect.SwizzleY = ind_comp;
+      src->Indirect.SwizzleZ = ind_comp;
+      src->Indirect.SwizzleW = ind_comp;
    }
 
    /* Parse optional swizzle.
     */
    if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle )) {
       if (parsed_swizzle) {
-         src->SrcRegister.SwizzleX = swizzle[0];
-         src->SrcRegister.SwizzleY = swizzle[1];
-         src->SrcRegister.SwizzleZ = swizzle[2];
-         src->SrcRegister.SwizzleW = swizzle[3];
+         src->Register.SwizzleX = swizzle[0];
+         src->Register.SwizzleY = swizzle[1];
+         src->Register.SwizzleZ = swizzle[2];
+         src->Register.SwizzleW = swizzle[3];
       }
    }
 
-   if (src->SrcRegister.Absolute) {
+   if (src->Register.Absolute) {
       eat_opt_white( &ctx->cur );
       if (*ctx->cur != '|') {
          report_error( ctx, "Expected `|'" );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
index 3544011b47..f4ca9e21ed 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -76,7 +76,7 @@ tgsi_util_get_full_src_register_swizzle(
    unsigned component )
 {
    return tgsi_util_get_src_register_swizzle(
-      &reg->SrcRegister,
+      &reg->Register,
       component );
 }
 
@@ -111,10 +111,10 @@ tgsi_util_get_full_src_register_sign_mode(
 {
    unsigned sign_mode;
 
-   if( reg->SrcRegister.Absolute ) {
+   if( reg->Register.Absolute ) {
       /* Consider only the post-abs negation. */
 
-      if( reg->SrcRegister.Negate ) {
+      if( reg->Register.Negate ) {
          sign_mode = TGSI_UTIL_SIGN_SET;
       }
       else {
@@ -122,7 +122,7 @@ tgsi_util_get_full_src_register_sign_mode(
       }
    }
    else {
-      if( reg->SrcRegister.Negate ) {
+      if( reg->Register.Negate ) {
          sign_mode = TGSI_UTIL_SIGN_TOGGLE;
       }
       else {
@@ -141,23 +141,23 @@ tgsi_util_set_full_src_register_sign_mode(
    switch (sign_mode)
    {
    case TGSI_UTIL_SIGN_CLEAR:
-      reg->SrcRegister.Negate = 0;
-      reg->SrcRegister.Absolute = 1;
+      reg->Register.Negate = 0;
+      reg->Register.Absolute = 1;
       break;
 
    case TGSI_UTIL_SIGN_SET:
-      reg->SrcRegister.Absolute = 1;
-      reg->SrcRegister.Negate = 1;
+      reg->Register.Absolute = 1;
+      reg->Register.Negate = 1;
       break;
 
    case TGSI_UTIL_SIGN_TOGGLE:
-      reg->SrcRegister.Negate = 1;
-      reg->SrcRegister.Absolute = 0;
+      reg->Register.Negate = 1;
+      reg->Register.Absolute = 0;
       break;
 
    case TGSI_UTIL_SIGN_KEEP:
-      reg->SrcRegister.Negate = 0;
-      reg->SrcRegister.Absolute = 0;
+      reg->Register.Negate = 0;
+      reg->Register.Absolute = 0;
       break;
 
    default:
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 4564a6c67f..36a7987099 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -237,9 +237,9 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
 
       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-      inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-      inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+      inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+      inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+      inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
       inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
    }
@@ -415,9 +415,9 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
 
       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-      inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-      inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+      inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+      inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+      inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
       inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
    }
@@ -620,9 +620,9 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
 
       inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1);
-      inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-      inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-      inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
+      inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+      inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+      inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
       inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X << i;
       ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
    }
@@ -642,10 +642,10 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    /* lerp t1, c1.x, t1, t2        ; Blend past and future texels */
    inst = vl_inst4(TGSI_OPCODE_LRP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2);
-   inst.Src[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X;
-   inst.Src[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X;
-   inst.Src[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X;
-   inst.Src[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X;
+   inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+   inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+   inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
+   inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
    ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti);
 
    /* add o0, t0, t1               ; Add past/future ref and differential to form final output */
diff --git a/src/gallium/auxiliary/vl/vl_shader_build.c b/src/gallium/auxiliary/vl/vl_shader_build.c
index 9ebb4a9171..d011ef97bd 100644
--- a/src/gallium/auxiliary/vl/vl_shader_build.c
+++ b/src/gallium/auxiliary/vl/vl_shader_build.c
@@ -141,8 +141,8 @@ struct tgsi_full_instruction vl_inst2
    inst.Dst[0].Register.File = dst_file;
    inst.Dst[0].Register.Index = dst_index;
    inst.Instruction.NumSrcRegs = 1;
-   inst.Src[0].SrcRegister.File = src_file;
-   inst.Src[0].SrcRegister.Index = src_index;
+   inst.Src[0].Register.File = src_file;
+   inst.Src[0].Register.Index = src_index;
 
    return inst;
 }
@@ -165,10 +165,10 @@ struct tgsi_full_instruction vl_inst3
    inst.Dst[0].Register.File = dst_file;
    inst.Dst[0].Register.Index = dst_index;
    inst.Instruction.NumSrcRegs = 2;
-   inst.Src[0].SrcRegister.File = src1_file;
-   inst.Src[0].SrcRegister.Index = src1_index;
-   inst.Src[1].SrcRegister.File = src2_file;
-   inst.Src[1].SrcRegister.Index = src2_index;
+   inst.Src[0].Register.File = src1_file;
+   inst.Src[0].Register.Index = src1_index;
+   inst.Src[1].Register.File = src2_file;
+   inst.Src[1].Register.Index = src2_index;
 
    return inst;
 }
@@ -193,10 +193,10 @@ struct tgsi_full_instruction vl_tex
    inst.Instruction.NumSrcRegs = 2;
    inst.Instruction.Texture = 1;
    inst.Texture.Texture = tex;
-   inst.Src[0].SrcRegister.File = src1_file;
-   inst.Src[0].SrcRegister.Index = src1_index;
-   inst.Src[1].SrcRegister.File = src2_file;
-   inst.Src[1].SrcRegister.Index = src2_index;
+   inst.Src[0].Register.File = src1_file;
+   inst.Src[0].Register.Index = src1_index;
+   inst.Src[1].Register.File = src2_file;
+   inst.Src[1].Register.Index = src2_index;
 
    return inst;
 }
@@ -221,12 +221,12 @@ struct tgsi_full_instruction vl_inst4
    inst.Dst[0].Register.File = dst_file;
    inst.Dst[0].Register.Index = dst_index;
    inst.Instruction.NumSrcRegs = 3;
-   inst.Src[0].SrcRegister.File = src1_file;
-   inst.Src[0].SrcRegister.Index = src1_index;
-   inst.Src[1].SrcRegister.File = src2_file;
-   inst.Src[1].SrcRegister.Index = src2_index;
-   inst.Src[2].SrcRegister.File = src3_file;
-   inst.Src[2].SrcRegister.Index = src3_index;
+   inst.Src[0].Register.File = src1_file;
+   inst.Src[0].Register.Index = src1_index;
+   inst.Src[1].Register.File = src2_file;
+   inst.Src[1].Register.Index = src2_index;
+   inst.Src[2].Register.File = src3_file;
+   inst.Src[2].Register.Index = src3_index;
 
    return inst;
 }
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index f639c62605..4d43f65d29 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -237,8 +237,8 @@ is_register_src(struct codegen *gen, int channel,
    if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
       return FALSE;
    }
-   if (src->SrcRegister.File == TGSI_FILE_TEMPORARY ||
-       src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
+   if (src->Register.File == TGSI_FILE_TEMPORARY ||
+       src->Register.File == TGSI_FILE_IMMEDIATE) {
       return TRUE;
    }
    return FALSE;
@@ -279,15 +279,15 @@ get_src_reg(struct codegen *gen,
    assert(swizzle <= TGSI_SWIZZLE_W);
 
    {
-      int index = src->SrcRegister.Index;
+      int index = src->Register.Index;
 
       assert(swizzle < 4);
 
-      if (src->SrcRegister.Indirect) {
+      if (src->Register.Indirect) {
          /* XXX unfinished */
       }
 
-      switch (src->SrcRegister.File) {
+      switch (src->Register.File) {
       case TGSI_FILE_TEMPORARY:
          reg = gen->temp_regs[index][swizzle];
          break;
@@ -1352,7 +1352,7 @@ static boolean
 emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
 {
    const uint target = inst->InstructionExtTexture.Texture;
-   const uint unit = inst->Src[1].SrcRegister.Index;
+   const uint unit = inst->Src[1].Register.Index;
    uint addr;
    int ch;
    int coord_regs[4], d_regs[4];
@@ -1373,7 +1373,7 @@ emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
       return FALSE;
    }
 
-   assert(inst->Src[1].SrcRegister.File == TGSI_FILE_SAMPLER);
+   assert(inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
 
    spe_comment(gen->f, -4, "CALL tex:");
 
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 1b4792a316..5ed330aa6e 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -431,22 +431,22 @@ fetch_source(
    index.i[0] =
    index.i[1] =
    index.i[2] =
-   index.i[3] = reg->SrcRegister.Index;
+   index.i[3] = reg->Register.Index;
 
-   if (reg->SrcRegister.Indirect) {
+   if (reg->Register.Indirect) {
       union spu_exec_channel index2;
       union spu_exec_channel indir_index;
 
       index2.i[0] =
       index2.i[1] =
       index2.i[2] =
-      index2.i[3] = reg->SrcRegisterInd.Index;
+      index2.i[3] = reg->Indirect.Index;
 
-      swizzle = tgsi_util_get_src_register_swizzle(&reg->SrcRegisterInd,
+      swizzle = tgsi_util_get_src_register_swizzle(&reg->Indirect,
                                                    CHAN_X);
       fetch_src_file_channel(
          mach,
-         reg->SrcRegisterInd.File,
+         reg->Indirect.File,
          swizzle,
          &index2,
          &indir_index );
@@ -454,8 +454,8 @@ fetch_source(
       index.q = si_a(index.q, indir_index.q);
    }
 
-   if( reg->SrcRegister.Dimension ) {
-      switch( reg->SrcRegister.File ) {
+   if( reg->Register.Dimension ) {
+      switch( reg->Register.File ) {
       case TGSI_FILE_INPUT:
          index.q = si_mpyi(index.q, 17);
          break;
@@ -466,24 +466,24 @@ fetch_source(
          ASSERT( 0 );
       }
 
-      index.i[0] += reg->SrcRegisterDim.Index;
-      index.i[1] += reg->SrcRegisterDim.Index;
-      index.i[2] += reg->SrcRegisterDim.Index;
-      index.i[3] += reg->SrcRegisterDim.Index;
+      index.i[0] += reg->Dimension.Index;
+      index.i[1] += reg->Dimension.Index;
+      index.i[2] += reg->Dimension.Index;
+      index.i[3] += reg->Dimension.Index;
 
-      if (reg->SrcRegisterDim.Indirect) {
+      if (reg->Dimension.Indirect) {
          union spu_exec_channel index2;
          union spu_exec_channel indir_index;
 
          index2.i[0] =
          index2.i[1] =
          index2.i[2] =
-         index2.i[3] = reg->SrcRegisterDimInd.Index;
+         index2.i[3] = reg->DimIndirect.Index;
 
-         swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
+         swizzle = tgsi_util_get_src_register_swizzle( &reg->DimIndirect, CHAN_X );
          fetch_src_file_channel(
             mach,
-            reg->SrcRegisterDimInd.File,
+            reg->DimIndirect.File,
             swizzle,
             &index2,
             &indir_index );
@@ -495,7 +495,7 @@ fetch_source(
    swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
    fetch_src_file_channel(
       mach,
-      reg->SrcRegister.File,
+      reg->Register.File,
       swizzle,
       &index,
       chan );
@@ -517,7 +517,7 @@ fetch_source(
       break;
    }
 
-   if (reg->SrcRegisterExtMod.Complement) {
+   if (reg->RegisterExtMod.Complement) {
       chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
    }
 }
@@ -677,7 +677,7 @@ exec_tex(struct spu_exec_machine *mach,
          const struct tgsi_full_instruction *inst,
          boolean biasLod, boolean projected)
 {
-   const uint unit = inst->Src[1].SrcRegister.Index;
+   const uint unit = inst->Src[1].Register.Index;
    union spu_exec_channel r[8];
    uint chan_index;
    float lodBias;
diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c
index c2c32b22d5..24057e29e3 100644
--- a/src/gallium/drivers/cell/spu/spu_util.c
+++ b/src/gallium/drivers/cell/spu/spu_util.c
@@ -33,7 +33,7 @@ tgsi_util_get_full_src_register_swizzle(
    unsigned component )
 {
    return tgsi_util_get_src_register_swizzle(
-      reg->SrcRegister,
+      reg->Register,
       component );
 }
 
@@ -45,10 +45,10 @@ tgsi_util_get_full_src_register_sign_mode(
 {
    unsigned sign_mode;
 
-   if( reg->SrcRegisterExtMod.Absolute ) {
+   if( reg->RegisterExtMod.Absolute ) {
       /* Consider only the post-abs negation. */
 
-      if( reg->SrcRegisterExtMod.Negate ) {
+      if( reg->RegisterExtMod.Negate ) {
          sign_mode = TGSI_UTIL_SIGN_SET;
       }
       else {
@@ -60,8 +60,8 @@ tgsi_util_get_full_src_register_sign_mode(
 
       unsigned negate;
 
-      negate = reg->SrcRegister.Negate;
-      if( reg->SrcRegisterExtMod.Negate ) {
+      negate = reg->Register.Negate;
+      if( reg->RegisterExtMod.Negate ) {
          negate = !negate;
       }
 
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c
index 13c280827a..25c53210be 100644
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -143,12 +143,12 @@ static uint
 src_vector(struct i915_fp_compile *p,
            const struct tgsi_full_src_register *source)
 {
-   uint index = source->SrcRegister.Index;
+   uint index = source->Register.Index;
    uint src = 0, sem_name, sem_ind;
 
-   switch (source->SrcRegister.File) {
+   switch (source->Register.File) {
    case TGSI_FILE_TEMPORARY:
-      if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) {
+      if (source->Register.Index >= I915_MAX_TEMPORARY) {
          i915_program_error(p, "Exceeded max temporary reg");
          return 0;
       }
@@ -215,17 +215,17 @@ src_vector(struct i915_fp_compile *p,
    }
 
    src = swizzle(src,
-		 source->SrcRegister.SwizzleX,
-		 source->SrcRegister.SwizzleY,
-		 source->SrcRegister.SwizzleZ,
-		 source->SrcRegister.SwizzleW);
+		 source->Register.SwizzleX,
+		 source->Register.SwizzleY,
+		 source->Register.SwizzleZ,
+		 source->Register.SwizzleW);
 
 
    /* There's both negate-all-components and per-component negation.
     * Try to handle both here.
     */
    {
-      int n = source->SrcRegister.Negate;
+      int n = source->Register.Negate;
       src = negate(src, n, n, n, n);
    }
 
@@ -233,7 +233,7 @@ src_vector(struct i915_fp_compile *p,
 #if 0
    /* XXX assertions disabled to allow arbfplight.c to run */
    /* XXX enable these assertions, or fix things */
-   assert(!source->SrcRegister.Absolute);
+   assert(!source->Register.Absolute);
 #endif
    return src;
 }
@@ -339,7 +339,7 @@ emit_tex(struct i915_fp_compile *p,
          uint opcode)
 {
    uint texture = inst->Texture.Texture;
-   uint unit = inst->Src[1].SrcRegister.Index;
+   uint unit = inst->Src[1].Register.Index;
    uint tex = translate_tex_src_target( p, texture );
    uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
    uint coord = src_vector( p, &inst->Src[0]);
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 99266f34ed..fe2db04d8f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -167,9 +167,9 @@ emit_fetch(
    case TGSI_SWIZZLE_Z:
    case TGSI_SWIZZLE_W:
 
-      switch (reg->SrcRegister.File) {
+      switch (reg->Register.File) {
       case TGSI_FILE_CONSTANT: {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->SrcRegister.Index*4 + swizzle, 0);
+         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), reg->Register.Index*4 + swizzle, 0);
          LLVMValueRef scalar_ptr = LLVMBuildGEP(bld->base.builder, bld->consts_ptr, &index, 1, "");
          LLVMValueRef scalar = LLVMBuildLoad(bld->base.builder, scalar_ptr, "");
          res = lp_build_broadcast_scalar(&bld->base, scalar);
@@ -177,17 +177,17 @@ emit_fetch(
       }
 
       case TGSI_FILE_IMMEDIATE:
-         res = bld->immediates[reg->SrcRegister.Index][swizzle];
+         res = bld->immediates[reg->Register.Index][swizzle];
          assert(res);
          break;
 
       case TGSI_FILE_INPUT:
-         res = bld->inputs[reg->SrcRegister.Index][swizzle];
+         res = bld->inputs[reg->Register.Index][swizzle];
          assert(res);
          break;
 
       case TGSI_FILE_TEMPORARY:
-         res = bld->temps[reg->SrcRegister.Index][swizzle];
+         res = bld->temps[reg->Register.Index][swizzle];
          if(!res)
             return bld->base.undef;
          break;
@@ -319,7 +319,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
           boolean projected,
           LLVMValueRef *texel)
 {
-   const uint unit = inst->Src[1].SrcRegister.Index;
+   const uint unit = inst->Src[1].Register.Index;
    LLVMValueRef lodbias;
    LLVMValueRef oow;
    LLVMValueRef coords[3];
@@ -424,8 +424,8 @@ indirect_temp_reference(const struct tgsi_full_instruction *inst)
    uint i;
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
       const struct tgsi_full_src_register *reg = &inst->Src[i];
-      if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
-          reg->SrcRegister.Indirect)
+      if (reg->Register.File == TGSI_FILE_TEMPORARY &&
+          reg->Register.Indirect)
          return TRUE;
    }
    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
index e3bb9f9d7f..9e8aab9754 100644
--- a/src/gallium/drivers/nv20/nv20_vertprog.c
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -253,32 +253,32 @@ static INLINE struct nv20_sreg
 tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
 	struct nv20_sreg src;
 
-	switch (fsrc->SrcRegister.File) {
+	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
-		src = nv20_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
+		src = nv20_sr(NV30SR_INPUT, fsrc->Register.Index);
 		break;
 	case TGSI_FILE_CONSTANT:
-		src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+		src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
 		break;
 	case TGSI_FILE_IMMEDIATE:
-		src = vpc->imm[fsrc->SrcRegister.Index];
+		src = vpc->imm[fsrc->Register.Index];
 		break;
 	case TGSI_FILE_TEMPORARY:
-		if (vpc->high_temp < fsrc->SrcRegister.Index)
-			vpc->high_temp = fsrc->SrcRegister.Index;
-		src = nv20_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
+		if (vpc->high_temp < fsrc->Register.Index)
+			vpc->high_temp = fsrc->Register.Index;
+		src = nv20_sr(NV30SR_TEMP, fsrc->Register.Index);
 		break;
 	default:
 		NOUVEAU_ERR("bad src file\n");
 		break;
 	}
 
-	src.abs = fsrc->SrcRegister.Absolute;
-	src.negate = fsrc->SrcRegister.Negate;
-	src.swz[0] = fsrc->SrcRegister.SwizzleX;
-	src.swz[1] = fsrc->SrcRegister.SwizzleY;
-	src.swz[2] = fsrc->SrcRegister.SwizzleZ;
-	src.swz[3] = fsrc->SrcRegister.SwizzleW;
+	src.abs = fsrc->Register.Absolute;
+	src.negate = fsrc->Register.Negate;
+	src.swz[0] = fsrc->Register.SwizzleX;
+	src.swz[1] = fsrc->Register.SwizzleY;
+	src.swz[2] = fsrc->Register.SwizzleZ;
+	src.swz[3] = fsrc->Register.SwizzleW;
 	return src;
 }
 
@@ -335,7 +335,7 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
 		const struct tgsi_full_src_register *fsrc;
 
 		fsrc = &finst->Src[i];
-		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+		if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
 			src[i] = tgsi_src(vpc, fsrc);
 		}
 	}
@@ -344,10 +344,10 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
 		const struct tgsi_full_src_register *fsrc;
 
 		fsrc = &finst->Src[i];
-		switch (fsrc->SrcRegister.File) {
+		switch (fsrc->Register.File) {
 		case TGSI_FILE_INPUT:
-			if (ai == -1 || ai == fsrc->SrcRegister.Index) {
-				ai = fsrc->SrcRegister.Index;
+			if (ai == -1 || ai == fsrc->Register.Index) {
+				ai = fsrc->Register.Index;
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
@@ -360,8 +360,8 @@ nv20_vertprog_parse_instruction(struct nv20_vpc *vpc,
 		 */
 		case TGSI_FILE_CONSTANT:
 		case TGSI_FILE_IMMEDIATE:
-			if (ci == -1 || ci == fsrc->SrcRegister.Index) {
-				ci = fsrc->SrcRegister.Index;
+			if (ci == -1 || ci == fsrc->Register.Index) {
+				ci = fsrc->Register.Index;
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index 14dc884b3a..40965a9772 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -237,20 +237,20 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 {
 	struct nv30_sreg src;
 
-	switch (fsrc->SrcRegister.File) {
+	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
 		src = nv30_sr(NV30SR_INPUT,
-			      fpc->attrib_map[fsrc->SrcRegister.Index]);
+			      fpc->attrib_map[fsrc->Register.Index]);
 		break;
 	case TGSI_FILE_CONSTANT:
-		src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+		src = constant(fpc, fsrc->Register.Index, NULL);
 		break;
 	case TGSI_FILE_IMMEDIATE:
-		assert(fsrc->SrcRegister.Index < fpc->nr_imm);
-		src = fpc->imm[fsrc->SrcRegister.Index];
+		assert(fsrc->Register.Index < fpc->nr_imm);
+		src = fpc->imm[fsrc->Register.Index];
 		break;
 	case TGSI_FILE_TEMPORARY:
-		src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1);
+		src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index + 1);
 		if (fpc->high_temp < src.index)
 			fpc->high_temp = src.index;
 		break;
@@ -258,7 +258,7 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 	 * Luckily fragprog results are just temp regs..
 	 */
 	case TGSI_FILE_OUTPUT:
-		if (fsrc->SrcRegister.Index == fpc->colour_id)
+		if (fsrc->Register.Index == fpc->colour_id)
 			return nv30_sr(NV30SR_OUTPUT, 0);
 		else
 			return nv30_sr(NV30SR_OUTPUT, 1);
@@ -268,12 +268,12 @@ tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 		break;
 	}
 
-	src.abs = fsrc->SrcRegister.Absolute;
-	src.negate = fsrc->SrcRegister.Negate;
-	src.swz[0] = fsrc->SrcRegister.SwizzleX;
-	src.swz[1] = fsrc->SrcRegister.SwizzleY;
-	src.swz[2] = fsrc->SrcRegister.SwizzleZ;
-	src.swz[3] = fsrc->SrcRegister.SwizzleW;
+	src.abs = fsrc->Register.Absolute;
+	src.negate = fsrc->Register.Negate;
+	src.swz[0] = fsrc->Register.SwizzleX;
+	src.swz[1] = fsrc->Register.SwizzleY;
+	src.swz[2] = fsrc->Register.SwizzleZ;
+	src.swz[3] = fsrc->Register.SwizzleW;
 	return src;
 }
 
@@ -364,7 +364,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 		const struct tgsi_full_src_register *fsrc;
 
 		fsrc = &finst->Src[i];
-		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+		if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
 			src[i] = tgsi_src(fpc, fsrc);
 		}
 	}
@@ -374,7 +374,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 
 		fsrc = &finst->Src[i];
 
-		switch (fsrc->SrcRegister.File) {
+		switch (fsrc->Register.File) {
 		case TGSI_FILE_INPUT:
 		case TGSI_FILE_CONSTANT:
 		case TGSI_FILE_TEMPORARY:
@@ -385,14 +385,14 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 			break;
 		}
 
-		switch (fsrc->SrcRegister.File) {
+		switch (fsrc->Register.File) {
 		case TGSI_FILE_INPUT:
-			if (ai == -1 || ai == fsrc->SrcRegister.Index) {
-				ai = fsrc->SrcRegister.Index;
+			if (ai == -1 || ai == fsrc->Register.Index) {
+				ai = fsrc->Register.Index;
 				src[i] = tgsi_src(fpc, fsrc);
 			} else {
 				NOUVEAU_MSG("extra src attr %d\n",
-					 fsrc->SrcRegister.Index);
+					 fsrc->Register.Index);
 				src[i] = temp(fpc);
 				arith(fpc, 0, MOV, src[i], MASK_ALL,
 				      tgsi_src(fpc, fsrc), none, none);
@@ -400,8 +400,8 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 			break;
 		case TGSI_FILE_CONSTANT:
 		case TGSI_FILE_IMMEDIATE:
-			if (ci == -1 || ci == fsrc->SrcRegister.Index) {
-				ci = fsrc->SrcRegister.Index;
+			if (ci == -1 || ci == fsrc->Register.Index) {
+				ci = fsrc->Register.Index;
 				src[i] = tgsi_src(fpc, fsrc);
 			} else {
 				src[i] = temp(fpc);
@@ -413,7 +413,7 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 			/* handled above */
 			break;
 		case TGSI_FILE_SAMPLER:
-			unit = fsrc->SrcRegister.Index;
+			unit = fsrc->Register.Index;
 			break;
 		case TGSI_FILE_OUTPUT:
 			break;
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index 41e4161dda..36ac8299f0 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -253,32 +253,32 @@ static INLINE struct nv30_sreg
 tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
 	struct nv30_sreg src;
 
-	switch (fsrc->SrcRegister.File) {
+	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
-		src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index);
+		src = nv30_sr(NV30SR_INPUT, fsrc->Register.Index);
 		break;
 	case TGSI_FILE_CONSTANT:
-		src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+		src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
 		break;
 	case TGSI_FILE_IMMEDIATE:
-		src = vpc->imm[fsrc->SrcRegister.Index];
+		src = vpc->imm[fsrc->Register.Index];
 		break;
 	case TGSI_FILE_TEMPORARY:
-		if (vpc->high_temp < fsrc->SrcRegister.Index)
-			vpc->high_temp = fsrc->SrcRegister.Index;
-		src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index);
+		if (vpc->high_temp < fsrc->Register.Index)
+			vpc->high_temp = fsrc->Register.Index;
+		src = nv30_sr(NV30SR_TEMP, fsrc->Register.Index);
 		break;
 	default:
 		NOUVEAU_ERR("bad src file\n");
 		break;
 	}
 
-	src.abs = fsrc->SrcRegister.Absolute;
-	src.negate = fsrc->SrcRegister.Negate;
-	src.swz[0] = fsrc->SrcRegister.SwizzleX;
-	src.swz[1] = fsrc->SrcRegister.SwizzleY;
-	src.swz[2] = fsrc->SrcRegister.SwizzleZ;
-	src.swz[3] = fsrc->SrcRegister.SwizzleW;
+	src.abs = fsrc->Register.Absolute;
+	src.negate = fsrc->Register.Negate;
+	src.swz[0] = fsrc->Register.SwizzleX;
+	src.swz[1] = fsrc->Register.SwizzleY;
+	src.swz[2] = fsrc->Register.SwizzleZ;
+	src.swz[3] = fsrc->Register.SwizzleW;
 	return src;
 }
 
@@ -335,7 +335,7 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
 		const struct tgsi_full_src_register *fsrc;
 
 		fsrc = &finst->Src[i];
-		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+		if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
 			src[i] = tgsi_src(vpc, fsrc);
 		}
 	}
@@ -344,10 +344,10 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
 		const struct tgsi_full_src_register *fsrc;
 
 		fsrc = &finst->Src[i];
-		switch (fsrc->SrcRegister.File) {
+		switch (fsrc->Register.File) {
 		case TGSI_FILE_INPUT:
-			if (ai == -1 || ai == fsrc->SrcRegister.Index) {
-				ai = fsrc->SrcRegister.Index;
+			if (ai == -1 || ai == fsrc->Register.Index) {
+				ai = fsrc->Register.Index;
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
@@ -360,8 +360,8 @@ nv30_vertprog_parse_instruction(struct nv30_vpc *vpc,
 		 */
 		case TGSI_FILE_CONSTANT:
 		case TGSI_FILE_IMMEDIATE:
-			if (ci == -1 || ci == fsrc->SrcRegister.Index) {
-				ci = fsrc->SrcRegister.Index;
+			if (ci == -1 || ci == fsrc->Register.Index) {
+				ci = fsrc->Register.Index;
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 02c23e92c0..1bf16726d1 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -255,36 +255,36 @@ tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc)
 {
 	struct nv40_sreg src;
 
-	switch (fsrc->SrcRegister.File) {
+	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
 		src = nv40_sr(NV40SR_INPUT,
-			      fpc->attrib_map[fsrc->SrcRegister.Index]);
+			      fpc->attrib_map[fsrc->Register.Index]);
 		break;
 	case TGSI_FILE_CONSTANT:
-		src = constant(fpc, fsrc->SrcRegister.Index, NULL);
+		src = constant(fpc, fsrc->Register.Index, NULL);
 		break;
 	case TGSI_FILE_IMMEDIATE:
-		assert(fsrc->SrcRegister.Index < fpc->nr_imm);
-		src = fpc->imm[fsrc->SrcRegister.Index];
+		assert(fsrc->Register.Index < fpc->nr_imm);
+		src = fpc->imm[fsrc->Register.Index];
 		break;
 	case TGSI_FILE_TEMPORARY:
-		src = fpc->r_temp[fsrc->SrcRegister.Index];
+		src = fpc->r_temp[fsrc->Register.Index];
 		break;
 	/* NV40 fragprog result regs are just temps, so this is simple */
 	case TGSI_FILE_OUTPUT:
-		src = fpc->r_result[fsrc->SrcRegister.Index];
+		src = fpc->r_result[fsrc->Register.Index];
 		break;
 	default:
 		NOUVEAU_ERR("bad src file\n");
 		break;
 	}
 
-	src.abs = fsrc->SrcRegister.Absolute;
-	src.negate = fsrc->SrcRegister.Negate;
-	src.swz[0] = fsrc->SrcRegister.SwizzleX;
-	src.swz[1] = fsrc->SrcRegister.SwizzleY;
-	src.swz[2] = fsrc->SrcRegister.SwizzleZ;
-	src.swz[3] = fsrc->SrcRegister.SwizzleW;
+	src.abs = fsrc->Register.Absolute;
+	src.negate = fsrc->Register.Negate;
+	src.swz[0] = fsrc->Register.SwizzleX;
+	src.swz[1] = fsrc->Register.SwizzleY;
+	src.swz[2] = fsrc->Register.SwizzleZ;
+	src.swz[3] = fsrc->Register.SwizzleW;
 	return src;
 }
 
@@ -365,7 +365,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 		const struct tgsi_full_src_register *fsrc;
 
 		fsrc = &finst->Src[i];
-		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+		if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
 			src[i] = tgsi_src(fpc, fsrc);
 		}
 	}
@@ -375,7 +375,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 
 		fsrc = &finst->Src[i];
 
-		switch (fsrc->SrcRegister.File) {
+		switch (fsrc->Register.File) {
 		case TGSI_FILE_INPUT:
 		case TGSI_FILE_CONSTANT:
 		case TGSI_FILE_TEMPORARY:
@@ -386,10 +386,10 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 			break;
 		}
 
-		switch (fsrc->SrcRegister.File) {
+		switch (fsrc->Register.File) {
 		case TGSI_FILE_INPUT:
-			if (ai == -1 || ai == fsrc->SrcRegister.Index) {
-				ai = fsrc->SrcRegister.Index;
+			if (ai == -1 || ai == fsrc->Register.Index) {
+				ai = fsrc->Register.Index;
 				src[i] = tgsi_src(fpc, fsrc);
 			} else {
 				src[i] = temp(fpc);
@@ -399,8 +399,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 			break;
 		case TGSI_FILE_CONSTANT:
 			if ((ci == -1 && ii == -1) ||
-			    ci == fsrc->SrcRegister.Index) {
-				ci = fsrc->SrcRegister.Index;
+			    ci == fsrc->Register.Index) {
+				ci = fsrc->Register.Index;
 				src[i] = tgsi_src(fpc, fsrc);
 			} else {
 				src[i] = temp(fpc);
@@ -410,8 +410,8 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 			break;
 		case TGSI_FILE_IMMEDIATE:
 			if ((ci == -1 && ii == -1) ||
-			    ii == fsrc->SrcRegister.Index) {
-				ii = fsrc->SrcRegister.Index;
+			    ii == fsrc->Register.Index) {
+				ii = fsrc->Register.Index;
 				src[i] = tgsi_src(fpc, fsrc);
 			} else {
 				src[i] = temp(fpc);
@@ -423,7 +423,7 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 			/* handled above */
 			break;
 		case TGSI_FILE_SAMPLER:
-			unit = fsrc->SrcRegister.Index;
+			unit = fsrc->Register.Index;
 			break;
 		case TGSI_FILE_OUTPUT:
 			break;
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index c4f51d622c..55835ee644 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -295,30 +295,30 @@ static INLINE struct nv40_sreg
 tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) {
 	struct nv40_sreg src;
 
-	switch (fsrc->SrcRegister.File) {
+	switch (fsrc->Register.File) {
 	case TGSI_FILE_INPUT:
-		src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index);
+		src = nv40_sr(NV40SR_INPUT, fsrc->Register.Index);
 		break;
 	case TGSI_FILE_CONSTANT:
-		src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0);
+		src = constant(vpc, fsrc->Register.Index, 0, 0, 0, 0);
 		break;
 	case TGSI_FILE_IMMEDIATE:
-		src = vpc->imm[fsrc->SrcRegister.Index];
+		src = vpc->imm[fsrc->Register.Index];
 		break;
 	case TGSI_FILE_TEMPORARY:
-		src = vpc->r_temp[fsrc->SrcRegister.Index];
+		src = vpc->r_temp[fsrc->Register.Index];
 		break;
 	default:
 		NOUVEAU_ERR("bad src file\n");
 		break;
 	}
 
-	src.abs = fsrc->SrcRegister.Absolute;
-	src.negate = fsrc->SrcRegister.Negate;
-	src.swz[0] = fsrc->SrcRegister.SwizzleX;
-	src.swz[1] = fsrc->SrcRegister.SwizzleY;
-	src.swz[2] = fsrc->SrcRegister.SwizzleZ;
-	src.swz[3] = fsrc->SrcRegister.SwizzleW;
+	src.abs = fsrc->Register.Absolute;
+	src.negate = fsrc->Register.Negate;
+	src.swz[0] = fsrc->Register.SwizzleX;
+	src.swz[1] = fsrc->Register.SwizzleY;
+	src.swz[2] = fsrc->Register.SwizzleZ;
+	src.swz[3] = fsrc->Register.SwizzleW;
 	return src;
 }
 
@@ -406,7 +406,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 		const struct tgsi_full_src_register *fsrc;
 
 		fsrc = &finst->Src[i];
-		if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) {
+		if (fsrc->Register.File == TGSI_FILE_TEMPORARY) {
 			src[i] = tgsi_src(vpc, fsrc);
 		}
 	}
@@ -416,7 +416,7 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 
 		fsrc = &finst->Src[i];
 
-		switch (fsrc->SrcRegister.File) {
+		switch (fsrc->Register.File) {
 		case TGSI_FILE_INPUT:
 		case TGSI_FILE_CONSTANT:
 		case TGSI_FILE_TEMPORARY:
@@ -427,10 +427,10 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 			break;
 		}
 
-		switch (fsrc->SrcRegister.File) {
+		switch (fsrc->Register.File) {
 		case TGSI_FILE_INPUT:
-			if (ai == -1 || ai == fsrc->SrcRegister.Index) {
-				ai = fsrc->SrcRegister.Index;
+			if (ai == -1 || ai == fsrc->Register.Index) {
+				ai = fsrc->Register.Index;
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
@@ -440,8 +440,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 			break;
 		case TGSI_FILE_CONSTANT:
 			if ((ci == -1 && ii == -1) ||
-			    ci == fsrc->SrcRegister.Index) {
-				ci = fsrc->SrcRegister.Index;
+			    ci == fsrc->Register.Index) {
+				ci = fsrc->Register.Index;
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
@@ -451,8 +451,8 @@ nv40_vertprog_parse_instruction(struct nv40_vpc *vpc,
 			break;
 		case TGSI_FILE_IMMEDIATE:
 			if ((ci == -1 && ii == -1) ||
-			    ii == fsrc->SrcRegister.Index) {
-				ii = fsrc->SrcRegister.Index;
+			    ii == fsrc->Register.Index) {
+				ii = fsrc->Register.Index;
 				src[i] = tgsi_src(vpc, fsrc);
 			} else {
 				src[i] = temp(vpc);
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 3409edb4c8..1509cecaac 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1535,10 +1535,10 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
 	for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) {
 		if (s == i)
 			continue;
-		if ((insn->Src[s].SrcRegister.Index ==
-		     insn->Src[i].SrcRegister.Index) &&
-		    (insn->Src[s].SrcRegister.File ==
-		     insn->Src[i].SrcRegister.File))
+		if ((insn->Src[s].Register.Index ==
+		     insn->Src[i].Register.Index) &&
+		    (insn->Src[s].Register.File ==
+		     insn->Src[i].Register.File))
 			return FALSE;
 	}
 
@@ -1644,8 +1644,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 	struct nv50_reg *temp;
 	unsigned sgn, c, swz;
 
-	if (src->SrcRegister.File != TGSI_FILE_CONSTANT)
-		assert(!src->SrcRegister.Indirect);
+	if (src->Register.File != TGSI_FILE_CONSTANT)
+		assert(!src->Register.Indirect);
 
 	sgn = tgsi_util_get_full_src_register_sign_mode(src, chan);
 
@@ -1655,16 +1655,16 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 	case TGSI_SWIZZLE_Y:
 	case TGSI_SWIZZLE_Z:
 	case TGSI_SWIZZLE_W:
-		switch (src->SrcRegister.File) {
+		switch (src->Register.File) {
 		case TGSI_FILE_INPUT:
-			r = &pc->attr[src->SrcRegister.Index * 4 + c];
+			r = &pc->attr[src->Register.Index * 4 + c];
 			break;
 		case TGSI_FILE_TEMPORARY:
-			r = &pc->temp[src->SrcRegister.Index * 4 + c];
+			r = &pc->temp[src->Register.Index * 4 + c];
 			break;
 		case TGSI_FILE_CONSTANT:
-			if (!src->SrcRegister.Indirect) {
-				r = &pc->param[src->SrcRegister.Index * 4 + c];
+			if (!src->Register.Indirect) {
+				r = &pc->param[src->Register.Index * 4 + c];
 				break;
 			}
 			/* Indicate indirection by setting r->acc < 0 and
@@ -1672,19 +1672,19 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 			 */
 			r = MALLOC_STRUCT(nv50_reg);
 			swz = tgsi_util_get_src_register_swizzle(
-						 &src->SrcRegisterInd, 0);
+						 &src->Indirect, 0);
 			ctor_reg(r, P_CONST,
-				 src->SrcRegisterInd.Index * 4 + swz,
-				 src->SrcRegister.Index * 4 + c);
+				 src->Indirect.Index * 4 + swz,
+				 src->Register.Index * 4 + c);
 			r->acc = -1;
 			break;
 		case TGSI_FILE_IMMEDIATE:
-			r = &pc->immd[src->SrcRegister.Index * 4 + c];
+			r = &pc->immd[src->Register.Index * 4 + c];
 			break;
 		case TGSI_FILE_SAMPLER:
 			break;
 		case TGSI_FILE_ADDRESS:
-			r = pc->addr[src->SrcRegister.Index * 4 + c];
+			r = pc->addr[src->Register.Index * 4 + c];
 			assert(r);
 			break;
 		default:
@@ -1871,8 +1871,8 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		src_mask = nv50_tgsi_src_mask(inst, i);
 		neg_supp = negate_supported(inst, i);
 
-		if (fs->SrcRegister.File == TGSI_FILE_SAMPLER)
-			unit = fs->SrcRegister.Index;
+		if (fs->Register.File == TGSI_FILE_SAMPLER)
+			unit = fs->Register.Index;
 
 		for (c = 0; c < 4; c++)
 			if (src_mask & (1 << c))
@@ -2284,10 +2284,10 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
 	for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
 		src = &insn->Src[i];
 
-		if (src->SrcRegister.File == TGSI_FILE_TEMPORARY)
+		if (src->Register.File == TGSI_FILE_TEMPORARY)
 			reg = pc->temp;
 		else
-		if (src->SrcRegister.File == TGSI_FILE_INPUT)
+		if (src->Register.File == TGSI_FILE_INPUT)
 			reg = pc->attr;
 		else
 			continue;
@@ -2299,7 +2299,7 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
 				continue;
 			k = tgsi_util_get_full_src_register_swizzle(src, c);
 
-			reg[src->SrcRegister.Index * 4 + k].acc = pc->insn_nr;
+			reg[src->Register.Index * 4 + k].acc = pc->insn_nr;
 		}
 	}
 }
@@ -2391,8 +2391,8 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
 		boolean neg_supp = negate_supported(insn, i);
 
 		fs = &insn->Src[i];
-		if (fs->SrcRegister.File != fd->Register.File ||
-		    fs->SrcRegister.Index != fd->Register.Index)
+		if (fs->Register.File != fd->Register.File ||
+		    fs->Register.Index != fd->Register.Index)
 			continue;
 
 		for (chn = 0; chn < 4; ++chn) {
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 92796d150b..9fb2de2403 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -201,15 +201,15 @@ static void transform_srcreg(
     struct rc_src_register * dst,
     struct tgsi_full_src_register * src)
 {
-    dst->File = translate_register_file(src->SrcRegister.File);
-    dst->Index = translate_register_index(ttr, src->SrcRegister.File, src->SrcRegister.Index);
-    dst->RelAddr = src->SrcRegister.Indirect;
+    dst->File = translate_register_file(src->Register.File);
+    dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index);
+    dst->RelAddr = src->Register.Indirect;
     dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0);
     dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3;
     dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6;
     dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9;
-    dst->Abs = src->SrcRegister.Absolute;
-    dst->Negate = src->SrcRegister.Negate ? RC_MASK_XYZW : 0;
+    dst->Abs = src->Register.Absolute;
+    dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0;
 }
 
 static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src)
@@ -261,8 +261,8 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst
         transform_dstreg(ttr, &dst->U.I.DstReg, &src->Dst[0]);
 
     for(i = 0; i < src->Instruction.NumSrcRegs; ++i) {
-        if (src->Src[i].SrcRegister.File == TGSI_FILE_SAMPLER)
-            dst->U.I.TexSrcUnit = src->Src[i].SrcRegister.Index;
+        if (src->Src[i].Register.File == TGSI_FILE_SAMPLER)
+            dst->U.I.TexSrcUnit = src->Src[i].Register.Index;
         else
             transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->Src[i]);
     }
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 9ca89f1cdd..1670da8bfa 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -176,33 +176,33 @@ translate_src_register( const struct svga_shader_emitter *emit,
 {
    struct src_register src;
 
-   switch (reg->SrcRegister.File) {
+   switch (reg->Register.File) {
    case TGSI_FILE_INPUT:
       /* Input registers are referred to by their semantic name rather
        * than by index.  Use the mapping build up from the decls:
        */
-      src = emit->input_map[reg->SrcRegister.Index];
+      src = emit->input_map[reg->Register.Index];
       break;
        
    case TGSI_FILE_IMMEDIATE:
       /* Immediates are appended after TGSI constants in the D3D
        * constant buffer.
        */
-      src = src_register( translate_file( reg->SrcRegister.File ),
-                          reg->SrcRegister.Index + 
+      src = src_register( translate_file( reg->Register.File ),
+                          reg->Register.Index + 
                           emit->imm_start );
       break;
 
    default:
-      src = src_register( translate_file( reg->SrcRegister.File ),
-                          reg->SrcRegister.Index );
+      src = src_register( translate_file( reg->Register.File ),
+                          reg->Register.Index );
 
       break;
    }
 
    /* Indirect addressing (for coninstant buffer lookups only)
     */
-   if (reg->SrcRegister.Indirect)
+   if (reg->Register.Indirect)
    {
       /* we shift the offset towards the minimum */
       if (svga_arl_needs_adjustment( emit )) {
@@ -213,28 +213,28 @@ translate_src_register( const struct svga_shader_emitter *emit,
       /* Not really sure what should go in the second token:
        */
       src.indirect = src_token( SVGA3DREG_ADDR,
-                                reg->SrcRegisterInd.Index );
+                                reg->Indirect.Index );
 
       src.indirect.swizzle = SWIZZLE_XXXX;
    }
 
    src = swizzle( src,
-                  reg->SrcRegister.SwizzleX,
-                  reg->SrcRegister.SwizzleY,
-                  reg->SrcRegister.SwizzleZ,
-                  reg->SrcRegister.SwizzleW );
+                  reg->Register.SwizzleX,
+                  reg->Register.SwizzleY,
+                  reg->Register.SwizzleZ,
+                  reg->Register.SwizzleW );
 
    /* src.mod isn't a bitfield, unfortunately:
     * See tgsi_util_get_full_src_register_sign_mode for implementation details.
     */
-   if (reg->SrcRegister.Absolute) {
-      if (reg->SrcRegister.Negate)
+   if (reg->Register.Absolute) {
+      if (reg->Register.Negate)
          src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
       else
          src.base.srcMod = SVGA3DSRCMOD_ABS;
    }
    else {
-      if (reg->SrcRegister.Negate)
+      if (reg->Register.Negate)
          src.base.srcMod = SVGA3DSRCMOD_NEG;
       else
          src.base.srcMod = SVGA3DSRCMOD_NONE;
@@ -986,13 +986,13 @@ static boolean emit_kil(struct svga_shader_emitter *emit,
    inst = inst_token( SVGA3DOP_TEXKILL );
    src0 = translate_src_register( emit, reg );
 
-   if (reg->SrcRegister.Absolute ||
-       reg->SrcRegister.Negate ||
-       reg->SrcRegister.Indirect ||
-       reg->SrcRegister.SwizzleX != 0 ||
-       reg->SrcRegister.SwizzleY != 1 ||
-       reg->SrcRegister.SwizzleZ != 2 ||
-       reg->SrcRegister.File != TGSI_FILE_TEMPORARY)
+   if (reg->Register.Absolute ||
+       reg->Register.Negate ||
+       reg->Register.Indirect ||
+       reg->Register.SwizzleX != 0 ||
+       reg->Register.SwizzleY != 1 ||
+       reg->Register.SwizzleZ != 2 ||
+       reg->Register.File != TGSI_FILE_TEMPORARY)
    {
       SVGA3dShaderDestToken temp = get_temp( emit );
 
@@ -2543,27 +2543,27 @@ pre_parse_instruction( struct svga_shader_emitter *emit,
                        const struct tgsi_full_instruction *insn,
                        int current_arl)
 {
-   if (insn->Src[0].SrcRegister.Indirect &&
-       insn->Src[0].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+   if (insn->Src[0].Register.Indirect &&
+       insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
       const struct tgsi_full_src_register *reg = &insn->Src[0];
-      if (reg->SrcRegister.Index < 0) {
-         pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
+      if (reg->Register.Index < 0) {
+         pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
       }
    }
 
-   if (insn->Src[1].SrcRegister.Indirect &&
-       insn->Src[1].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+   if (insn->Src[1].Register.Indirect &&
+       insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
       const struct tgsi_full_src_register *reg = &insn->Src[1];
-      if (reg->SrcRegister.Index < 0) {
-         pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
+      if (reg->Register.Index < 0) {
+         pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
       }
    }
 
-   if (insn->Src[2].SrcRegister.Indirect &&
-       insn->Src[2].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+   if (insn->Src[2].Register.Indirect &&
+       insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
       const struct tgsi_full_src_register *reg = &insn->Src[2];
-      if (reg->SrcRegister.Index < 0) {
-         pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
+      if (reg->Register.Index < 0) {
+         pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
       }
    }
 
-- 
cgit v1.2.3


From 8bf4e5d6176b0efb93c11bcd14fa5d320088e2e3 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 24 Nov 2009 16:01:01 +0000
Subject: llvmpipe: Update instructions.

---
 src/gallium/drivers/llvmpipe/README | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 89d08834a3..478e0139c8 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -51,21 +51,18 @@ Requirements
 
  - Linux
  
- - udis86, http://udis86.sourceforge.net/ . Use my repository, which decodes
-   opcodes not yet supported by upstream.
+ - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD
+   instructions. This is necessary because we emit several SSE intrinsics for
+   convenience. See /proc/cpuinfo to know what your CPU supports.
  
-     git clone git://people.freedesktop.org/~jrfonseca/udis86
-     cd udis86
-     ./configure --with-pic
-     make
-     sudo make install
+ - LLVM 2.5 or greater. LLVM 2.6 is preferred.
  
- - LLVM 2.5. On Debian based distributions do:
+   On Debian based distributions do:
  
      aptitude install llvm-dev
 
-   There is a typo in one of the llvm-dev 2.5 headers, that causes compilation
-   errors in the debug build:
+   There is a typo in one of the llvm 2.5 headers, that may cause compilation
+   errors. To fix it apply the change:
 
      --- /usr/include/llvm-c/Core.h.orig	2009-08-10 15:38:54.000000000 +0100
      +++ /usr/include/llvm-c/Core.h	2009-08-10 15:38:25.000000000 +0100
@@ -79,12 +76,17 @@ Requirements
           #endif
           return reinterpret_cast<T**>(Vals);
  
- - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD
-   instructions. This is necessary because we emit several SSE intrinsics for
-   convenience. See /proc/cpuinfo to know what your CPU supports.
- 
- - scons
+ - scons (optional)
 
+ - udis86, http://udis86.sourceforge.net/ (optional):
+ 
+     git clone git://udis86.git.sourceforge.net/gitroot/udis86/udis86
+     cd udis86
+     ./autogen.sh
+     ./configure --with-pic
+     make
+     sudo make install
+ 
 
 Building
 ========
-- 
cgit v1.2.3


From c783f5cfd891e6b8e9dc622ad0950e5859b5a0c0 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 26 Nov 2009 12:02:14 +0000
Subject: svga: Remove spurious argument to SVGA_DBG.

---
 src/gallium/drivers/svga/svga_screen_texture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
index d61d88114c..e7301aba84 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.c
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -556,7 +556,7 @@ svga_texture_view_surface(struct pipe_context *pipe,
       return NULL;
    }
 
-   SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n", handle);
+   SVGA_DBG(DEBUG_DMA, "surface_create for texture view\n");
    handle = svga_screen_surface_create(ss, key);
    if (!handle) {
       key->cachable = 0;
-- 
cgit v1.2.3


From b96218c65622a7814ff8154a91874a5e5a9dc773 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 26 Nov 2009 15:25:09 +0000
Subject: svga: hash the whole key, not just the first four bytes

---
 src/gallium/drivers/svga/svga_screen_cache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
index 65f5c07a72..689981cc6d 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -41,7 +41,7 @@
 static INLINE unsigned
 svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key)
 {
-   return util_hash_crc32( key, sizeof key ) % SVGA_HOST_SURFACE_CACHE_BUCKETS;
+   return util_hash_crc32( key, sizeof *key ) % SVGA_HOST_SURFACE_CACHE_BUCKETS;
 }
 
 
@@ -95,8 +95,8 @@ svga_screen_cache_lookup(struct svga_screen *svgascreen,
    pipe_mutex_unlock(cache->mutex);
    
    if (SVGA_DEBUG & DEBUG_DMA)
-      debug_printf("%s: cache %s after %u tries\n", __FUNCTION__, 
-                   handle ? "hit" : "miss", tries);
+      debug_printf("%s: cache %s after %u tries (bucket %d)\n", __FUNCTION__, 
+                   handle ? "hit" : "miss", tries, bucket);
    
    return handle;
 }
-- 
cgit v1.2.3


From da1c40260d8cb49eacbb6c394198dc37e020e75a Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 26 Nov 2009 11:15:08 +0000
Subject: llvmpipe: Update/correct CPU requirements.

There are no hard requirements at the moment.

We don't really emit any sse3 yet. Just some ssse3.

Thanks to Roland for spotting these incorrections.
---
 src/gallium/drivers/llvmpipe/README | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 478e0139c8..0c3f00fd58 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -51,9 +51,13 @@ Requirements
 
  - Linux
  
- - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD
-   instructions. This is necessary because we emit several SSE intrinsics for
-   convenience. See /proc/cpuinfo to know what your CPU supports.
+ - A x86 or amd64 processor.  64bit mode is preferred.
+ 
+   Support for sse2 is strongly encouraged.  Support for ssse3, and sse4.1 will
+   yield the most efficient code.  The less features the CPU has the more
+   likely is that you ran into underperforming, buggy, or incomplete code.  
+   
+   See /proc/cpuinfo to know what your CPU supports.
  
  - LLVM 2.5 or greater. LLVM 2.6 is preferred.
  
-- 
cgit v1.2.3


From 953b74d116c88f2b93740b6d1f713bb1b5989e98 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 26 Nov 2009 11:16:19 +0000
Subject: llvmpipe: Fake missing SSSE3 when simulation less capabable machines.

SSE3 != SSSE3 and so far we only use the later.
---
 src/gallium/drivers/llvmpipe/lp_jit.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index c601c79480..bce3baec16 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -154,6 +154,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
 #if 0
    /* For simulating less capable machines */
    util_cpu_caps.has_sse3 = 0;
+   util_cpu_caps.has_ssse3 = 0;
    util_cpu_caps.has_sse4_1 = 0;
 #endif
 
-- 
cgit v1.2.3


From d509f84543d0979e9bb53c20c195f378dd61e728 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Thu, 26 Nov 2009 22:49:58 +0100
Subject: gallium: fix more statetrackers/drivers for not using texture
 width/height/depth arrays

---
 src/gallium/auxiliary/util/u_gen_mipmap.c          |  8 +--
 src/gallium/drivers/cell/ppu/cell_state_emit.c     |  7 +-
 src/gallium/drivers/cell/ppu/cell_texture.c        | 27 ++++---
 src/gallium/drivers/llvmpipe/lp_bld_sample.c       |  6 +-
 src/gallium/drivers/llvmpipe/lp_state_sampler.c    |  4 +-
 src/gallium/drivers/llvmpipe/lp_tex_cache.c        |  5 +-
 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c     | 30 ++++----
 src/gallium/drivers/llvmpipe/lp_texture.c          | 31 ++++----
 src/gallium/drivers/nv04/nv04_fragtex.c            |  4 +-
 src/gallium/drivers/nv04/nv04_miptree.c            | 19 +++--
 src/gallium/drivers/nv04/nv04_transfer.c           |  7 +-
 src/gallium/drivers/nv10/nv10_fragtex.c            |  8 +--
 src/gallium/drivers/nv10/nv10_miptree.c            | 18 ++---
 src/gallium/drivers/nv10/nv10_transfer.c           |  7 +-
 src/gallium/drivers/nv20/nv20_fragtex.c            |  8 +--
 src/gallium/drivers/nv20/nv20_miptree.c            | 31 ++++----
 src/gallium/drivers/nv20/nv20_transfer.c           |  7 +-
 src/gallium/drivers/nv30/nv30_fragtex.c            | 10 +--
 src/gallium/drivers/nv30/nv30_miptree.c            | 36 +++++-----
 src/gallium/drivers/nv30/nv30_transfer.c           |  7 +-
 src/gallium/drivers/nv40/nv40_fragtex.c            |  6 +-
 src/gallium/drivers/nv40/nv40_miptree.c            | 36 +++++-----
 src/gallium/drivers/nv40/nv40_transfer.c           |  7 +-
 src/gallium/drivers/nv50/nv50_miptree.c            | 21 +++---
 src/gallium/drivers/nv50/nv50_tex.c                |  4 +-
 src/gallium/drivers/nv50/nv50_transfer.c           | 11 +--
 src/gallium/drivers/r300/r300_texture.c            | 48 ++++++-------
 src/gallium/state_trackers/dri/dri_drawable.c      |  8 +--
 src/gallium/state_trackers/egl/egl_surface.c       |  6 +-
 src/gallium/state_trackers/python/p_device.i       |  6 +-
 src/gallium/state_trackers/python/p_texture.i      | 12 ++--
 .../state_trackers/python/retrace/interpreter.py   |  6 +-
 src/gallium/state_trackers/python/st_device.c      | 10 +--
 src/gallium/state_trackers/python/st_sample.c      |  4 +-
 src/gallium/state_trackers/vega/api_filters.c      |  8 +--
 src/gallium/state_trackers/vega/image.c            | 16 ++---
 src/gallium/state_trackers/vega/mask.c             | 12 ++--
 src/gallium/state_trackers/vega/paint.c            | 10 +--
 src/gallium/state_trackers/vega/renderer.c         | 46 ++++++------
 src/gallium/state_trackers/vega/vg_tracker.c       |  6 +-
 src/gallium/state_trackers/xorg/xorg_composite.c   |  4 +-
 src/gallium/state_trackers/xorg/xorg_crtc.c        |  6 +-
 src/gallium/state_trackers/xorg/xorg_dri2.c        |  6 +-
 src/gallium/state_trackers/xorg/xorg_exa.c         | 30 ++++----
 src/gallium/state_trackers/xorg/xorg_renderer.c    | 82 +++++++++++-----------
 src/gallium/state_trackers/xorg/xorg_xv.c          | 22 +++---
 src/gallium/state_trackers/xorg/xvmc/surface.c     |  6 +-
 .../winsys/drm/nouveau/drm/nouveau_drm_api.c       |  6 +-
 src/gallium/winsys/drm/radeon/core/radeon_buffer.c |  6 +-
 src/mesa/state_tracker/st_cb_fbo.c                 |  2 +-
 50 files changed, 360 insertions(+), 373 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 84db14576e..f67f1e458d 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -1214,12 +1214,12 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
       
       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
                                           PIPE_TRANSFER_READ, 0, 0,
-                                          pt->width[srcLevel],
-                                          pt->height[srcLevel]);
+                                          u_minify(pt->width0, srcLevel),
+                                          u_minify(pt->height0, srcLevel));
       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
                                           PIPE_TRANSFER_WRITE, 0, 0,
-                                          pt->width[dstLevel],
-                                          pt->height[dstLevel]);
+                                          u_minify(pt->width0, dstLevel),
+                                          u_minify(pt->height0, dstLevel));
 
       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index 9479c0898f..ac5fafec1a 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -27,6 +27,7 @@
 
 #include "pipe/p_inlines.h"
 #include "util/u_memory.h"
+#include "util/u_math.h"
 #include "cell_context.h"
 #include "cell_gen_fragment.h"
 #include "cell_state.h"
@@ -299,9 +300,9 @@ cell_emit_state(struct cell_context *cell)
                for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
                   texture->start[level] = (ct->mapped +
                                            ct->level_offset[level]);
-                  texture->width[level] = ct->base.width[level];
-                  texture->height[level] = ct->base.height[level];
-                  texture->depth[level] = ct->base.depth[level];
+                  texture->width[level] = u_minify(ct->base.width0, level);
+                  texture->height[level] = u_minify(ct->base.height0, level);
+                  texture->depth[level] = u_minify(ct->base.depth0, level);
                }
                texture->target = ct->base.target;
             }
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index ae4c61efb3..e6b8a87045 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -49,9 +49,9 @@ cell_texture_layout(struct cell_texture *ct)
 {
    struct pipe_texture *pt = &ct->base;
    unsigned level;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
-   unsigned depth = pt->depth[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
 
    ct->buffer_size = 0;
 
@@ -65,9 +65,6 @@ cell_texture_layout(struct cell_texture *ct)
       w_tile = align(width, TILE_SIZE);
       h_tile = align(height, TILE_SIZE);
 
-      pt->width[level] = width;
-      pt->height[level] = height;
-      pt->depth[level] = depth;
       pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile);  
       pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile);  
 
@@ -83,9 +80,9 @@ cell_texture_layout(struct cell_texture *ct)
 
       ct->buffer_size += size;
 
-      width = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
    }
 }
 
@@ -276,8 +273,8 @@ cell_get_tex_surface(struct pipe_screen *screen,
       pipe_reference_init(&ps->reference, 1);
       pipe_texture_reference(&ps->texture, pt);
       ps->format = pt->format;
-      ps->width = pt->width[level];
-      ps->height = pt->height[level];
+      ps->width = u_minify(pt->width0, level);
+      ps->height = u_minify(pt->height0, level);
       ps->offset = ct->level_offset[level];
       /* XXX may need to override usage flags (see sp_texture.c) */
       ps->usage = usage;
@@ -386,8 +383,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
    struct pipe_texture *pt = transfer->texture;
    struct cell_texture *ct = cell_texture(pt);
    const uint level = ctrans->base.level;
-   const uint texWidth = pt->width[level];
-   const uint texHeight = pt->height[level];
+   const uint texWidth = u_minify(pt->width0, level);
+   const uint texHeight = u_minify(pt->height0, level);
    const uint stride = ct->stride[level];
    unsigned size;
 
@@ -440,8 +437,8 @@ cell_transfer_unmap(struct pipe_screen *screen,
    struct pipe_texture *pt = transfer->texture;
    struct cell_texture *ct = cell_texture(pt);
    const uint level = ctrans->base.level;
-   const uint texWidth = pt->width[level];
-   const uint texHeight = pt->height[level];
+   const uint texWidth = u_minify(pt->width0, level);
+   const uint texHeight = u_minify(pt->height0, level);
    const uint stride = ct->stride[level];
 
    if (!ct->mapped) {
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
index 4d272bea87..af70ddc6ab 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
@@ -59,9 +59,9 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
 
    state->format            = texture->format;
    state->target            = texture->target;
-   state->pot_width         = util_is_pot(texture->width[0]);
-   state->pot_height        = util_is_pot(texture->height[0]);
-   state->pot_depth         = util_is_pot(texture->depth[0]);
+   state->pot_width         = util_is_pot(texture->width0);
+   state->pot_height        = util_is_pot(texture->height0);
+   state->pot_depth         = util_is_pot(texture->depth0);
 
    state->wrap_s            = sampler->wrap_s;
    state->wrap_t            = sampler->wrap_t;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index c69d90c723..8333805a3f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -102,8 +102,8 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
       if(tex) {
          struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex);
          struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i];
-         jit_tex->width = tex->width[0];
-         jit_tex->height = tex->height[0];
+         jit_tex->width = tex->width0;
+         jit_tex->height = tex->height0;
          jit_tex->stride = lp_tex->stride[0];
          if(!lp_tex->dt)
             jit_tex->data = lp_tex->data;
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
index 773e848242..c7c4143bc6 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
@@ -36,6 +36,7 @@
 #include "util/u_memory.h"
 #include "util/u_tile.h"
 #include "util/u_format.h"
+#include "util/u_math.h"
 #include "lp_context.h"
 #include "lp_surface.h"
 #include "lp_texture.h"
@@ -270,8 +271,8 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
                                      addr.bits.level, 
                                      addr.bits.z, 
                                      PIPE_TRANSFER_READ, 0, 0,
-                                     tc->texture->width[addr.bits.level],
-                                     tc->texture->height[addr.bits.level]);
+                                     u_minify(tc->texture->width0, addr.bits.level),
+                                     u_minify(tc->texture->height0, addr.bits.level));
 
          tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index a1365a045f..0d01c07fb5 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -544,7 +544,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
       float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
       dsdx = fabsf(dsdx);
       dsdy = fabsf(dsdy);
-      rho = MAX2(dsdx, dsdy) * texture->width[0];
+      rho = MAX2(dsdx, dsdy) * texture->width0;
    }
    if (t) {
       float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
@@ -552,7 +552,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
       float max;
       dtdx = fabsf(dtdx);
       dtdy = fabsf(dtdy);
-      max = MAX2(dtdx, dtdy) * texture->height[0];
+      max = MAX2(dtdx, dtdy) * texture->height0;
       rho = MAX2(rho, max);
    }
    if (p) {
@@ -561,7 +561,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
       float max;
       dpdx = fabsf(dpdx);
       dpdy = fabsf(dpdy);
-      max = MAX2(dpdx, dpdy) * texture->depth[0];
+      max = MAX2(dpdx, dpdy) * texture->depth0;
       rho = MAX2(rho, max);
    }
 
@@ -726,9 +726,9 @@ get_texel(const struct tgsi_sampler *tgsi_sampler,
    const struct pipe_texture *texture = samp->texture;
    const struct pipe_sampler_state *sampler = samp->sampler;
 
-   if (x < 0 || x >= (int) texture->width[level] ||
-       y < 0 || y >= (int) texture->height[level] ||
-       z < 0 || z >= (int) texture->depth[level]) {
+   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+       y < 0 || y >= (int) u_minify(texture->height0, level) ||
+       z < 0 || z >= (int) u_minify(texture->depth0, level)) {
       rgba[0][j] = sampler->border_color[0];
       rgba[1][j] = sampler->border_color[1];
       rgba[2][j] = sampler->border_color[2];
@@ -1093,8 +1093,8 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
 
    assert(sampler->normalized_coords);
 
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
 
@@ -1250,9 +1250,9 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
 
    assert(sampler->normalized_coords);
 
-   width = texture->width[level0];
-   height = texture->height[level0];
-   depth = texture->depth[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
+   depth = u_minify(texture->depth0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1394,8 +1394,8 @@ lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
    /* texture RECTS cannot be mipmapped */
    assert(level0 == level1);
 
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
 
@@ -1513,8 +1513,8 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler,
 
    /* Do this elsewhere: 
     */
-   samp->xpot = util_unsigned_logbase2( samp->texture->width[0] );
-   samp->ypot = util_unsigned_logbase2( samp->texture->height[0] );
+   samp->xpot = util_unsigned_logbase2( samp->texture->width0 );
+   samp->ypot = util_unsigned_logbase2( samp->texture->height0 );
 
    /* Try to hook in a faster sampler.  Ultimately we'll have to
     * code-generate these.  Luckily most of this looks like it is
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index a00f2495df..0a0f31f8a3 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -57,9 +57,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
 {
    struct pipe_texture *pt = &lpt->base;
    unsigned level;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
-   unsigned depth = pt->depth[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
 
    unsigned buffer_size = 0;
 
@@ -68,9 +68,6 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
    for (level = 0; level <= pt->last_level; level++) {
       unsigned nblocksx, nblocksy;
 
-      pt->width[level] = width;
-      pt->height[level] = height;
-      pt->depth[level] = depth;
       pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);  
       pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
 
@@ -87,9 +84,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
                       ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
                       lpt->stride[level]);
 
-      width  = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width  = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
    }
 
    lpt->data = align_malloc(buffer_size, 16);
@@ -104,13 +101,13 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
    struct llvmpipe_winsys *winsys = screen->winsys;
 
    pf_get_block(lpt->base.format, &lpt->base.block);
-   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]);  
-   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]);  
+   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0);  
+   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0);  
 
    lpt->dt = winsys->displaytarget_create(winsys,
                                           lpt->base.format,
-                                          lpt->base.width[0],
-                                          lpt->base.height[0],
+                                          lpt->base.width0,
+                                          lpt->base.height0,
                                           16,
                                           &lpt->stride[0] );
 
@@ -183,8 +180,8 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
    lpt->base = *base;
    pipe_reference_init(&lpt->base.reference, 1);
    lpt->base.screen = screen;
-   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]);  
-   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]);  
+   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0);  
+   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0);  
    lpt->stride[0] = stride[0];
 
    pipe_buffer_reference(&lpt->buffer, buffer);
@@ -229,8 +226,8 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen,
       pipe_reference_init(&ps->reference, 1);
       pipe_texture_reference(&ps->texture, pt);
       ps->format = pt->format;
-      ps->width = pt->width[level];
-      ps->height = pt->height[level];
+      ps->width = u_minify(pt->width0, level);
+      ps->height = u_minify(pt->height0, level);
       ps->offset = lpt->level_offset[level];
       ps->usage = usage;
 
diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c
index 21f990fd53..0cce71ad1d 100644
--- a/src/gallium/drivers/nv04/nv04_fragtex.c
+++ b/src/gallium/drivers/nv04/nv04_fragtex.c
@@ -57,8 +57,8 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit)
 		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
 		| nv04_fragtex_format(pt->format)
 		| ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
-		| ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
-		| ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
+		| ( log2i(pt->width0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
+		| ( log2i(pt->height0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
 		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
 		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
 		;
diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c
index 93f752faec..4fd72c82e6 100644
--- a/src/gallium/drivers/nv04/nv04_miptree.c
+++ b/src/gallium/drivers/nv04/nv04_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv04_context.h"
 #include "nv04_screen.h"
@@ -9,31 +10,29 @@ static void
 nv04_miptree_layout(struct nv04_miptree *nv04mt)
 {
 	struct pipe_texture *pt = &nv04mt->base;
-	uint width = pt->width[0], height = pt->height[0];
+	uint width = pt->width0, height = pt->height0;
 	uint offset = 0;
 	int nr_faces, l;
 
 	nr_faces = 1;
 
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->width[l] = width;
-		pt->height[l] = height;
 
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 		
-		nv04mt->level[l].pitch = pt->width[0];
+		nv04mt->level[l].pitch = pt->width0;
 		nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63;
 
-		width  = MAX2(1, width  >> 1);
-		height = MAX2(1, height >> 1);
+		width  = u_minify(width, 1);
+		height = u_minify(height, 1);
 	}
 
 	for (l = 0; l <= pt->last_level; l++) {
 
 		nv04mt->level[l].image_offset = 
 			CALLOC(nr_faces, sizeof(unsigned));
-		offset += nv04mt->level[l].pitch * pt->height[l];
+		offset += nv04mt->level[l].pitch * u_minify(pt->height0, l);
 	}
 
 	nv04mt->total_size = offset;
@@ -75,7 +74,7 @@ nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv04_miptree);
@@ -120,8 +119,8 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ns->base.texture, pt);
 	ns->base.format = pt->format;
-	ns->base.width = pt->width[level];
-	ns->base.height = pt->height[level];
+	ns->base.width = u_minify(pt->width0, level);
+	ns->base.height = u_minify(pt->height0, level);
 	ns->base.usage = flags;
 	pipe_reference_init(&ns->base.reference, 1);
 	ns->base.face = face;
diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c
index 6618660743..e6456429f4 100644
--- a/src/gallium/drivers/nv04/nv04_transfer.c
+++ b/src/gallium/drivers/nv04/nv04_transfer.c
@@ -2,6 +2,7 @@
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
 #include "nv04_context.h"
 #include "nv04_screen.h"
@@ -20,9 +21,9 @@ nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width[0] = pt->width[level];
-	template->height[0] = pt->height[level];
-	template->depth[0] = 1;
+	template->width0 = u_minify(pt->width0, level);
+	template->height0 = u_minify(pt->height0, level);
+	template->depth0 = 1;
 	template->block = pt->block;
 	template->nblocksx[0] = pt->nblocksx[level];
 	template->nblocksy[0] = pt->nblocksx[level];
diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c
index 27f2f87584..906fdfeeb9 100644
--- a/src/gallium/drivers/nv10/nv10_fragtex.c
+++ b/src/gallium/drivers/nv10/nv10_fragtex.c
@@ -62,9 +62,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit)
 
 	txf  = tf->format << 8;
 	txf |= (pt->last_level + 1) << 16;
-	txf |= log2i(pt->width[0]) << 20;
-	txf |= log2i(pt->height[0]) << 24;
-	txf |= log2i(pt->depth[0]) << 28;
+	txf |= log2i(pt->width0) << 20;
+	txf |= log2i(pt->height0) << 24;
+	txf |= log2i(pt->depth0) << 28;
 	txf |= 8;
 
 	switch (pt->target) {
@@ -89,7 +89,7 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit)
 	OUT_RING  (0x40000000); /* enable */
 	OUT_RING  (txs);
 	OUT_RING  (ps->filt | 0x2000 /* magic */);
-	OUT_RING  ((pt->width[0] << 16) | pt->height[0]);
+	OUT_RING  ((pt->width0 << 16) | pt->height0);
 	OUT_RING  (ps->bcol);
 #endif
 }
diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c
index 34e3c2ebd7..b2a6c59b74 100644
--- a/src/gallium/drivers/nv10/nv10_miptree.c
+++ b/src/gallium/drivers/nv10/nv10_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv10_context.h"
 #include "nv10_screen.h"
@@ -10,7 +11,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt)
 {
 	struct pipe_texture *pt = &nv10mt->base;
 	boolean swizzled = FALSE;
-	uint width = pt->width[0], height = pt->height[0];
+	uint width = pt->width0, height = pt->height0;
 	uint offset = 0;
 	int nr_faces, l, f;
 
@@ -21,8 +22,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt)
 	}
 	
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->width[l] = width;
-		pt->height[l] = height;
+
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 
@@ -35,15 +35,15 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt)
 		nv10mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
-		width  = MAX2(1, width  >> 1);
-		height = MAX2(1, height >> 1);
+		width  = u_minify(width, 1);
+		height = u_minify(height, 1);
 
 	}
 
 	for (f = 0; f < nr_faces; f++) {
 		for (l = 0; l <= pt->last_level; l++) {
 			nv10mt->level[l].image_offset[f] = offset;
-			offset += nv10mt->level[l].pitch * pt->height[l];
+			offset += nv10mt->level[l].pitch * u_minify(pt->height0, l);
 		}
 	}
 
@@ -58,7 +58,7 @@ nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv10_miptree);
@@ -133,8 +133,8 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ns->base.texture, pt);
 	ns->base.format = pt->format;
-	ns->base.width = pt->width[level];
-	ns->base.height = pt->height[level];
+	ns->base.width = u_minify(pt->width0, level);
+	ns->base.height = u_minify(pt->height0, level);
 	ns->base.usage = flags;
 	pipe_reference_init(&ns->base.reference, 1);
 	ns->base.face = face;
diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c
index 8feb85e4bd..ec54297ab0 100644
--- a/src/gallium/drivers/nv10/nv10_transfer.c
+++ b/src/gallium/drivers/nv10/nv10_transfer.c
@@ -2,6 +2,7 @@
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
 #include "nv10_context.h"
 #include "nv10_screen.h"
@@ -20,9 +21,9 @@ nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width[0] = pt->width[level];
-	template->height[0] = pt->height[level];
-	template->depth[0] = 1;
+	template->width0 = u_minify(pt->width0, level);
+	template->height0 = u_minify(pt->height0, level);
+	template->depth0 = 1;
 	template->block = pt->block;
 	template->nblocksx[0] = pt->nblocksx[level];
 	template->nblocksy[0] = pt->nblocksx[level];
diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c
index 495a7be912..2db4a4015a 100644
--- a/src/gallium/drivers/nv20/nv20_fragtex.c
+++ b/src/gallium/drivers/nv20/nv20_fragtex.c
@@ -62,9 +62,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit)
 
 	txf  = tf->format << 8;
 	txf |= (pt->last_level + 1) << 16;
-	txf |= log2i(pt->width[0]) << 20;
-	txf |= log2i(pt->height[0]) << 24;
-	txf |= log2i(pt->depth[0]) << 28;
+	txf |= log2i(pt->width0) << 20;
+	txf |= log2i(pt->height0) << 24;
+	txf |= log2i(pt->depth0) << 28;
 	txf |= 8;
 
 	switch (pt->target) {
@@ -89,7 +89,7 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit)
 	OUT_RING  (0x40000000); /* enable */
 	OUT_RING  (txs);
 	OUT_RING  (ps->filt | 0x2000 /* magic */);
-	OUT_RING  ((pt->width[0] << 16) | pt->height[0]);
+	OUT_RING  ((pt->width0 << 16) | pt->height0);
 	OUT_RING  (ps->bcol);
 #endif
 }
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
index 185fbf53e0..554e28e47d 100644
--- a/src/gallium/drivers/nv20/nv20_miptree.c
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv20_context.h"
 #include "nv20_screen.h"
@@ -9,7 +10,7 @@ static void
 nv20_miptree_layout(struct nv20_miptree *nv20mt)
 {
 	struct pipe_texture *pt = &nv20mt->base;
-	uint width = pt->width[0], height = pt->height[0];
+	uint width = pt->width0, height = pt->height0;
 	uint offset = 0;
 	int nr_faces, l, f;
 	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -25,21 +26,19 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt)
 	}
 	
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->width[l] = width;
-		pt->height[l] = height;
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			nv20mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64);
+			nv20mt->level[l].pitch = align(pt->width0 * pt->block.size, 64);
 		else
-			nv20mt->level[l].pitch = pt->width[l] * pt->block.size;
+			nv20mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size;
 
 		nv20mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
-		width  = MAX2(1, width  >> 1);
-		height = MAX2(1, height >> 1);
+		width  = u_minify(width, 1);
+		height = u_minify(height, 1);
 	}
 
 	for (f = 0; f < nr_faces; f++) {
@@ -47,14 +46,14 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt)
 			nv20mt->level[l].image_offset[f] = offset;
 
 			if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
-			    pt->width[l + 1] > 1 && pt->height[l + 1] > 1)
-				offset += align(nv20mt->level[l].pitch * pt->height[l], 64);
+			    u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
+				offset += align(nv20mt->level[l].pitch * u_minify(pt->height0, l), 64);
 			else
-				offset += nv20mt->level[l].pitch * pt->height[l];
+				offset += nv20mt->level[l].pitch * u_minify(pt->height0, l);
 		}
 
 		nv20mt->level[l].image_offset[f] = offset;
-		offset += nv20mt->level[l].pitch * pt->height[l];
+		offset += nv20mt->level[l].pitch * u_minify(pt->height0, l);
 	}
 
 	nv20mt->total_size = offset;
@@ -68,7 +67,7 @@ nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv20_miptree);
@@ -100,8 +99,8 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
 	mt->base.screen = screen;
 
 	/* Swizzled textures must be POT */
-	if (pt->width[0] & (pt->width[0] - 1) ||
-	    pt->height[0] & (pt->height[0] - 1))
+	if (pt->width0 & (pt->width0 - 1) ||
+	    pt->height0 & (pt->height0 - 1))
 		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
 	else
 	if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
@@ -167,8 +166,8 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ns->base.texture, pt);
 	ns->base.format = pt->format;
-	ns->base.width = pt->width[level];
-	ns->base.height = pt->height[level];
+	ns->base.width = u_minify(pt->width0, level);
+	ns->base.height = u_minify(pt->height0, level);
 	ns->base.usage = flags;
 	pipe_reference_init(&ns->base.reference, 1);
 	ns->base.face = face;
diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c
index 81b4f1a917..87b5c14a3c 100644
--- a/src/gallium/drivers/nv20/nv20_transfer.c
+++ b/src/gallium/drivers/nv20/nv20_transfer.c
@@ -2,6 +2,7 @@
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
 #include "nv20_context.h"
 #include "nv20_screen.h"
@@ -20,9 +21,9 @@ nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width[0] = pt->width[level];
-	template->height[0] = pt->height[level];
-	template->depth[0] = 1;
+	template->width0 = u_minify(pt->width0, level);
+	template->height0 = u_minify(pt->height0, level);
+	template->depth0 = 1;
 	template->block = pt->block;
 	template->nblocksx[0] = pt->nblocksx[level];
 	template->nblocksy[0] = pt->nblocksx[level];
diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
index dca760cae6..b3293ee700 100644
--- a/src/gallium/drivers/nv30/nv30_fragtex.c
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -74,9 +74,9 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
 
 	txf  = tf->format;
 	txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
-	txf |= log2i(pt->width[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
-	txf |= log2i(pt->height[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
-	txf |= log2i(pt->depth[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
+	txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
+	txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
+	txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
 	txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000;
 
 	switch (pt->target) {
@@ -115,8 +115,8 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
 	so_data  (so, NV34TCL_TX_ENABLE_ENABLE | ps->en);
 	so_data  (so, txs);
 	so_data  (so, ps->filt | 0x2000 /*voodoo*/);
-	so_data  (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
-		       pt->height[0]);
+	so_data  (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
+		       pt->height0);
 	so_data  (so, ps->bcol);
 
 	return so;
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index 280696d450..b4c306d127 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv30_context.h"
 
@@ -8,7 +9,7 @@ static void
 nv30_miptree_layout(struct nv30_miptree *nv30mt)
 {
 	struct pipe_texture *pt = &nv30mt->base;
-	uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+	uint width = pt->width0, height = pt->height0, depth = pt->depth0;
 	uint offset = 0;
 	int nr_faces, l, f;
 	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -21,29 +22,26 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt)
 		nr_faces = 6;
 	} else
 	if (pt->target == PIPE_TEXTURE_3D) {
-		nr_faces = pt->depth[0];
+		nr_faces = pt->depth0;
 	} else {
 		nr_faces = 1;
 	}
 
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->width[l] = width;
-		pt->height[l] = height;
-		pt->depth[l] = depth;
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			nv30mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64);
+			nv30mt->level[l].pitch = align(pt->width0 * pt->block.size, 64);
 		else
-			nv30mt->level[l].pitch = pt->width[l] * pt->block.size;
+			nv30mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size;
 
 		nv30mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
-		width  = MAX2(1, width  >> 1);
-		height = MAX2(1, height >> 1);
-		depth  = MAX2(1, depth  >> 1);
+		width  = u_minify(width, 1);
+		height = u_minify(height, 1);
+		depth  = u_minify(depth, 1);
 	}
 
 	for (f = 0; f < nr_faces; f++) {
@@ -51,14 +49,14 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt)
 			nv30mt->level[l].image_offset[f] = offset;
 
 			if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
-			    pt->width[l + 1] > 1 && pt->height[l + 1] > 1)
-				offset += align(nv30mt->level[l].pitch * pt->height[l], 64);
+			    u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
+				offset += align(nv30mt->level[l].pitch * u_minify(pt->height0, l), 64);
 			else
-				offset += nv30mt->level[l].pitch * pt->height[l];
+				offset += nv30mt->level[l].pitch * u_minify(pt->height0, l);
 		}
 
 		nv30mt->level[l].image_offset[f] = offset;
-		offset += nv30mt->level[l].pitch * pt->height[l];
+		offset += nv30mt->level[l].pitch * u_minify(pt->height0, l);
 	}
 
 	nv30mt->total_size = offset;
@@ -79,8 +77,8 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 	mt->base.screen = pscreen;
 
 	/* Swizzled textures must be POT */
-	if (pt->width[0] & (pt->width[0] - 1) ||
-	    pt->height[0] & (pt->height[0] - 1))
+	if (pt->width0 & (pt->width0 - 1) ||
+	    pt->height0 & (pt->height0 - 1))
 		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
 	else
 	if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
@@ -134,7 +132,7 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv30_miptree);
@@ -182,8 +180,8 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ns->base.texture, pt);
 	ns->base.format = pt->format;
-	ns->base.width = pt->width[level];
-	ns->base.height = pt->height[level];
+	ns->base.width = u_minify(pt->width0, level);
+	ns->base.height = u_minify(pt->height0, level);
 	ns->base.usage = flags;
 	pipe_reference_init(&ns->base.reference, 1);
 	ns->base.face = face;
diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
index 98011decf7..5e429b4d85 100644
--- a/src/gallium/drivers/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nv30/nv30_transfer.c
@@ -2,6 +2,7 @@
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
 #include "nv30_context.h"
 #include "nv30_screen.h"
@@ -20,9 +21,9 @@ nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width[0] = pt->width[level];
-	template->height[0] = pt->height[level];
-	template->depth[0] = 1;
+	template->width0 = u_minify(pt->width0, level);
+	template->height0 = u_minify(pt->height0, level);
+	template->depth0 = 1;
 	template->block = pt->block;
 	template->nblocksx[0] = pt->nblocksx[level];
 	template->nblocksy[0] = pt->nblocksx[level];
diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c
index e2ec57564d..44abc84596 100644
--- a/src/gallium/drivers/nv40/nv40_fragtex.c
+++ b/src/gallium/drivers/nv40/nv40_fragtex.c
@@ -117,11 +117,11 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit)
 	so_data  (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en);
 	so_data  (so, txs);
 	so_data  (so, ps->filt | tf->sign | 0x2000 /*voodoo*/);
-	so_data  (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) |
-		       pt->height[0]);
+	so_data  (so, (pt->width0 << NV40TCL_TEX_SIZE0_W_SHIFT) |
+		       pt->height0);
 	so_data  (so, ps->bcol);
 	so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1);
-	so_data  (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
+	so_data  (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
 
 	return so;
 }
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index 465dd3b069..f73bedff6d 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv40_context.h"
 
@@ -8,7 +9,7 @@ static void
 nv40_miptree_layout(struct nv40_miptree *mt)
 {
 	struct pipe_texture *pt = &mt->base;
-	uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+	uint width = pt->width0, height = pt->height0, depth = pt->depth0;
 	uint offset = 0;
 	int nr_faces, l, f;
 	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -21,29 +22,26 @@ nv40_miptree_layout(struct nv40_miptree *mt)
 		nr_faces = 6;
 	} else
 	if (pt->target == PIPE_TEXTURE_3D) {
-		nr_faces = pt->depth[0];
+		nr_faces = pt->depth0;
 	} else {
 		nr_faces = 1;
 	}
 
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->width[l] = width;
-		pt->height[l] = height;
-		pt->depth[l] = depth;
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64);
+			mt->level[l].pitch = align(pt->width0 * pt->block.size, 64);
 		else
-			mt->level[l].pitch = pt->width[l] * pt->block.size;
+			mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size;
 
 		mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
-		width  = MAX2(1, width  >> 1);
-		height = MAX2(1, height >> 1);
-		depth  = MAX2(1, depth  >> 1);
+		width  = u_minify(width, 1);
+		height = u_minify(height, 1);
+		depth  = u_minify(depth, 1);
 	}
 
 	for (f = 0; f < nr_faces; f++) {
@@ -51,14 +49,14 @@ nv40_miptree_layout(struct nv40_miptree *mt)
 			mt->level[l].image_offset[f] = offset;
 
 			if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
-			    pt->width[l + 1] > 1 && pt->height[l + 1] > 1)
-				offset += align(mt->level[l].pitch * pt->height[l], 64);
+			    u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
+				offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64);
 			else
-				offset += mt->level[l].pitch * pt->height[l];
+				offset += mt->level[l].pitch * u_minify(pt->height0, l);
 		}
 
 		mt->level[l].image_offset[f] = offset;
-		offset += mt->level[l].pitch * pt->height[l];
+		offset += mt->level[l].pitch * u_minify(pt->height0, l);
 	}
 
 	mt->total_size = offset;
@@ -79,8 +77,8 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 	mt->base.screen = pscreen;
 
 	/* Swizzled textures must be POT */
-	if (pt->width[0] & (pt->width[0] - 1) ||
-	    pt->height[0] & (pt->height[0] - 1))
+	if (pt->width0 & (pt->width0 - 1) ||
+	    pt->height0 & (pt->height0 - 1))
 		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
 	else
 	if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
@@ -128,7 +126,7 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv40_miptree);
@@ -176,8 +174,8 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ns->base.texture, pt);
 	ns->base.format = pt->format;
-	ns->base.width = pt->width[level];
-	ns->base.height = pt->height[level];
+	ns->base.width = u_minify(pt->width0, level);
+	ns->base.height = u_minify(pt->height0, level);
 	ns->base.usage = flags;
 	pipe_reference_init(&ns->base.reference, 1);
 	ns->base.face = face;
diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
index 92caee6f38..36e253c96f 100644
--- a/src/gallium/drivers/nv40/nv40_transfer.c
+++ b/src/gallium/drivers/nv40/nv40_transfer.c
@@ -2,6 +2,7 @@
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
 #include "nv40_context.h"
 #include "nv40_screen.h"
@@ -20,9 +21,9 @@ nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width[0] = pt->width[level];
-	template->height[0] = pt->height[level];
-	template->depth[0] = 1;
+	template->width0 = u_minify(pt->width0, level);
+	template->height0 = u_minify(pt->height0, level);
+	template->depth0 = 1;
 	template->block = pt->block;
 	template->nblocksx[0] = pt->nblocksx[level];
 	template->nblocksy[0] = pt->nblocksx[level];
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 9c20c5cc28..3d58746793 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -61,8 +61,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 	struct nouveau_device *dev = nouveau_screen(pscreen)->device;
 	struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
 	struct pipe_texture *pt = &mt->base.base;
-	unsigned width = tmp->width[0], height = tmp->height[0];
-	unsigned depth = tmp->depth[0], image_alignment;
+	unsigned width = tmp->width0, height = tmp->height0;
+	unsigned depth = tmp->depth0, image_alignment;
 	uint32_t tile_flags;
 	int ret, i, l;
 
@@ -92,9 +92,6 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 	for (l = 0; l <= pt->last_level; l++) {
 		struct nv50_miptree_level *lvl = &mt->level[l];
 
-		pt->width[l] = width;
-		pt->height[l] = height;
-		pt->depth[l] = depth;
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 
@@ -102,9 +99,9 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 		lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64);
 		lvl->tile_mode = get_tile_mode(pt->nblocksy[l], depth);
 
-		width = MAX2(1, width >> 1);
-		height = MAX2(1, height >> 1);
-		depth = MAX2(1, depth >> 1);
+		width = u_minify(width, 1);
+		height = u_minify(height, 1);
+		depth = u_minify(depth, 1);
 	}
 
 	image_alignment  = get_tile_height(mt->level[0].tile_mode) * 64;
@@ -122,7 +119,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 
 			size  = lvl->pitch;
 			size *= align(pt->nblocksy[l], tile_h);
-			size *= align(pt->depth[l], tile_d);
+			size *= align(u_minify(pt->depth0, l), tile_d);
 
 			lvl->image_offset[i] = mt->total_size;
 
@@ -151,7 +148,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv50_miptree);
@@ -202,8 +199,8 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ps->texture, pt);
 	ps->format = pt->format;
-	ps->width = pt->width[level];
-	ps->height = pt->height[level];
+	ps->width = u_minify(pt->width0, level);
+	ps->height = u_minify(pt->height0, level);
 	ps->usage = flags;
 	pipe_reference_init(&ps->reference, 1);
 	ps->face = face;
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 2813f54477..417d367942 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -131,9 +131,9 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 		 NOUVEAU_BO_RD, 0, 0);
 	so_data (so, mode);
 	so_data (so, 0x00300000);
-	so_data (so, mt->base.base.width[0] | (1 << 31));
+	so_data (so, mt->base.base.width0 | (1 << 31));
 	so_data (so, (mt->base.base.last_level << 28) |
-		 (mt->base.base.depth[0] << 16) | mt->base.base.height[0]);
+		 (mt->base.base.depth0 << 16) | mt->base.base.height0);
 	so_data (so, 0x03000000);
 	so_data (so, mt->base.base.last_level << 4);
 
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index ea61357aaa..39d65279fc 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -1,6 +1,7 @@
 
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv50_context.h"
 
@@ -156,9 +157,9 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->base.block = pt->block;
 	if (!pt->nblocksx[level]) {
 		tx->base.nblocksx = pf_get_nblocksx(&pt->block,
-						    pt->width[level]);
+						    u_minify(pt->width0, level));
 		tx->base.nblocksy = pf_get_nblocksy(&pt->block,
-						    pt->height[level]);
+						    u_minify(pt->height0, level));
 	} else {
 		tx->base.nblocksx = pt->nblocksx[level];
 		tx->base.nblocksy = pt->nblocksy[level];
@@ -167,9 +168,9 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->base.usage = usage;
 
 	tx->level_pitch = lvl->pitch;
-	tx->level_width = mt->base.base.width[level];
-	tx->level_height = mt->base.base.height[level];
-	tx->level_depth = mt->base.base.depth[level];
+	tx->level_width = u_minify(mt->base.base.width0, level);
+	tx->level_height = u_minify(mt->base.base.height0, level);
+	tx->level_depth = u_minify(mt->base.base.depth0, level);
 	tx->level_offset = lvl->image_offset[image];
 	tx->level_tiling = lvl->tile_mode;
 	tx->level_x = pf_get_nblocksx(&tx->base.block, x);
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index aea25cf71d..f4d148cdc5 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -34,8 +34,8 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
     struct r300_texture_state* state = &tex->state;
     struct pipe_texture *pt = &tex->tex;
 
-    state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) |
-                     R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff);
+    state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) |
+                     R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff);
 
     if (tex->is_npot) {
         /* rectangles love this */
@@ -43,7 +43,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
         state->format2 = (tex->pitch[0] - 1) & 0x1fff;
     } else {
         /* power of two textures (3D, mipmaps, and no pitch) */
-        state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) |
+        state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf) |
                           R300_TX_NUM_LEVELS(pt->last_level & 0xf);
     }
 
@@ -58,17 +58,17 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
     /* large textures on r500 */
     if (is_r500)
     {
-        if (pt->width[0] > 2048) {
+        if (pt->width0 > 2048) {
             state->format2 |= R500_TXWIDTH_BIT11;
         }
-        if (pt->height[0] > 2048) {
+        if (pt->height0 > 2048) {
             state->format2 |= R500_TXHEIGHT_BIT11;
         }
     }
-    assert(is_r500 || (pt->width[0] <= 2048 && pt->height[0] <= 2048));
+    assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048));
 
     debug_printf("r300: Set texture state (%dx%d, %d levels)\n",
-		 pt->width[0], pt->height[0], pt->last_level);
+		 pt->width0, pt->height0, pt->last_level);
 }
 
 unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
@@ -106,7 +106,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level)
         return 0;
     }
 
-    return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32);
+    return align(pf_get_stride(&tex->tex.block, u_minify(tex->tex.width0, level)), 32);
 }
 
 static void r300_setup_miptree(struct r300_texture* tex)
@@ -116,14 +116,8 @@ static void r300_setup_miptree(struct r300_texture* tex)
     int i;
 
     for (i = 0; i <= base->last_level; i++) {
-        if (i > 0) {
-            base->width[i] = minify(base->width[i-1]);
-            base->height[i] = minify(base->height[i-1]);
-            base->depth[i] = minify(base->depth[i-1]);
-        }
-
-        base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]);
-        base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]);
+        base->nblocksx[i] = pf_get_nblocksx(&base->block, u_minify(base->width0, i));
+        base->nblocksy[i] = pf_get_nblocksy(&base->block, u_minify(base->height0, i));
 
         stride = r300_texture_get_stride(tex, i);
         layer_size = stride * base->nblocksy[i];
@@ -131,7 +125,7 @@ static void r300_setup_miptree(struct r300_texture* tex)
         if (base->target == PIPE_TEXTURE_CUBE)
             size = layer_size * 6;
         else
-            size = layer_size * base->depth[i];
+            size = layer_size * u_minify(base->depth0, i);
 
         tex->offset[i] = align(tex->size, 32);
         tex->size = tex->offset[i] + size;
@@ -140,15 +134,15 @@ static void r300_setup_miptree(struct r300_texture* tex)
 
         debug_printf("r300: Texture miptree: Level %d "
                 "(%dx%dx%d px, pitch %d bytes)\n",
-                i, base->width[i], base->height[i], base->depth[i],
-                stride);
+                i, u_minify(base->width0, i), u_minify(base->height0, i),
+                u_minify(base->depth0, i), stride);
     }
 }
 
 static void r300_setup_flags(struct r300_texture* tex)
 {
-    tex->is_npot = !util_is_power_of_two(tex->tex.width[0]) ||
-                   !util_is_power_of_two(tex->tex.height[0]);
+    tex->is_npot = !util_is_power_of_two(tex->tex.width0) ||
+                   !util_is_power_of_two(tex->tex.height0);
 }
 
 /* Create a new texture. */
@@ -208,8 +202,8 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
         pipe_reference_init(&surface->reference, 1);
         pipe_texture_reference(&surface->texture, texture);
         surface->format = texture->format;
-        surface->width = texture->width[level];
-        surface->height = texture->height[level];
+        surface->width = u_minify(texture->width0, level);
+        surface->height = u_minify(texture->height0, level);
         surface->offset = offset;
         surface->usage = flags;
         surface->zslice = zslice;
@@ -237,7 +231,7 @@ static struct pipe_texture*
 
     /* Support only 2D textures without mipmaps */
     if (base->target != PIPE_TEXTURE_2D ||
-        base->depth[0] != 1 ||
+        base->depth0 != 1 ||
         base->last_level != 0) {
         return NULL;
     }
@@ -287,9 +281,9 @@ r300_video_surface_create(struct pipe_screen *screen,
     template.target = PIPE_TEXTURE_2D;
     template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
     template.last_level = 0;
-    template.width[0] = util_next_power_of_two(width);
-    template.height[0] = util_next_power_of_two(height);
-    template.depth[0] = 1;
+    template.width0 = util_next_power_of_two(width);
+    template.height0 = util_next_power_of_two(height);
+    template.depth0 = 1;
     pf_get_block(template.format, &template.block);
     template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER |
                          PIPE_TEXTURE_USAGE_RENDER_TARGET;
diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c
index 5625ff53cf..45a6059ea8 100644
--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -46,7 +46,7 @@
 
 #include "util/u_memory.h"
 #include "util/u_rect.h"
-
+ 
 static struct pipe_surface *
 dri_surface_from_handle(struct drm_api *api,
 			struct pipe_screen *screen,
@@ -62,10 +62,10 @@ dri_surface_from_handle(struct drm_api *api,
    templat.tex_usage |= PIPE_TEXTURE_USAGE_RENDER_TARGET;
    templat.target = PIPE_TEXTURE_2D;
    templat.last_level = 0;
-   templat.depth[0] = 1;
+   templat.depth0 = 1;
    templat.format = format;
-   templat.width[0] = width;
-   templat.height[0] = height;
+   templat.width0 = width;
+   templat.height0 = height;
    pf_get_block(templat.format, &templat.block);
 
    texture = api->texture_from_shared_handle(api, screen, &templat,
diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c
index 91615abebe..ddd9b04cd4 100644
--- a/src/gallium/state_trackers/egl/egl_surface.c
+++ b/src/gallium/state_trackers/egl/egl_surface.c
@@ -114,10 +114,10 @@ drm_create_texture(_EGLDisplay *dpy,
 	templat.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY;
 	templat.target = PIPE_TEXTURE_2D;
 	templat.last_level = 0;
-	templat.depth[0] = 1;
+	templat.depth0 = 1;
 	templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
-	templat.width[0] = w;
-	templat.height[0] = h;
+	templat.width0 = w;
+	templat.height0 = h;
 	pf_get_block(templat.format, &templat.block);
 
 	texture = screen->texture_create(dev->screen,
diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i
index f16fe5b0ff..a83bcc71a1 100644
--- a/src/gallium/state_trackers/python/p_device.i
+++ b/src/gallium/state_trackers/python/p_device.i
@@ -113,9 +113,9 @@ struct st_device {
       memset(&templat, 0, sizeof(templat));
       templat.format = format;
       pf_get_block(templat.format, &templat.block);
-      templat.width[0] = width;
-      templat.height[0] = height;
-      templat.depth[0] = depth;
+      templat.width0 = width;
+      templat.height0 = height;
+      templat.depth0 = depth;
       templat.last_level = last_level;
       templat.target = target;
       templat.tex_usage = tex_usage;
diff --git a/src/gallium/state_trackers/python/p_texture.i b/src/gallium/state_trackers/python/p_texture.i
index 1d513abf3c..5416b872f5 100644
--- a/src/gallium/state_trackers/python/p_texture.i
+++ b/src/gallium/state_trackers/python/p_texture.i
@@ -59,15 +59,15 @@
    }
    
    unsigned get_width(unsigned level=0) {
-      return $self->width[level];
+      return u_minify($self->width0, level);
    }
    
    unsigned get_height(unsigned level=0) {
-      return $self->height[level];
+      return u_minify($self->height0, level);
    }
    
    unsigned get_depth(unsigned level=0) {
-      return $self->depth[level];
+      return u_minify($self->depth0, level);
    }
    
    unsigned get_nblocksx(unsigned level=0) {
@@ -88,7 +88,7 @@
          SWIG_exception(SWIG_ValueError, "face out of bounds");
       if(level > $self->last_level)
          SWIG_exception(SWIG_ValueError, "level out of bounds");
-      if(zslice >= $self->depth[level])
+      if(zslice >= u_minify($self->depth0, level))
          SWIG_exception(SWIG_ValueError, "zslice out of bounds");
       
       surface = CALLOC_STRUCT(st_surface);
@@ -375,13 +375,13 @@ struct st_surface
    static unsigned
    st_surface_width_get(struct st_surface *surface)
    {
-      return surface->texture->width[surface->level];
+      return u_minify(surface->texture->width0, surface->level);
    }
    
    static unsigned
    st_surface_height_get(struct st_surface *surface)
    {
-      return surface->texture->height[surface->level];
+      return u_minify(surface->texture->height0, surface->level);
    }
 
    static unsigned
diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py
index 348f2e4368..d0bcb690a9 100755
--- a/src/gallium/state_trackers/python/retrace/interpreter.py
+++ b/src/gallium/state_trackers/python/retrace/interpreter.py
@@ -279,9 +279,9 @@ class Screen(Object):
     def texture_create(self, templat):
         return self.real.texture_create(
             format = templat.format,
-            width = templat.width[0],
-            height = templat.height[0],
-            depth = templat.depth[0],
+            width = templat.width0,
+            height = templat.height0,
+            depth = templat.depth0,
             last_level = templat.last_level,
             target = templat.target,
             tex_usage = templat.tex_usage,
diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c
index ea7d18738f..a791113aba 100644
--- a/src/gallium/state_trackers/python/st_device.c
+++ b/src/gallium/state_trackers/python/st_device.c
@@ -252,9 +252,9 @@ st_context_create(struct st_device *st_dev)
       templat.block.size = 4;
       templat.block.width = 1;
       templat.block.height = 1;
-      templat.width[0] = 1;
-      templat.height[0] = 1;
-      templat.depth[0] = 1;
+      templat.width0 = 1;
+      templat.height0 = 1;
+      templat.depth0 = 1;
       templat.last_level = 0;
    
       st_ctx->default_texture = screen->texture_create( screen, &templat );
@@ -264,8 +264,8 @@ st_context_create(struct st_device *st_dev)
                                              0, 0, 0,
                                              PIPE_TRANSFER_WRITE,
                                              0, 0,
-                                             st_ctx->default_texture->width[0],
-                                             st_ctx->default_texture->height[0]);
+                                             st_ctx->default_texture->width0,
+                                             st_ctx->default_texture->height0);
          if (transfer) {
             uint32_t *map;
             map = (uint32_t *) screen->transfer_map(screen, transfer);
diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c
index 53a01891e1..6fee90afda 100644
--- a/src/gallium/state_trackers/python/st_sample.c
+++ b/src/gallium/state_trackers/python/st_sample.c
@@ -528,8 +528,8 @@ st_sample_surface(struct st_surface *surface, float *rgba)
 {
    struct pipe_texture *texture = surface->texture;
    struct pipe_screen *screen = texture->screen;
-   unsigned width = texture->width[surface->level];
-   unsigned height = texture->height[surface->level];
+   unsigned width = u_minify(texture->width0, surface->level);
+   unsigned height = u_minify(texture->height0, surface->level);
    uint rgba_stride = width * 4;
    struct pipe_transfer *transfer;
    void *raw;
diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c
index 862cbb03c4..faf396d087 100644
--- a/src/gallium/state_trackers/vega/api_filters.c
+++ b/src/gallium/state_trackers/vega/api_filters.c
@@ -68,9 +68,9 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx,
    templ.target = PIPE_TEXTURE_1D;
    templ.format = PIPE_FORMAT_A8R8G8B8_UNORM;
    templ.last_level = 0;
-   templ.width[0] = color_data_len;
-   templ.height[0] = 1;
-   templ.depth[0] = 1;
+   templ.width0 = color_data_len;
+   templ.height0 = 1;
+   templ.depth0 = 1;
    pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
@@ -81,7 +81,7 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx,
          screen->get_tex_transfer(screen, tex,
                                   0, 0, 0,
                                   PIPE_TRANSFER_READ_WRITE ,
-                                  0, 0, tex->width[0], tex->height[0]);
+                                  0, 0, tex->width0, tex->height0);
       void *map = screen->transfer_map(screen, transfer);
       memcpy(map, color_data, sizeof(VGint)*color_data_len);
       screen->transfer_unmap(screen, transfer);
diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c
index 9a722980d5..4684a5727d 100644
--- a/src/gallium/state_trackers/vega/image.c
+++ b/src/gallium/state_trackers/vega/image.c
@@ -93,8 +93,8 @@ static void vg_copy_texture(struct vg_context *ctx,
    dst_loc[3] = height;
    dst_bounds[0] = 0.f;
    dst_bounds[1] = 0.f;
-   dst_bounds[2] = dst->width[0];
-   dst_bounds[3] = dst->height[0];
+   dst_bounds[2] = dst->width0;
+   dst_bounds[3] = dst->height0;
 
    src_loc[0] = sx;
    src_loc[1] = sy;
@@ -102,8 +102,8 @@ static void vg_copy_texture(struct vg_context *ctx,
    src_loc[3] = height;
    src_bounds[0] = 0.f;
    src_bounds[1] = 0.f;
-   src_bounds[2] = src->width[0];
-   src_bounds[3] = src->height[0];
+   src_bounds[2] = src->width0;
+   src_bounds[3] = src->height0;
 
    vg_bound_rect(src_loc, src_bounds, src_shift);
    vg_bound_rect(dst_loc, dst_bounds, dst_shift);
@@ -272,9 +272,9 @@ struct vg_image * image_create(VGImageFormat format,
    pt.format = pformat;
    pf_get_block(pformat, &pt.block);
    pt.last_level = 0;
-   pt.width[0] = width;
-   pt.height[0] = height;
-   pt.depth[0] = 1;
+   pt.width0 = width;
+   pt.height0 = height;
+   pt.depth0 = 1;
    pt.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
    newtex = screen->texture_create(screen, &pt);
@@ -414,7 +414,7 @@ void image_sub_data(struct vg_image *image,
    { /* upload color_data */
       struct pipe_transfer *transfer = screen->get_tex_transfer(
          screen, texture, 0, 0, 0,
-         PIPE_TRANSFER_WRITE, 0, 0, texture->width[0], texture->height[0]);
+         PIPE_TRANSFER_WRITE, 0, 0, texture->width0, texture->height0);
       src += (dataStride * yoffset);
       for (i = 0; i < height; i++) {
          _vega_unpack_float_span_rgba(ctx, width, xoffset, src, dataFormat, temp);
diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c
index 24650a37d5..b84103fdba 100644
--- a/src/gallium/state_trackers/vega/mask.c
+++ b/src/gallium/state_trackers/vega/mask.c
@@ -426,7 +426,7 @@ static void mask_using_texture(struct pipe_texture *texture,
    if (!surface)
       return;
    if (!intersect_rectangles(surface->width, surface->height,
-                             texture->width[0], texture->height[0],
+                             texture->width0, texture->height0,
                              x, y, width, height,
                              offsets, loc))
       return;
@@ -493,9 +493,9 @@ struct vg_mask_layer * mask_layer_create(VGint width, VGint height)
       pt.format = PIPE_FORMAT_A8R8G8B8_UNORM;
       pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &pt.block);
       pt.last_level = 0;
-      pt.width[0] = width;
-      pt.height[0] = height;
-      pt.depth[0] = 1;
+      pt.width0 = width;
+      pt.height0 = height;
+      pt.depth0 = 1;
       pt.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
       pt.compressed = 0;
 
@@ -607,8 +607,8 @@ void mask_render_to(struct path *path,
    struct vg_mask_layer *temp_layer;
    VGint width, height;
 
-   width = fb_buffers->alpha_mask->width[0];
-   height = fb_buffers->alpha_mask->width[0];
+   width = fb_buffers->alpha_mask->width0;
+   height = fb_buffers->alpha_mask->width0;
 
    temp_layer = mask_layer_create(width, height);
 
diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c
index 04a6ba9cdc..e8ca7d9e89 100644
--- a/src/gallium/state_trackers/vega/paint.c
+++ b/src/gallium/state_trackers/vega/paint.c
@@ -151,9 +151,9 @@ static INLINE struct pipe_texture *create_gradient_texture(struct vg_paint *p)
    templ.target = PIPE_TEXTURE_1D;
    templ.format = PIPE_FORMAT_A8R8G8B8_UNORM;
    templ.last_level = 0;
-   templ.width[0] = 1024;
-   templ.height[0] = 1;
-   templ.depth[0] = 1;
+   templ.width0 = 1024;
+   templ.height0 = 1;
+   templ.depth0 = 1;
    pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
@@ -328,8 +328,8 @@ static INLINE void  paint_pattern_buffer(struct vg_paint *paint, void *buffer)
 
    map[4] = 0.f;
    map[5] = 1.f;
-   map[6] = paint->pattern.texture->width[0];
-   map[7] = paint->pattern.texture->height[0];
+   map[6] = paint->pattern.texture->width0;
+   map[7] = paint->pattern.texture->height0;
    {
       struct matrix mat;
       memcpy(&mat, &ctx->state.vg.fill_paint_to_user_matrix,
diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c
index 396c88aa3d..9085ed1bfe 100644
--- a/src/gallium/state_trackers/vega/renderer.c
+++ b/src/gallium/state_trackers/vega/renderer.c
@@ -230,13 +230,13 @@ void renderer_draw_texture(struct renderer *r,
    struct pipe_buffer *buf;
    VGfloat s0, t0, s1, t1;
 
-   assert(tex->width[0] != 0);
-   assert(tex->height[0] != 0);
+   assert(tex->width0 != 0);
+   assert(tex->height0 != 0);
 
-   s0 = x1offset / tex->width[0];
-   s1 = x2offset / tex->width[0];
-   t0 = y1offset / tex->height[0];
-   t1 = y2offset / tex->height[0];
+   s0 = x1offset / tex->width0;
+   s1 = x2offset / tex->width0;
+   t0 = y1offset / tex->height0;
+   t1 = y2offset / tex->height0;
 
    cso_save_vertex_shader(r->cso);
    /* shaders */
@@ -276,10 +276,10 @@ void renderer_copy_texture(struct renderer *ctx,
    struct pipe_framebuffer_state fb;
    float s0, t0, s1, t1;
 
-   assert(src->width[0] != 0);
-   assert(src->height[0] != 0);
-   assert(dst->width[0] != 0);
-   assert(dst->height[0] != 0);
+   assert(src->width0 != 0);
+   assert(src->height0 != 0);
+   assert(dst->width0 != 0);
+   assert(dst->height0 != 0);
 
 #if 0
    debug_printf("copy texture [%f, %f, %f, %f], [%f, %f, %f, %f]\n",
@@ -287,10 +287,10 @@ void renderer_copy_texture(struct renderer *ctx,
 #endif
 
 #if 1
-   s0 = sx1 / src->width[0];
-   s1 = sx2 / src->width[0];
-   t0 = sy1 / src->height[0];
-   t1 = sy2 / src->height[0];
+   s0 = sx1 / src->width0;
+   s1 = sx2 / src->width0;
+   t0 = sy1 / src->height0;
+   t1 = sy2 / src->height0;
 #else
    s0 = 0;
    s1 = 1;
@@ -445,9 +445,9 @@ void renderer_copy_surface(struct renderer *ctx,
    texTemp.target = PIPE_TEXTURE_2D;
    texTemp.format = src->format;
    texTemp.last_level = 0;
-   texTemp.width[0] = srcW;
-   texTemp.height[0] = srcH;
-   texTemp.depth[0] = 1;
+   texTemp.width0 = srcW;
+   texTemp.height0 = srcH;
+   texTemp.depth0 = 1;
    pf_get_block(src->format, &texTemp.block);
 
    tex = screen->texture_create(screen, &texTemp);
@@ -570,13 +570,13 @@ void renderer_texture_quad(struct renderer *r,
    struct pipe_buffer *buf;
    VGfloat s0, t0, s1, t1;
 
-   assert(tex->width[0] != 0);
-   assert(tex->height[0] != 0);
+   assert(tex->width0 != 0);
+   assert(tex->height0 != 0);
 
-   s0 = x1offset / tex->width[0];
-   s1 = x2offset / tex->width[0];
-   t0 = y1offset / tex->height[0];
-   t1 = y2offset / tex->height[0];
+   s0 = x1offset / tex->width0;
+   s1 = x2offset / tex->width0;
+   t0 = y1offset / tex->height0;
+   t1 = y2offset / tex->height0;
 
    cso_save_vertex_shader(r->cso);
    /* shaders */
diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c
index c4da01e52c..d28463dd1b 100644
--- a/src/gallium/state_trackers/vega/vg_tracker.c
+++ b/src/gallium/state_trackers/vega/vg_tracker.c
@@ -51,9 +51,9 @@ create_texture(struct pipe_context *pipe, enum pipe_format format,
 
    templ.target = PIPE_TEXTURE_2D;
    pf_get_block(templ.format, &templ.block);
-   templ.width[0] = width;
-   templ.height[0] = height;
-   templ.depth[0] = 1;
+   templ.width0 = width;
+   templ.height0 = height;
+   templ.depth0 = 1;
    templ.last_level = 0;
 
    if (pf_get_component_bits(format, PIPE_FORMAT_COMP_S)) {
diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c
index 733bd53fca..6064648ab0 100644
--- a/src/gallium/state_trackers/xorg/xorg_composite.c
+++ b/src/gallium/state_trackers/xorg/xorg_composite.c
@@ -436,8 +436,8 @@ setup_fs_constant_buffer(struct exa_context *exa)
 static void
 setup_constant_buffers(struct exa_context *exa, struct exa_pixmap_priv *pDst)
 {
-   int width = pDst->tex->width[0];
-   int height = pDst->tex->height[0];
+   int width = pDst->tex->width0;
+   int height = pDst->tex->height0;
 
    setup_vs_constant_buffer(exa, width, height);
    setup_fs_constant_buffer(exa);
diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c
index 85b9162d4c..c4751724c9 100644
--- a/src/gallium/state_trackers/xorg/xorg_crtc.c
+++ b/src/gallium/state_trackers/xorg/xorg_crtc.c
@@ -187,10 +187,10 @@ crtc_load_cursor_argb(xf86CrtcPtr crtc, CARD32 * image)
 	templat.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY;
 	templat.target = PIPE_TEXTURE_2D;
 	templat.last_level = 0;
-	templat.depth[0] = 1;
+	templat.depth0 = 1;
 	templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
-	templat.width[0] = 64;
-	templat.height[0] = 64;
+	templat.width0 = 64;
+	templat.height0 = 64;
 	pf_get_block(templat.format, &templat.block);
 
 	crtcp->cursor_tex = ms->screen->texture_create(ms->screen,
diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c
index c41a7cd639..e16e79719c 100644
--- a/src/gallium/state_trackers/xorg/xorg_dri2.c
+++ b/src/gallium/state_trackers/xorg/xorg_dri2.c
@@ -103,9 +103,9 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format)
 		template.format = ms->ds_depth_bits_last ?
 		    PIPE_FORMAT_S8Z24_UNORM : PIPE_FORMAT_Z24S8_UNORM;
 	    pf_get_block(template.format, &template.block);
-	    template.width[0] = pDraw->width;
-	    template.height[0] = pDraw->height;
-	    template.depth[0] = 1;
+	    template.width0 = pDraw->width;
+	    template.height0 = pDraw->height;
+	    template.depth0 = 1;
 	    template.last_level = 0;
 	    template.tex_usage = PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
 		PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c
index 6fa274eb0a..534d4da13f 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa.c
@@ -288,7 +288,7 @@ ExaPrepareAccess(PixmapPtr pPix, int index)
 					PIPE_TRANSFER_MAP_DIRECTLY |
 #endif
 					PIPE_TRANSFER_READ_WRITE,
-					0, 0, priv->tex->width[0], priv->tex->height[0]);
+					0, 0, priv->tex->width0, priv->tex->height0);
 	if (!priv->map_transfer)
 #ifdef EXA_MIXED_PIXMAPS
 	    return FALSE;
@@ -752,8 +752,8 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
 
     /* Deal with screen resize */
     if (!priv->tex ||
-        (priv->tex->width[0] != width ||
-         priv->tex->height[0] != height ||
+        (priv->tex->width0 != width ||
+         priv->tex->height0 != height ||
          priv->tex_flags != priv->flags)) {
 	struct pipe_texture *texture = NULL;
 	struct pipe_texture template;
@@ -762,9 +762,9 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
 	template.target = PIPE_TEXTURE_2D;
 	exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &priv->picture_format);
 	pf_get_block(template.format, &template.block);
-	template.width[0] = width;
-	template.height[0] = height;
-	template.depth[0] = 1;
+	template.width0 = width;
+	template.height0 = height;
+	template.depth0 = 1;
 	template.last_level = 0;
 	template.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET | priv->flags;
 	priv->tex_flags = priv->flags;
@@ -779,12 +779,12 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
 	    src_surf = xorg_gpu_surface(exa->pipe->screen, priv);
         if (exa->pipe->surface_copy) {
             exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf,
-                        0, 0, min(width, texture->width[0]),
-                        min(height, texture->height[0]));
+                        0, 0, min(width, texture->width0),
+                        min(height, texture->height0));
         } else {
             util_surface_copy(exa->pipe, FALSE, dst_surf, 0, 0, src_surf,
-                        0, 0, min(width, texture->width[0]),
-                        min(height, texture->height[0]));
+                        0, 0, min(width, texture->width0),
+                        min(height, texture->height0));
         }
 	    exa->scrn->tex_surface_destroy(dst_surf);
 	    exa->scrn->tex_surface_destroy(src_surf);
@@ -817,8 +817,8 @@ xorg_exa_set_texture(PixmapPtr pPixmap, struct  pipe_texture *tex)
     if (!priv)
 	return FALSE;
 
-    if (pPixmap->drawable.width != tex->width[0] ||
-	pPixmap->drawable.height != tex->height[0])
+    if (pPixmap->drawable.width != tex->width0 ||
+	pPixmap->drawable.height != tex->height0)
 	return FALSE;
 
     pipe_texture_reference(&priv->tex, tex);
@@ -841,9 +841,9 @@ xorg_exa_create_root_texture(ScrnInfoPtr pScrn,
     template.target = PIPE_TEXTURE_2D;
     exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &dummy);
     pf_get_block(template.format, &template.block);
-    template.width[0] = width;
-    template.height[0] = height;
-    template.depth[0] = 1;
+    template.width0 = width;
+    template.height0 = height;
+    template.depth0 = 1;
     template.last_level = 0;
     template.tex_usage |= PIPE_TEXTURE_USAGE_RENDER_TARGET;
     template.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY;
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c
index 723605312c..418a8dd88b 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.c
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.c
@@ -167,14 +167,14 @@ add_vertex_data1(struct xorg_renderer *r,
       map_point(src_matrix, pt3[0], pt3[1], &pt3[0], &pt3[1]);
    }
 
-   s0 =  pt0[0] / src->width[0];
-   s1 =  pt1[0] / src->width[0];
-   s2 =  pt2[0] / src->width[0];
-   s3 =  pt3[0] / src->width[0];
-   t0 =  pt0[1] / src->height[0];
-   t1 =  pt1[1] / src->height[0];
-   t2 =  pt2[1] / src->height[0];
-   t3 =  pt3[1] / src->height[0];
+   s0 =  pt0[0] / src->width0;
+   s1 =  pt1[0] / src->width0;
+   s2 =  pt2[0] / src->width0;
+   s3 =  pt3[0] / src->width0;
+   t0 =  pt0[1] / src->height0;
+   t1 =  pt1[1] / src->height0;
+   t2 =  pt2[1] / src->height0;
+   t3 =  pt3[1] / src->height0;
 
    /* 1st vertex */
    add_vertex_1tex(r, dstX, dstY, s0, t0);
@@ -262,15 +262,15 @@ add_vertex_data2(struct xorg_renderer *r,
       map_point(mask_matrix, mpt1[0], mpt1[1], &mpt1[0], &mpt1[1]);
    }
 
-   src_s0 = spt0[0] / src->width[0];
-   src_t0 = spt0[1] / src->height[0];
-   src_s1 = spt1[0] / src->width[0];
-   src_t1 = spt1[1] / src->height[0];
+   src_s0 = spt0[0] / src->width0;
+   src_t0 = spt0[1] / src->height0;
+   src_s1 = spt1[0] / src->width0;
+   src_t1 = spt1[1] / src->height0;
 
-   mask_s0 = mpt0[0] / mask->width[0];
-   mask_t0 = mpt0[1] / mask->height[0];
-   mask_s1 = mpt1[0] / mask->width[0];
-   mask_t1 = mpt1[1] / mask->height[0];
+   mask_s0 = mpt0[0] / mask->width0;
+   mask_t0 = mpt0[1] / mask->height0;
+   mask_s1 = mpt1[0] / mask->width0;
+   mask_t1 = mpt1[1] / mask->height0;
 
    /* 1st vertex */
    add_vertex_2tex(r, dstX, dstY,
@@ -300,10 +300,10 @@ setup_vertex_data_yuv(struct xorg_renderer *r,
    spt1[0] = srcX + srcW;
    spt1[1] = srcY + srcH;
 
-   s0 = spt0[0] / tex[0]->width[0];
-   t0 = spt0[1] / tex[0]->height[0];
-   s1 = spt1[0] / tex[0]->width[0];
-   t1 = spt1[1] / tex[0]->height[0];
+   s0 = spt0[0] / tex[0]->width0;
+   t0 = spt0[1] / tex[0]->height0;
+   s1 = spt1[0] / tex[0]->width0;
+   t1 = spt1[1] / tex[0]->height0;
 
    /* 1st vertex */
    add_vertex_1tex(r, dstX, dstY, s0, t0);
@@ -387,8 +387,8 @@ void renderer_bind_framebuffer(struct xorg_renderer *r,
    struct pipe_surface *surface = xorg_gpu_surface(r->pipe->screen, priv);
    memset(&state, 0, sizeof(struct pipe_framebuffer_state));
 
-   state.width  = priv->tex->width[0];
-   state.height = priv->tex->height[0];
+   state.width  = priv->tex->width0;
+   state.height = priv->tex->height0;
 
    state.nr_cbufs = 1;
    state.cbufs[0] = surface;
@@ -407,8 +407,8 @@ void renderer_bind_framebuffer(struct xorg_renderer *r,
 void renderer_bind_viewport(struct xorg_renderer *r,
                             struct exa_pixmap_priv *dst)
 {
-   int width = dst->tex->width[0];
-   int height = dst->tex->height[0];
+   int width = dst->tex->width0;
+   int height = dst->tex->height0;
 
    /*debug_printf("Bind viewport (%d, %d)\n", width, height);*/
 
@@ -584,16 +584,16 @@ static void renderer_copy_texture(struct xorg_renderer *r,
    float s0, t0, s1, t1;
    struct xorg_shader shader;
 
-   assert(src->width[0] != 0);
-   assert(src->height[0] != 0);
-   assert(dst->width[0] != 0);
-   assert(dst->height[0] != 0);
+   assert(src->width0 != 0);
+   assert(src->height0 != 0);
+   assert(dst->width0 != 0);
+   assert(dst->height0 != 0);
 
 #if 1
-   s0 = sx1 / src->width[0];
-   s1 = sx2 / src->width[0];
-   t0 = sy1 / src->height[0];
-   t1 = sy2 / src->height[0];
+   s0 = sx1 / src->width0;
+   s1 = sx2 / src->width0;
+   t0 = sy1 / src->height0;
+   t1 = sy2 / src->height0;
 #else
    s0 = 0;
    s1 = 1;
@@ -730,9 +730,9 @@ create_sampler_texture(struct xorg_renderer *r,
    templ.target = PIPE_TEXTURE_2D;
    templ.format = format;
    templ.last_level = 0;
-   templ.width[0] = src->width[0];
-   templ.height[0] = src->height[0];
-   templ.depth[0] = 1;
+   templ.width0 = src->width0;
+   templ.height0 = src->height0;
+   templ.depth0 = 1;
    pf_get_block(format, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
@@ -754,13 +754,13 @@ create_sampler_texture(struct xorg_renderer *r,
                 ps_tex, /* dest */
                 0, 0, /* destx/y */
                 ps_read,
-                0, 0, src->width[0], src->height[0]);
+                0, 0, src->width0, src->height0);
       } else {
           util_surface_copy(pipe, FALSE,
                 ps_tex, /* dest */
                 0, 0, /* destx/y */
                 ps_read,
-                0, 0, src->width[0], src->height[0]);
+                0, 0, src->width0, src->height0);
       }
       pipe_surface_reference(&ps_read, NULL);
       pipe_surface_reference(&ps_tex, NULL);
@@ -791,8 +791,8 @@ void renderer_copy_pixmap(struct xorg_renderer *r,
    dst_loc[3] = height;
    dst_bounds[0] = 0.f;
    dst_bounds[1] = 0.f;
-   dst_bounds[2] = dst->width[0];
-   dst_bounds[3] = dst->height[0];
+   dst_bounds[2] = dst->width0;
+   dst_bounds[3] = dst->height0;
 
    src_loc[0] = sx;
    src_loc[1] = sy;
@@ -800,8 +800,8 @@ void renderer_copy_pixmap(struct xorg_renderer *r,
    src_loc[3] = height;
    src_bounds[0] = 0.f;
    src_bounds[1] = 0.f;
-   src_bounds[2] = src->width[0];
-   src_bounds[3] = src->height[0];
+   src_bounds[2] = src->width0;
+   src_bounds[3] = src->height0;
 
    bound_rect(src_loc, src_bounds, src_shift);
    bound_rect(dst_loc, dst_bounds, dst_shift);
diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c
index 2b935c0f73..856599e640 100644
--- a/src/gallium/state_trackers/xorg/xorg_xv.c
+++ b/src/gallium/state_trackers/xorg/xorg_xv.c
@@ -166,9 +166,9 @@ create_component_texture(struct pipe_context *pipe,
    templ.target = PIPE_TEXTURE_2D;
    templ.format = PIPE_FORMAT_L8_UNORM;
    templ.last_level = 0;
-   templ.width[0] = width;
-   templ.height[0] = height;
-   templ.depth[0] = 1;
+   templ.width0 = width;
+   templ.height0 = height;
+   templ.depth0 = 1;
    pf_get_block(PIPE_FORMAT_L8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
@@ -182,18 +182,18 @@ check_yuv_textures(struct xorg_xv_port_priv *priv,  int width, int height)
 {
    struct pipe_texture **dst = priv->yuv[priv->current_set];
    if (!dst[0] ||
-       dst[0]->width[0] != width ||
-       dst[0]->height[0] != height) {
+       dst[0]->width0 != width ||
+       dst[0]->height0 != height) {
       pipe_texture_reference(&dst[0], NULL);
    }
    if (!dst[1] ||
-       dst[1]->width[0] != width ||
-       dst[1]->height[0] != height) {
+       dst[1]->width0 != width ||
+       dst[1]->height0 != height) {
       pipe_texture_reference(&dst[1], NULL);
    }
    if (!dst[2] ||
-       dst[2]->width[0] != width ||
-       dst[2]->height[0] != height) {
+       dst[2]->width0 != width ||
+       dst[2]->height0 != height) {
       pipe_texture_reference(&dst[2], NULL);
    }
 
@@ -320,8 +320,8 @@ copy_packed_data(ScrnInfoPtr pScrn,
 static void
 setup_vs_video_constants(struct xorg_renderer *r, struct exa_pixmap_priv *dst)
 {
-   int width = dst->tex->width[0];
-   int height = dst->tex->height[0];
+   int width = dst->tex->width0;
+   int height = dst->tex->height0;
    const int param_bytes = 8 * sizeof(float);
    float vs_consts[8] = {
       2.f/width, 2.f/height, 1, 1,
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index bf9038f356..8cb73f4897 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -103,9 +103,9 @@ CreateOrResizeBackBuffer(struct pipe_video_context *vpipe, unsigned int width, u
    /* XXX: Needs to match the drawable's format? */
    template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
    template.last_level = 0;
-   template.width[0] = width;
-   template.height[0] = height;
-   template.depth[0] = 1;
+   template.width0 = width;
+   template.height0 = height;
+   template.depth0 = 1;
    pf_get_block(template.format, &template.block);
    template.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
 
diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
index 317dc44d22..d497861324 100644
--- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
+++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
@@ -24,10 +24,10 @@ dri_surface_from_handle(struct drm_api *api, struct pipe_screen *pscreen,
 	tmpl.tex_usage = PIPE_TEXTURE_USAGE_PRIMARY;
 	tmpl.target = PIPE_TEXTURE_2D;
 	tmpl.last_level = 0;
-	tmpl.depth[0] = 1;
+	tmpl.depth0 = 1;
 	tmpl.format = format;
-	tmpl.width[0] = width;
-	tmpl.height[0] = height;
+	tmpl.width0 = width;
+	tmpl.height0 = height;
 	pf_get_block(tmpl.format, &tmpl.block);
 
 	pt = api->texture_from_shared_handle(api, pscreen, &tmpl,
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 81cd9dc4fb..74afffc9cf 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -317,9 +317,9 @@ struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_co
     memset(&tmpl, 0, sizeof(tmpl));
     tmpl.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
     tmpl.target = PIPE_TEXTURE_2D;
-    tmpl.width[0] = w;
-    tmpl.height[0] = h;
-    tmpl.depth[0] = 1;
+    tmpl.width0 = w;
+    tmpl.height0 = h;
+    tmpl.depth0 = 1;
     tmpl.format = format;
     pf_get_block(tmpl.format, &tmpl.block);
     tmpl.nblocksx[0] = pf_get_nblocksx(&tmpl.block, w);
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 0469fb9978..659a6c9193 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -376,7 +376,7 @@ st_render_texture(GLcontext *ctx,
    rb->_BaseFormat = texImage->_BaseFormat;
    /*printf("***** render to texture level %d: %d x %d\n", att->TextureLevel, rb->Width, rb->Height);*/
 
-   /*printf("***** pipe texture %d x %d\n", pt->width[0], pt->height[0]);*/
+   /*printf("***** pipe texture %d x %d\n", pt->width0, pt->height0);*/
 
    pipe_texture_reference( &strb->texture, pt );
 
-- 
cgit v1.2.3


From 456b5bd5d0dbed172a5d8f88625eeb63fd87c8dd Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Fri, 27 Nov 2009 10:11:18 +0100
Subject: svga: Update text shader header.

---
 src/gallium/drivers/svga/svga_pipe_vs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c
index e5ffe668c3..c104c41f5f 100644
--- a/src/gallium/drivers/svga/svga_pipe_vs.c
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -48,7 +48,7 @@ static const struct tgsi_token *substitute_vs(
    static struct tgsi_token tokens[300];
 
    const char *text = 
-      "VERT1.1\n"
+      "VERT\n"
       "DCL IN[0]\n"
       "DCL IN[1]\n"
       "DCL IN[2]\n"
-- 
cgit v1.2.3


From b911688b87a011eacf2034bd61562e633952a66b Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 27 Nov 2009 12:18:22 +0000
Subject: svga: add DEBUG_CACHE option

---
 src/gallium/drivers/svga/svga_debug.h         |  1 +
 src/gallium/drivers/svga/svga_draw.c          |  7 +++++++
 src/gallium/drivers/svga/svga_pipe_blit.c     |  8 +++++++
 src/gallium/drivers/svga/svga_pipe_clear.c    |  6 ++++++
 src/gallium/drivers/svga/svga_pipe_flush.c    |  3 +++
 src/gallium/drivers/svga/svga_screen.c        |  5 +++++
 src/gallium/drivers/svga/svga_screen_buffer.c |  2 +-
 src/gallium/drivers/svga/svga_screen_cache.c  | 30 ++++++++++++++++++++-------
 8 files changed, 53 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h
index b7bb5686ed..3a3fcd8fae 100644
--- a/src/gallium/drivers/svga/svga_debug.h
+++ b/src/gallium/drivers/svga/svga_debug.h
@@ -43,6 +43,7 @@
 #define DEBUG_FLUSH    0x1000   /* flush after every draw */
 #define DEBUG_SYNC     0x2000   /* sync after every flush */
 #define DEBUG_QUERY    0x4000
+#define DEBUG_CACHE    0x8000
 
 #ifdef DEBUG
 extern int SVGA_DEBUG;
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 1b371cecc6..8db40d0fd5 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -29,10 +29,13 @@
 #include "util/u_memory.h"
 #include "util/u_math.h"
 
+#include "svga_context.h"
 #include "svga_draw.h"
 #include "svga_draw_private.h"
+#include "svga_debug.h"
 #include "svga_screen.h"
 #include "svga_screen_buffer.h"
+#include "svga_screen_texture.h"
 #include "svga_winsys.h"
 #include "svga_cmd.h"
 
@@ -160,6 +163,10 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
          ib_handle[i] = handle;
       }
 
+      SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
+               svga_surface(svga->curr.framebuffer.cbufs[0])->handle,
+               hwtnl->cmd.prim_count);
+
       ret = SVGA3D_BeginDrawPrimitives(swc, 
                                        &vdecl, 
                                        hwtnl->cmd.vdecl_count, 
diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c
index 5a4a8c0f5f..4f575b06e6 100644
--- a/src/gallium/drivers/svga/svga_pipe_blit.c
+++ b/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -25,6 +25,7 @@
 
 #include "svga_screen_texture.h"
 #include "svga_context.h"
+#include "svga_debug.h"
 #include "svga_cmd.h"
 
 #define FILE_DEBUG_FLAG DEBUG_BLIT
@@ -43,6 +44,13 @@ static void svga_surface_copy(struct pipe_context *pipe,
 
    svga_hwtnl_flush_retry( svga );
 
+   SVGA_DBG(DEBUG_DMA, "blit to sid %p (%d,%d), from sid %p (%d,%d) sz %dx%d\n",
+            svga_surface(dest)->handle,
+            destx, desty,
+            svga_surface(src)->handle,
+            srcx, srcy,
+            width, height);
+
    ret = SVGA3D_BeginSurfaceCopy(svga->swc,
                                  src,
                                  dest,
diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c
index 8977d26541..6195c3897e 100644
--- a/src/gallium/drivers/svga/svga_pipe_clear.c
+++ b/src/gallium/drivers/svga/svga_pipe_clear.c
@@ -24,12 +24,14 @@
  **********************************************************/
 
 #include "svga_cmd.h"
+#include "svga_debug.h"
 
 #include "pipe/p_defines.h"
 #include "util/u_pack_color.h"
 
 #include "svga_context.h"
 #include "svga_state.h"
+#include "svga_screen_texture.h"
 
 
 static enum pipe_error
@@ -98,6 +100,10 @@ svga_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
 {
    struct svga_context *svga = svga_context( pipe );
    int ret;
+   
+   if (buffers & PIPE_CLEAR_COLOR)
+      SVGA_DBG(DEBUG_DMA, "clear sid %p\n",
+               svga_surface(svga->curr.framebuffer.cbufs[0])->handle);
 
    ret = try_clear( svga, buffers, rgba, depth, stencil );
 
diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c
index 942366de72..0becb0765a 100644
--- a/src/gallium/drivers/svga/svga_pipe_flush.c
+++ b/src/gallium/drivers/svga/svga_pipe_flush.c
@@ -59,6 +59,9 @@ static void svga_flush( struct pipe_context *pipe,
    /* Flush command queue.
     */
    svga_context_flush(svga, fence);
+
+   SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s flags %x fence_ptr %p\n",
+            __FUNCTION__, flags, fence ? *fence : 0x0);
 }
 
 
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 3afcaffff5..fc1b3c980e 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -57,6 +57,7 @@ static const struct debug_named_value svga_debug_flags[] = {
    { "perf",     DEBUG_PERF },
    { "flush",    DEBUG_FLUSH },
    { "sync",     DEBUG_SYNC },
+   { "cache",    DEBUG_CACHE },
    {NULL, 0}
 };
 #endif
@@ -297,6 +298,10 @@ svga_fence_finish(struct pipe_screen *screen,
                   unsigned flag)
 {
    struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+
+   SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "%s fence_ptr %p\n",
+            __FUNCTION__, fence);
+
    return sws->fence_finish(sws, fence, flag);
 }
 
diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c
index c0b0f518bc..1f8a889672 100644
--- a/src/gallium/drivers/svga/svga_screen_buffer.c
+++ b/src/gallium/drivers/svga/svga_screen_buffer.c
@@ -447,7 +447,7 @@ svga_buffer_map_range( struct pipe_screen *screen,
             enum pipe_error ret;
             struct pipe_fence_handle *fence = NULL;
             
-            SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p, bytes %u - %u\n", 
+            SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p (buffer), bytes %u - %u\n", 
                      sbuf->handle, 0, sbuf->base.size);
 
             memset(&flags, 0, sizeof flags);
diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
index 689981cc6d..8a06383f61 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -134,7 +134,8 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
    else if(!LIST_IS_EMPTY(&cache->unused)) {
       /* free the last used buffer and reuse its entry */
       entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->unused.prev, head);
-      SVGA_DBG(DEBUG_DMA, "unref sid %p (make space)\n", entry->handle);
+      SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+               "unref sid %p (make space)\n", entry->handle);
       sws->surface_reference(sws, &entry->handle, NULL);
 
       LIST_DEL(&entry->bucket_head);
@@ -146,11 +147,14 @@ svga_screen_cache_add(struct svga_screen *svgascreen,
       entry->handle = handle;
       memcpy(&entry->key, key, sizeof entry->key);
    
+      SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+               "cache sid %p\n", entry->handle);
       LIST_ADD(&entry->head, &cache->validated);
    }
    else {
       /* Couldn't cache the buffer -- this really shouldn't happen */
-      SVGA_DBG(DEBUG_DMA, "unref sid %p (couldn't find space)\n", handle);
+      SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+               "unref sid %p (couldn't find space)\n", handle);
       sws->surface_reference(sws, &handle, NULL);
    }
    
@@ -209,7 +213,8 @@ svga_screen_cache_cleanup(struct svga_screen *svgascreen)
    
    for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) {
       if(cache->entries[i].handle) {
-	 SVGA_DBG(DEBUG_DMA, "unref sid %p (shutdown)\n", cache->entries[i].handle);
+	 SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+                  "unref sid %p (shutdown)\n", cache->entries[i].handle);
 	 sws->surface_reference(sws, &cache->entries[i].handle, NULL);
       }
 
@@ -252,7 +257,8 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
    struct svga_winsys_surface *handle = NULL;
    boolean cachable = SVGA_SURFACE_CACHE_ENABLED && key->cachable;
 
-   SVGA_DBG(DEBUG_DMA, "%s sz %dx%dx%d mips %d faces %d cachable %d\n", 
+   SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+            "%s sz %dx%dx%d mips %d faces %d cachable %d\n", 
             __FUNCTION__,
             key->size.width,
             key->size.height,
@@ -276,10 +282,12 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
       handle = svga_screen_cache_lookup(svgascreen, key);
       if (handle) {
          if (key->format == SVGA3D_BUFFER)
-            SVGA_DBG(DEBUG_DMA, "  reuse sid %p sz %d (buffer)\n", handle, 
+            SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+                     "reuse sid %p sz %d (buffer)\n", handle, 
                      key->size.width);
          else
-            SVGA_DBG(DEBUG_DMA, "  reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle, 
+            SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+                     "reuse sid %p sz %dx%dx%d mips %d faces %d\n", handle, 
                      key->size.width,
                      key->size.height,
                      key->size.depth,
@@ -296,7 +304,12 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
                                    key->numFaces, 
                                    key->numMipLevels);
       if (handle)
-         SVGA_DBG(DEBUG_DMA, "create sid %p sz %d\n", handle, key->size);
+         SVGA_DBG(DEBUG_CACHE|DEBUG_DMA,
+                  "  CREATE sid %p sz %dx%dx%d\n", 
+                  handle, 
+                  key->size.width,
+                  key->size.height,
+                  key->size.depth);
    }
 
    return handle;
@@ -318,7 +331,8 @@ svga_screen_surface_destroy(struct svga_screen *svgascreen,
       svga_screen_cache_add(svgascreen, key, p_handle);
    }
    else {
-      SVGA_DBG(DEBUG_DMA, "unref sid %p (uncachable)\n", *p_handle);
+      SVGA_DBG(DEBUG_DMA,
+               "unref sid %p (uncachable)\n", *p_handle);
       sws->surface_reference(sws, p_handle, NULL);
    }
 }
-- 
cgit v1.2.3


From b84b7f19dfdc0ac02175847065b39110db7ad98f Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 27 Nov 2009 12:19:28 +0000
Subject: svga: flush our command buffer after the 8th distinct render target

This helps improve the surface cache behaviour in the face of the
large number of single-use render targets generated by EXA and the xorg
state tracker.  Without this we can reference hundreds of individual
render targets from a command buffer, which leaves little scope for
sharing or reuse of those targets.

Flushing early means we can start reusing textures much sooner.

This shouldn't have much effect on normal 3d rendering as it's pretty
rare to have a command buffer with >8 different render targets in that
world.
---
 src/gallium/drivers/svga/svga_context.c           | 4 +++-
 src/gallium/drivers/svga/svga_context.h           | 5 +++++
 src/gallium/drivers/svga/svga_state_framebuffer.c | 3 +++
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index 73233957f3..c3de12b4a3 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -230,7 +230,9 @@ void svga_context_flush( struct svga_context *svga,
                          struct pipe_fence_handle **pfence )
 {
    struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
-   
+
+   svga->curr.nr_fbs = 0;
+
    /* Unmap upload manager buffers: 
     */
    u_upload_flush(svga->upload_vb);
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index 9a3e92fd8d..e650a251d1 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -191,6 +191,11 @@ struct svga_state
    struct pipe_framebuffer_state framebuffer;
    float depthscale;
 
+   /* Hack to limit the number of different render targets between
+    * flushes.  Helps avoid blowing out our surface cache in EXA.
+    */
+   int nr_fbs;
+
    struct pipe_poly_stipple poly_stipple;
    struct pipe_scissor_state scissor;
    struct pipe_blend_color blend_color;
diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c
index 7d7f93d8e3..cfdcae4ee4 100644
--- a/src/gallium/drivers/svga/svga_state_framebuffer.c
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -54,6 +54,9 @@ static int emit_framebuffer( struct svga_context *svga,
    
    for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
       if (curr->cbufs[i] != hw->cbufs[i]) {
+         if (svga->curr.nr_fbs++ > 8)
+            return PIPE_ERROR_OUT_OF_MEMORY;
+
          ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, curr->cbufs[i]);
          if (ret != PIPE_OK)
             return ret;
-- 
cgit v1.2.3


From 178407f33c413cbe7434597b2129abde90041b6b Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 24 Nov 2009 14:37:45 +0000
Subject: svga: Use consistent file names for dumping facilities.

---
 src/gallium/drivers/svga/Makefile                  |   4 +-
 src/gallium/drivers/svga/SConscript                |   4 +-
 src/gallium/drivers/svga/svga_tgsi.c               |   2 +-
 src/gallium/drivers/svga/svgadump/st_shader.h      | 214 -------
 src/gallium/drivers/svga/svgadump/st_shader_dump.c | 649 ---------------------
 src/gallium/drivers/svga/svgadump/st_shader_dump.h |  42 --
 src/gallium/drivers/svga/svgadump/st_shader_op.c   | 168 ------
 src/gallium/drivers/svga/svgadump/st_shader_op.h   |  46 --
 src/gallium/drivers/svga/svgadump/svga_dump.c      |   2 +-
 src/gallium/drivers/svga/svgadump/svga_dump.py     |   2 +-
 src/gallium/drivers/svga/svgadump/svga_shader.h    | 214 +++++++
 .../drivers/svga/svgadump/svga_shader_dump.c       | 649 +++++++++++++++++++++
 .../drivers/svga/svgadump/svga_shader_dump.h       |  42 ++
 src/gallium/drivers/svga/svgadump/svga_shader_op.c | 168 ++++++
 src/gallium/drivers/svga/svgadump/svga_shader_op.h |  46 ++
 15 files changed, 1126 insertions(+), 1126 deletions(-)
 delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader.h
 delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.c
 delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_dump.h
 delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.c
 delete mode 100644 src/gallium/drivers/svga/svgadump/st_shader_op.h
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader.h
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_dump.c
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_dump.h
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_op.c
 create mode 100644 src/gallium/drivers/svga/svgadump/svga_shader_op.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile
index 8158364d25..f361908187 100644
--- a/src/gallium/drivers/svga/Makefile
+++ b/src/gallium/drivers/svga/Makefile
@@ -4,8 +4,8 @@ include $(TOP)/configs/current
 LIBNAME = svga
 
 C_SOURCES = \
-	svgadump/st_shader_dump.c \
-	svgadump/st_shader_op.c \
+	svgadump/svga_shader_dump.c \
+	svgadump/svga_shader_op.c \
 	svgadump/svga_dump.c \
 	svga_cmd.c \
 	svga_context.c \
diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript
index ff9645fc03..737b791ceb 100644
--- a/src/gallium/drivers/svga/SConscript
+++ b/src/gallium/drivers/svga/SConscript
@@ -60,8 +60,8 @@ sources = [
     'svga_tgsi_insn.c',
     
     'svgadump/svga_dump.c',
-    'svgadump/st_shader_dump.c',
-    'svgadump/st_shader_op.c',
+    'svgadump/svga_shader_dump.c',
+    'svgadump/svga_shader_op.c',
 ]
 
 svga = env.ConvenienceLibrary(
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index 44d0930bc0..81eea1a145 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -32,7 +32,7 @@
 #include "tgsi/tgsi_scan.h"
 #include "util/u_memory.h"
 
-#include "svgadump/st_shader_dump.h"
+#include "svgadump/svga_shader_dump.h"
 
 #include "svga_context.h"
 #include "svga_tgsi.h"
diff --git a/src/gallium/drivers/svga/svgadump/st_shader.h b/src/gallium/drivers/svga/svgadump/st_shader.h
deleted file mode 100644
index 2fc1796a90..0000000000
--- a/src/gallium/drivers/svga/svgadump/st_shader.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/**********************************************************
- * Copyright 2007-2009 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **********************************************************/
-
-/**
- * @file
- * SVGA Shader Token Definitions
- * 
- * @author Michal Krol <michal@vmware.com>
- */
-
-#ifndef ST_SHADER_SVGA_H
-#define ST_SHADER_SVGA_H
-
-#include "pipe/p_compiler.h"
-
-struct sh_op
-{
-   unsigned opcode:16;
-   unsigned control:8;
-   unsigned length:4;
-   unsigned predicated:1;
-   unsigned unused:1;
-   unsigned coissue:1;
-   unsigned is_reg:1;
-};
-
-struct sh_reg
-{
-   unsigned number:11;
-   unsigned type_hi:2;
-   unsigned relative:1;
-   unsigned unused:14;
-   unsigned type_lo:3;
-   unsigned is_reg:1;
-};
-
-static INLINE unsigned
-sh_reg_type( struct sh_reg reg )
-{
-   return reg.type_lo | (reg.type_hi << 3);
-}
-
-struct sh_cdata
-{
-   float xyzw[4];
-};
-
-struct sh_def
-{
-   struct sh_op op;
-   struct sh_reg reg;
-   struct sh_cdata cdata;
-};
-
-struct sh_defb
-{
-   struct sh_op op;
-   struct sh_reg reg;
-   uint data;
-};
-
-struct sh_idata
-{
-   int xyzw[4];
-};
-
-struct sh_defi
-{
-   struct sh_op op;
-   struct sh_reg reg;
-   struct sh_idata idata;
-};
-
-#define PS_TEXTURETYPE_UNKNOWN   SVGA3DSAMP_UNKNOWN
-#define PS_TEXTURETYPE_2D        SVGA3DSAMP_2D
-#define PS_TEXTURETYPE_CUBE      SVGA3DSAMP_CUBE
-#define PS_TEXTURETYPE_VOLUME    SVGA3DSAMP_VOLUME
-
-struct ps_sampleinfo
-{
-   unsigned unused:27;
-   unsigned texture_type:4;
-   unsigned is_reg:1;
-};
-
-struct vs_semantic
-{
-   unsigned usage:5;
-   unsigned unused1:11;
-   unsigned usage_index:4;
-   unsigned unused2:12;
-};
-
-struct sh_dstreg
-{
-   unsigned number:11;
-   unsigned type_hi:2;
-   unsigned relative:1;
-   unsigned unused:2;
-   unsigned write_mask:4;
-   unsigned modifier:4;
-   unsigned shift_scale:4;
-   unsigned type_lo:3;
-   unsigned is_reg:1;
-};
-
-static INLINE unsigned
-sh_dstreg_type( struct sh_dstreg reg )
-{
-   return reg.type_lo | (reg.type_hi << 3);
-}
-
-struct sh_dcl
-{
-   struct sh_op op;
-   union {
-      struct {
-         struct ps_sampleinfo sampleinfo;
-      } ps;
-      struct {
-         struct vs_semantic semantic;
-      } vs;
-   } u;
-   struct sh_dstreg reg;
-};
-
-
-struct sh_srcreg
-{
-   unsigned number:11;
-   unsigned type_hi:2;
-   unsigned relative:1;
-   unsigned unused:2;
-   unsigned swizzle_x:2;
-   unsigned swizzle_y:2;
-   unsigned swizzle_z:2;
-   unsigned swizzle_w:2;
-   unsigned modifier:4;
-   unsigned type_lo:3;
-   unsigned is_reg:1;
-};
-
-static INLINE unsigned
-sh_srcreg_type( struct sh_srcreg reg )
-{
-   return reg.type_lo | (reg.type_hi << 3);
-}
-
-struct sh_dstop
-{
-   struct sh_op op;
-   struct sh_dstreg dst;
-};
-
-struct sh_srcop
-{
-   struct sh_op op;
-   struct sh_srcreg src;
-};
-
-struct sh_src2op
-{
-   struct sh_op op;
-   struct sh_srcreg src0;
-   struct sh_srcreg src1;
-};
-
-struct sh_unaryop
-{
-   struct sh_op op;
-   struct sh_dstreg dst;
-   struct sh_srcreg src;
-};
-
-struct sh_binaryop
-{
-   struct sh_op op;
-   struct sh_dstreg dst;
-   struct sh_srcreg src0;
-   struct sh_srcreg src1;
-};
-
-struct sh_trinaryop
-{
-   struct sh_op op;
-   struct sh_dstreg dst;
-   struct sh_srcreg src0;
-   struct sh_srcreg src1;
-   struct sh_srcreg src2;
-};
-
-#endif /* ST_SHADER_SVGA_H */
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.c b/src/gallium/drivers/svga/svgadump/st_shader_dump.c
deleted file mode 100644
index d65cc93bfd..0000000000
--- a/src/gallium/drivers/svga/svgadump/st_shader_dump.c
+++ /dev/null
@@ -1,649 +0,0 @@
-/**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **********************************************************/
-
-/**
- * @file
- * SVGA Shader Dump Facilities
- * 
- * @author Michal Krol <michal@vmware.com>
- */
-
-#include "st_shader.h"
-#include "st_shader_dump.h"
-#include "st_shader_op.h"
-#include "util/u_debug.h"
-
-#include "../svga_hw_reg.h"
-#include "svga3d_shaderdefs.h"
-
-struct dump_info
-{
-   SVGA3dShaderVersion version;
-   boolean is_ps;
-};
-
-static void dump_op( struct sh_op op, const char *mnemonic )
-{
-   assert( op.predicated == 0 );
-   assert( op.is_reg == 0 );
-
-   if (op.coissue)
-      debug_printf( "+" );
-   debug_printf( "%s", mnemonic );
-   switch (op.control) {
-   case 0:
-      break;
-   case SVGA3DOPCONT_PROJECT:
-      debug_printf( "p" );
-      break;
-   case SVGA3DOPCONT_BIAS:
-      debug_printf( "b" );
-      break;
-   default:
-      assert( 0 );
-   }
-}
-
-
-static void dump_comp_op( struct sh_op op, const char *mnemonic )
-{
-   assert( op.is_reg == 0 );
-
-   if (op.coissue)
-      debug_printf( "+" );
-   debug_printf( "%s", mnemonic );
-   switch (op.control) {
-   case SVGA3DOPCOMP_RESERVED0:
-      break;
-   case SVGA3DOPCOMP_GT:
-      debug_printf("_gt");
-      break;
-   case SVGA3DOPCOMP_EQ:
-      debug_printf("_eq");
-      break;
-   case SVGA3DOPCOMP_GE:
-      debug_printf("_ge");
-      break;
-   case SVGA3DOPCOMP_LT:
-      debug_printf("_lt");
-      break;
-   case SVGA3DOPCOMPC_NE:
-      debug_printf("_ne");
-      break;
-   case SVGA3DOPCOMP_LE:
-      debug_printf("_le");
-      break;
-   case SVGA3DOPCOMP_RESERVED1:
-   default:
-      assert( 0 );
-   }
-}
-
-
-static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di )
-{
-   assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 );
-   assert( reg.is_reg == 1 );
-
-   switch (sh_reg_type( reg )) {
-   case SVGA3DREG_TEMP:
-      debug_printf( "r%u", reg.number );
-      break;
-
-   case SVGA3DREG_INPUT:
-      debug_printf( "v%u", reg.number );
-      break;
-
-   case SVGA3DREG_CONST:
-      if (reg.relative) {
-         if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP)
-            debug_printf( "c[aL+%u]", reg.number );
-         else
-            debug_printf( "c[a%u.x+%u]", indreg->number, reg.number );
-      }
-      else
-         debug_printf( "c%u", reg.number );
-      break;
-
-   case SVGA3DREG_ADDR:    /* VS */
-   /* SVGA3DREG_TEXTURE */ /* PS */
-      if (di->is_ps)
-         debug_printf( "t%u", reg.number );
-      else
-         debug_printf( "a%u", reg.number );
-      break;
-
-   case SVGA3DREG_RASTOUT:
-      switch (reg.number) {
-      case 0 /*POSITION*/:
-         debug_printf( "oPos" );
-         break;
-      case 1 /*FOG*/:
-         debug_printf( "oFog" );
-         break;
-      case 2 /*POINT_SIZE*/:
-         debug_printf( "oPts" );
-         break;
-      default:
-         assert( 0 );
-         debug_printf( "???" );
-      }
-      break;
-
-   case SVGA3DREG_ATTROUT:
-      assert( reg.number < 2 );
-      debug_printf( "oD%u", reg.number );
-      break;
-
-   case SVGA3DREG_TEXCRDOUT:
-   /* SVGA3DREG_OUTPUT */
-      debug_printf( "oT%u", reg.number );
-      break;
-
-   case SVGA3DREG_COLOROUT:
-      debug_printf( "oC%u", reg.number );
-      break;
-
-   case SVGA3DREG_DEPTHOUT:
-      debug_printf( "oD%u", reg.number );
-      break;
-
-   case SVGA3DREG_SAMPLER:
-      debug_printf( "s%u", reg.number );
-      break;
-
-   case SVGA3DREG_CONSTBOOL:
-      assert( !reg.relative );
-      debug_printf( "b%u", reg.number );
-      break;
-
-   case SVGA3DREG_CONSTINT:
-      assert( !reg.relative );
-      debug_printf( "i%u", reg.number );
-      break;
-
-   case SVGA3DREG_LOOP:
-      assert( reg.number == 0 );
-      debug_printf( "aL" );
-      break;
-
-   case SVGA3DREG_MISCTYPE:
-      switch (reg.number) {
-      case SVGA3DMISCREG_POSITION:
-         debug_printf( "vPos" );
-         break;
-      case SVGA3DMISCREG_FACE:
-         debug_printf( "vFace" );
-         break;
-      default:
-         assert(0);
-         break;
-      }
-      break;
-
-   case SVGA3DREG_LABEL:
-      debug_printf( "l%u", reg.number );
-      break;
-
-   case SVGA3DREG_PREDICATE:
-      debug_printf( "p%u", reg.number );
-      break;
-
-
-   default:
-      assert( 0 );
-      debug_printf( "???" );
-   }
-}
-
-static void dump_cdata( struct sh_cdata cdata )
-{
-   debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] );
-}
-
-static void dump_idata( struct sh_idata idata )
-{
-   debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] );
-}
-
-static void dump_bdata( boolean bdata )
-{
-   debug_printf( bdata ? "TRUE" : "FALSE" );
-}
-
-static void dump_sampleinfo( struct ps_sampleinfo sampleinfo )
-{
-   switch (sampleinfo.texture_type) {
-   case SVGA3DSAMP_2D:
-      debug_printf( "_2d" );
-      break;
-   case SVGA3DSAMP_CUBE:
-      debug_printf( "_cube" );
-      break;
-   case SVGA3DSAMP_VOLUME:
-      debug_printf( "_volume" );
-      break;
-   default:
-      assert( 0 );
-   }
-}
-
-
-static void dump_usageinfo( struct vs_semantic semantic )
-{
-   switch (semantic.usage) {
-   case SVGA3D_DECLUSAGE_POSITION:
-      debug_printf("_position" );
-      break;
-   case SVGA3D_DECLUSAGE_BLENDWEIGHT:
-      debug_printf("_blendweight" );
-      break;
-   case SVGA3D_DECLUSAGE_BLENDINDICES:
-      debug_printf("_blendindices" );
-      break;
-   case SVGA3D_DECLUSAGE_NORMAL:
-      debug_printf("_normal" );
-      break;
-   case SVGA3D_DECLUSAGE_PSIZE:
-      debug_printf("_psize" );
-      break;
-   case SVGA3D_DECLUSAGE_TEXCOORD:
-      debug_printf("_texcoord");
-      break;
-   case SVGA3D_DECLUSAGE_TANGENT:
-      debug_printf("_tangent" );
-      break;
-   case SVGA3D_DECLUSAGE_BINORMAL:
-      debug_printf("_binormal" );
-      break;
-   case SVGA3D_DECLUSAGE_TESSFACTOR:
-      debug_printf("_tessfactor" );
-      break;
-   case SVGA3D_DECLUSAGE_POSITIONT:
-      debug_printf("_positiont" );
-      break;
-   case SVGA3D_DECLUSAGE_COLOR:
-      debug_printf("_color" );
-      break;
-   case SVGA3D_DECLUSAGE_FOG:
-      debug_printf("_fog" );
-      break;
-   case SVGA3D_DECLUSAGE_DEPTH:
-      debug_printf("_depth" );
-      break;
-   case SVGA3D_DECLUSAGE_SAMPLE:
-      debug_printf("_sample");
-      break;
-   default:
-      assert( 0 );
-      return;
-   }
-
-   if (semantic.usage_index != 0) {
-      debug_printf("%d", semantic.usage_index );
-   }
-}
-
-static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di )
-{
-   union {
-      struct sh_reg reg;
-      struct sh_dstreg dstreg;
-   } u;
-
-   assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier );
-
-   if (dstreg.modifier & SVGA3DDSTMOD_SATURATE)
-      debug_printf( "_sat" );
-   if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION)
-      debug_printf( "_pp" );
-   switch (dstreg.shift_scale) {
-   case 0:
-      break;
-   case 1:
-      debug_printf( "_x2" );
-      break;
-   case 2:
-      debug_printf( "_x4" );
-      break;
-   case 3:
-      debug_printf( "_x8" );
-      break;
-   case 13:
-      debug_printf( "_d8" );
-      break;
-   case 14:
-      debug_printf( "_d4" );
-      break;
-   case 15:
-      debug_printf( "_d2" );
-      break;
-   default:
-      assert( 0 );
-   }
-   debug_printf( " " );
-
-   u.dstreg = dstreg;
-   dump_reg( u.reg, NULL, di );
-   if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) {
-      debug_printf( "." );
-      if (dstreg.write_mask & SVGA3DWRITEMASK_0)
-         debug_printf( "x" );
-      if (dstreg.write_mask & SVGA3DWRITEMASK_1)
-         debug_printf( "y" );
-      if (dstreg.write_mask & SVGA3DWRITEMASK_2)
-         debug_printf( "z" );
-      if (dstreg.write_mask & SVGA3DWRITEMASK_3)
-         debug_printf( "w" );
-   }
-}
-
-static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di )
-{
-   union {
-      struct sh_reg reg;
-      struct sh_srcreg srcreg;
-   } u;
-
-   switch (srcreg.modifier) {
-   case SVGA3DSRCMOD_NEG:
-   case SVGA3DSRCMOD_BIASNEG:
-   case SVGA3DSRCMOD_SIGNNEG:
-   case SVGA3DSRCMOD_X2NEG:
-      debug_printf( "-" );
-      break;
-   case SVGA3DSRCMOD_ABS:
-      debug_printf( "|" );
-      break;
-   case SVGA3DSRCMOD_ABSNEG:
-      debug_printf( "-|" );
-      break;
-   case SVGA3DSRCMOD_COMP:
-      debug_printf( "1-" );
-      break;
-   case SVGA3DSRCMOD_NOT:
-      debug_printf( "!" );
-   }
-
-   u.srcreg = srcreg;
-   dump_reg( u.reg, indreg, di );
-   switch (srcreg.modifier) {
-   case SVGA3DSRCMOD_NONE:
-   case SVGA3DSRCMOD_NEG:
-   case SVGA3DSRCMOD_COMP:
-   case SVGA3DSRCMOD_NOT:
-      break;
-   case SVGA3DSRCMOD_ABS:
-   case SVGA3DSRCMOD_ABSNEG:
-      debug_printf( "|" );
-      break;
-   case SVGA3DSRCMOD_BIAS:
-   case SVGA3DSRCMOD_BIASNEG:
-      debug_printf( "_bias" );
-      break;
-   case SVGA3DSRCMOD_SIGN:
-   case SVGA3DSRCMOD_SIGNNEG:
-      debug_printf( "_bx2" );
-      break;
-   case SVGA3DSRCMOD_X2:
-   case SVGA3DSRCMOD_X2NEG:
-      debug_printf( "_x2" );
-      break;
-   case SVGA3DSRCMOD_DZ:
-      debug_printf( "_dz" );
-      break;
-   case SVGA3DSRCMOD_DW:
-      debug_printf( "_dw" );
-      break;
-   default:
-      assert( 0 );
-   }
-   if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) {
-      debug_printf( "." );
-      if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) {
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
-      }
-      else {
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_y] );
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_z] );
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_w] );
-      }
-   }
-}
-
-void
-sh_svga_dump(
-   const unsigned *assem,
-   unsigned dwords,
-   unsigned do_binary )
-{
-   const unsigned *start = assem;
-   boolean finished = FALSE;
-   struct dump_info di;
-   unsigned i;
-
-   if (do_binary) {
-      for (i = 0; i < dwords; i++) 
-         debug_printf("  0x%08x,\n", assem[i]);
-      
-      debug_printf("\n\n");
-   }
-
-   di.version.value = *assem++;
-   di.is_ps = (di.version.type == SVGA3D_PS_TYPE);
-
-   debug_printf(
-      "%s_%u_%u\n",
-      di.is_ps ? "ps" : "vs",
-      di.version.major,
-      di.version.minor );
-
-   while (!finished) {
-      struct sh_op op = *(struct sh_op *) assem;
-
-      if (assem - start >= dwords) {
-         debug_printf("... ran off end of buffer\n");
-         assert(0);
-         return;
-      }
-
-      switch (op.opcode) {
-      case SVGA3DOP_DCL:
-         {
-            struct sh_dcl dcl = *(struct sh_dcl *) assem;
-
-            debug_printf( "dcl" );
-            if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER)
-               dump_sampleinfo( dcl.u.ps.sampleinfo );
-            else if (di.is_ps) {
-               if (di.version.major == 3 && 
-                   sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE)
-                  dump_usageinfo( dcl.u.vs.semantic );
-            }
-            else
-               dump_usageinfo( dcl.u.vs.semantic );
-            dump_dstreg( dcl.reg, &di );
-            debug_printf( "\n" );
-            assem += sizeof( struct sh_dcl ) / sizeof( unsigned );
-         }
-         break;
-
-      case SVGA3DOP_DEFB:
-         {
-            struct sh_defb defb = *(struct sh_defb *) assem;
-
-            debug_printf( "defb " );
-            dump_reg( defb.reg, NULL, &di );
-            debug_printf( ", " );
-            dump_bdata( defb.data );
-            debug_printf( "\n" );
-            assem += sizeof( struct sh_defb ) / sizeof( unsigned );
-         }
-         break;
-
-      case SVGA3DOP_DEFI:
-         {
-            struct sh_defi defi = *(struct sh_defi *) assem;
-
-            debug_printf( "defi " );
-            dump_reg( defi.reg, NULL, &di );
-            debug_printf( ", " );
-            dump_idata( defi.idata );
-            debug_printf( "\n" );
-            assem += sizeof( struct sh_defi ) / sizeof( unsigned );
-         }
-         break;
-
-      case SVGA3DOP_TEXCOORD:
-         assert( di.is_ps );
-         dump_op( op, "texcoord" );
-         if (0) {
-            struct sh_dstop dstop = *(struct sh_dstop *) assem;
-            dump_dstreg( dstop.dst, &di );
-            assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
-         }
-         else {
-            struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
-            dump_dstreg( unaryop.dst, &di );
-            debug_printf( ", " );
-            dump_srcreg( unaryop.src, NULL, &di );
-            assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
-         }
-         debug_printf( "\n" );
-         break;
-
-      case SVGA3DOP_TEX:
-         assert( di.is_ps );
-         if (0) {
-            dump_op( op, "tex" );
-            if (0) {
-               struct sh_dstop dstop = *(struct sh_dstop *) assem;
-
-               dump_dstreg( dstop.dst, &di );
-               assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
-            }
-            else {
-               struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
-
-               dump_dstreg( unaryop.dst, &di );
-               debug_printf( ", " );
-               dump_srcreg( unaryop.src, NULL, &di );
-               assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
-            }
-         }
-         else {
-            struct sh_binaryop binaryop = *(struct sh_binaryop *) assem;
-
-            dump_op( op, "texld" );
-            dump_dstreg( binaryop.dst, &di );
-            debug_printf( ", " );
-            dump_srcreg( binaryop.src0, NULL, &di );
-            debug_printf( ", " );
-            dump_srcreg( binaryop.src1, NULL, &di );
-            assem += sizeof( struct sh_binaryop ) / sizeof( unsigned );
-         }
-         debug_printf( "\n" );
-         break;
-
-      case SVGA3DOP_DEF:
-         {
-            struct sh_def def = *(struct sh_def *) assem;
-
-            debug_printf( "def " );
-            dump_reg( def.reg, NULL, &di );
-            debug_printf( ", " );
-            dump_cdata( def.cdata );
-            debug_printf( "\n" );
-            assem += sizeof( struct sh_def ) / sizeof( unsigned );
-         }
-         break;
-
-      case SVGA3DOP_PHASE:
-         debug_printf( "phase\n" );
-         assem += sizeof( struct sh_op ) / sizeof( unsigned );
-         break;
-
-      case SVGA3DOP_COMMENT:
-         assert( 0 );
-         break;
-
-      case SVGA3DOP_RET:
-         debug_printf( "ret\n" );
-         assem += sizeof( struct sh_op ) / sizeof( unsigned );
-         break;
-
-      case SVGA3DOP_END:
-         debug_printf( "end\n" );
-         finished = TRUE;
-         break;
-
-      default:
-         {
-            const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode );
-            uint i;
-            uint num_src = info->num_src + op.predicated;
-            boolean not_first_arg = FALSE;
-
-            assert( info->num_dst <= 1 );
-
-            if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3)
-               num_src += 2;
-
-            dump_comp_op( op, info->mnemonic );
-            assem += sizeof( struct sh_op ) / sizeof( unsigned );
-
-            if (info->num_dst > 0) {
-               struct sh_dstreg dstreg = *(struct sh_dstreg *) assem;
-
-               dump_dstreg( dstreg, &di );
-               assem += sizeof( struct sh_dstreg ) / sizeof( unsigned );
-               not_first_arg = TRUE;
-            }
-
-            for (i = 0; i < num_src; i++) {
-               struct sh_srcreg srcreg;
-               struct sh_srcreg indreg;
-
-               srcreg = *(struct sh_srcreg *) assem;
-               assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
-               if (srcreg.relative && !di.is_ps && di.version.major >= 2) {
-                  indreg = *(struct sh_srcreg *) assem;
-                  assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
-               }
-
-               if (not_first_arg)
-                  debug_printf( ", " );
-               else
-                  debug_printf( " " );
-               dump_srcreg( srcreg, &indreg, &di );
-               not_first_arg = TRUE;
-            }
-
-            debug_printf( "\n" );
-         }
-      }
-   }
-}
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_dump.h b/src/gallium/drivers/svga/svgadump/st_shader_dump.h
deleted file mode 100644
index af5549cdba..0000000000
--- a/src/gallium/drivers/svga/svgadump/st_shader_dump.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **********************************************************/
-
-/**
- * @file
- * SVGA Shader Dump Facilities
- * 
- * @author Michal Krol <michal@vmware.com>
- */
-
-#ifndef ST_SHADER_SVGA_DUMP_H
-#define ST_SHADER_SVGA_DUMP_H
-
-void
-sh_svga_dump(
-   const unsigned *assem,
-   unsigned dwords,
-   unsigned do_binary );
-
-#endif /* ST_SHADER_SVGA_DUMP_H */
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.c b/src/gallium/drivers/svga/svgadump/st_shader_op.c
deleted file mode 100644
index 2c05382ab9..0000000000
--- a/src/gallium/drivers/svga/svgadump/st_shader_op.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **********************************************************/
-
-/**
- * @file
- * SVGA Shader Token Opcode Info
- * 
- * @author Michal Krol <michal@vmware.com>
- */
-
-#include "util/u_debug.h"
-#include "st_shader_op.h"
-
-#include "../svga_hw_reg.h"
-#include "svga3d_shaderdefs.h"
-
-#define SVGA3DOP_INVALID SVGA3DOP_END
-#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST
-
-static struct sh_opcode_info opcode_info[] =
-{
-   { "nop",          0, 0, SVGA3DOP_NOP          },
-   { "mov",          1, 1, SVGA3DOP_MOV,         },
-   { "add",          1, 2, SVGA3DOP_ADD,         },
-   { "sub",          1, 2, SVGA3DOP_SUB,         },
-   { "mad",          1, 3, SVGA3DOP_MAD,         },
-   { "mul",          1, 2, SVGA3DOP_MUL,         },
-   { "rcp",          1, 1, SVGA3DOP_RCP,         },
-   { "rsq",          1, 1, SVGA3DOP_RSQ,         },
-   { "dp3",          1, 2, SVGA3DOP_DP3,         },
-   { "dp4",          1, 2, SVGA3DOP_DP4,         },
-   { "min",          1, 2, SVGA3DOP_MIN,         },
-   { "max",          1, 2, SVGA3DOP_MAX,         },
-   { "slt",          1, 2, SVGA3DOP_SLT,         },
-   { "sge",          1, 2, SVGA3DOP_SGE,         },
-   { "exp",          1, 1, SVGA3DOP_EXP,         },
-   { "log",          1, 1, SVGA3DOP_LOG,         },
-   { "lit",          1, 1, SVGA3DOP_LIT,         },
-   { "dst",          1, 2, SVGA3DOP_DST,         },
-   { "lrp",          1, 3, SVGA3DOP_LRP,         },
-   { "frc",          1, 1, SVGA3DOP_FRC,         },
-   { "m4x4",         1, 2, SVGA3DOP_M4x4,        },
-   { "m4x3",         1, 2, SVGA3DOP_M4x3,        },
-   { "m3x4",         1, 2, SVGA3DOP_M3x4,        },
-   { "m3x3",         1, 2, SVGA3DOP_M3x3,        },
-   { "m3x2",         1, 2, SVGA3DOP_M3x2,        },
-   { "call",         0, 1, SVGA3DOP_CALL,        },
-   { "callnz",       0, 2, SVGA3DOP_CALLNZ,      },
-   { "loop",         0, 2, SVGA3DOP_LOOP,        },
-   { "ret",          0, 0, SVGA3DOP_RET,         },
-   { "endloop",      0, 0, SVGA3DOP_ENDLOOP,     },
-   { "label",        0, 1, SVGA3DOP_LABEL,       },
-   { "dcl",          0, 0, SVGA3DOP_DCL,         },
-   { "pow",          1, 2, SVGA3DOP_POW,         },
-   { "crs",          1, 2, SVGA3DOP_CRS,         },
-   { "sgn",          1, 3, SVGA3DOP_SGN,         },
-   { "abs",          1, 1, SVGA3DOP_ABS,         },
-   { "nrm",          1, 1, SVGA3DOP_NRM,         }, /* 3-componenet normalization */
-   { "sincos",       1, 1, SVGA3DOP_SINCOS,      },
-   { "rep",          0, 1, SVGA3DOP_REP,         },
-   { "endrep",       0, 0, SVGA3DOP_ENDREP,      },
-   { "if",           0, 1, SVGA3DOP_IF,          },
-   { "ifc",          0, 2, SVGA3DOP_IFC,         },
-   { "else",         0, 0, SVGA3DOP_ELSE,        },
-   { "endif",        0, 0, SVGA3DOP_ENDIF,       },
-   { "break",        0, 0, SVGA3DOP_BREAK,       },
-   { "breakc",       0, 0, SVGA3DOP_BREAKC,      },
-   { "mova",         1, 1, SVGA3DOP_MOVA,        },
-   { "defb",         0, 0, SVGA3DOP_DEFB,        },
-   { "defi",         0, 0, SVGA3DOP_DEFI,        },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "???",          0, 0, SVGA3DOP_INVALID,     },
-   { "texcoord",     0, 0, SVGA3DOP_TEXCOORD,    },
-   { "texkill",      1, 0, SVGA3DOP_TEXKILL,     },
-   { "tex",          0, 0, SVGA3DOP_TEX,         },
-   { "texbem",       1, 1, SVGA3DOP_TEXBEM,      },
-   { "texbeml",      1, 1, SVGA3DOP_TEXBEML,     },
-   { "texreg2ar",    1, 1, SVGA3DOP_TEXREG2AR,   },
-   { "texreg2gb",    1, 1, SVGA3DOP_TEXREG2GB,   },
-   { "texm3x2pad",   1, 1, SVGA3DOP_TEXM3x2PAD,  },
-   { "texm3x2tex",   1, 1, SVGA3DOP_TEXM3x2TEX,  },
-   { "texm3x3pad",   1, 1, SVGA3DOP_TEXM3x3PAD,  },
-   { "texm3x3tex",   1, 1, SVGA3DOP_TEXM3x3TEX,  },
-   { "reserved0",    0, 0, SVGA3DOP_RESERVED0,   },
-   { "texm3x3spec",  1, 2, SVGA3DOP_TEXM3x3SPEC, },
-   { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,},
-   { "expp",         1, 1, SVGA3DOP_EXPP,        },
-   { "logp",         1, 1, SVGA3DOP_LOGP,        },
-   { "cnd",          1, 3, SVGA3DOP_CND,         },
-   { "def",          0, 0, SVGA3DOP_DEF,         },
-   { "texreg2rgb",   1, 1, SVGA3DOP_TEXREG2RGB,  },
-   { "texdp3tex",    1, 1, SVGA3DOP_TEXDP3TEX,   },
-   { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,},
-   { "texdp3",       1, 1, SVGA3DOP_TEXDP3,      },
-   { "texm3x3",      1, 1, SVGA3DOP_TEXM3x3,     },
-   { "texdepth",     1, 0, SVGA3DOP_TEXDEPTH,    },
-   { "cmp",          1, 3, SVGA3DOP_CMP,         },
-   { "bem",          1, 2, SVGA3DOP_BEM,         },
-   { "dp2add",       1, 3, SVGA3DOP_DP2ADD,      },
-   { "dsx",          1, 1, SVGA3DOP_INVALID,     },
-   { "dsy",          1, 1, SVGA3DOP_INVALID,     },
-   { "texldd",       1, 1, SVGA3DOP_INVALID,     },
-   { "setp",         1, 2, SVGA3DOP_SETP,        },
-   { "texldl",       1, 1, SVGA3DOP_INVALID,     },
-   { "breakp",       1, 1, SVGA3DOP_INVALID,     },
-};
-
-const struct sh_opcode_info *sh_svga_opcode_info( uint op )
-{
-   struct sh_opcode_info *info;
-
-   if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) {
-      /* The opcode is either PHASE, COMMENT, END or out of range.
-       */
-      assert( 0 );
-      return NULL;
-   }
-
-   info = &opcode_info[op];
-
-   if (info->svga_opcode == SVGA3DOP_INVALID) {
-      /* No valid information. Please provide number of dst/src registers.
-       */
-      assert( 0 );
-      return NULL;
-   }
-
-   /* Sanity check.
-    */
-   assert( op == info->svga_opcode );
-
-   return info;
-}
diff --git a/src/gallium/drivers/svga/svgadump/st_shader_op.h b/src/gallium/drivers/svga/svgadump/st_shader_op.h
deleted file mode 100644
index 01d39dca84..0000000000
--- a/src/gallium/drivers/svga/svgadump/st_shader_op.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/**********************************************************
- * Copyright 2008-2009 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy,
- * modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- **********************************************************/
-
-/**
- * @file
- * SVGA Shader Token Opcode Info
- * 
- * @author Michal Krol <michal@vmware.com>
- */
-
-#ifndef ST_SHADER_SVGA_OP_H
-#define ST_SHADER_SVGA_OP_H
-
-struct sh_opcode_info
-{
-   const char *mnemonic;
-   unsigned num_dst:8;
-   unsigned num_src:8;
-   unsigned svga_opcode:16;
-};
-
-const struct sh_opcode_info *sh_svga_opcode_info( unsigned op );
-
-#endif /* ST_SHADER_SVGA_OP_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
index 180dde8dc1..c6c353f58e 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -31,7 +31,7 @@
  */
 
 #include "svga_types.h"
-#include "st_shader_dump.h"
+#include "svga_shader_dump.h"
 #include "svga3d_reg.h"
 
 #include "util/u_debug.h"
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py
index 3cb29c395b..288e753296 100755
--- a/src/gallium/drivers/svga/svgadump/svga_dump.py
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.py
@@ -291,7 +291,7 @@ def main():
     print ' */'
     print
     print '#include "svga_types.h"'
-    print '#include "shader_dump/st_shader_dump.h"'
+    print '#include "svga_shader_dump.h"'
     print '#include "svga3d_reg.h"'
     print
     print '#include "pipe/p_debug.h"'
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h
new file mode 100644
index 0000000000..2fc1796a90
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader.h
@@ -0,0 +1,214 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Definitions
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef ST_SHADER_SVGA_H
+#define ST_SHADER_SVGA_H
+
+#include "pipe/p_compiler.h"
+
+struct sh_op
+{
+   unsigned opcode:16;
+   unsigned control:8;
+   unsigned length:4;
+   unsigned predicated:1;
+   unsigned unused:1;
+   unsigned coissue:1;
+   unsigned is_reg:1;
+};
+
+struct sh_reg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:14;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_reg_type( struct sh_reg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_cdata
+{
+   float xyzw[4];
+};
+
+struct sh_def
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   struct sh_cdata cdata;
+};
+
+struct sh_defb
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   uint data;
+};
+
+struct sh_idata
+{
+   int xyzw[4];
+};
+
+struct sh_defi
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   struct sh_idata idata;
+};
+
+#define PS_TEXTURETYPE_UNKNOWN   SVGA3DSAMP_UNKNOWN
+#define PS_TEXTURETYPE_2D        SVGA3DSAMP_2D
+#define PS_TEXTURETYPE_CUBE      SVGA3DSAMP_CUBE
+#define PS_TEXTURETYPE_VOLUME    SVGA3DSAMP_VOLUME
+
+struct ps_sampleinfo
+{
+   unsigned unused:27;
+   unsigned texture_type:4;
+   unsigned is_reg:1;
+};
+
+struct vs_semantic
+{
+   unsigned usage:5;
+   unsigned unused1:11;
+   unsigned usage_index:4;
+   unsigned unused2:12;
+};
+
+struct sh_dstreg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:2;
+   unsigned write_mask:4;
+   unsigned modifier:4;
+   unsigned shift_scale:4;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_dstreg_type( struct sh_dstreg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_dcl
+{
+   struct sh_op op;
+   union {
+      struct {
+         struct ps_sampleinfo sampleinfo;
+      } ps;
+      struct {
+         struct vs_semantic semantic;
+      } vs;
+   } u;
+   struct sh_dstreg reg;
+};
+
+
+struct sh_srcreg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:2;
+   unsigned swizzle_x:2;
+   unsigned swizzle_y:2;
+   unsigned swizzle_z:2;
+   unsigned swizzle_w:2;
+   unsigned modifier:4;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_srcreg_type( struct sh_srcreg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_dstop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+};
+
+struct sh_srcop
+{
+   struct sh_op op;
+   struct sh_srcreg src;
+};
+
+struct sh_src2op
+{
+   struct sh_op op;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+};
+
+struct sh_unaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src;
+};
+
+struct sh_binaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+};
+
+struct sh_trinaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+   struct sh_srcreg src2;
+};
+
+#endif /* ST_SHADER_SVGA_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
new file mode 100644
index 0000000000..c654126d3a
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
@@ -0,0 +1,649 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Dump Facilities
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#include "svga_shader.h"
+#include "svga_shader_dump.h"
+#include "svga_shader_op.h"
+#include "util/u_debug.h"
+
+#include "../svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+struct dump_info
+{
+   SVGA3dShaderVersion version;
+   boolean is_ps;
+};
+
+static void dump_op( struct sh_op op, const char *mnemonic )
+{
+   assert( op.predicated == 0 );
+   assert( op.is_reg == 0 );
+
+   if (op.coissue)
+      debug_printf( "+" );
+   debug_printf( "%s", mnemonic );
+   switch (op.control) {
+   case 0:
+      break;
+   case SVGA3DOPCONT_PROJECT:
+      debug_printf( "p" );
+      break;
+   case SVGA3DOPCONT_BIAS:
+      debug_printf( "b" );
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_comp_op( struct sh_op op, const char *mnemonic )
+{
+   assert( op.is_reg == 0 );
+
+   if (op.coissue)
+      debug_printf( "+" );
+   debug_printf( "%s", mnemonic );
+   switch (op.control) {
+   case SVGA3DOPCOMP_RESERVED0:
+      break;
+   case SVGA3DOPCOMP_GT:
+      debug_printf("_gt");
+      break;
+   case SVGA3DOPCOMP_EQ:
+      debug_printf("_eq");
+      break;
+   case SVGA3DOPCOMP_GE:
+      debug_printf("_ge");
+      break;
+   case SVGA3DOPCOMP_LT:
+      debug_printf("_lt");
+      break;
+   case SVGA3DOPCOMPC_NE:
+      debug_printf("_ne");
+      break;
+   case SVGA3DOPCOMP_LE:
+      debug_printf("_le");
+      break;
+   case SVGA3DOPCOMP_RESERVED1:
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di )
+{
+   assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 );
+   assert( reg.is_reg == 1 );
+
+   switch (sh_reg_type( reg )) {
+   case SVGA3DREG_TEMP:
+      debug_printf( "r%u", reg.number );
+      break;
+
+   case SVGA3DREG_INPUT:
+      debug_printf( "v%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONST:
+      if (reg.relative) {
+         if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP)
+            debug_printf( "c[aL+%u]", reg.number );
+         else
+            debug_printf( "c[a%u.x+%u]", indreg->number, reg.number );
+      }
+      else
+         debug_printf( "c%u", reg.number );
+      break;
+
+   case SVGA3DREG_ADDR:    /* VS */
+   /* SVGA3DREG_TEXTURE */ /* PS */
+      if (di->is_ps)
+         debug_printf( "t%u", reg.number );
+      else
+         debug_printf( "a%u", reg.number );
+      break;
+
+   case SVGA3DREG_RASTOUT:
+      switch (reg.number) {
+      case 0 /*POSITION*/:
+         debug_printf( "oPos" );
+         break;
+      case 1 /*FOG*/:
+         debug_printf( "oFog" );
+         break;
+      case 2 /*POINT_SIZE*/:
+         debug_printf( "oPts" );
+         break;
+      default:
+         assert( 0 );
+         debug_printf( "???" );
+      }
+      break;
+
+   case SVGA3DREG_ATTROUT:
+      assert( reg.number < 2 );
+      debug_printf( "oD%u", reg.number );
+      break;
+
+   case SVGA3DREG_TEXCRDOUT:
+   /* SVGA3DREG_OUTPUT */
+      debug_printf( "oT%u", reg.number );
+      break;
+
+   case SVGA3DREG_COLOROUT:
+      debug_printf( "oC%u", reg.number );
+      break;
+
+   case SVGA3DREG_DEPTHOUT:
+      debug_printf( "oD%u", reg.number );
+      break;
+
+   case SVGA3DREG_SAMPLER:
+      debug_printf( "s%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONSTBOOL:
+      assert( !reg.relative );
+      debug_printf( "b%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONSTINT:
+      assert( !reg.relative );
+      debug_printf( "i%u", reg.number );
+      break;
+
+   case SVGA3DREG_LOOP:
+      assert( reg.number == 0 );
+      debug_printf( "aL" );
+      break;
+
+   case SVGA3DREG_MISCTYPE:
+      switch (reg.number) {
+      case SVGA3DMISCREG_POSITION:
+         debug_printf( "vPos" );
+         break;
+      case SVGA3DMISCREG_FACE:
+         debug_printf( "vFace" );
+         break;
+      default:
+         assert(0);
+         break;
+      }
+      break;
+
+   case SVGA3DREG_LABEL:
+      debug_printf( "l%u", reg.number );
+      break;
+
+   case SVGA3DREG_PREDICATE:
+      debug_printf( "p%u", reg.number );
+      break;
+
+
+   default:
+      assert( 0 );
+      debug_printf( "???" );
+   }
+}
+
+static void dump_cdata( struct sh_cdata cdata )
+{
+   debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] );
+}
+
+static void dump_idata( struct sh_idata idata )
+{
+   debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] );
+}
+
+static void dump_bdata( boolean bdata )
+{
+   debug_printf( bdata ? "TRUE" : "FALSE" );
+}
+
+static void dump_sampleinfo( struct ps_sampleinfo sampleinfo )
+{
+   switch (sampleinfo.texture_type) {
+   case SVGA3DSAMP_2D:
+      debug_printf( "_2d" );
+      break;
+   case SVGA3DSAMP_CUBE:
+      debug_printf( "_cube" );
+      break;
+   case SVGA3DSAMP_VOLUME:
+      debug_printf( "_volume" );
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_usageinfo( struct vs_semantic semantic )
+{
+   switch (semantic.usage) {
+   case SVGA3D_DECLUSAGE_POSITION:
+      debug_printf("_position" );
+      break;
+   case SVGA3D_DECLUSAGE_BLENDWEIGHT:
+      debug_printf("_blendweight" );
+      break;
+   case SVGA3D_DECLUSAGE_BLENDINDICES:
+      debug_printf("_blendindices" );
+      break;
+   case SVGA3D_DECLUSAGE_NORMAL:
+      debug_printf("_normal" );
+      break;
+   case SVGA3D_DECLUSAGE_PSIZE:
+      debug_printf("_psize" );
+      break;
+   case SVGA3D_DECLUSAGE_TEXCOORD:
+      debug_printf("_texcoord");
+      break;
+   case SVGA3D_DECLUSAGE_TANGENT:
+      debug_printf("_tangent" );
+      break;
+   case SVGA3D_DECLUSAGE_BINORMAL:
+      debug_printf("_binormal" );
+      break;
+   case SVGA3D_DECLUSAGE_TESSFACTOR:
+      debug_printf("_tessfactor" );
+      break;
+   case SVGA3D_DECLUSAGE_POSITIONT:
+      debug_printf("_positiont" );
+      break;
+   case SVGA3D_DECLUSAGE_COLOR:
+      debug_printf("_color" );
+      break;
+   case SVGA3D_DECLUSAGE_FOG:
+      debug_printf("_fog" );
+      break;
+   case SVGA3D_DECLUSAGE_DEPTH:
+      debug_printf("_depth" );
+      break;
+   case SVGA3D_DECLUSAGE_SAMPLE:
+      debug_printf("_sample");
+      break;
+   default:
+      assert( 0 );
+      return;
+   }
+
+   if (semantic.usage_index != 0) {
+      debug_printf("%d", semantic.usage_index );
+   }
+}
+
+static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di )
+{
+   union {
+      struct sh_reg reg;
+      struct sh_dstreg dstreg;
+   } u;
+
+   assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier );
+
+   if (dstreg.modifier & SVGA3DDSTMOD_SATURATE)
+      debug_printf( "_sat" );
+   if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION)
+      debug_printf( "_pp" );
+   switch (dstreg.shift_scale) {
+   case 0:
+      break;
+   case 1:
+      debug_printf( "_x2" );
+      break;
+   case 2:
+      debug_printf( "_x4" );
+      break;
+   case 3:
+      debug_printf( "_x8" );
+      break;
+   case 13:
+      debug_printf( "_d8" );
+      break;
+   case 14:
+      debug_printf( "_d4" );
+      break;
+   case 15:
+      debug_printf( "_d2" );
+      break;
+   default:
+      assert( 0 );
+   }
+   debug_printf( " " );
+
+   u.dstreg = dstreg;
+   dump_reg( u.reg, NULL, di );
+   if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) {
+      debug_printf( "." );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_0)
+         debug_printf( "x" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_1)
+         debug_printf( "y" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_2)
+         debug_printf( "z" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_3)
+         debug_printf( "w" );
+   }
+}
+
+static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di )
+{
+   union {
+      struct sh_reg reg;
+      struct sh_srcreg srcreg;
+   } u;
+
+   switch (srcreg.modifier) {
+   case SVGA3DSRCMOD_NEG:
+   case SVGA3DSRCMOD_BIASNEG:
+   case SVGA3DSRCMOD_SIGNNEG:
+   case SVGA3DSRCMOD_X2NEG:
+      debug_printf( "-" );
+      break;
+   case SVGA3DSRCMOD_ABS:
+      debug_printf( "|" );
+      break;
+   case SVGA3DSRCMOD_ABSNEG:
+      debug_printf( "-|" );
+      break;
+   case SVGA3DSRCMOD_COMP:
+      debug_printf( "1-" );
+      break;
+   case SVGA3DSRCMOD_NOT:
+      debug_printf( "!" );
+   }
+
+   u.srcreg = srcreg;
+   dump_reg( u.reg, indreg, di );
+   switch (srcreg.modifier) {
+   case SVGA3DSRCMOD_NONE:
+   case SVGA3DSRCMOD_NEG:
+   case SVGA3DSRCMOD_COMP:
+   case SVGA3DSRCMOD_NOT:
+      break;
+   case SVGA3DSRCMOD_ABS:
+   case SVGA3DSRCMOD_ABSNEG:
+      debug_printf( "|" );
+      break;
+   case SVGA3DSRCMOD_BIAS:
+   case SVGA3DSRCMOD_BIASNEG:
+      debug_printf( "_bias" );
+      break;
+   case SVGA3DSRCMOD_SIGN:
+   case SVGA3DSRCMOD_SIGNNEG:
+      debug_printf( "_bx2" );
+      break;
+   case SVGA3DSRCMOD_X2:
+   case SVGA3DSRCMOD_X2NEG:
+      debug_printf( "_x2" );
+      break;
+   case SVGA3DSRCMOD_DZ:
+      debug_printf( "_dz" );
+      break;
+   case SVGA3DSRCMOD_DW:
+      debug_printf( "_dw" );
+      break;
+   default:
+      assert( 0 );
+   }
+   if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) {
+      debug_printf( "." );
+      if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) {
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+      }
+      else {
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_y] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_z] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_w] );
+      }
+   }
+}
+
+void
+sh_svga_dump(
+   const unsigned *assem,
+   unsigned dwords,
+   unsigned do_binary )
+{
+   const unsigned *start = assem;
+   boolean finished = FALSE;
+   struct dump_info di;
+   unsigned i;
+
+   if (do_binary) {
+      for (i = 0; i < dwords; i++) 
+         debug_printf("  0x%08x,\n", assem[i]);
+      
+      debug_printf("\n\n");
+   }
+
+   di.version.value = *assem++;
+   di.is_ps = (di.version.type == SVGA3D_PS_TYPE);
+
+   debug_printf(
+      "%s_%u_%u\n",
+      di.is_ps ? "ps" : "vs",
+      di.version.major,
+      di.version.minor );
+
+   while (!finished) {
+      struct sh_op op = *(struct sh_op *) assem;
+
+      if (assem - start >= dwords) {
+         debug_printf("... ran off end of buffer\n");
+         assert(0);
+         return;
+      }
+
+      switch (op.opcode) {
+      case SVGA3DOP_DCL:
+         {
+            struct sh_dcl dcl = *(struct sh_dcl *) assem;
+
+            debug_printf( "dcl" );
+            if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER)
+               dump_sampleinfo( dcl.u.ps.sampleinfo );
+            else if (di.is_ps) {
+               if (di.version.major == 3 && 
+                   sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE)
+                  dump_usageinfo( dcl.u.vs.semantic );
+            }
+            else
+               dump_usageinfo( dcl.u.vs.semantic );
+            dump_dstreg( dcl.reg, &di );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_dcl ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_DEFB:
+         {
+            struct sh_defb defb = *(struct sh_defb *) assem;
+
+            debug_printf( "defb " );
+            dump_reg( defb.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_bdata( defb.data );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_defb ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_DEFI:
+         {
+            struct sh_defi defi = *(struct sh_defi *) assem;
+
+            debug_printf( "defi " );
+            dump_reg( defi.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_idata( defi.idata );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_defi ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_TEXCOORD:
+         assert( di.is_ps );
+         dump_op( op, "texcoord" );
+         if (0) {
+            struct sh_dstop dstop = *(struct sh_dstop *) assem;
+            dump_dstreg( dstop.dst, &di );
+            assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
+         }
+         else {
+            struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
+            dump_dstreg( unaryop.dst, &di );
+            debug_printf( ", " );
+            dump_srcreg( unaryop.src, NULL, &di );
+            assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
+         }
+         debug_printf( "\n" );
+         break;
+
+      case SVGA3DOP_TEX:
+         assert( di.is_ps );
+         if (0) {
+            dump_op( op, "tex" );
+            if (0) {
+               struct sh_dstop dstop = *(struct sh_dstop *) assem;
+
+               dump_dstreg( dstop.dst, &di );
+               assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
+            }
+            else {
+               struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
+
+               dump_dstreg( unaryop.dst, &di );
+               debug_printf( ", " );
+               dump_srcreg( unaryop.src, NULL, &di );
+               assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
+            }
+         }
+         else {
+            struct sh_binaryop binaryop = *(struct sh_binaryop *) assem;
+
+            dump_op( op, "texld" );
+            dump_dstreg( binaryop.dst, &di );
+            debug_printf( ", " );
+            dump_srcreg( binaryop.src0, NULL, &di );
+            debug_printf( ", " );
+            dump_srcreg( binaryop.src1, NULL, &di );
+            assem += sizeof( struct sh_binaryop ) / sizeof( unsigned );
+         }
+         debug_printf( "\n" );
+         break;
+
+      case SVGA3DOP_DEF:
+         {
+            struct sh_def def = *(struct sh_def *) assem;
+
+            debug_printf( "def " );
+            dump_reg( def.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_cdata( def.cdata );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_def ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_PHASE:
+         debug_printf( "phase\n" );
+         assem += sizeof( struct sh_op ) / sizeof( unsigned );
+         break;
+
+      case SVGA3DOP_COMMENT:
+         assert( 0 );
+         break;
+
+      case SVGA3DOP_RET:
+         debug_printf( "ret\n" );
+         assem += sizeof( struct sh_op ) / sizeof( unsigned );
+         break;
+
+      case SVGA3DOP_END:
+         debug_printf( "end\n" );
+         finished = TRUE;
+         break;
+
+      default:
+         {
+            const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode );
+            uint i;
+            uint num_src = info->num_src + op.predicated;
+            boolean not_first_arg = FALSE;
+
+            assert( info->num_dst <= 1 );
+
+            if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3)
+               num_src += 2;
+
+            dump_comp_op( op, info->mnemonic );
+            assem += sizeof( struct sh_op ) / sizeof( unsigned );
+
+            if (info->num_dst > 0) {
+               struct sh_dstreg dstreg = *(struct sh_dstreg *) assem;
+
+               dump_dstreg( dstreg, &di );
+               assem += sizeof( struct sh_dstreg ) / sizeof( unsigned );
+               not_first_arg = TRUE;
+            }
+
+            for (i = 0; i < num_src; i++) {
+               struct sh_srcreg srcreg;
+               struct sh_srcreg indreg;
+
+               srcreg = *(struct sh_srcreg *) assem;
+               assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
+               if (srcreg.relative && !di.is_ps && di.version.major >= 2) {
+                  indreg = *(struct sh_srcreg *) assem;
+                  assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
+               }
+
+               if (not_first_arg)
+                  debug_printf( ", " );
+               else
+                  debug_printf( " " );
+               dump_srcreg( srcreg, &indreg, &di );
+               not_first_arg = TRUE;
+            }
+
+            debug_printf( "\n" );
+         }
+      }
+   }
+}
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
new file mode 100644
index 0000000000..af5549cdba
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
@@ -0,0 +1,42 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Dump Facilities
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef ST_SHADER_SVGA_DUMP_H
+#define ST_SHADER_SVGA_DUMP_H
+
+void
+sh_svga_dump(
+   const unsigned *assem,
+   unsigned dwords,
+   unsigned do_binary );
+
+#endif /* ST_SHADER_SVGA_DUMP_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
new file mode 100644
index 0000000000..cecc22106b
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
@@ -0,0 +1,168 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Opcode Info
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#include "util/u_debug.h"
+#include "svga_shader_op.h"
+
+#include "../svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+#define SVGA3DOP_INVALID SVGA3DOP_END
+#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST
+
+static struct sh_opcode_info opcode_info[] =
+{
+   { "nop",          0, 0, SVGA3DOP_NOP          },
+   { "mov",          1, 1, SVGA3DOP_MOV,         },
+   { "add",          1, 2, SVGA3DOP_ADD,         },
+   { "sub",          1, 2, SVGA3DOP_SUB,         },
+   { "mad",          1, 3, SVGA3DOP_MAD,         },
+   { "mul",          1, 2, SVGA3DOP_MUL,         },
+   { "rcp",          1, 1, SVGA3DOP_RCP,         },
+   { "rsq",          1, 1, SVGA3DOP_RSQ,         },
+   { "dp3",          1, 2, SVGA3DOP_DP3,         },
+   { "dp4",          1, 2, SVGA3DOP_DP4,         },
+   { "min",          1, 2, SVGA3DOP_MIN,         },
+   { "max",          1, 2, SVGA3DOP_MAX,         },
+   { "slt",          1, 2, SVGA3DOP_SLT,         },
+   { "sge",          1, 2, SVGA3DOP_SGE,         },
+   { "exp",          1, 1, SVGA3DOP_EXP,         },
+   { "log",          1, 1, SVGA3DOP_LOG,         },
+   { "lit",          1, 1, SVGA3DOP_LIT,         },
+   { "dst",          1, 2, SVGA3DOP_DST,         },
+   { "lrp",          1, 3, SVGA3DOP_LRP,         },
+   { "frc",          1, 1, SVGA3DOP_FRC,         },
+   { "m4x4",         1, 2, SVGA3DOP_M4x4,        },
+   { "m4x3",         1, 2, SVGA3DOP_M4x3,        },
+   { "m3x4",         1, 2, SVGA3DOP_M3x4,        },
+   { "m3x3",         1, 2, SVGA3DOP_M3x3,        },
+   { "m3x2",         1, 2, SVGA3DOP_M3x2,        },
+   { "call",         0, 1, SVGA3DOP_CALL,        },
+   { "callnz",       0, 2, SVGA3DOP_CALLNZ,      },
+   { "loop",         0, 2, SVGA3DOP_LOOP,        },
+   { "ret",          0, 0, SVGA3DOP_RET,         },
+   { "endloop",      0, 0, SVGA3DOP_ENDLOOP,     },
+   { "label",        0, 1, SVGA3DOP_LABEL,       },
+   { "dcl",          0, 0, SVGA3DOP_DCL,         },
+   { "pow",          1, 2, SVGA3DOP_POW,         },
+   { "crs",          1, 2, SVGA3DOP_CRS,         },
+   { "sgn",          1, 3, SVGA3DOP_SGN,         },
+   { "abs",          1, 1, SVGA3DOP_ABS,         },
+   { "nrm",          1, 1, SVGA3DOP_NRM,         }, /* 3-componenet normalization */
+   { "sincos",       1, 1, SVGA3DOP_SINCOS,      },
+   { "rep",          0, 1, SVGA3DOP_REP,         },
+   { "endrep",       0, 0, SVGA3DOP_ENDREP,      },
+   { "if",           0, 1, SVGA3DOP_IF,          },
+   { "ifc",          0, 2, SVGA3DOP_IFC,         },
+   { "else",         0, 0, SVGA3DOP_ELSE,        },
+   { "endif",        0, 0, SVGA3DOP_ENDIF,       },
+   { "break",        0, 0, SVGA3DOP_BREAK,       },
+   { "breakc",       0, 0, SVGA3DOP_BREAKC,      },
+   { "mova",         1, 1, SVGA3DOP_MOVA,        },
+   { "defb",         0, 0, SVGA3DOP_DEFB,        },
+   { "defi",         0, 0, SVGA3DOP_DEFI,        },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "texcoord",     0, 0, SVGA3DOP_TEXCOORD,    },
+   { "texkill",      1, 0, SVGA3DOP_TEXKILL,     },
+   { "tex",          0, 0, SVGA3DOP_TEX,         },
+   { "texbem",       1, 1, SVGA3DOP_TEXBEM,      },
+   { "texbeml",      1, 1, SVGA3DOP_TEXBEML,     },
+   { "texreg2ar",    1, 1, SVGA3DOP_TEXREG2AR,   },
+   { "texreg2gb",    1, 1, SVGA3DOP_TEXREG2GB,   },
+   { "texm3x2pad",   1, 1, SVGA3DOP_TEXM3x2PAD,  },
+   { "texm3x2tex",   1, 1, SVGA3DOP_TEXM3x2TEX,  },
+   { "texm3x3pad",   1, 1, SVGA3DOP_TEXM3x3PAD,  },
+   { "texm3x3tex",   1, 1, SVGA3DOP_TEXM3x3TEX,  },
+   { "reserved0",    0, 0, SVGA3DOP_RESERVED0,   },
+   { "texm3x3spec",  1, 2, SVGA3DOP_TEXM3x3SPEC, },
+   { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,},
+   { "expp",         1, 1, SVGA3DOP_EXPP,        },
+   { "logp",         1, 1, SVGA3DOP_LOGP,        },
+   { "cnd",          1, 3, SVGA3DOP_CND,         },
+   { "def",          0, 0, SVGA3DOP_DEF,         },
+   { "texreg2rgb",   1, 1, SVGA3DOP_TEXREG2RGB,  },
+   { "texdp3tex",    1, 1, SVGA3DOP_TEXDP3TEX,   },
+   { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,},
+   { "texdp3",       1, 1, SVGA3DOP_TEXDP3,      },
+   { "texm3x3",      1, 1, SVGA3DOP_TEXM3x3,     },
+   { "texdepth",     1, 0, SVGA3DOP_TEXDEPTH,    },
+   { "cmp",          1, 3, SVGA3DOP_CMP,         },
+   { "bem",          1, 2, SVGA3DOP_BEM,         },
+   { "dp2add",       1, 3, SVGA3DOP_DP2ADD,      },
+   { "dsx",          1, 1, SVGA3DOP_INVALID,     },
+   { "dsy",          1, 1, SVGA3DOP_INVALID,     },
+   { "texldd",       1, 1, SVGA3DOP_INVALID,     },
+   { "setp",         1, 2, SVGA3DOP_SETP,        },
+   { "texldl",       1, 1, SVGA3DOP_INVALID,     },
+   { "breakp",       1, 1, SVGA3DOP_INVALID,     },
+};
+
+const struct sh_opcode_info *sh_svga_opcode_info( uint op )
+{
+   struct sh_opcode_info *info;
+
+   if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) {
+      /* The opcode is either PHASE, COMMENT, END or out of range.
+       */
+      assert( 0 );
+      return NULL;
+   }
+
+   info = &opcode_info[op];
+
+   if (info->svga_opcode == SVGA3DOP_INVALID) {
+      /* No valid information. Please provide number of dst/src registers.
+       */
+      assert( 0 );
+      return NULL;
+   }
+
+   /* Sanity check.
+    */
+   assert( op == info->svga_opcode );
+
+   return info;
+}
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.h b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
new file mode 100644
index 0000000000..01d39dca84
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
@@ -0,0 +1,46 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Opcode Info
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef ST_SHADER_SVGA_OP_H
+#define ST_SHADER_SVGA_OP_H
+
+struct sh_opcode_info
+{
+   const char *mnemonic;
+   unsigned num_dst:8;
+   unsigned num_src:8;
+   unsigned svga_opcode:16;
+};
+
+const struct sh_opcode_info *sh_svga_opcode_info( unsigned op );
+
+#endif /* ST_SHADER_SVGA_OP_H */
-- 
cgit v1.2.3


From d3f26a84204d589e69e82627395771ed7273315d Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 24 Nov 2009 14:43:30 +0000
Subject: svga: Use consistent names for public symbol names of shader dumping
 facilities.

---
 src/gallium/drivers/svga/svgadump/svga_dump.c        | 2 +-
 src/gallium/drivers/svga/svgadump/svga_shader_dump.c | 4 ++--
 src/gallium/drivers/svga/svgadump/svga_shader_dump.h | 8 ++++----
 src/gallium/drivers/svga/svgadump/svga_shader_op.c   | 2 +-
 src/gallium/drivers/svga/svgadump/svga_shader_op.h   | 8 ++++----
 5 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
index c6c353f58e..910afa2528 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -1627,7 +1627,7 @@ svga_dump_commands(const void *commands, uint32_t size)
                const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
                dump_SVGA3dCmdDefineShader(cmd);
                body = (const uint8_t *)&cmd[1];
-               sh_svga_dump((const uint32_t *)body, 
+               svga_shader_dump((const uint32_t *)body, 
                             (unsigned)(next - body)/sizeof(uint32_t),
                             FALSE );
                body = next;
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
index c654126d3a..7718bdf757 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
@@ -435,7 +435,7 @@ static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, cons
 }
 
 void
-sh_svga_dump(
+svga_shader_dump(
    const unsigned *assem,
    unsigned dwords,
    unsigned do_binary )
@@ -602,7 +602,7 @@ sh_svga_dump(
 
       default:
          {
-            const struct sh_opcode_info *info = sh_svga_opcode_info( op.opcode );
+            const struct sh_opcode_info *info = svga_opcode_info( op.opcode );
             uint i;
             uint num_src = info->num_src + op.predicated;
             boolean not_first_arg = FALSE;
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
index af5549cdba..a2657acb2f 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
@@ -30,13 +30,13 @@
  * @author Michal Krol <michal@vmware.com>
  */
 
-#ifndef ST_SHADER_SVGA_DUMP_H
-#define ST_SHADER_SVGA_DUMP_H
+#ifndef SVGA_SHADER_DUMP_H
+#define SVGA_SHADER_DUMP_H
 
 void
-sh_svga_dump(
+svga_shader_dump(
    const unsigned *assem,
    unsigned dwords,
    unsigned do_binary );
 
-#endif /* ST_SHADER_SVGA_DUMP_H */
+#endif /* SVGA_SHADER_DUMP_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
index cecc22106b..8343bfdaab 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_op.c
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
@@ -140,7 +140,7 @@ static struct sh_opcode_info opcode_info[] =
    { "breakp",       1, 1, SVGA3DOP_INVALID,     },
 };
 
-const struct sh_opcode_info *sh_svga_opcode_info( uint op )
+const struct sh_opcode_info *svga_opcode_info( uint op )
 {
    struct sh_opcode_info *info;
 
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.h b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
index 01d39dca84..e558de02c5 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_op.h
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
@@ -30,8 +30,8 @@
  * @author Michal Krol <michal@vmware.com>
  */
 
-#ifndef ST_SHADER_SVGA_OP_H
-#define ST_SHADER_SVGA_OP_H
+#ifndef SVGA_SHADER_OP_H
+#define SVGA_SHADER_OP_H
 
 struct sh_opcode_info
 {
@@ -41,6 +41,6 @@ struct sh_opcode_info
    unsigned svga_opcode:16;
 };
 
-const struct sh_opcode_info *sh_svga_opcode_info( unsigned op );
+const struct sh_opcode_info *svga_opcode_info( unsigned op );
 
-#endif /* ST_SHADER_SVGA_OP_H */
+#endif /* SVGA_SHADER_OP_H */
-- 
cgit v1.2.3


From 135d7e12991312d7aff637565fbe67f666e4e39f Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 15 Nov 2009 12:14:03 -0800
Subject: svga: Handle comment tokens when dumping.

---
 src/gallium/drivers/svga/svgadump/svga_shader.h      | 6 ++++++
 src/gallium/drivers/svga/svgadump/svga_shader_dump.c | 7 ++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h
index 2fc1796a90..9217af2dd9 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader.h
+++ b/src/gallium/drivers/svga/svgadump/svga_shader.h
@@ -211,4 +211,10 @@ struct sh_trinaryop
    struct sh_srcreg src2;
 };
 
+struct sh_comment
+{
+   unsigned opcode:16;
+   unsigned size:16;
+};
+
 #endif /* ST_SHADER_SVGA_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
index 7718bdf757..b0e7fdf378 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
@@ -587,7 +587,12 @@ svga_shader_dump(
          break;
 
       case SVGA3DOP_COMMENT:
-         assert( 0 );
+         {
+            struct sh_comment comment = *(struct sh_comment *)assem;
+
+            /* Ignore comment contents. */
+            assem += sizeof(struct sh_comment) / sizeof(unsigned) + comment.size;
+         }
          break;
 
       case SVGA3DOP_RET:
-- 
cgit v1.2.3


From 6dd9676a8fc43062a7017f2951e0f032889fac9e Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 27 Nov 2009 13:59:37 +0000
Subject: svga: Re-add shader dumping.

---
 src/gallium/drivers/svga/svga_tgsi.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index 81eea1a145..b8ef137c01 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -222,6 +222,20 @@ svga_tgsi_translate( const struct svga_shader *shader,
    result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned);
    memcpy(&result->key, &key, sizeof key);
 
+   if (SVGA_DEBUG & DEBUG_TGSI) 
+   {
+      debug_printf( "#####################################\n" );
+      debug_printf( "Shader %u below\n", shader->id );
+      tgsi_dump( shader->tokens, 0 );
+      if (SVGA_DEBUG & DEBUG_TGSI) {
+         debug_printf( "Shader %u compiled below\n", shader->id );
+         svga_shader_dump( result->tokens,
+                           result->nr_tokens ,
+                           FALSE );
+      }
+      debug_printf( "#####################################\n" );
+   }
+
    return result;
 
 fail:
-- 
cgit v1.2.3


From b748a9f574361273df6b05c06c647ac4fd9b3e41 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Fri, 27 Nov 2009 17:40:24 +0100
Subject: r300g,llvmpipe: fix some more merge problems

---
 src/gallium/drivers/llvmpipe/lp_texture.c | 2 +-
 src/gallium/drivers/r300/r300_emit.c      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 0a0f31f8a3..65d62fd072 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -169,7 +169,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
    /* Only supports one type */
    if (base->target != PIPE_TEXTURE_2D ||
        base->last_level != 0 ||
-       base->depth[0] != 1) {
+       base->depth0 != 1) {
       return NULL;
    }
 
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index e6ab8e4af1..98a39390bf 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -145,8 +145,8 @@ static const float * get_shader_constant(
                  * normalized coords. Should only show up on non-r500. */
                 case RC_STATE_R300_TEXRECT_FACTOR:
                     tex = &r300->textures[constant->u.State[1]]->tex;
-                    vec[0] = 1.0 / tex->width[0];
-                    vec[1] = 1.0 / tex->height[0];
+                    vec[0] = 1.0 / tex->width0;
+                    vec[1] = 1.0 / tex->height0;
                     break;
 
                 default:
-- 
cgit v1.2.3


From 7fa1bcc05a237365e5ea09512453f29a91c7a141 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Fri, 27 Nov 2009 17:41:42 +0100
Subject: svga: fix for not using texture width/height/depth arrays

---
 src/gallium/drivers/svga/svga_screen_texture.c  | 61 ++++++++++++-------------
 src/gallium/drivers/svga/svga_state_constants.c |  4 +-
 2 files changed, 32 insertions(+), 33 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
index 8472dea04d..fb11b80dcf 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.c
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -287,23 +287,20 @@ svga_texture_create(struct pipe_screen *screen,
    if(templat->last_level >= SVGA_MAX_TEXTURE_LEVELS)
       goto error2;
    
-   width = templat->width[0];
-   height = templat->height[0];
-   depth = templat->depth[0];
+   width = templat->width0;
+   height = templat->height0;
+   depth = templat->depth0;
    for(level = 0; level <= templat->last_level; ++level) {
-      tex->base.width[level] = width;
-      tex->base.height[level] = height;
-      tex->base.depth[level] = depth;
       tex->base.nblocksx[level] = pf_get_nblocksx(&tex->base.block, width);  
       tex->base.nblocksy[level] = pf_get_nblocksy(&tex->base.block, height);  
-      width  = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
    }
    
-   size.width = templat->width[0];
-   size.height = templat->height[0];
-   size.depth = templat->depth[0];
+   size.width = templat->width0;
+   size.height = templat->height0;
+   size.depth = templat->depth0;
    
    if(templat->target == PIPE_TEXTURE_CUBE) {
       flags |= SVGA3D_SURFACE_CUBEMAP;
@@ -367,7 +364,7 @@ svga_texture_blanket(struct pipe_screen * screen,
    /* Only supports one type */
    if (base->target != PIPE_TEXTURE_2D ||
        base->last_level != 0 ||
-       base->depth[0] != 1) {
+       base->depth0 != 1) {
       return NULL;
    }
 
@@ -534,9 +531,9 @@ svga_texture_view_surface(struct pipe_context *pipe,
             "svga: Create surface view: face %d zslice %d mips %d..%d\n",
             face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
 
-   size.width = tex->base.width[start_mip];
-   size.height = tex->base.height[start_mip];
-   size.depth = zslice_pick < 0 ? tex->base.depth[start_mip] : 1;
+   size.width = u_minify(tex->base.width0, start_mip);
+   size.height = u_minify(tex->base.height0, start_mip);
+   size.depth = zslice_pick < 0 ? u_minify(tex->base.depth0, start_mip) : 1;
    assert(size.depth == 1);
    
    if(tex->base.target == PIPE_TEXTURE_CUBE && face_pick < 0) {
@@ -565,11 +562,12 @@ svga_texture_view_surface(struct pipe_context *pipe,
    for (i = 0; i < num_mip; i++) {
       for (j = 0; j < numFaces; j++) {
          if(tex->defined[j + face_pick][i + start_mip]) {
-            unsigned depth = zslice_pick < 0 ? tex->base.depth[i + start_mip] : 1;
+            unsigned depth = zslice_pick < 0 ? u_minify(tex->base.depth0, i + start_mip) : 1;
             svga_texture_copy_handle(svga_context(pipe), ss,
                                      tex->handle, 0, 0, z_offset, i + start_mip, j + face_pick,
                                      handle, 0, 0, 0, i, j,
-                                     tex->base.width[i + start_mip], tex->base.height[i + start_mip], depth);
+                                     u_minify(tex->base.width0, i + start_mip),
+                                     u_minify(tex->base.height0, i + start_mip), depth);
          }
       }
    }
@@ -599,8 +597,8 @@ svga_get_tex_surface(struct pipe_screen *screen,
    pipe_reference_init(&ps->reference, 1);
    pipe_texture_reference(&ps->texture, pt);
    ps->format = pt->format;
-   ps->width = pt->width[level];
-   ps->height = pt->height[level];
+   ps->width = u_minify(pt->width0, level);
+   ps->height = u_minify(pt->height0, level);
    ps->usage = flags;
    ps->level = level;
    ps->face = face;
@@ -723,7 +721,8 @@ svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf)
       svga_texture_copy_handle(svga_context(pipe), ss,
                                s->handle, 0, 0, 0, s->real_level, s->real_face,
                                tex->handle, 0, 0, surf->zslice, surf->level, surf->face,
-                               tex->base.width[surf->level], tex->base.height[surf->level], 1);
+                               u_minify(tex->base.width0, surf->level),
+                               u_minify(tex->base.height0, surf->level), 1);
       tex->defined[surf->face][surf->level] = TRUE;
    }
 }
@@ -953,9 +952,9 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
                "svga: Sampler view: no %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n",
                pt, min_lod, max_lod,
                max_lod - min_lod + 1,
-               pt->width[0],
-               pt->height[0],
-               pt->depth[0],
+               pt->width0,
+               pt->height0,
+               pt->depth0,
                pt->last_level);
       sws->surface_reference(sws, &sv->handle, tex->handle);
       return sv;
@@ -965,9 +964,9 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
             "svga: Sampler view: yes %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n",
             pt, min_lod, max_lod,
             max_lod - min_lod + 1,
-            pt->width[0],
-            pt->height[0],
-            pt->depth[0],
+            pt->width0,
+            pt->height0,
+            pt->depth0,
             pt->last_level);
 
    sv->age = tex->age;
@@ -1015,9 +1014,9 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
             svga_texture_copy_handle(svga, NULL,
                                      tex->handle, 0, 0, 0, i, k,
                                      v->handle, 0, 0, 0, i - v->min_lod, k,
-                                     tex->base.width[i],
-                                     tex->base.height[i],
-                                     tex->base.depth[i]);
+                                     u_minify(tex->base.width0, i),
+                                     u_minify(tex->base.height0, i),
+                                     u_minify(tex->base.depth0, i));
       }
    }
 
@@ -1047,7 +1046,7 @@ svga_screen_buffer_from_texture(struct pipe_texture *texture,
        svga_translate_format(texture->format),
        stex->handle);
 
-   *stride = pf_get_nblocksx(&texture->block, texture->width[0]) *
+   *stride = pf_get_nblocksx(&texture->block, texture->width0) *
       texture->block.size;
 
    return *buffer != NULL;
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index 18cce7dde1..209ed28245 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -140,8 +140,8 @@ static int emit_fs_consts( struct svga_context *svga,
             struct pipe_texture *tex = svga->curr.texture[i];
             float data[4];
 
-            data[0] = 1.0 / (float)tex->width[0];
-            data[1] = 1.0 / (float)tex->height[0];
+            data[0] = 1.0 / (float)tex->width0;
+            data[1] = 1.0 / (float)tex->height0;
             data[2] = 1.0;
             data[3] = 1.0;
 
-- 
cgit v1.2.3


From 510fd280b54fa33ed229ef297a1a77c78811c592 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 26 Nov 2009 16:59:39 +0100
Subject: nv50: bswap32 the polygon stipple pattern

The hardware wants the pattern the same way it is
passed to glPolygonStipple.
---
 src/gallium/drivers/nv50/nv50_state_validate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 799d2758fe..19b8ef07b5 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -285,7 +285,7 @@ nv50_state_validate(struct nv50_context *nv50)
 		so = so_new(33, 0);
 		so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
 		for (i = 0; i < 32; i++)
-			so_data(so, nv50->stipple.stipple[i]);
+			so_data(so, util_bswap32(nv50->stipple.stipple[i]));
 		so_ref(so, &nv50->state.stipple);
 		so_ref(NULL, &so);
 	}
-- 
cgit v1.2.3


From cad14c2542698de144bb5434cefa02d7a00aaa74 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 27 Nov 2009 21:29:38 +0100
Subject: nv50: do conversion of last insn to 64 bit format first

Simplifies things since the second to last one will then
be converted in the subsequent pass that ensures alignment
automatically.
---
 src/gallium/drivers/nv50/nv50_program.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index bf50982dd1..855079f293 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2914,7 +2914,7 @@ nv50_fp_move_results(struct nv50_pc *pc)
 static void
 nv50_program_fixup_insns(struct nv50_pc *pc)
 {
-	struct nv50_program_exec *e, *prev = NULL, **bra_list;
+	struct nv50_program_exec *e, **bra_list;
 	unsigned i, n, pos;
 
 	bra_list = CALLOC(pc->p->exec_size, sizeof(struct nv50_program_exec *));
@@ -2926,6 +2926,16 @@ nv50_program_fixup_insns(struct nv50_pc *pc)
 		if (e->param.index >= 0 && !e->param.mask)
 			bra_list[n++] = e;
 
+	/* last instruction must be long so it can have the exit bit set */
+	if (!is_long(pc->p->exec_tail))
+		convert_to_long(pc, pc->p->exec_tail);
+	/* set exit bit */
+	pc->p->exec_tail->inst[1] |= 1;
+
+	/* !immd on exit insn simultaneously means !join */
+	assert(!is_immd(pc->p->exec_head));
+	assert(!is_immd(pc->p->exec_tail));
+
 	/* Make sure we don't have any single 32 bit instructions. */
 	for (e = pc->p->exec_head, pos = 0; e; e = e->next) {
 		pos += is_long(e) ? 2 : 1;
@@ -2937,23 +2947,8 @@ nv50_program_fixup_insns(struct nv50_pc *pc)
 			convert_to_long(pc, e);
 			++pos;
 		}
-		if (e->next)
-			prev = e;
 	}
 
-	assert(!is_immd(pc->p->exec_head));
-	assert(!is_immd(pc->p->exec_tail));
-
-	/* last instruction must be long so it can have the end bit set */
-	if (!is_long(pc->p->exec_tail)) {
-		convert_to_long(pc, pc->p->exec_tail);
-		if (prev)
-			convert_to_long(pc, prev);
-	}
-	assert(!(pc->p->exec_tail->inst[1] & 2));
-	/* set the end-bit */
-	pc->p->exec_tail->inst[1] |= 1;
-
 	FREE(bra_list);
 }
 
-- 
cgit v1.2.3


From ca9b0e942c5f8f95383c637b05e3fb237f013688 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Sun, 29 Nov 2009 18:59:19 +1000
Subject: i965g: remove surface from views list before freeing

this fixes a crash with the xorg state tracker, however it then locks
up the GPU once rendering is enabled but at least it doesn't crash.

Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 src/gallium/drivers/i965/brw_screen_surface.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
index 21a7382873..1e37c63d6c 100644
--- a/src/gallium/drivers/i965/brw_screen_surface.c
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -245,6 +245,7 @@ static void brw_tex_surface_destroy( struct pipe_surface *surf )
 
    /* Unreference texture, shared buffer:
     */
+   remove_from_list(surface);
    bo_reference(&surface->bo, NULL);
    pipe_texture_reference( &surface->base.texture, NULL );
 
-- 
cgit v1.2.3


From 3f471c7948425a9c8ae23a563e0e816954a7589a Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 26 Nov 2009 17:03:00 +0100
Subject: nv50: don't permanently negate src in emit_ddy

---
 src/gallium/drivers/nv50/nv50_program.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 855079f293..f93c864c2a 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1440,19 +1440,25 @@ emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 static void
 emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
+	struct nv50_reg *r = src;
 	struct nv50_program_exec *e = exec(pc);
 
 	assert(src->type == P_TEMP);
 
-	if (!(src->mod & NV50_MOD_NEG)) /* ! double negation */
-		emit_neg(pc, src, src);
+	if (!(src->mod & NV50_MOD_NEG)) { /* ! double negation */
+		r = alloc_temp(pc, NULL);
+		emit_neg(pc, r, src);
+	}
 
 	e->inst[0] = 0xc0150000;
 	e->inst[1] = 0x8a400000;
 	set_long(pc, e);
 	set_dst(pc, dst, e);
-	set_src_0(pc, src, e);
-	set_src_2(pc, src, e);
+	set_src_0(pc, r, e);
+	set_src_2(pc, r, e);
+
+	if (r != src)
+		free_temp(pc, r);
 
 	emit(pc, e);
 }
-- 
cgit v1.2.3


From 7494b829052a87d7a8c56c68300a110b40e401e8 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 29 Nov 2009 13:33:16 +0100
Subject: nv50: match VP outputs to FP inputs ourselves

For each FP input, don't assume that the VP output will be
at the same position, but scan the semantics instead, then
put the correct output reg indices into VP_RESULT_MAP.

Position is still assumed to be the first output/input.

See 07fafc7c9346aa260829603bf3188596481e9e62, which renders
previous assumptions incorrect.
---
 src/gallium/drivers/nv50/nv50_program.c | 70 ++++++++++++++++++---------------
 src/gallium/drivers/nv50/nv50_program.h |  3 +-
 2 files changed, 40 insertions(+), 33 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index f93c864c2a..bad0ace7e5 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2643,7 +2643,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 
 		for (i = 0, rid = 0; i < pc->result_nr; ++i) {
 			p->cfg.io[i].hw = rid;
-			p->cfg.io[i].id_vp = i;
+			p->cfg.io[i].id = i;
 
 			for (c = 0; c < 4; ++c) {
 				int n = i * 4 + c;
@@ -2675,14 +2675,12 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 		 * the lower hardware IDs, so sort them:
 		 */
 		for (i = 0; i < pc->attr_nr; i++) {
-			if (pc->interp_mode[i] == INTERP_FLAT) {
-				p->cfg.io[m].id_vp = i + base;
-				p->cfg.io[m++].id_fp = i;
-			} else {
+			if (pc->interp_mode[i] == INTERP_FLAT)
+				p->cfg.io[m++].id = i;
+			else {
 				if (!(pc->interp_mode[i] & INTERP_PERSPECTIVE))
 					p->cfg.io[n].linear = TRUE;
-				p->cfg.io[n].id_vp = i + base;
-				p->cfg.io[n++].id_fp = i;
+				p->cfg.io[n++].id = i;
 			}
 		}
 
@@ -2694,7 +2692,7 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 
 		for (n = 0; n < pc->attr_nr; ++n) {
 			p->cfg.io[n].hw = rid = aid;
-			i = p->cfg.io[n].id_fp;
+			i = p->cfg.io[n].id;
 
 			if (p->info.input_semantic_name[n] ==
 			    TGSI_SEMANTIC_FACE) {
@@ -2734,8 +2732,8 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 		for (i = 0; i < pc->attr_nr; i++) {
 			ubyte si, sn;
 
-			sn = p->info.input_semantic_name[p->cfg.io[i].id_fp];
-			si = p->info.input_semantic_index[p->cfg.io[i].id_fp];
+			sn = p->info.input_semantic_name[p->cfg.io[i].id];
+			si = p->info.input_semantic_index[p->cfg.io[i].id];
 
 			if (sn == TGSI_SEMANTIC_COLOR) {
 				p->cfg.two_side[si] = p->cfg.io[i];
@@ -3237,15 +3235,15 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
 	struct nv50_program *vp = nv50->vertprog;
 	unsigned i, c, m = base;
 
-	/* XXX: This can't work correctly in all cases yet, we either
-	 * have to create TGSI_SEMANTIC_PNTC or sprite_coord_mode has
-	 * to be per FP input instead of per VP output
+	/* XXX: this might not work correctly in all cases yet - we'll
+	 * just assume that an FP generic input that is not written in
+	 * the VP is PointCoord.
 	 */
 	memset(pntc, 0, 8 * sizeof(uint32_t));
 
 	for (i = 0; i < fp->cfg.io_nr; i++) {
 		uint8_t sn, si;
-		uint8_t j = fp->cfg.io[i].id_vp, k = fp->cfg.io[i].id_fp;
+		uint8_t j, k = fp->cfg.io[i].id;
 		unsigned n = popcnt4(fp->cfg.io[i].mask);
 
 		if (fp->info.input_semantic_name[k] != TGSI_SEMANTIC_GENERIC) {
@@ -3253,10 +3251,16 @@ nv50_pntc_replace(struct nv50_context *nv50, uint32_t pntc[8], unsigned base)
 			continue;
 		}
 
-		sn = vp->info.input_semantic_name[j];
-		si = vp->info.input_semantic_index[j];
+		for (j = 0; j < vp->info.num_outputs; ++j) {
+			sn = vp->info.output_semantic_name[j];
+			si = vp->info.output_semantic_index[j];
 
-		if (j < fp->cfg.io_nr && sn == TGSI_SEMANTIC_GENERIC) {
+			if (sn == fp->info.input_semantic_name[k] &&
+			    si == fp->info.input_semantic_index[k])
+				break;
+		}
+
+		if (j < vp->info.num_outputs) {
 			ubyte mode =
 				nv50->rasterizer->pipe.sprite_coord_mode[si];
 
@@ -3344,20 +3348,24 @@ nv50_linkage_validate(struct nv50_context *nv50)
 	reg[0] += m - 4; /* adjust FFC0 id */
 	reg[4] |= m << 8; /* set mid where 'normal' FP inputs start */
 
-	i = 0;
-	if (fp->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION)
-		i = 1;
-	for (; i < fp->cfg.io_nr; i++) {
-		ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id_fp];
-		ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id_fp];
-
-		n = fp->cfg.io[i].id_vp;
-		if (n >= vp->cfg.io_nr ||
-		    vp->info.output_semantic_name[n] != sn ||
-		    vp->info.output_semantic_index[n] != si)
-			vpo = &dummy;
-		else
-			vpo = &vp->cfg.io[n];
+	for (i = 0; i < fp->cfg.io_nr; i++) {
+		ubyte sn = fp->info.input_semantic_name[fp->cfg.io[i].id];
+		ubyte si = fp->info.input_semantic_index[fp->cfg.io[i].id];
+
+		/* position must be mapped first */
+		assert(i == 0 || sn != TGSI_SEMANTIC_POSITION);
+
+		/* maybe even remove these from cfg.io */
+		if (sn == TGSI_SEMANTIC_POSITION || sn == TGSI_SEMANTIC_FACE)
+			continue;
+
+		/* VP outputs and vp->cfg.io are in the same order */
+		for (n = 0; n < vp->info.num_outputs; ++n) {
+			if (vp->info.output_semantic_name[n] == sn &&
+			    vp->info.output_semantic_index[n] == si)
+				break;
+		}
+		vpo = (n < vp->info.num_outputs) ? &vp->cfg.io[n] : &dummy;
 
 		m = nv50_sreg4_map(map, m, lin, &fp->cfg.io[i], vpo);
 	}
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index d78dee083f..255c7c737e 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -17,8 +17,7 @@ struct nv50_program_exec {
 
 struct nv50_sreg4 {
 	uint8_t hw;
-	uint8_t id_vp;
-	uint8_t id_fp;
+	uint8_t id; /* tgsi index, nv50 needs them sorted: flat ones last */
 
 	uint8_t mask;
 	boolean linear;
-- 
cgit v1.2.3


From c332525ad3cf8e946e60c3f9b96af525ca4cb71c Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sat, 28 Nov 2009 13:57:38 +0100
Subject: nv50: update linkage on rasterizer change

We need to update VP_RESULT_MAP and/or COORD_REPLACE_MAP
when light_twoside and/or point_sprite are changed.
---
 src/gallium/drivers/nv50/nv50_state_validate.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 19b8ef07b5..c871acaab8 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -201,7 +201,8 @@ nv50_state_emit(struct nv50_context *nv50)
 		so_emit(chan, nv50->state.vertprog);
 	if (nv50->state.dirty & NV50_NEW_FRAGPROG)
 		so_emit(chan, nv50->state.fragprog);
-	if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG))
+	if (nv50->state.dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
+				 NV50_NEW_RASTERIZER))
 		so_emit(chan, nv50->state.programs);
 	if (nv50->state.dirty & NV50_NEW_RASTERIZER)
 		so_emit(chan, nv50->state.rast);
@@ -264,7 +265,8 @@ nv50_state_validate(struct nv50_context *nv50)
 	if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB))
 		nv50_fragprog_validate(nv50);
 
-	if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG))
+	if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_VERTPROG |
+			   NV50_NEW_RASTERIZER))
 		nv50_linkage_validate(nv50);
 
 	if (nv50->dirty & NV50_NEW_RASTERIZER)
-- 
cgit v1.2.3


From 4490122d0cae360d1552cea7d7d860de352f13f6 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 25 Nov 2009 23:02:46 +0000
Subject: i965g: remove redundant nr_attrs member

---
 src/gallium/drivers/i965/brw_clip.c      | 8 ++------
 src/gallium/drivers/i965/brw_clip.h      | 4 ++--
 src/gallium/drivers/i965/brw_clip_tri.c  | 6 +++---
 src/gallium/drivers/i965/brw_clip_util.c | 2 +-
 4 files changed, 8 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index 4ec7b823e8..58d9e56df2 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -81,10 +81,6 @@ compile_clip_prog( struct brw_context *brw,
    else
        delta = REG_SIZE;
 
-   /* XXX: c.nr_attrs is very redundant:
-    */
-   c.nr_attrs = c.key.nr_attrs;
-
    c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE;
 
    if (c.key.output_color0)
@@ -103,9 +99,9 @@ compile_clip_prog( struct brw_context *brw,
       c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE;
    
    if (BRW_IS_IGDNG(brw))
-       c.nr_regs = (c.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
+       c.nr_regs = (c.key.nr_attrs + 1) / 2 + 3;  /* are vertices packed, or reg-aligned? */
    else
-       c.nr_regs = (c.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
+       c.nr_regs = (c.key.nr_attrs + 1) / 2 + 1;  /* are vertices packed, or reg-aligned? */
 
    c.nr_bytes = c.nr_regs * REG_SIZE;
 
diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h
index 8729efa47b..80e3a11a37 100644
--- a/src/gallium/drivers/i965/brw_clip.h
+++ b/src/gallium/drivers/i965/brw_clip.h
@@ -115,9 +115,9 @@ struct brw_clip_compile {
       struct brw_reg ff_sync;
    } reg;
 
-   /* 3 different ways of expressing vertex size:
+   /* 3 different ways of expressing vertex size, including
+    * key.nr_attrs.
     */
-   GLuint nr_attrs;
    GLuint nr_regs;
    GLuint nr_bytes;
 
diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c
index fa00f6044f..4cde7294ea 100644
--- a/src/gallium/drivers/i965/brw_clip_tri.c
+++ b/src/gallium/drivers/i965/brw_clip_tri.c
@@ -66,12 +66,12 @@ void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
       i += c->nr_regs;
    }
 
-   if (c->nr_attrs & 1) {
+   if (c->key.nr_attrs & 1) {
       for (j = 0; j < 3; j++) {
-	 GLuint delta = c->nr_attrs*16 + 32;
+	 GLuint delta = c->key.nr_attrs*16 + 32;
 
          if (c->chipset.is_igdng)
-             delta = c->nr_attrs * 16 + 32 * 3;
+             delta = c->key.nr_attrs * 16 + 32 * 3;
 
 	 brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0));
       }
diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c
index 872042c9a9..97a5710310 100644
--- a/src/gallium/drivers/i965/brw_clip_util.c
+++ b/src/gallium/drivers/i965/brw_clip_util.c
@@ -140,7 +140,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
       
    /* Iterate over each attribute (could be done in pairs?)
     */
-   for (i = 0; i < c->nr_attrs; i++) {
+   for (i = 0; i < c->key.nr_attrs; i++) {
       GLuint delta = i*16 + 32;
 
       if (c->chipset.is_igdng)
-- 
cgit v1.2.3


From cddc7e3a9cd321247c2298ef1b94cced1122a8e5 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 30 Nov 2009 13:39:21 +0000
Subject: brw: add dumping to gem winsys

---
 progs/demos/gears.c                               |   7 ++
 src/gallium/drivers/i965/Makefile                 |   1 +
 src/gallium/drivers/i965/brw_batchbuffer.c        |   2 -
 src/gallium/drivers/i965/brw_context.h            |   5 -
 src/gallium/drivers/i965/brw_debug.h              |   1 +
 src/gallium/drivers/i965/brw_disasm.c             |   4 +-
 src/gallium/drivers/i965/brw_eu_emit.c            |   1 +
 src/gallium/drivers/i965/brw_screen.c             |  11 ++
 src/gallium/drivers/i965/brw_vs_emit.c            |   1 +
 src/gallium/drivers/i965/brw_winsys.h             |  21 ++++
 src/gallium/drivers/i965/brw_winsys_debug.c       |  87 ++++++++++++++
 src/gallium/drivers/i965/brw_wm_emit.c            |   1 +
 src/gallium/winsys/drm/i965/gem/i965_drm_api.c    |  21 ++++
 src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c | 133 +++++++++++++++++++---
 src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h |   2 +
 src/gallium/winsys/drm/i965/xlib/xlib_i965.c      |  91 +--------------
 16 files changed, 280 insertions(+), 109 deletions(-)
 create mode 100644 src/gallium/drivers/i965/brw_winsys_debug.c

(limited to 'src/gallium/drivers')

diff --git a/progs/demos/gears.c b/progs/demos/gears.c
index 6016162d6f..cf2c0a5443 100644
--- a/progs/demos/gears.c
+++ b/progs/demos/gears.c
@@ -92,6 +92,7 @@ gear(GLfloat inner_radius, GLfloat outer_radius, GLfloat width,
 
   glNormal3f(0.0, 0.0, -1.0);
 
+#if 0
   /* draw back face */
   glBegin(GL_QUAD_STRIP);
   for (i = 0; i <= teeth; i++) {
@@ -160,6 +161,7 @@ gear(GLfloat inner_radius, GLfloat outer_radius, GLfloat width,
     glVertex3f(r0 * cos(angle), r0 * sin(angle), width * 0.5);
   }
   glEnd();
+#endif
 
 }
 
@@ -195,6 +197,7 @@ draw(void)
       glCallList(gear1);
     glPopMatrix();
 
+#if 0
     glPushMatrix();
       glTranslatef(3.1, -2.0, 0.0);
       glRotatef(-2.0 * angle - 9.0, 0.0, 0.0, 1.0);
@@ -206,6 +209,7 @@ draw(void)
       glRotatef(-2.0 * angle - 25.0, 0.0, 0.0, 1.0);
       glCallList(gear3);
     glPopMatrix();
+#endif
 
   glPopMatrix();
 
@@ -213,6 +217,9 @@ draw(void)
 
   Frames++;
 
+  if (Frames == 2)
+     exit(0);
+
   {
     GLint t = glutGet(GLUT_ELAPSED_TIME);
     if (t - T0 >= 5000) {
diff --git a/src/gallium/drivers/i965/Makefile b/src/gallium/drivers/i965/Makefile
index 8df07d1c10..95fd3cd69b 100644
--- a/src/gallium/drivers/i965/Makefile
+++ b/src/gallium/drivers/i965/Makefile
@@ -68,6 +68,7 @@ C_SOURCES = \
 	brw_screen_texture.c \
 	brw_screen_surface.c \
 	brw_batchbuffer.c \
+	brw_winsys_debug.c \
 	intel_decode.c
 
 include ../../Makefile.template
diff --git a/src/gallium/drivers/i965/brw_batchbuffer.c b/src/gallium/drivers/i965/brw_batchbuffer.c
index d725e8b27e..22607dc608 100644
--- a/src/gallium/drivers/i965/brw_batchbuffer.c
+++ b/src/gallium/drivers/i965/brw_batchbuffer.c
@@ -28,12 +28,10 @@
 #include "util/u_memory.h"
 
 #include "brw_batchbuffer.h"
-//#include "brw_decode.h"
 #include "brw_reg.h"
 #include "brw_winsys.h"
 #include "brw_debug.h"
 #include "brw_structs.h"
-#include "intel_decode.h"
 
 #define ALWAYS_EMIT_MI_FLUSH 1
 
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 598e747fe0..b7330f00f4 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -832,11 +832,6 @@ int brw_upload_urb_fence(struct brw_context *brw);
  */
 int brw_upload_cs_urb_state(struct brw_context *brw);
 
-/* brw_disasm.c */
-int brw_disasm_insn (FILE *file, const struct brw_instruction *inst);
-int brw_disasm (FILE *file, 
-                const struct brw_instruction *inst,
-                unsigned count);
 
 /*======================================================================
  * Inline conversion functions.  These are better-typed than the
diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h
index 0deddbf977..98407a06ed 100644
--- a/src/gallium/drivers/i965/brw_debug.h
+++ b/src/gallium/drivers/i965/brw_debug.h
@@ -39,4 +39,5 @@ extern int BRW_DEBUG;
 #endif
 
 
+
 #endif
diff --git a/src/gallium/drivers/i965/brw_disasm.c b/src/gallium/drivers/i965/brw_disasm.c
index 4100f11d48..65db27248b 100644
--- a/src/gallium/drivers/i965/brw_disasm.c
+++ b/src/gallium/drivers/i965/brw_disasm.c
@@ -27,7 +27,9 @@
 #include <unistd.h>
 #include <stdarg.h>
 
-#include "brw_context.h"
+#include "brw_disasm.h"
+#include "brw_structs.h"
+#include "brw_reg.h"
 #include "brw_defines.h"
 
 struct {
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
index 7776b4f965..3ee50899fb 100644
--- a/src/gallium/drivers/i965/brw_eu_emit.c
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -34,6 +34,7 @@
 #include "brw_defines.h"
 #include "brw_eu.h"
 #include "brw_debug.h"
+#include "brw_disasm.h"
 
 
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 05da72ebb2..70e2d9c47a 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -65,7 +65,16 @@ static const struct debug_named_value debug_names[] = {
    { NULL,    0 }
 };
 
+static const struct debug_named_value dump_names[] = {
+   { "asm",   DUMP_ASM},
+   { "state", DUMP_STATE},
+   { "batch", DUMP_BATCH},
+   { NULL, 0 }
+};
+
 int BRW_DEBUG = 0;
+int BRW_DUMP = 0;
+
 #endif
 
 
@@ -327,6 +336,8 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
    BRW_DEBUG = debug_get_flags_option("BRW_DEBUG", debug_names, 0);
    BRW_DEBUG |= debug_get_flags_option("INTEL_DEBUG", debug_names, 0);
    BRW_DEBUG |= DEBUG_STATS | DEBUG_MIN_URB | DEBUG_WM;
+
+   BRW_DUMP = debug_get_flags_option("BRW_DUMP", dump_names, 0);
 #endif
 
    memset(&chipset, 0, sizeof chipset);
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 00f0af2d07..20cec0f59b 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -41,6 +41,7 @@
 #include "brw_context.h"
 #include "brw_vs.h"
 #include "brw_debug.h"
+#include "brw_disasm.h"
 
 /* Choose one of the 4 vec4's which can be packed into each 16-wide reg.
  */
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index a723244960..9e86a1256e 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -111,6 +111,7 @@ enum brw_buffer_data_type {
 };
 
 
+
 /* Relocations to be applied with subdata in a call to sws->bo_subdata, below.
  *
  * Effectively this encodes:
@@ -274,6 +275,26 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
                                   struct brw_winsys_buffer *buffer);
 
 
+/*************************************************************************
+ * Cooperative dumping between winsys and driver.  TODO: make this
+ * driver-only by wrapping calls to winsys->bo_subdata().
+ */
+
+#ifdef DEBUG
+extern int BRW_DUMP;
+#else
+#define BRW_DUMP 0
+#endif 
+
+#define DUMP_ASM	        0x1
+#define DUMP_STATE	        0x2
+#define DUMP_BATCH	        0x4
+
+void brw_dump_data( unsigned pci_id,
+		    enum brw_buffer_data_type data_type,
+		    unsigned offset,
+		    const void *data,
+		    size_t size );
 
 
 #endif
diff --git a/src/gallium/drivers/i965/brw_winsys_debug.c b/src/gallium/drivers/i965/brw_winsys_debug.c
new file mode 100644
index 0000000000..f8f6a539bc
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_winsys_debug.c
@@ -0,0 +1,87 @@
+#include "brw_winsys.h"
+#include "brw_disasm.h"
+#include "brw_structs_dump.h"
+#include "brw_structs.h"
+#include "intel_decode.h"
+
+
+void brw_dump_data( unsigned pci_id,
+		    enum brw_buffer_data_type data_type,
+		    unsigned offset,
+		    const void *data,
+		    size_t size )
+{
+   if (BRW_DUMP & DUMP_ASM) {
+      switch (data_type) {
+      case BRW_DATA_GS_WM_PROG:
+      case BRW_DATA_GS_SF_PROG:
+      case BRW_DATA_GS_VS_PROG:
+      case BRW_DATA_GS_GS_PROG:
+      case BRW_DATA_GS_CLIP_PROG:
+         brw_disasm( stderr, data, size / sizeof(struct brw_instruction) );
+         break;
+      default:
+         break;
+      }
+   }
+
+   if (BRW_DUMP & DUMP_STATE) {
+      switch (data_type) {
+      case BRW_DATA_GS_CC_VP:
+         brw_dump_cc_viewport( data );
+         break;
+      case BRW_DATA_GS_CC_UNIT:
+         brw_dump_cc_unit_state( data );
+         break;
+      case BRW_DATA_GS_SAMPLER_DEFAULT_COLOR:
+         brw_dump_sampler_default_color( data );
+         break;
+      case BRW_DATA_GS_SAMPLER:
+         brw_dump_sampler_state( data );
+         break;
+      case BRW_DATA_GS_WM_UNIT:
+         brw_dump_wm_unit_state( data );
+         break;
+      case BRW_DATA_GS_SF_VP:
+         brw_dump_sf_viewport( data );
+         break;
+      case BRW_DATA_GS_SF_UNIT:
+         brw_dump_sf_unit_state( data );
+         break;
+      case BRW_DATA_GS_VS_UNIT:
+         brw_dump_vs_unit_state( data );
+         break;
+      case BRW_DATA_GS_GS_UNIT:
+         brw_dump_gs_unit_state( data );
+         break;
+      case BRW_DATA_GS_CLIP_VP:
+         brw_dump_clipper_viewport( data );
+         break;
+      case BRW_DATA_GS_CLIP_UNIT:
+         brw_dump_clip_unit_state( data );
+         break;
+      case BRW_DATA_SS_SURFACE:
+         brw_dump_surface_state( data );
+         break;
+      case BRW_DATA_SS_SURF_BIND:
+         break;
+      case BRW_DATA_OTHER:
+         break;
+      case BRW_DATA_CONSTANT_BUFFER:
+         break;
+      default:
+         break;
+      }
+   }
+
+   if (BRW_DUMP & DUMP_BATCH) {
+      switch (data_type) {
+      case BRW_DATA_BATCH_BUFFER:
+         intel_decode(data, size / 4, offset, pci_id);
+         break;
+      default:
+         break;
+      }
+   }
+}
+
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 3250db1848..0b82f4e156 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -35,6 +35,7 @@
 #include "brw_context.h"
 #include "brw_wm.h"
 #include "brw_debug.h"
+#include "brw_disasm.h"
 
 /* Not quite sure how correct this is - need to understand horiz
  * vs. vertical strides a little better.
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c
index 191a733c36..5d5dfdae46 100644
--- a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c
@@ -44,6 +44,9 @@ i965_libdrm_buffer_from_handle(struct i965_libdrm_winsys *idws,
    struct i965_libdrm_buffer *buf = CALLOC_STRUCT(i965_libdrm_buffer);
    uint32_t tile = 0, swizzle = 0;
 
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    if (!buf)
       return NULL;
    pipe_reference_init(&buf->base.reference, 1);
@@ -89,6 +92,9 @@ i965_libdrm_texture_from_shared_handle(struct drm_api *api,
    struct i965_libdrm_winsys *idws = i965_libdrm_winsys(brw_screen(screen)->sws);
    struct i965_libdrm_buffer *buffer;
 
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    buffer = i965_libdrm_buffer_from_handle(idws, name, handle);
    if (!buffer)
       return NULL;
@@ -106,6 +112,10 @@ i965_libdrm_shared_handle_from_texture(struct drm_api *api,
 {
    struct i965_libdrm_buffer *buf = NULL;
    struct brw_winsys_buffer *buffer = NULL;
+
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch))
       return FALSE;
 
@@ -129,6 +139,10 @@ i965_libdrm_local_handle_from_texture(struct drm_api *api,
                                       unsigned *handle)
 {
    struct brw_winsys_buffer *buffer = NULL;
+
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    if (!brw_texture_get_winsys_buffer(texture, &buffer, pitch))
       return FALSE;
 
@@ -142,6 +156,9 @@ i965_libdrm_winsys_destroy(struct brw_winsys_screen *iws)
 {
    struct i965_libdrm_winsys *idws = i965_libdrm_winsys(iws);
 
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    drm_intel_bufmgr_destroy(idws->gem);
 
    FREE(idws);
@@ -154,6 +171,8 @@ i965_libdrm_create_screen(struct drm_api *api, int drmFD,
    struct i965_libdrm_winsys *idws;
    unsigned int deviceID;
 
+   debug_printf("%s\n", __FUNCTION__);
+
    if (arg != NULL) {
       switch(arg->mode) {
       case DRM_CREATE_NORMAL:
@@ -194,6 +213,8 @@ i965_libdrm_create_context(struct drm_api *api, struct pipe_screen *screen)
 static void
 destroy(struct drm_api *api)
 {
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
 
 }
 
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c
index 1f3f19ab72..d4a0c97262 100644
--- a/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_buffer.c
@@ -5,16 +5,59 @@
 #include "i915_drm.h"
 #include "intel_bufmgr.h"
 
+
+
 const char *names[BRW_BUFFER_TYPE_MAX] = {
-   "texture",
-   "scanout",
-   "vertex",
-   "curbe",
-   "query",
-   "shader_constants",
-   "wm_scratch",
-   "batch",
-   "state_cache",
+   "TEXTURE",
+   "SCANOUT",
+   "VERTEX",
+   "CURBE",
+   "QUERY",
+   "SHADER_CONSTANTS",
+   "WM_SCRATCH",
+   "BATCH",
+   "GENERAL_STATE",
+   "SURFACE_STATE",
+   "PIXEL",
+   "GENERIC",
+};
+
+const char *usages[BRW_USAGE_MAX] = {
+   "STATE",
+   "QUERY_RESULT",
+   "RENDER_TARGET",
+   "DEPTH_BUFFER",
+   "BLIT_SOURCE",
+   "BLIT_DEST",
+   "SAMPLER",
+   "VERTEX",
+   "SCRATCH"
+};
+
+
+const char *data_types[BRW_DATA_MAX] =
+{
+   "GS: CC_VP",
+   "GS: CC_UNIT",
+   "GS: WM_PROG",
+   "GS: SAMPLER_DEFAULT_COLOR",
+   "GS: SAMPLER",
+   "GS: WM_UNIT",
+   "GS: SF_PROG",
+   "GS: SF_VP",
+   "GS: SF_UNIT",
+   "GS: VS_UNIT",
+   "GS: VS_PROG",
+   "GS: GS_UNIT",
+   "GS: GS_PROG",
+   "GS: CLIP_VP",
+   "GS: CLIP_UNIT",
+   "GS: CLIP_PROG",
+   "SS: SURFACE",
+   "SS: SURF_BIND",
+   "CONSTANT DATA",
+   "BATCH DATA",
+   "(untyped)"
 };
 
 static enum pipe_error 
@@ -27,6 +70,9 @@ i965_libdrm_bo_alloc(struct brw_winsys_screen *sws,
    struct i965_libdrm_winsys *idws = i965_libdrm_winsys(sws);
    struct i965_libdrm_buffer *buf;
 
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    buf = CALLOC_STRUCT(i965_libdrm_buffer);
    if (!buf)
       return PIPE_ERROR_OUT_OF_MEMORY;
@@ -79,6 +125,9 @@ i965_libdrm_bo_destroy(struct brw_winsys_buffer *buffer)
 {
    struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
 
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    drm_intel_bo_unreference(buf->bo);
    FREE(buffer);
 }
@@ -95,6 +144,12 @@ i965_libdrm_bo_emit_reloc(struct brw_winsys_buffer *buffer,
    int read, write;
    int ret;
 
+   if (BRW_DUMP)
+      debug_printf("%s buf %p offset %x delta %x buf2 %p/%s/%s\n",
+		   __FUNCTION__, (void *)buffer, 
+		   offset, delta,
+		   (void *)buffer2, names[buf2->data_type], usages[usage]);
+
    switch (usage) {
    case BRW_USAGE_STATE:
       read = I915_GEM_DOMAIN_INSTRUCTION;
@@ -104,7 +159,11 @@ i965_libdrm_bo_emit_reloc(struct brw_winsys_buffer *buffer,
       read = I915_GEM_DOMAIN_INSTRUCTION;
       write = I915_GEM_DOMAIN_INSTRUCTION;
       break;
-   case BRW_USAGE_BLIT_DEST:
+   case BRW_USAGE_RENDER_TARGET:
+      read = I915_GEM_DOMAIN_RENDER;
+      write = 0;
+      break;
+   case BRW_USAGE_DEPTH_BUFFER:
       read = I915_GEM_DOMAIN_RENDER;
       write = I915_GEM_DOMAIN_RENDER;
       break;
@@ -112,11 +171,7 @@ i965_libdrm_bo_emit_reloc(struct brw_winsys_buffer *buffer,
       read = 0;
       write = I915_GEM_DOMAIN_RENDER;
       break;
-   case BRW_USAGE_RENDER_TARGET:
-      read = I915_GEM_DOMAIN_RENDER;
-      write = 0;
-      break;
-   case BRW_USAGE_DEPTH_BUFFER:
+   case BRW_USAGE_BLIT_DEST:
       read = I915_GEM_DOMAIN_RENDER;
       write = I915_GEM_DOMAIN_RENDER;
       break;
@@ -137,6 +192,11 @@ i965_libdrm_bo_emit_reloc(struct brw_winsys_buffer *buffer,
       return -1;
    }
 
+   /* Needed??
+   ((uint32_t *)buf->bo->virtual)[offset/4] = (delta +
+					       buf2->bo->offset);
+    */
+
    ret = dri_bo_emit_reloc( buf->bo, read, write, delta, offset, buf2->bo );
    if (ret)
       return -1;
@@ -152,6 +212,9 @@ i965_libdrm_bo_exec(struct brw_winsys_buffer *buffer,
    struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws);
    int ret;
 
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    if (idws->send_cmd) {
       ret = dri_bo_exec(buf->bo, bytes_used, NULL, 0, 0);
       if (ret)
@@ -171,10 +234,20 @@ i965_libdrm_bo_subdata(struct brw_winsys_buffer *buffer,
                        unsigned nr_reloc)
 {
    struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+   struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws);
    int ret, i;
 
    (void)data_type;
 
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
+   if (BRW_DUMP)
+      brw_dump_data( idws->id,
+		     data_type,
+		     buf->bo->offset + offset, 
+		     data, size );
+
    /* XXX: use bo_map_gtt/memcpy/unmap_gtt under some circumstances???
     */
    ret = drm_intel_bo_subdata(buf->bo, offset, size, (void*)data);
@@ -194,6 +267,9 @@ i965_libdrm_bo_is_busy(struct brw_winsys_buffer *buffer)
 {
    struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
 
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    return drm_intel_bo_busy(buf->bo);
 }
 
@@ -204,6 +280,9 @@ i965_libdrm_bo_references(struct brw_winsys_buffer *a,
    struct i965_libdrm_buffer *bufa = i965_libdrm_buffer(a);
    struct i965_libdrm_buffer *bufb = i965_libdrm_buffer(b);
 
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    /* XXX: can't find this func:
     */
    return drm_intel_bo_references(bufa->bo, bufb->bo);
@@ -220,6 +299,9 @@ i965_libdrm_check_aperture_space(struct brw_winsys_screen *iws,
    static drm_intel_bo *bos[128];
    int i;
 
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    if (count > Elements(bos)) {
       assert(0);
       return FALSE;
@@ -243,6 +325,12 @@ i965_libdrm_bo_map(struct brw_winsys_buffer *buffer,
    struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
    int ret;
 
+
+   if (BRW_DUMP)
+      debug_printf("%s %p %s %s\n", __FUNCTION__, (void *)buffer, 
+		   write ? "read/write" : "read",
+		   write ? data_types[data_type] : "");
+
    if (!buf->map_count) {
       if (buf->map_gtt) {
          ret = drm_intel_gem_bo_map_gtt(buf->bo);
@@ -256,6 +344,7 @@ i965_libdrm_bo_map(struct brw_winsys_buffer *buffer,
       }
    }
 
+   buf->data_type = data_type;
    buf->map_count++;
    return buf->bo->virtual;
 }
@@ -265,7 +354,18 @@ i965_libdrm_bo_flush_range(struct brw_winsys_buffer *buffer,
                            unsigned offset,
                            unsigned length)
 {
+   struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
+   struct i965_libdrm_winsys *idws = i965_libdrm_winsys(buffer->sws);
 
+   if (BRW_DUMP)
+      debug_printf("%s offset %d len %d\n", __FUNCTION__, offset, length);
+
+   if (BRW_DUMP)
+      brw_dump_data( idws->id,
+		     buf->data_type,
+		     buf->bo->offset + offset, 
+		     buf->bo->virtual + offset, 
+		     length );
 }
 
 static void 
@@ -273,6 +373,9 @@ i965_libdrm_bo_unmap(struct brw_winsys_buffer *buffer)
 {
    struct i965_libdrm_buffer *buf = i965_libdrm_buffer(buffer);
 
+   if (BRW_DUMP)
+      debug_printf("%s\n", __FUNCTION__);
+
    if (--buf->map_count > 0)
       return;
 
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h
index 7945711263..5b556b18f0 100644
--- a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h
@@ -52,6 +52,8 @@ struct i965_libdrm_buffer {
    boolean flinked;
    unsigned flink;
 
+   unsigned data_type;		/* valid while mapped */
+
    unsigned cheesy_refcount;
 };
 
diff --git a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
index 9d2bfae090..e712de6307 100644
--- a/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
+++ b/src/gallium/winsys/drm/i965/xlib/xlib_i965.c
@@ -228,89 +228,7 @@ xlib_brw_bo_exec( struct brw_winsys_buffer *buffer,
    return 0;
 }
 
-static void dump_data( struct xlib_brw_winsys *xbw,
-                       enum brw_buffer_data_type data_type,
-                       unsigned offset,
-                       const void *data,
-                       size_t size )
-{
-   static int DUMP_ASM = 0;
-   static int DUMP_STATE = 0;
-   static int DUMP_BATCH = 1;
-
-   if (DUMP_ASM) {
-      switch (data_type) {
-      case BRW_DATA_GS_WM_PROG:
-      case BRW_DATA_GS_SF_PROG:
-      case BRW_DATA_GS_VS_PROG:
-      case BRW_DATA_GS_GS_PROG:
-      case BRW_DATA_GS_CLIP_PROG:
-         brw_disasm( stderr, data, size / sizeof(struct brw_instruction) );
-         break;
-      default:
-         break;
-      }
-   }
-
-   if (DUMP_STATE) {
-      switch (data_type) {
-      case BRW_DATA_GS_CC_VP:
-         brw_dump_cc_viewport( data );
-         break;
-      case BRW_DATA_GS_CC_UNIT:
-         brw_dump_cc_unit_state( data );
-         break;
-      case BRW_DATA_GS_SAMPLER_DEFAULT_COLOR:
-         brw_dump_sampler_default_color( data );
-         break;
-      case BRW_DATA_GS_SAMPLER:
-         brw_dump_sampler_state( data );
-         break;
-      case BRW_DATA_GS_WM_UNIT:
-         brw_dump_wm_unit_state( data );
-         break;
-      case BRW_DATA_GS_SF_VP:
-         brw_dump_sf_viewport( data );
-         break;
-      case BRW_DATA_GS_SF_UNIT:
-         brw_dump_sf_unit_state( data );
-         break;
-      case BRW_DATA_GS_VS_UNIT:
-         brw_dump_vs_unit_state( data );
-         break;
-      case BRW_DATA_GS_GS_UNIT:
-         brw_dump_gs_unit_state( data );
-         break;
-      case BRW_DATA_GS_CLIP_VP:
-         brw_dump_clipper_viewport( data );
-         break;
-      case BRW_DATA_GS_CLIP_UNIT:
-         brw_dump_clip_unit_state( data );
-         break;
-      case BRW_DATA_SS_SURFACE:
-         brw_dump_surface_state( data );
-         break;
-      case BRW_DATA_SS_SURF_BIND:
-         break;
-      case BRW_DATA_OTHER:
-         break;
-      case BRW_DATA_CONSTANT_BUFFER:
-         break;
-      default:
-         break;
-      }
-   }
 
-   if (DUMP_BATCH) {
-      switch (data_type) {
-      case BRW_DATA_BATCH_BUFFER:
-         intel_decode(data, size / 4, offset, xbw->chipset.pci_id);
-         break;
-      default:
-         break;
-      }
-   }
-}
 
 
 static int
@@ -346,10 +264,11 @@ xlib_brw_bo_subdata(struct brw_winsys_buffer *buffer,
          xlib_brw_buffer(reloc[i].bo)->offset + reloc[i].delta;
    }
 
-   if (1)
-      dump_data( xbw, data_type,
-                 buf->offset + offset, 
-                 buf->virtual + offset, size );
+   if (BRW_DUMP)
+      brw_dump_data( xbw->chipset.pci_id,
+		     data_type,
+		     buf->offset + offset, 
+		     buf->virtual + offset, size );
 
 
    return 0;
-- 
cgit v1.2.3


From 6781f624af8b06061673f3fd6f19ffb6a56c3e8c Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 30 Nov 2009 15:35:58 +0000
Subject: i965g: pass backbuffer tiling information to driver

The gem winsys gets this information, needs to pass it on.
---
 src/gallium/drivers/i965/brw_screen.h             |  3 ---
 src/gallium/drivers/i965/brw_screen_tex_layout.c  |  1 +
 src/gallium/drivers/i965/brw_screen_texture.c     | 12 +++---------
 src/gallium/drivers/i965/brw_winsys.h             |  9 ++++++++-
 src/gallium/winsys/drm/i965/gem/i965_drm_api.c    | 10 ++++++----
 src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h |  8 +++-----
 6 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index 301b20d549..ab811e48fc 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -72,9 +72,6 @@ struct brw_buffer
    void *ptr;
 };
 
-#define BRW_TILING_NONE  0
-#define BRW_TILING_Y     1
-#define BRW_TILING_X     2
 
 union brw_surface_id {
    struct {
diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c
index f793fa8859..71a8890f83 100644
--- a/src/gallium/drivers/i965/brw_screen_tex_layout.c
+++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c
@@ -32,6 +32,7 @@
 
 #include "brw_screen.h"
 #include "brw_debug.h"
+#include "brw_winsys.h"
 
 /* Code to layout images in a mipmap tree for i965.
  */
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 666ec70d42..650cac240b 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -472,7 +472,8 @@ boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
 struct pipe_texture * 
 brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
                                   const struct pipe_texture *templ,
-                                  const unsigned pitch,
+                                  unsigned pitch,
+				  unsigned tiling,
                                   struct brw_winsys_buffer *buffer)
 {
    struct brw_screen *bscreen = brw_screen(screen);
@@ -495,18 +496,11 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
    tex->base.screen = screen;
 
    tex->cpp = pf_get_size(tex->base.format);
+   tex->tiling = tiling;
 
    make_empty_list(&tex->views[0]);
    make_empty_list(&tex->views[1]);
 
-   if (1)
-      tex->tiling = BRW_TILING_NONE;
-   else if (bscreen->chipset.is_965 &&
-            pf_is_depth_or_stencil(templ->format))
-      tex->tiling = BRW_TILING_Y;
-   else
-      tex->tiling = BRW_TILING_X;
-
    if (!brw_texture_layout(bscreen, tex))
       goto fail;
 
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index 9e86a1256e..af506a283d 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -111,6 +111,12 @@ enum brw_buffer_data_type {
 };
 
 
+/* Matches the i915_drm definitions:
+ */
+#define BRW_TILING_NONE  0
+#define BRW_TILING_X     1
+#define BRW_TILING_Y     2
+
 
 /* Relocations to be applied with subdata in a call to sws->bo_subdata, below.
  *
@@ -271,7 +277,8 @@ boolean brw_texture_get_winsys_buffer(struct pipe_texture *texture,
 struct pipe_texture * 
 brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
                                   const struct pipe_texture *template,
-                                  const unsigned pitch,
+                                  unsigned pitch,
+				  unsigned tiling,
                                   struct brw_winsys_buffer *buffer);
 
 
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c
index 5172b5410b..fc9678d2b6 100644
--- a/src/gallium/winsys/drm/i965/gem/i965_drm_api.c
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_api.c
@@ -42,7 +42,7 @@ i965_libdrm_buffer_from_handle(struct i965_libdrm_winsys *idws,
                                const char* name, unsigned handle)
 {
    struct i965_libdrm_buffer *buf = CALLOC_STRUCT(i965_libdrm_buffer);
-   uint32_t tile = 0, swizzle = 0;
+   uint32_t swizzle = 0;
 
    if (BRW_DUMP)
       debug_printf("%s\n", __FUNCTION__);
@@ -60,8 +60,8 @@ i965_libdrm_buffer_from_handle(struct i965_libdrm_winsys *idws,
    if (!buf->bo)
       goto err;
 
-   drm_intel_bo_get_tiling(buf->bo, &tile, &swizzle);
-   if (tile != 0)
+   drm_intel_bo_get_tiling(buf->bo, &buf->tiling, &swizzle);
+   if (buf->tiling != 0)
       buf->map_gtt = TRUE;
 
    return buf;
@@ -100,7 +100,9 @@ i965_libdrm_texture_from_shared_handle(struct drm_api *api,
    if (!buffer)
       return NULL;
 
-   return brw_texture_blanket_winsys_buffer(screen, template, pitch, &buffer->base);
+   return brw_texture_blanket_winsys_buffer(screen, template, pitch,
+					    buffer->tiling,
+					    &buffer->base);
 }
 
 
diff --git a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h
index 235eaf68fa..c6a7d4a8c5 100644
--- a/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h
+++ b/src/gallium/winsys/drm/i965/gem/i965_drm_winsys.h
@@ -46,14 +46,12 @@ struct i965_libdrm_buffer {
 
    void *ptr;
    unsigned map_count;
-   boolean map_gtt;
+   unsigned data_type;		/* valid while mapped */
+   unsigned tiling;
 
+   boolean map_gtt;
    boolean flinked;
    unsigned flink;
-
-   unsigned data_type;		/* valid while mapped */
-
-   unsigned cheesy_refcount;
 };
 
 static INLINE struct i965_libdrm_buffer *
-- 
cgit v1.2.3


From bb1cde755bce0ea29cc4c1a29ad3841e3b304309 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 30 Nov 2009 16:16:30 +0000
Subject: i965g: turn on texture tiling by default

---
 src/gallium/drivers/i965/brw_screen.c         |  3 +++
 src/gallium/drivers/i965/brw_screen.h         |  1 +
 src/gallium/drivers/i965/brw_screen_texture.c | 11 ++++-------
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 70e2d9c47a..1855e4fd45 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -396,5 +396,8 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
    brw_screen_tex_surface_init(bscreen);
    brw_screen_buffer_init(bscreen);
 
+   bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE);
+   
+   
    return &bscreen->base;
 }
diff --git a/src/gallium/drivers/i965/brw_screen.h b/src/gallium/drivers/i965/brw_screen.h
index ab811e48fc..7226d9228b 100644
--- a/src/gallium/drivers/i965/brw_screen.h
+++ b/src/gallium/drivers/i965/brw_screen.h
@@ -45,6 +45,7 @@ struct brw_screen
    struct pipe_screen base;
    struct brw_chipset chipset;
    struct brw_winsys_screen *sws;
+   boolean no_tiling;
 };
 
 /**
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index 650cac240b..f4c20f31a5 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -209,14 +209,11 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
 
    /* XXX: No tiling with compressed textures??
     */
-   if (tex->compressed == 0 
-       /* && bscreen->use_texture_tiling */
-       /* && bscreen->kernel_exec_fencing */) 
+   if (tex->compressed == 0 &&
+       !bscreen->no_tiling) 
    {
-      if (1)
-         tex->tiling = BRW_TILING_NONE;
-      else if (bscreen->chipset.is_965 &&
-               pf_is_depth_or_stencil(templ->format))
+      if (bscreen->chipset.is_965 &&
+	  pf_is_depth_or_stencil(templ->format))
 	 tex->tiling = BRW_TILING_Y;
       else
 	 tex->tiling = BRW_TILING_X;
-- 
cgit v1.2.3


From ac400ffce62be47fc77e8d10cabcd39b92b6c627 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 30 Nov 2009 20:29:18 +0100
Subject: gallium: interface cleanups, remove nblocksx/y from pipe_texture and
 more

This patch removes nblocksx, nblocksy arrays from pipe_texture (can be
recalculated if needed). Furthermore, pipe_format_block struct is gone
completely (again, contains just derived state).
nblocksx, nblocksy, block are also removed from pipe_transfer, together with
the format enum (can be obtained from the texture associated with the transfer).
---
 src/gallium/auxiliary/draw/draw_pipe_aaline.c    |   1 -
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c  |   1 -
 src/gallium/auxiliary/util/u_blit.c              |   1 -
 src/gallium/auxiliary/util/u_debug.c             |   8 +-
 src/gallium/auxiliary/util/u_format.h            |   2 +-
 src/gallium/auxiliary/util/u_gen_mipmap.c        |  12 +--
 src/gallium/auxiliary/util/u_linear.c            |   2 +-
 src/gallium/auxiliary/util/u_linear.h            |  19 +++-
 src/gallium/auxiliary/util/u_rect.c              |  71 +++++++------
 src/gallium/auxiliary/util/u_rect.h              |   4 +-
 src/gallium/auxiliary/util/u_surface.c           |   1 -
 src/gallium/auxiliary/util/u_tile.c              |  29 +++---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c |   1 -
 src/gallium/drivers/softpipe/sp_texture.c        |  33 +++---
 src/gallium/drivers/softpipe/sp_tile_cache.c     |  10 +-
 src/gallium/include/pipe/p_format.h              | 123 ++++++++++-------------
 src/gallium/include/pipe/p_state.h               |   8 --
 src/mesa/state_tracker/st_cb_drawpixels.c        |   8 +-
 src/mesa/state_tracker/st_cb_fbo.c               |   7 +-
 src/mesa/state_tracker/st_cb_readpixels.c        |  14 +--
 src/mesa/state_tracker/st_cb_texture.c           |  33 +++---
 src/mesa/state_tracker/st_gen_mipmap.c           |   4 +-
 src/mesa/state_tracker/st_texture.c              |   4 +-
 23 files changed, 191 insertions(+), 205 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 31de84b272..8c631a01af 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -401,7 +401,6 @@ aaline_create_texture(struct aaline_stage *aaline)
    texTemp.width0 = 1 << MAX_TEXTURE_LEVEL;
    texTemp.height0 = 1 << MAX_TEXTURE_LEVEL;
    texTemp.depth0 = 1;
-   pf_get_block(texTemp.format, &texTemp.block);
 
    aaline->texture = screen->texture_create(screen, &texTemp);
    if (!aaline->texture)
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 27d89721b1..7803946baa 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -430,7 +430,6 @@ pstip_create_texture(struct pstip_stage *pstip)
    texTemp.width0 = 32;
    texTemp.height0 = 32;
    texTemp.depth0 = 1;
-   pf_get_block(texTemp.format, &texTemp.block);
 
    pstip->texture = screen->texture_create(screen, &texTemp);
    if (pstip->texture == NULL)
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 5372df5735..abe1de3302 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -357,7 +357,6 @@ util_blit_pixels_writemask(struct blit_state *ctx,
       texTemp.width0 = srcW;
       texTemp.height0 = srcH;
       texTemp.depth0 = 1;
-      pf_get_block(src->format, &texTemp.block);
 
       tex = screen->texture_create(screen, &texTemp);
       if (!tex)
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 96d400c839..40633574b0 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -669,10 +669,10 @@ void debug_dump_surface(const char *prefix,
       goto error;
    
    debug_dump_image(prefix, 
-                    transfer->format,
-                    transfer->block.size, 
-                    transfer->nblocksx,
-                    transfer->nblocksy,
+                    texture->format,
+                    pf_get_blocksize(texture->format), 
+                    pf_get_nblocksx(texture->format, transfer->width),
+                    pf_get_nblocksy(texture->format, transfer->height),
                     transfer->stride,
                     data);
    
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 7b5b7fcda5..6740683a61 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -50,7 +50,7 @@ struct util_format_block
    /** Block height in pixels */
    unsigned height;
 
-   /** Block size in bytes */
+   /** Block size in bits */
    unsigned bits;
 };
 
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index f67f1e458d..83263d9fe6 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -996,7 +996,7 @@ reduce_2d(enum pipe_format pformat,
 {
    enum dtype datatype;
    uint comps;
-   const int bpt = pf_get_size(pformat);
+   const int bpt = pf_get_blocksize(pformat);
    const ubyte *srcA, *srcB;
    ubyte *dst;
    int row;
@@ -1035,7 +1035,7 @@ reduce_3d(enum pipe_format pformat,
           int dstWidth, int dstHeight, int dstDepth,
           int dstRowStride, ubyte *dstPtr)
 {
-   const int bpt = pf_get_size(pformat);
+   const int bpt = pf_get_blocksize(pformat);
    const int border = 0;
    int img, row;
    int bytesPerSrcImage, bytesPerDstImage;
@@ -1159,8 +1159,8 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
    const uint zslice = 0;
    uint dstLevel;
    
-   assert(pt->block.width == 1);
-   assert(pt->block.height == 1);
+   assert(pf_get_blockwidth(pt->format) == 1);
+   assert(pf_get_blockheight(pt->format) == 1);
 
    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
       const uint srcLevel = dstLevel - 1;
@@ -1204,8 +1204,8 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
    struct pipe_screen *screen = pipe->screen;
    uint dstLevel, zslice = 0;
 
-   assert(pt->block.width == 1);
-   assert(pt->block.height == 1);
+   assert(pf_get_blockwidth(pt->format) == 1);
+   assert(pf_get_blockheight(pt->format) == 1);
 
    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
       const uint srcLevel = dstLevel - 1;
diff --git a/src/gallium/auxiliary/util/u_linear.c b/src/gallium/auxiliary/util/u_linear.c
index a1dce3f5cf..f1aef21677 100644
--- a/src/gallium/auxiliary/util/u_linear.c
+++ b/src/gallium/auxiliary/util/u_linear.c
@@ -82,7 +82,7 @@ void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr,
 
 void
 pipe_linear_fill_info(struct pipe_tile_info *t,
-		      const struct pipe_format_block *block,
+		      const struct u_linear_format_block *block,
 		      unsigned tile_width, unsigned tile_height,
 		      unsigned tiles_x, unsigned tiles_y)
 {
diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h
index b74308ffa3..42c40b2aa7 100644
--- a/src/gallium/auxiliary/util/u_linear.h
+++ b/src/gallium/auxiliary/util/u_linear.h
@@ -35,6 +35,19 @@
 
 #include "pipe/p_format.h"
 
+struct u_linear_format_block
+{
+   /** Block size in bytes */
+   unsigned size;
+   
+   /** Block width in pixels */
+   unsigned width;
+   
+   /** Block height in pixels */
+   unsigned height;
+};
+
+
 struct pipe_tile_info
 {
    unsigned size;
@@ -49,10 +62,10 @@ struct pipe_tile_info
    unsigned rows;
 
    /* Describe the tile in pixels */
-   struct pipe_format_block tile;
+   struct u_linear_format_block tile;
 
    /* Describe each block within the tile */
-   struct pipe_format_block block;
+   struct u_linear_format_block block;
 };
 
 void pipe_linear_to_tile(size_t src_stride, const void *src_ptr,
@@ -71,7 +84,7 @@ void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr,
  * @tiles_y number of tiles in y axis
  */
 void pipe_linear_fill_info(struct pipe_tile_info *t,
-			   const struct pipe_format_block *block,
+			   const struct u_linear_format_block *block,
 			   unsigned tile_width, unsigned tile_height,
 			   unsigned tiles_x, unsigned tiles_y);
 
diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c
index 9866b6fc8a..72725b59d2 100644
--- a/src/gallium/auxiliary/util/u_rect.c
+++ b/src/gallium/auxiliary/util/u_rect.c
@@ -44,7 +44,7 @@
  */
 void
 util_copy_rect(ubyte * dst,
-               const struct pipe_format_block *block,
+               enum pipe_format format,
                unsigned dst_stride,
                unsigned dst_x,
                unsigned dst_y,
@@ -57,27 +57,30 @@ util_copy_rect(ubyte * dst,
 {
    unsigned i;
    int src_stride_pos = src_stride < 0 ? -src_stride : src_stride;
+   int blocksize = pf_get_blocksize(format);
+   int blockwidth = pf_get_blockwidth(format);
+   int blockheight = pf_get_blockheight(format);
 
-   assert(block->size > 0);
-   assert(block->width > 0);
-   assert(block->height > 0);
+   assert(blocksize > 0);
+   assert(blockwidth > 0);
+   assert(blockheight > 0);
    assert(src_x >= 0);
    assert(src_y >= 0);
    assert(dst_x >= 0);
    assert(dst_y >= 0);
 
-   dst_x /= block->width;
-   dst_y /= block->height;
-   width = (width + block->width - 1)/block->width;
-   height = (height + block->height - 1)/block->height;
-   src_x /= block->width;
-   src_y /= block->height;
+   dst_x /= blockwidth;
+   dst_y /= blockheight;
+   width = (width + blockwidth - 1)/blockwidth;
+   height = (height + blockheight - 1)/blockheight;
+   src_x /= blockwidth;
+   src_y /= blockheight;
    
-   dst += dst_x * block->size;
-   src += src_x * block->size;
+   dst += dst_x * blocksize;
+   src += src_x * blocksize;
    dst += dst_y * dst_stride;
    src += src_y * src_stride_pos;
-   width *= block->size;
+   width *= blocksize;
 
    if (width == dst_stride && width == src_stride)
       memcpy(dst, src, height * width);
@@ -92,7 +95,7 @@ util_copy_rect(ubyte * dst,
 
 void
 util_fill_rect(ubyte * dst,
-               const struct pipe_format_block *block,
+               enum pipe_format format,
                unsigned dst_stride,
                unsigned dst_x,
                unsigned dst_y,
@@ -102,23 +105,26 @@ util_fill_rect(ubyte * dst,
 {
    unsigned i, j;
    unsigned width_size;
+   int blocksize = pf_get_blocksize(format);
+   int blockwidth = pf_get_blockwidth(format);
+   int blockheight = pf_get_blockheight(format);
 
-   assert(block->size > 0);
-   assert(block->width > 0);
-   assert(block->height > 0);
+   assert(blocksize > 0);
+   assert(blockwidth > 0);
+   assert(blockheight > 0);
    assert(dst_x >= 0);
    assert(dst_y >= 0);
 
-   dst_x /= block->width;
-   dst_y /= block->height;
-   width = (width + block->width - 1)/block->width;
-   height = (height + block->height - 1)/block->height;
+   dst_x /= blockwidth;
+   dst_y /= blockheight;
+   width = (width + blockwidth - 1)/blockwidth;
+   height = (height + blockheight - 1)/blockheight;
    
-   dst += dst_x * block->size;
+   dst += dst_x * blocksize;
    dst += dst_y * dst_stride;
-   width_size = width * block->size;
+   width_size = width * blocksize;
    
-   switch (block->size) {
+   switch (blocksize) {
    case 1:
       if(dst_stride == width_size)
 	 memset(dst, (ubyte) value, height * width_size);
@@ -172,10 +178,15 @@ util_surface_copy(struct pipe_context *pipe,
    struct pipe_transfer *src_trans, *dst_trans;
    void *dst_map;
    const void *src_map;
+   enum pipe_format src_format, dst_format;
 
    assert(src->texture && dst->texture);
    if (!src->texture || !dst->texture)
       return;
+
+   src_format = src->texture->format;
+   dst_format = dst->texture->format;
+
    src_trans = screen->get_tex_transfer(screen,
                                         src->texture,
                                         src->face,
@@ -192,9 +203,9 @@ util_surface_copy(struct pipe_context *pipe,
                                         PIPE_TRANSFER_WRITE,
                                         dst_x, dst_y, w, h);
 
-   assert(dst_trans->block.size == src_trans->block.size);
-   assert(dst_trans->block.width == src_trans->block.width);
-   assert(dst_trans->block.height == src_trans->block.height);
+   assert(pf_get_blocksize(dst_format) == pf_get_blocksize(src_format));
+   assert(pf_get_blockwidth(dst_format) == pf_get_blockwidth(src_format));
+   assert(pf_get_blockheight(dst_format) == pf_get_blockheight(src_format));
 
    src_map = pipe->screen->transfer_map(screen, src_trans);
    dst_map = pipe->screen->transfer_map(screen, dst_trans);
@@ -205,7 +216,7 @@ util_surface_copy(struct pipe_context *pipe,
    if (src_map && dst_map) {
       /* If do_flip, invert src_y position and pass negative src stride */
       util_copy_rect(dst_map,
-                     &dst_trans->block,
+                     dst_format,
                      dst_trans->stride,
                      0, 0,
                      w, h,
@@ -259,11 +270,11 @@ util_surface_fill(struct pipe_context *pipe,
    if (dst_map) {
       assert(dst_trans->stride > 0);
 
-      switch (dst_trans->block.size) {
+      switch (pf_get_blocksize(dst_trans->texture->format)) {
       case 1:
       case 2:
       case 4:
-         util_fill_rect(dst_map, &dst_trans->block, dst_trans->stride,
+         util_fill_rect(dst_map, dst_trans->texture->format, dst_trans->stride,
                         0, 0, width, height, value);
          break;
       case 8:
diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h
index daa50834d3..5e444ffae2 100644
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -42,13 +42,13 @@ struct pipe_surface;
 
 
 extern void
-util_copy_rect(ubyte * dst, const struct pipe_format_block *block,
+util_copy_rect(ubyte * dst, enum pipe_format format,
                unsigned dst_stride, unsigned dst_x, unsigned dst_y,
                unsigned width, unsigned height, const ubyte * src,
                int src_stride, unsigned src_x, int src_y);
 
 extern void
-util_fill_rect(ubyte * dst, const struct pipe_format_block *block,
+util_fill_rect(ubyte * dst, enum pipe_format format,
                unsigned dst_stride, unsigned dst_x, unsigned dst_y,
                unsigned width, unsigned height, uint32_t value);
 
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index de8c266db8..f828908f0b 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -82,7 +82,6 @@ util_create_rgba_surface(struct pipe_screen *screen,
    templ.width0 = width;
    templ.height0 = height;
    templ.depth0 = 1;
-   pf_get_block(format, &templ.block);
    templ.tex_usage = usage;
 
    *textureOut = screen->texture_create(screen, &templ);
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index 8a22f584be..4f34f8a1a6 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -52,7 +52,7 @@ pipe_get_tile_raw(struct pipe_transfer *pt,
    const void *src;
 
    if (dst_stride == 0)
-      dst_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size;
+      dst_stride = pf_get_stride(pt->texture->format, w);
 
    if (pipe_clip_tile(x, y, &w, &h, pt))
       return;
@@ -62,7 +62,7 @@ pipe_get_tile_raw(struct pipe_transfer *pt,
    if(!src)
       return;
 
-   util_copy_rect(dst, &pt->block, dst_stride, 0, 0, w, h, src, pt->stride, x, y);
+   util_copy_rect(dst, pt->texture->format, dst_stride, 0, 0, w, h, src, pt->stride, x, y);
 
    screen->transfer_unmap(screen, pt);
 }
@@ -78,9 +78,10 @@ pipe_put_tile_raw(struct pipe_transfer *pt,
 {
    struct pipe_screen *screen = pt->texture->screen;
    void *dst;
+   enum pipe_format format = pt->texture->format;
 
    if (src_stride == 0)
-      src_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size;
+      src_stride = pf_get_stride(format, w);
 
    if (pipe_clip_tile(x, y, &w, &h, pt))
       return;
@@ -90,7 +91,7 @@ pipe_put_tile_raw(struct pipe_transfer *pt,
    if(!dst)
       return;
 
-   util_copy_rect(dst, &pt->block, pt->stride, x, y, w, h, src, src_stride, 0, 0);
+   util_copy_rect(dst, format, pt->stride, x, y, w, h, src, src_stride, 0, 0);
 
    screen->transfer_unmap(screen, pt);
 }
@@ -1219,21 +1220,22 @@ pipe_get_tile_rgba(struct pipe_transfer *pt,
 {
    unsigned dst_stride = w * 4;
    void *packed;
+   enum pipe_format format = pt->texture->format;
 
    if (pipe_clip_tile(x, y, &w, &h, pt))
       return;
 
-   packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
+   packed = MALLOC(pf_get_nblocks(format, w, h) * pf_get_blocksize(format));
 
    if (!packed)
       return;
 
-   if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV)
+   if(format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV)
       assert((x & 1) == 0);
 
    pipe_get_tile_raw(pt, x, y, w, h, packed, 0);
 
-   pipe_tile_raw_to_rgba(pt->format, packed, w, h, p, dst_stride);
+   pipe_tile_raw_to_rgba(format, packed, w, h, p, dst_stride);
 
    FREE(packed);
 }
@@ -1246,16 +1248,17 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
 {
    unsigned src_stride = w * 4;
    void *packed;
+   enum pipe_format format = pt->texture->format;
 
    if (pipe_clip_tile(x, y, &w, &h, pt))
       return;
 
-   packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size);
+   packed = MALLOC(pf_get_nblocks(format, w, h) * pf_get_blocksize(format));
 
    if (!packed)
       return;
 
-   switch (pt->format) {
+   switch (format) {
    case PIPE_FORMAT_A8R8G8B8_UNORM:
       a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);
       break;
@@ -1322,7 +1325,7 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
       /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/
       break;
    default:
-      debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format));
+      debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(format));
    }
 
    pipe_put_tile_raw(pt, x, y, w, h, packed, 0);
@@ -1344,6 +1347,7 @@ pipe_get_tile_z(struct pipe_transfer *pt,
    ubyte *map;
    uint *pDest = z;
    uint i, j;
+   enum pipe_format format = pt->texture->format;
 
    if (pipe_clip_tile(x, y, &w, &h, pt))
       return;
@@ -1354,7 +1358,7 @@ pipe_get_tile_z(struct pipe_transfer *pt,
       return;
    }
 
-   switch (pt->format) {
+   switch (format) {
    case PIPE_FORMAT_Z32_UNORM:
       {
          const uint *ptrc
@@ -1428,6 +1432,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,
    const uint *ptrc = zSrc;
    ubyte *map;
    uint i, j;
+   enum pipe_format format = pt->texture->format;
 
    if (pipe_clip_tile(x, y, &w, &h, pt))
       return;
@@ -1438,7 +1443,7 @@ pipe_put_tile_z(struct pipe_transfer *pt,
       return;
    }
 
-   switch (pt->format) {
+   switch (format) {
    case PIPE_FORMAT_Z32_UNORM:
       {
          uint *pDest = (uint *) (map + y * pt->stride + x*4);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 1934965995..8b4c0dc3a2 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -840,7 +840,6 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    template.height0 = r->pot_buffers ?
       util_next_power_of_two(r->picture_height) : r->picture_height;
    template.depth0 = 1;
-   pf_get_block(template.format, &template.block);
    template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
 
    r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index ac5f61e46f..bd653216c0 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -63,13 +63,11 @@ softpipe_texture_layout(struct pipe_screen *screen,
    pt->depth0 = depth;
 
    for (level = 0; level <= pt->last_level; level++) {
-      pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);  
-      pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);  
-      spt->stride[level] = pt->nblocksx[level]*pt->block.size;
+      spt->stride[level] = pf_get_stride(pt->format, width);
 
       spt->level_offset[level] = buffer_size;
 
-      buffer_size += (pt->nblocksy[level] *
+      buffer_size += (pf_get_nblocksy(pt->format, u_minify(height, level)) *
                       ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
                       spt->stride[level]);
 
@@ -97,9 +95,6 @@ softpipe_displaytarget_layout(struct pipe_screen *screen,
                      PIPE_BUFFER_USAGE_GPU_READ_WRITE);
    unsigned tex_usage = spt->base.tex_usage;
 
-   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0);  
-   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0);  
-
    spt->buffer = screen->surface_buffer_create( screen, 
                                                 spt->base.width0, 
                                                 spt->base.height0,
@@ -175,8 +170,6 @@ softpipe_texture_blanket(struct pipe_screen * screen,
    spt->base = *base;
    pipe_reference_init(&spt->base.reference, 1);
    spt->base.screen = screen;
-   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0);  
-   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0);  
    spt->stride[0] = stride[0];
 
    pipe_buffer_reference(&spt->buffer, buffer);
@@ -244,10 +237,12 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
       ps->zslice = zslice;
 
       if (pt->target == PIPE_TEXTURE_CUBE) {
-         ps->offset += face * pt->nblocksy[level] * spt->stride[level];
+         ps->offset += face * pf_get_nblocksy(pt->format, u_minify(pt->height0, level)) *
+                       spt->stride[level];
       }
       else if (pt->target == PIPE_TEXTURE_3D) {
-         ps->offset += zslice * pt->nblocksy[level] * spt->stride[level];
+         ps->offset += zslice * pf_get_nblocksy(pt->format, u_minify(pt->height0, level)) *
+                       spt->stride[level];
       }
       else {
          assert(face == 0);
@@ -302,15 +297,12 @@ softpipe_get_tex_transfer(struct pipe_screen *screen,
    spt = CALLOC_STRUCT(softpipe_transfer);
    if (spt) {
       struct pipe_transfer *pt = &spt->base;
+      int nblocksy = pf_get_nblocksy(texture->format, u_minify(texture->height0, level));
       pipe_texture_reference(&pt->texture, texture);
-      pt->format = texture->format;
-      pt->block = texture->block;
       pt->x = x;
       pt->y = y;
       pt->width = w;
       pt->height = h;
-      pt->nblocksx = texture->nblocksx[level];
-      pt->nblocksy = texture->nblocksy[level];
       pt->stride = sptex->stride[level];
       pt->usage = usage;
       pt->face = face;
@@ -320,10 +312,10 @@ softpipe_get_tex_transfer(struct pipe_screen *screen,
       spt->offset = sptex->level_offset[level];
 
       if (texture->target == PIPE_TEXTURE_CUBE) {
-         spt->offset += face * pt->nblocksy * pt->stride;
+         spt->offset += face * nblocksy * pt->stride;
       }
       else if (texture->target == PIPE_TEXTURE_3D) {
-         spt->offset += zslice * pt->nblocksy * pt->stride;
+         spt->offset += zslice * nblocksy * pt->stride;
       }
       else {
          assert(face == 0);
@@ -361,9 +353,11 @@ softpipe_transfer_map( struct pipe_screen *screen,
 {
    ubyte *map, *xfer_map;
    struct softpipe_texture *spt;
+   enum pipe_format format;
 
    assert(transfer->texture);
    spt = softpipe_texture(transfer->texture);
+   format = transfer->texture->format;
 
    map = pipe_buffer_map(screen, spt->buffer, pipe_transfer_buffer_flags(transfer));
    if (map == NULL)
@@ -380,8 +374,8 @@ softpipe_transfer_map( struct pipe_screen *screen,
    }
 
    xfer_map = map + softpipe_transfer(transfer)->offset +
-      transfer->y / transfer->block.height * transfer->stride +
-      transfer->x / transfer->block.width * transfer->block.size;
+      transfer->y / pf_get_blockheight(format) * transfer->stride +
+      transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format);
    /*printf("map = %p  xfer map = %p\n", map, xfer_map);*/
    return xfer_map;
 }
@@ -438,7 +432,6 @@ softpipe_video_surface_create(struct pipe_screen *screen,
    template.width0 = util_next_power_of_two(width);
    template.height0 = util_next_power_of_two(height);
    template.depth0 = 1;
-   pf_get_block(template.format, &template.block);
    template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
 
    sp_vsfc->tex = screen->texture_create(screen, &template);
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 65872cecc4..04f61d16c4 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -238,7 +238,7 @@ clear_tile(struct softpipe_cached_tile *tile,
 {
    uint i, j;
 
-   switch (pf_get_size(format)) {
+   switch (pf_get_blocksize(format)) {
    case 1:
       memset(tile->data.any, clear_value, TILE_SIZE * TILE_SIZE);
       break;
@@ -284,8 +284,9 @@ sp_tile_cache_flush_clear(struct softpipe_tile_cache *tc)
    uint x, y;
    uint numCleared = 0;
 
+   assert(pt->texture);
    /* clear the scratch tile to the clear value */
-   clear_tile(&tc->tile, pt->format, tc->clear_val);
+   clear_tile(&tc->tile, pt->texture->format, tc->clear_val);
 
    /* push the tile to all positions marked as clear */
    for (y = 0; y < h; y += TILE_SIZE) {
@@ -372,6 +373,7 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc,
 
    if (addr.value != tile->addr.value) {
 
+      assert(pt->texture);
       if (tile->addr.bits.invalid == 0) {
          /* put dirty tile back in framebuffer */
          if (tc->depth_stencil) {
@@ -395,10 +397,10 @@ sp_find_cached_tile(struct softpipe_tile_cache *tc,
       if (is_clear_flag_set(tc->clear_flags, addr)) {
          /* don't get tile from framebuffer, just clear it */
          if (tc->depth_stencil) {
-            clear_tile(tile, pt->format, tc->clear_val);
+            clear_tile(tile, pt->texture->format, tc->clear_val);
          }
          else {
-            clear_tile_rgba(tile, pt->format, tc->clear_color);
+            clear_tile_rgba(tile, pt->texture->format, tc->clear_color);
          }
          clear_clear_flag(tc->clear_flags, addr);
       }
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index af23080920..e6bba777d3 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -422,10 +422,11 @@ static INLINE uint pf_get_component_bits( enum pipe_format format, uint comp )
    return size << (pf_mixed_scale8( format ) * 3);
 }
 
+
 /**
- * Return total bits needed for the pixel format.
+ * Return total bits needed for the pixel format per block.
  */
-static INLINE uint pf_get_bits( enum pipe_format format )
+static INLINE uint pf_get_blocksizebits( enum pipe_format format )
 {
    switch (pf_layout(format)) {
    case PIPE_FORMAT_LAYOUT_RGBAZS:
@@ -441,8 +442,24 @@ static INLINE uint pf_get_bits( enum pipe_format format )
          pf_get_component_bits( format, PIPE_FORMAT_COMP_S );
    case PIPE_FORMAT_LAYOUT_YCBCR:
       assert( format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV );
-      /* return effective bits per pixel */
-      return 16; 
+      return 32;
+   case PIPE_FORMAT_LAYOUT_DXT:
+        switch(format) {
+        case PIPE_FORMAT_DXT1_RGBA:
+        case PIPE_FORMAT_DXT1_RGB:
+        case PIPE_FORMAT_DXT1_SRGBA:
+        case PIPE_FORMAT_DXT1_SRGB:
+           return 64;
+        case PIPE_FORMAT_DXT3_RGBA:
+        case PIPE_FORMAT_DXT5_RGBA:
+        case PIPE_FORMAT_DXT3_SRGBA:
+        case PIPE_FORMAT_DXT5_SRGBA:
+           return 128;
+        default:
+           assert( 0 );
+           return 0;
+        }
+
    default:
       assert( 0 );
       return 0;
@@ -450,102 +467,66 @@ static INLINE uint pf_get_bits( enum pipe_format format )
 }
 
 /**
- * Return bytes per pixel for the given format.
+ * Return bytes per element for the given format.
  */
-static INLINE uint pf_get_size( enum pipe_format format )
+static INLINE uint pf_get_blocksize( enum pipe_format format )
 {
-   assert(pf_get_bits(format) % 8 == 0);
-   return pf_get_bits(format) / 8;
+   assert(pf_get_blocksizebits(format) % 8 == 0);
+   return pf_get_blocksizebits(format) / 8;
 }
 
-/**
- * Describe accurately the pixel format.
- * 
- * The chars-per-pixel concept falls apart with compressed and yuv images, where
- * more than one pixel are coded in a single data block. This structure 
- * describes that block.
- * 
- * Simple pixel formats are effectively a 1x1xcpp block.
- */
-struct pipe_format_block
+static INLINE uint pf_get_blockwidth( enum pipe_format format )
 {
-   /** Block size in bytes */
-   unsigned size;
-   
-   /** Block width in pixels */
-   unsigned width;
-   
-   /** Block height in pixels */
-   unsigned height;
-};
+   switch (pf_layout(format)) {
+   case PIPE_FORMAT_LAYOUT_YCBCR:
+      return 2;
+   case PIPE_FORMAT_LAYOUT_DXT:
+      return 4;
+   default:
+      return 1;
+   }
+}
 
-/**
- * Describe pixel format's block.   
- * 
- * @sa http://msdn2.microsoft.com/en-us/library/ms796147.aspx
- */
-static INLINE void 
-pf_get_block(enum pipe_format format, struct pipe_format_block *block)
+static INLINE uint pf_get_blockheight( enum pipe_format format )
 {
-   switch(format) {
-   case PIPE_FORMAT_DXT1_RGBA:
-   case PIPE_FORMAT_DXT1_RGB:
-   case PIPE_FORMAT_DXT1_SRGBA:
-   case PIPE_FORMAT_DXT1_SRGB:
-      block->size = 8;
-      block->width = 4;
-      block->height = 4;
-      break;
-   case PIPE_FORMAT_DXT3_RGBA:
-   case PIPE_FORMAT_DXT5_RGBA:
-   case PIPE_FORMAT_DXT3_SRGBA:
-   case PIPE_FORMAT_DXT5_SRGBA:
-      block->size = 16;
-      block->width = 4;
-      block->height = 4;
-      break;
-   case PIPE_FORMAT_YCBCR:
-   case PIPE_FORMAT_YCBCR_REV:
-      block->size = 4; /* 2*cpp */
-      block->width = 2;
-      block->height = 1;
-      break;
+   switch (pf_layout(format)) {
+   case PIPE_FORMAT_LAYOUT_DXT:
+      return 4;
    default:
-      block->size = pf_get_size(format);
-      block->width = 1;
-      block->height = 1;
-      break;
+      return 1;
    }
 }
 
 static INLINE unsigned
-pf_get_nblocksx(const struct pipe_format_block *block, unsigned x)
+pf_get_nblocksx(enum pipe_format format, unsigned x)
 {
-   return (x + block->width - 1)/block->width;
+   unsigned blockwidth = pf_get_blockwidth(format);
+   return (x + blockwidth - 1) / blockwidth;
 }
 
 static INLINE unsigned
-pf_get_nblocksy(const struct pipe_format_block *block, unsigned y)
+pf_get_nblocksy(enum pipe_format format, unsigned y)
 {
-   return (y + block->height - 1)/block->height;
+   unsigned blockheight = pf_get_blockheight(format);
+   return (y + blockheight - 1) / blockheight;
 }
 
 static INLINE unsigned
-pf_get_nblocks(const struct pipe_format_block *block, unsigned width, unsigned height)
+pf_get_nblocks(enum pipe_format format, unsigned width, unsigned height)
 {
-   return pf_get_nblocksx(block, width)*pf_get_nblocksy(block, height);
+   return pf_get_nblocksx(format, width) * pf_get_nblocksy(format, height);
 }
 
 static INLINE size_t
-pf_get_stride(const struct pipe_format_block *block, unsigned width)
+pf_get_stride(enum pipe_format format, unsigned width)
 {
-   return pf_get_nblocksx(block, width)*block->size;
+   return pf_get_nblocksx(format, width) * pf_get_blocksize(format);
 }
 
 static INLINE size_t
-pf_get_2d_size(const struct pipe_format_block *block, size_t stride, unsigned height)
+pf_get_2d_size(enum pipe_format format, size_t stride, unsigned height)
 {
-   return pf_get_nblocksy(block, height)*stride;
+   return pf_get_nblocksy(format, height) * stride;
 }
 
 static INLINE boolean 
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 9766e86620..db83c8e157 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -315,14 +315,10 @@ struct pipe_surface
  */
 struct pipe_transfer
 {
-   enum pipe_format format;      /**< PIPE_FORMAT_x */
    unsigned x;                   /**< x offset from start of texture image */
    unsigned y;                   /**< y offset from start of texture image */
    unsigned width;               /**< logical width in pixels */
    unsigned height;              /**< logical height in pixels */
-   struct pipe_format_block block;
-   unsigned nblocksx;            /**< allocated width in blocks */
-   unsigned nblocksy;            /**< allocated height in blocks */
    unsigned stride;              /**< stride in bytes between rows of blocks */
    enum pipe_transfer_usage usage; /**< PIPE_TRANSFER_*  */
 
@@ -347,10 +343,6 @@ struct pipe_texture
    unsigned height0;
    unsigned depth0;
 
-   struct pipe_format_block block;
-   unsigned nblocksx[PIPE_MAX_TEXTURE_LEVELS]; /**< allocated width in blocks */
-   unsigned nblocksy[PIPE_MAX_TEXTURE_LEVELS]; /**< allocated height in blocks */
-
    unsigned last_level:8;    /**< Index of last mipmap level present/defined */
 
    unsigned nr_samples:8;    /**< for multisampled surfaces, nr of samples */
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index a68a29e126..a15043da78 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -701,7 +701,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
             }
 
             /* now pack the stencil (and Z) values in the dest format */
-            switch (pt->format) {
+            switch (pt->texture->format) {
             case PIPE_FORMAT_S8_UNORM:
                {
                   ubyte *dest = stmap + spanY * pt->stride + spanX;
@@ -856,8 +856,8 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
 					   usage, dstx, dsty,
 					   width, height);
 
-   assert(ptDraw->block.width == 1);
-   assert(ptDraw->block.height == 1);
+   assert(pf_get_blockwidth(ptDraw->texture->format) == 1);
+   assert(pf_get_blockheight(ptDraw->texture->format) == 1);
 
    /* map the stencil buffer */
    drawMap = screen->transfer_map(screen, ptDraw);
@@ -878,7 +878,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
       dst = drawMap + y * ptDraw->stride;
       src = buffer + i * width;
 
-      switch (ptDraw->format) {
+      switch (ptDraw->texture->format) {
       case PIPE_FORMAT_S8Z24_UNORM:
          {
             uint *dst4 = (uint *) dst;
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 659a6c9193..ead8e22888 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -98,16 +98,14 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
    strb->defined = GL_FALSE;  /* undefined contents now */
 
    if(strb->software) {
-      struct pipe_format_block block;
       size_t size;
       
       _mesa_free(strb->data);
 
       assert(strb->format != PIPE_FORMAT_NONE);
-      pf_get_block(strb->format, &block);
       
-      strb->stride = pf_get_stride(&block, width);
-      size = pf_get_2d_size(&block, strb->stride, height);
+      strb->stride = pf_get_stride(strb->format, width);
+      size = pf_get_2d_size(strb->format, strb->stride, height);
       
       strb->data = _mesa_malloc(size);
       
@@ -127,7 +125,6 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
       memset(&template, 0, sizeof(template));
       template.target = PIPE_TEXTURE_2D;
       template.format = format;
-      pf_get_block(format, &template.block);
       template.width0 = width;
       template.height0 = height;
       template.depth0 = 1;
diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c
index 103861d6f9..6fa7bb64f2 100644
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -103,7 +103,7 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
       }
 
       /* get stencil (and Z) values */
-      switch (pt->format) {
+      switch (pt->texture->format) {
       case PIPE_FORMAT_S8_UNORM:
          {
             const ubyte *src = stmap + srcY * pt->stride;
@@ -431,8 +431,8 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
       const GLint dstStride = _mesa_image_row_stride(&clippedPacking, width,
                                                      format, type);
 
-      if (trans->format == PIPE_FORMAT_S8Z24_UNORM ||
-          trans->format == PIPE_FORMAT_X8Z24_UNORM) {
+      if (trans->texture->format == PIPE_FORMAT_S8Z24_UNORM ||
+          trans->texture->format == PIPE_FORMAT_X8Z24_UNORM) {
          if (format == GL_DEPTH_COMPONENT) {
             for (i = 0; i < height; i++) {
                GLuint ztemp[MAX_WIDTH];
@@ -463,8 +463,8 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
             }
          }
       }
-      else if (trans->format == PIPE_FORMAT_Z24S8_UNORM ||
-               trans->format == PIPE_FORMAT_Z24X8_UNORM) {
+      else if (trans->texture->format == PIPE_FORMAT_Z24S8_UNORM ||
+               trans->texture->format == PIPE_FORMAT_Z24X8_UNORM) {
          if (format == GL_DEPTH_COMPONENT) {
             for (i = 0; i < height; i++) {
                GLuint ztemp[MAX_WIDTH];
@@ -490,7 +490,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
             }
          }
       }
-      else if (trans->format == PIPE_FORMAT_Z16_UNORM) {
+      else if (trans->texture->format == PIPE_FORMAT_Z16_UNORM) {
          for (i = 0; i < height; i++) {
             GLushort ztemp[MAX_WIDTH];
             GLfloat zfloat[MAX_WIDTH];
@@ -505,7 +505,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
             dst += dstStride;
          }
       }
-      else if (trans->format == PIPE_FORMAT_Z32_UNORM) {
+      else if (trans->texture->format == PIPE_FORMAT_Z32_UNORM) {
          for (i = 0; i < height; i++) {
             GLuint ztemp[MAX_WIDTH];
             GLfloat zfloat[MAX_WIDTH];
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 3a2337802f..6d136f5abf 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -405,7 +405,6 @@ compress_with_blit(GLcontext * ctx,
    memset(&templ, 0, sizeof(templ));
    templ.target = PIPE_TEXTURE_2D;
    templ.format = st_mesa_format_to_pipe_format(mesa_format);
-   pf_get_block(templ.format, &templ.block);
    templ.width0 = width;
    templ.height0 = height;
    templ.depth0 = 1;
@@ -833,7 +832,7 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level,
    /* copy/pack data into user buffer */
    if (st_equal_formats(stImage->pt->format, format, type)) {
       /* memcpy */
-      const uint bytesPerRow = width * pf_get_size(stImage->pt->format);
+      const uint bytesPerRow = width * pf_get_blocksize(stImage->pt->format);
       ubyte *map = screen->transfer_map(screen, tex_xfer);
       GLuint row;
       for (row = 0; row < height; row++) {
@@ -915,7 +914,7 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
                                             PIPE_TRANSFER_READ, 0, 0,
                                             stImage->base.Width,
                                             stImage->base.Height);
-      texImage->RowStride = stImage->transfer->stride / stImage->pt->block.size;
+      texImage->RowStride = stImage->transfer->stride / pf_get_blocksize(stImage->pt->format);
    }
    else {
       /* Otherwise, the image should actually be stored in
@@ -1163,10 +1162,10 @@ st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
                            struct gl_texture_image *texImage)
 {
    struct st_texture_image *stImage = st_texture_image(texImage);
-   struct pipe_format_block block;
    int srcBlockStride;
    int dstBlockStride;
    int y;
+   enum pipe_format pformat= stImage->pt->format;
 
    if (stImage->pt) {
       unsigned face = _mesa_tex_target_to_face(target);
@@ -1178,8 +1177,7 @@ st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
                                             xoffset, yoffset,
                                             width, height);
       
-      block = stImage->pt->block;
-      srcBlockStride = pf_get_stride(&block, width);
+      srcBlockStride = pf_get_stride(pformat, width);
       dstBlockStride = stImage->transfer->stride;
    } else {
       assert(stImage->pt);
@@ -1193,16 +1191,16 @@ st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
       return;
    }
 
-   assert(xoffset % block.width == 0);
-   assert(yoffset % block.height == 0);
-   assert(width % block.width == 0);
-   assert(height % block.height == 0);
+   assert(xoffset % pf_get_blockwidth(pformat) == 0);
+   assert(yoffset % pf_get_blockheight(pformat) == 0);
+   assert(width % pf_get_blockwidth(pformat) == 0);
+   assert(height % pf_get_blockheight(pformat) == 0);
 
-   for (y = 0; y < height; y += block.height) {
+   for (y = 0; y < height; y += pf_get_blockheight(pformat)) {
       /* don't need to adjust for xoffset and yoffset as st_texture_image_map does that */
-      const char *src = (const char*)data + srcBlockStride * pf_get_nblocksy(&block, y);
-      char *dst = (char*)texImage->Data + dstBlockStride * pf_get_nblocksy(&block, y);
-      memcpy(dst, src, pf_get_stride(&block, width));
+      const char *src = (const char*)data + srcBlockStride * pf_get_nblocksy(pformat, y);
+      char *dst = (char*)texImage->Data + dstBlockStride * pf_get_nblocksy(pformat, y);
+      memcpy(dst, src, pf_get_stride(pformat, width));
    }
 
    if (stImage->pt) {
@@ -1692,10 +1690,10 @@ copy_image_data_to_texture(struct st_context *st,
                             dstLevel,
                             stImage->base.Data,
                             stImage->base.RowStride * 
-                            stObj->pt->block.size,
+                            pf_get_blocksize(stObj->pt->format),
                             stImage->base.RowStride *
                             stImage->base.Height *
-                            stObj->pt->block.size);
+                            pf_get_blocksize(stObj->pt->format));
       _mesa_align_free(stImage->base.Data);
       stImage->base.Data = NULL;
    }
@@ -1763,8 +1761,7 @@ st_finalize_texture(GLcontext *ctx,
           stObj->pt->last_level < stObj->lastLevel ||
           stObj->pt->width0 != firstImage->base.Width2 ||
           stObj->pt->height0 != firstImage->base.Height2 ||
-          stObj->pt->depth0 != firstImage->base.Depth2 ||
-          stObj->pt->block.size != blockSize)
+          stObj->pt->depth0 != firstImage->base.Depth2)
       {
          pipe_texture_reference(&stObj->pt, NULL);
          ctx->st->dirty.st |= ST_NEW_FRAMEBUFFER;
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
index f8068fa12b..7700551830 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -146,8 +146,8 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target,
       srcData = (ubyte *) screen->transfer_map(screen, srcTrans);
       dstData = (ubyte *) screen->transfer_map(screen, dstTrans);
 
-      srcStride = srcTrans->stride / srcTrans->block.size;
-      dstStride = dstTrans->stride / dstTrans->block.size;
+      srcStride = srcTrans->stride / pf_get_blocksize(srcTrans->texture->format);
+      dstStride = dstTrans->stride / pf_get_blocksize(dstTrans->texture->format);
 
       _mesa_generate_mipmap_level(target, datatype, comps,
                                   0 /*border*/,
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index dbccee86c1..3035d78b61 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -104,7 +104,6 @@ st_texture_create(struct st_context *st,
    pt.width0 = width0;
    pt.height0 = height0;
    pt.depth0 = depth0;
-   pf_get_block(format, &pt.block);
    pt.tex_usage = usage;
 
    newtex = screen->texture_create(screen, &pt);
@@ -242,8 +241,9 @@ st_surface_data(struct pipe_context *pipe,
    struct pipe_screen *screen = pipe->screen;
    void *map = screen->transfer_map(screen, dst);
 
+   assert(dst->texture);
    util_copy_rect(map,
-                  &dst->block,
+                  dst->texture->format,
                   dst->stride,
                   dstx, dsty, 
                   width, height, 
-- 
cgit v1.2.3


From decf6ed810eae473d043a4a399a5a84f1378a725 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 30 Nov 2009 23:02:49 +0100
Subject: fixups for interface changes (mostly state trackers)

---
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c   |  4 +--
 src/gallium/drivers/trace/tr_dump_state.c          | 24 ---------------
 src/gallium/drivers/trace/tr_dump_state.h          |  3 --
 src/gallium/drivers/trace/tr_rbug.c                | 13 +++++---
 src/gallium/drivers/trace/tr_screen.c              |  3 +-
 src/gallium/state_trackers/dri/dri_drawable.c      |  1 -
 src/gallium/state_trackers/egl/egl_surface.c       |  1 -
 src/gallium/state_trackers/python/gallium.i        |  1 -
 src/gallium/state_trackers/python/p_device.i       |  1 -
 src/gallium/state_trackers/python/p_format.i       |  8 -----
 src/gallium/state_trackers/python/p_texture.i      | 32 ++++----------------
 .../state_trackers/python/retrace/interpreter.py   |  3 +-
 src/gallium/state_trackers/python/st_device.c      |  3 --
 src/gallium/state_trackers/python/st_sample.c      | 35 ++++++++++++----------
 src/gallium/state_trackers/python/st_sample.h      |  1 -
 .../state_trackers/python/st_softpipe_winsys.c     | 19 ++----------
 .../state_trackers/python/tests/surface_copy.py    |  7 +++--
 .../python/tests/texture_transfer.py               |  5 ++--
 src/gallium/state_trackers/vega/api_filters.c      |  1 -
 src/gallium/state_trackers/vega/image.c            |  1 -
 src/gallium/state_trackers/vega/mask.c             |  1 -
 src/gallium/state_trackers/vega/paint.c            |  1 -
 src/gallium/state_trackers/vega/renderer.c         |  1 -
 src/gallium/state_trackers/vega/vg_tracker.c       |  1 -
 src/gallium/state_trackers/xorg/xorg_crtc.c        |  3 +-
 src/gallium/state_trackers/xorg/xorg_dri2.c        |  1 -
 src/gallium/state_trackers/xorg/xorg_exa.c         |  6 ++--
 src/gallium/state_trackers/xorg/xorg_renderer.c    |  1 -
 src/gallium/state_trackers/xorg/xorg_xv.c          |  1 -
 .../winsys/drm/nouveau/drm/nouveau_drm_api.c       |  3 +-
 src/gallium/winsys/drm/radeon/core/radeon_buffer.c | 17 ++++-------
 src/gallium/winsys/egl_xlib/sw_winsys.c            | 19 ++----------
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c         |  9 ++----
 src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c       | 17 ++---------
 src/gallium/winsys/gdi/gdi_softpipe_winsys.c       | 23 ++++----------
 src/gallium/winsys/xlib/xlib_cell.c                | 20 +++----------
 src/gallium/winsys/xlib/xlib_llvmpipe.c            | 15 ++++------
 src/gallium/winsys/xlib/xlib_softpipe.c            | 15 ++++------
 38 files changed, 87 insertions(+), 233 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 8b4c0dc3a2..bffc018848 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -1449,7 +1449,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
    assert(r);
    assert(blocks);
 
-   tex_pitch = r->tex_transfer[0]->stride / r->tex_transfer[0]->block.size;
+   tex_pitch = r->tex_transfer[0]->stride / pf_get_blocksize(r->tex_transfer[0]->texture->format);
    texels = r->texels[0] + mbpy * tex_pitch + mbpx;
 
    for (y = 0; y < 2; ++y) {
@@ -1488,7 +1488,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
    mbpy /= 2;
 
    for (tb = 0; tb < 2; ++tb) {
-      tex_pitch = r->tex_transfer[tb + 1]->stride / r->tex_transfer[tb + 1]->block.size;
+      tex_pitch = r->tex_transfer[tb + 1]->stride / pf_get_blocksize(r->tex_transfer[tb + 1]->texture->format);
       texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
 
       if ((cbp >> (1 - tb)) & 1) {
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 6d58209294..0102cc1876 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -43,19 +43,6 @@ void trace_dump_format(enum pipe_format format)
 }
 
 
-void trace_dump_block(const struct pipe_format_block *block)
-{
-   if (!trace_dumping_enabled_locked())
-      return;
-
-   trace_dump_struct_begin("pipe_format_block");
-   trace_dump_member(uint, block, size);
-   trace_dump_member(uint, block, width);
-   trace_dump_member(uint, block, height);
-   trace_dump_struct_end();
-}
-
-
 static void trace_dump_reference(const struct pipe_reference *reference)
 {
    if (!trace_dumping_enabled_locked())
@@ -94,10 +81,6 @@ void trace_dump_template(const struct pipe_texture *templat)
    trace_dump_uint(templat->depth0);
    trace_dump_member_end();
 
-   trace_dump_member_begin("block");
-   trace_dump_block(&templat->block);
-   trace_dump_member_end();
-
    trace_dump_member(uint, templat, last_level);
    trace_dump_member(uint, templat, tex_usage);
 
@@ -483,16 +466,9 @@ void trace_dump_transfer(const struct pipe_transfer *state)
 
    trace_dump_struct_begin("pipe_transfer");
 
-   trace_dump_member(format, state, format);
    trace_dump_member(uint, state, width);
    trace_dump_member(uint, state, height);
 
-   trace_dump_member_begin("block");
-   trace_dump_block(&state->block);
-   trace_dump_member_end();
-
-   trace_dump_member(uint, state, nblocksx);
-   trace_dump_member(uint, state, nblocksy);
    trace_dump_member(uint, state, stride);
    trace_dump_member(uint, state, usage);
 
diff --git a/src/gallium/drivers/trace/tr_dump_state.h b/src/gallium/drivers/trace/tr_dump_state.h
index 05b821adb6..07ad6fbb20 100644
--- a/src/gallium/drivers/trace/tr_dump_state.h
+++ b/src/gallium/drivers/trace/tr_dump_state.h
@@ -35,11 +35,8 @@
 
 void trace_dump_format(enum pipe_format format);
 
-void trace_dump_block(const struct pipe_format_block *block);
-
 void trace_dump_template(const struct pipe_texture *templat);
 
-
 void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state);
 
 void trace_dump_poly_stipple(const struct pipe_poly_stipple *state);
diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c
index b59458c0e3..af1d7f3224 100644
--- a/src/gallium/drivers/trace/tr_rbug.c
+++ b/src/gallium/drivers/trace/tr_rbug.c
@@ -203,7 +203,9 @@ trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header,
                                &t->width0, 1,
                                &t->height0, 1,
                                &t->depth0, 1,
-                               t->block.width, t->block.height, t->block.size,
+                               pf_get_blockwidth(t->format),
+                               pf_get_blockheight(t->format),
+                               pf_get_blocksize(t->format),
                                t->last_level,
                                t->nr_samples,
                                t->tex_usage,
@@ -251,9 +253,12 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header,
    map = screen->transfer_map(screen, t);
 
    rbug_send_texture_read_reply(tr_rbug->con, serial,
-                                t->format,
-                                t->block.width, t->block.height, t->block.size,
-                                (uint8_t*)map, t->stride * t->nblocksy,
+                                t->texture->format,
+                                pf_get_blockwidth(t->texture->format),
+                                pf_get_blockheight(t->texture->format),
+                                pf_get_blocksize(t->texture->format),
+                                (uint8_t*)map,
+                                t->stride * pf_get_nblocksy(t->texture->format, t->height),
                                 t->stride,
                                 NULL);
 
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index 7da9bd3866..f69f7da000 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -35,6 +35,7 @@
 #include "tr_screen.h"
 
 #include "pipe/p_inlines.h"
+#include "pipe/p_format.h"
 
 
 static boolean trace = FALSE;
@@ -424,7 +425,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen,
    struct pipe_transfer *transfer = tr_trans->transfer;
 
    if(tr_trans->map) {
-      size_t size = transfer->nblocksy * transfer->stride;
+      size_t size = pf_get_nblocksy(transfer->texture->format, transfer->width) * transfer->stride;
 
       trace_dump_call_begin("pipe_screen", "transfer_write");
 
diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c
index 45a6059ea8..099cf1e064 100644
--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -66,7 +66,6 @@ dri_surface_from_handle(struct drm_api *api,
    templat.format = format;
    templat.width0 = width;
    templat.height0 = height;
-   pf_get_block(templat.format, &templat.block);
 
    texture = api->texture_from_shared_handle(api, screen, &templat,
                                              "dri2 buffer", pitch, handle);
diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c
index ddd9b04cd4..737bdfdd34 100644
--- a/src/gallium/state_trackers/egl/egl_surface.c
+++ b/src/gallium/state_trackers/egl/egl_surface.c
@@ -118,7 +118,6 @@ drm_create_texture(_EGLDisplay *dpy,
 	templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
 	templat.width0 = w;
 	templat.height0 = h;
-	pf_get_block(templat.format, &templat.block);
 
 	texture = screen->texture_create(dev->screen,
 	                                 &templat);
diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i
index 3f79cc1a3d..8e323f4896 100644
--- a/src/gallium/state_trackers/python/gallium.i
+++ b/src/gallium/state_trackers/python/gallium.i
@@ -80,7 +80,6 @@
 %rename(Stencil) pipe_stencil_state;
 %rename(Alpha) pipe_alpha_state;
 %rename(DepthStencilAlpha) pipe_depth_stencil_alpha_state;
-%rename(FormatBlock) pipe_format_block;
 %rename(Framebuffer) pipe_framebuffer_state;
 %rename(PolyStipple) pipe_poly_stipple;
 %rename(Rasterizer) pipe_rasterizer_state;
diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i
index a83bcc71a1..2dc995adb0 100644
--- a/src/gallium/state_trackers/python/p_device.i
+++ b/src/gallium/state_trackers/python/p_device.i
@@ -112,7 +112,6 @@ struct st_device {
       struct pipe_texture templat;
       memset(&templat, 0, sizeof(templat));
       templat.format = format;
-      pf_get_block(templat.format, &templat.block);
       templat.width0 = width;
       templat.height0 = height;
       templat.depth0 = depth;
diff --git a/src/gallium/state_trackers/python/p_format.i b/src/gallium/state_trackers/python/p_format.i
index 26fb12b387..68df009331 100644
--- a/src/gallium/state_trackers/python/p_format.i
+++ b/src/gallium/state_trackers/python/p_format.i
@@ -152,11 +152,3 @@ enum pipe_format {
    PIPE_FORMAT_DXT5_SRGBA,
 };
 
-
-struct pipe_format_block
-{
-   unsigned size;
-   unsigned width;
-   unsigned height;
-};
-
diff --git a/src/gallium/state_trackers/python/p_texture.i b/src/gallium/state_trackers/python/p_texture.i
index 5416b872f5..1de7f86a3c 100644
--- a/src/gallium/state_trackers/python/p_texture.i
+++ b/src/gallium/state_trackers/python/p_texture.i
@@ -69,15 +69,7 @@
    unsigned get_depth(unsigned level=0) {
       return u_minify($self->depth0, level);
    }
-   
-   unsigned get_nblocksx(unsigned level=0) {
-      return $self->nblocksx[level];
-   }
-   
-   unsigned get_nblocksy(unsigned level=0) {
-      return $self->nblocksy[level];
-   }
-   
+  
    /** Get a surface which is a "view" into a texture */
    struct st_surface *
    get_surface(unsigned face=0, unsigned level=0, unsigned zslice=0)
@@ -126,8 +118,6 @@ struct st_surface
    unsigned format;
    unsigned width;
    unsigned height;
-   unsigned nblocksx;
-   unsigned nblocksy;
    
    ~st_surface() {
       pipe_texture_reference(&$self->texture, NULL);
@@ -142,8 +132,8 @@ struct st_surface
       struct pipe_transfer *transfer;
       unsigned stride;
 
-      stride = pf_get_nblocksx(&texture->block, w) * texture->block.size;
-      *LENGTH = pf_get_nblocksy(&texture->block, h) * stride;
+      stride = pf_get_stride(texture->format, w);
+      *LENGTH = pf_get_nblocksy(texture->format, h) * stride;
       *STRING = (char *) malloc(*LENGTH);
       if(!*STRING)
          return;
@@ -169,9 +159,9 @@ struct st_surface
       struct pipe_transfer *transfer;
      
       if(stride == 0)
-         stride = pf_get_nblocksx(&texture->block, w) * texture->block.size;
+         stride = pf_get_stride(texture->format, w);
       
-      if(LENGTH < pf_get_nblocksy(&texture->block, h) * stride)
+      if(LENGTH < pf_get_nblocksy(texture->format, h) * stride)
          SWIG_exception(SWIG_ValueError, "offset must be smaller than buffer size");
          
       transfer = screen->get_tex_transfer(screen,
@@ -383,18 +373,6 @@ struct st_surface
    {
       return u_minify(surface->texture->height0, surface->level);
    }
-
-   static unsigned
-   st_surface_nblocksx_get(struct st_surface *surface)
-   {
-      return surface->texture->nblocksx[surface->level];
-   }
-   
-   static unsigned
-   st_surface_nblocksy_get(struct st_surface *surface)
-   {
-      return surface->texture->nblocksy[surface->level];
-   }
 %}
 
 /* Avoid naming conflict with p_inlines.h's pipe_buffer_read/write */ 
diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py
index d0bcb690a9..3251046c79 100755
--- a/src/gallium/state_trackers/python/retrace/interpreter.py
+++ b/src/gallium/state_trackers/python/retrace/interpreter.py
@@ -99,7 +99,6 @@ struct_factories = {
     "pipe_stencil_state": gallium.Stencil,
     "pipe_alpha_state": gallium.Alpha,
     "pipe_depth_stencil_alpha_state": gallium.DepthStencilAlpha,
-    "pipe_format_block": gallium.FormatBlock,
     #"pipe_framebuffer_state": gallium.Framebuffer,
     "pipe_poly_stipple": gallium.PolyStipple,
     "pipe_rasterizer_state": gallium.Rasterizer,
@@ -307,7 +306,7 @@ class Screen(Object):
     def surface_write(self, surface, data, stride, size):
         if surface is None:
             return
-        assert surface.nblocksy * stride == size 
+#        assert surface.nblocksy * stride == size 
         surface.put_tile_raw(0, 0, surface.width, surface.height, data, stride)
 
     def get_tex_transfer(self, texture, face, level, zslice, usage, x, y, w, h):
diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c
index a791113aba..2966b24cdc 100644
--- a/src/gallium/state_trackers/python/st_device.c
+++ b/src/gallium/state_trackers/python/st_device.c
@@ -249,9 +249,6 @@ st_context_create(struct st_device *st_dev)
       memset( &templat, 0, sizeof( templat ) );
       templat.target = PIPE_TEXTURE_2D;
       templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
-      templat.block.size = 4;
-      templat.block.width = 1;
-      templat.block.height = 1;
       templat.width0 = 1;
       templat.height0 = 1;
       templat.depth0 = 1;
diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c
index 6fee90afda..97ca2afc54 100644
--- a/src/gallium/state_trackers/python/st_sample.c
+++ b/src/gallium/state_trackers/python/st_sample.c
@@ -423,7 +423,6 @@ dxt5_rgba_data[] = {
 
 static INLINE void 
 st_sample_dxt_pixel_block(enum pipe_format format, 
-                          const struct pipe_format_block *block,
                           uint8_t *raw,
                           float *rgba, unsigned rgba_stride, 
                           unsigned w, unsigned h)
@@ -462,21 +461,21 @@ st_sample_dxt_pixel_block(enum pipe_format format,
          for(ch = 0; ch < 4; ++ch)
             rgba[y*rgba_stride + x*4 + ch] = (float)(data[i].rgba[y*4*4 + x*4 + ch])/255.0f;
    
-   memcpy(raw, data[i].raw, block->size);
+   memcpy(raw, data[i].raw, pf_get_blocksize(format));
 }
 
 
 static INLINE void 
 st_sample_generic_pixel_block(enum pipe_format format, 
-                              const struct pipe_format_block *block,
                               uint8_t *raw,
                               float *rgba, unsigned rgba_stride,
                               unsigned w, unsigned h)
 {
    unsigned i;
    unsigned x, y, ch;
+   int blocksize = pf_get_blocksize(format);
    
-   for(i = 0; i < block->size; ++i)
+   for(i = 0; i < blocksize; ++i)
       raw[i] = (uint8_t)st_random();
    
    
@@ -503,7 +502,6 @@ st_sample_generic_pixel_block(enum pipe_format format,
  */
 void 
 st_sample_pixel_block(enum pipe_format format,
-                      const struct pipe_format_block *block,
                       void *raw,
                       float *rgba, unsigned rgba_stride,
                       unsigned w, unsigned h)
@@ -513,11 +511,11 @@ st_sample_pixel_block(enum pipe_format format,
    case PIPE_FORMAT_DXT1_RGBA:
    case PIPE_FORMAT_DXT3_RGBA:
    case PIPE_FORMAT_DXT5_RGBA:
-      st_sample_dxt_pixel_block(format, block, raw, rgba, rgba_stride, w, h);
+      st_sample_dxt_pixel_block(format, raw, rgba, rgba_stride, w, h);
       break;
 
    default:
-      st_sample_generic_pixel_block(format, block, raw, rgba, rgba_stride, w, h);
+      st_sample_generic_pixel_block(format, raw, rgba, rgba_stride, w, h);
       break;
    }
 }
@@ -548,18 +546,23 @@ st_sample_surface(struct st_surface *surface, float *rgba)
 
    raw = screen->transfer_map(screen, transfer);
    if (raw) {
-      const struct pipe_format_block *block = &texture->block;
+      enum pipe_format format = texture->format;
       uint x, y;
+      int nblocksx = pf_get_nblocksx(format, width);
+      int nblocksy = pf_get_nblocksy(format, height);
+      int blockwidth = pf_get_blockwidth(format);
+      int blockheight = pf_get_blockheight(format);
+      int blocksize = pf_get_blocksize(format);
 
-      for (y = 0; y < transfer->nblocksy; ++y) {
-         for (x = 0; x < transfer->nblocksx; ++x) {
-            st_sample_pixel_block(texture->format,
-                                  block,
-                                  (uint8_t *) raw + y * transfer->stride + x * block->size,
-                                  rgba + y * block->height * rgba_stride + x * block->width * 4,
+
+      for (y = 0; y < nblocksy; ++y) {
+         for (x = 0; x < nblocksx; ++x) {
+            st_sample_pixel_block(format,
+                                  (uint8_t *) raw + y * transfer->stride + x * blocksize,
+                                  rgba + y * blockheight * rgba_stride + x * blockwidth * 4,
                                   rgba_stride,
-                                  MIN2(block->width, width - x*block->width),
-                                  MIN2(block->height, height - y*block->height));
+                                  MIN2(blockwidth, width - x*blockwidth),
+                                  MIN2(blockheight, height - y*blockheight));
          }
       }
 
diff --git a/src/gallium/state_trackers/python/st_sample.h b/src/gallium/state_trackers/python/st_sample.h
index 0a27083549..888114d302 100644
--- a/src/gallium/state_trackers/python/st_sample.h
+++ b/src/gallium/state_trackers/python/st_sample.h
@@ -35,7 +35,6 @@
 
 void 
 st_sample_pixel_block(enum pipe_format format,
-                      const struct pipe_format_block *block,
                       void *raw,
                       float *rgba, unsigned rgba_stride,
                       unsigned w, unsigned h);
diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c
index f0abd12e3d..43c61af1ff 100644
--- a/src/gallium/state_trackers/python/st_softpipe_winsys.c
+++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c
@@ -157,16 +157,6 @@ st_softpipe_user_buffer_create(struct pipe_winsys *winsys,
 }
 
 
-/**
- * Round n up to next multiple.
- */
-static INLINE unsigned
-round_up(unsigned n, unsigned multiple)
-{
-   return (n + multiple - 1) & ~(multiple - 1);
-}
-
-
 static struct pipe_buffer *
 st_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
                                   unsigned width, unsigned height,
@@ -176,13 +166,10 @@ st_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
                                   unsigned *stride)
 {
    const unsigned alignment = 64;
-   struct pipe_format_block block;
-   unsigned nblocksx, nblocksy;
+   unsigned nblocksy;
 
-   pf_get_block(format, &block);
-   nblocksx = pf_get_nblocksx(&block, width);
-   nblocksy = pf_get_nblocksy(&block, height);
-   *stride = round_up(nblocksx * block.size, alignment);
+   nblocksy = pf_get_nblocksy(format, height);
+   *stride = align(pf_get_stride(format, width), alignment);
 
    return winsys->buffer_create(winsys, alignment,
                                 usage,
diff --git a/src/gallium/state_trackers/python/tests/surface_copy.py b/src/gallium/state_trackers/python/tests/surface_copy.py
index 3ceecbbd3a..df5babb78a 100755
--- a/src/gallium/state_trackers/python/tests/surface_copy.py
+++ b/src/gallium/state_trackers/python/tests/surface_copy.py
@@ -98,9 +98,10 @@ class TextureTest(TestCase):
         y = 0
         w = dst_surface.width
         h = dst_surface.height
-    
-        stride = dst_surface.nblocksx * dst_texture.block.size
-        size = dst_surface.nblocksy * stride
+
+        # ???
+        stride = pf_get_stride(texture->format, w)
+        size = pf_get_nblocksy(texture->format) * stride
         src_raw = os.urandom(size)
 
         src_surface.put_tile_raw(0, 0, w, h, src_raw, stride)
diff --git a/src/gallium/state_trackers/python/tests/texture_transfer.py b/src/gallium/state_trackers/python/tests/texture_transfer.py
index e65b425adf..35daca9e49 100755
--- a/src/gallium/state_trackers/python/tests/texture_transfer.py
+++ b/src/gallium/state_trackers/python/tests/texture_transfer.py
@@ -86,8 +86,9 @@ class TextureTest(TestCase):
         
         surface = texture.get_surface(face, level, zslice)
         
-        stride = surface.nblocksx * texture.block.size
-        size = surface.nblocksy * stride
+        # ???
+        stride = pf_get_stride(texture->format, w)
+        size = pf_get_nblocksy(texture->format) * stride
 
         in_raw = os.urandom(size)
 
diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c
index faf396d087..eb135c1ff4 100644
--- a/src/gallium/state_trackers/vega/api_filters.c
+++ b/src/gallium/state_trackers/vega/api_filters.c
@@ -71,7 +71,6 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx,
    templ.width0 = color_data_len;
    templ.height0 = 1;
    templ.depth0 = 1;
-   pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
    tex = screen->texture_create(screen, &templ);
diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c
index 4684a5727d..172311851e 100644
--- a/src/gallium/state_trackers/vega/image.c
+++ b/src/gallium/state_trackers/vega/image.c
@@ -270,7 +270,6 @@ struct vg_image * image_create(VGImageFormat format,
    memset(&pt, 0, sizeof(pt));
    pt.target = PIPE_TEXTURE_2D;
    pt.format = pformat;
-   pf_get_block(pformat, &pt.block);
    pt.last_level = 0;
    pt.width0 = width;
    pt.height0 = height;
diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c
index b84103fdba..868c28239a 100644
--- a/src/gallium/state_trackers/vega/mask.c
+++ b/src/gallium/state_trackers/vega/mask.c
@@ -491,7 +491,6 @@ struct vg_mask_layer * mask_layer_create(VGint width, VGint height)
       memset(&pt, 0, sizeof(pt));
       pt.target = PIPE_TEXTURE_2D;
       pt.format = PIPE_FORMAT_A8R8G8B8_UNORM;
-      pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &pt.block);
       pt.last_level = 0;
       pt.width0 = width;
       pt.height0 = height;
diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c
index e8ca7d9e89..785c982943 100644
--- a/src/gallium/state_trackers/vega/paint.c
+++ b/src/gallium/state_trackers/vega/paint.c
@@ -154,7 +154,6 @@ static INLINE struct pipe_texture *create_gradient_texture(struct vg_paint *p)
    templ.width0 = 1024;
    templ.height0 = 1;
    templ.depth0 = 1;
-   pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
    tex = screen->texture_create(screen, &templ);
diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c
index 9085ed1bfe..c85dae0282 100644
--- a/src/gallium/state_trackers/vega/renderer.c
+++ b/src/gallium/state_trackers/vega/renderer.c
@@ -448,7 +448,6 @@ void renderer_copy_surface(struct renderer *ctx,
    texTemp.width0 = srcW;
    texTemp.height0 = srcH;
    texTemp.depth0 = 1;
-   pf_get_block(src->format, &texTemp.block);
 
    tex = screen->texture_create(screen, &texTemp);
    if (!tex)
diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c
index d28463dd1b..ed18dd6075 100644
--- a/src/gallium/state_trackers/vega/vg_tracker.c
+++ b/src/gallium/state_trackers/vega/vg_tracker.c
@@ -50,7 +50,6 @@ create_texture(struct pipe_context *pipe, enum pipe_format format,
    }
 
    templ.target = PIPE_TEXTURE_2D;
-   pf_get_block(templ.format, &templ.block);
    templ.width0 = width;
    templ.height0 = height;
    templ.depth0 = 1;
diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c
index c4751724c9..0d1844b53c 100644
--- a/src/gallium/state_trackers/xorg/xorg_crtc.c
+++ b/src/gallium/state_trackers/xorg/xorg_crtc.c
@@ -191,7 +191,6 @@ crtc_load_cursor_argb(xf86CrtcPtr crtc, CARD32 * image)
 	templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
 	templat.width0 = 64;
 	templat.height0 = 64;
-	pf_get_block(templat.format, &templat.block);
 
 	crtcp->cursor_tex = ms->screen->texture_create(ms->screen,
 						       &templat);
@@ -207,7 +206,7 @@ crtc_load_cursor_argb(xf86CrtcPtr crtc, CARD32 * image)
 					    PIPE_TRANSFER_WRITE,
 					    0, 0, 64, 64);
     ptr = ms->screen->transfer_map(ms->screen, transfer);
-    util_copy_rect(ptr, &crtcp->cursor_tex->block,
+    util_copy_rect(ptr, crtcp->cursor_tex->format,
 		   transfer->stride, 0, 0,
 		   64, 64, (void*)image, 64 * 4, 0, 0);
     ms->screen->transfer_unmap(ms->screen, transfer);
diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c
index 406e0afff4..d3bb381333 100644
--- a/src/gallium/state_trackers/xorg/xorg_dri2.c
+++ b/src/gallium/state_trackers/xorg/xorg_dri2.c
@@ -108,7 +108,6 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format)
 	    else
 		template.format = ms->ds_depth_bits_last ?
 		    PIPE_FORMAT_S8Z24_UNORM : PIPE_FORMAT_Z24S8_UNORM;
-	    pf_get_block(template.format, &template.block);
 	    template.width0 = pDraw->width;
 	    template.height0 = pDraw->height;
 	    template.depth0 = 1;
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c
index a68a626fa4..c02ed39ca1 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa.c
@@ -206,7 +206,7 @@ ExaDownloadFromScreen(PixmapPtr pPix, int x,  int y, int w,  int h, char *dst,
                  x, y, w, h, dst_pitch);
 #endif
 
-    util_copy_rect((unsigned char*)dst, &priv->tex->block, dst_pitch, 0, 0,
+    util_copy_rect((unsigned char*)dst, priv->tex->format, dst_pitch, 0, 0,
 		   w, h, exa->scrn->transfer_map(exa->scrn, transfer),
 		   transfer->stride, 0, 0);
 
@@ -246,7 +246,7 @@ ExaUploadToScreen(PixmapPtr pPix, int x, int y, int w, int h, char *src,
 #endif
 
     util_copy_rect(exa->scrn->transfer_map(exa->scrn, transfer),
-		   &priv->tex->block, transfer->stride, 0, 0, w, h,
+		   priv->tex->format, transfer->stride, 0, 0, w, h,
 		   (unsigned char*)src, src_pitch, 0, 0);
 
     exa->scrn->transfer_unmap(exa->scrn, transfer);
@@ -761,7 +761,6 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
 	memset(&template, 0, sizeof(template));
 	template.target = PIPE_TEXTURE_2D;
 	exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &priv->picture_format);
-	pf_get_block(template.format, &template.block);
 	template.width0 = width;
 	template.height0 = height;
 	template.depth0 = 1;
@@ -840,7 +839,6 @@ xorg_exa_create_root_texture(ScrnInfoPtr pScrn,
     memset(&template, 0, sizeof(template));
     template.target = PIPE_TEXTURE_2D;
     exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &dummy);
-    pf_get_block(template.format, &template.block);
     template.width0 = width;
     template.height0 = height;
     template.depth0 = 1;
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c
index 418a8dd88b..5c34e71b1b 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.c
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.c
@@ -733,7 +733,6 @@ create_sampler_texture(struct xorg_renderer *r,
    templ.width0 = src->width0;
    templ.height0 = src->height0;
    templ.depth0 = 1;
-   pf_get_block(format, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
    pt = screen->texture_create(screen, &templ);
diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c
index bb515a0f49..459ab3c64e 100644
--- a/src/gallium/state_trackers/xorg/xorg_xv.c
+++ b/src/gallium/state_trackers/xorg/xorg_xv.c
@@ -170,7 +170,6 @@ create_component_texture(struct pipe_context *pipe,
    templ.width0 = width;
    templ.height0 = height;
    templ.depth0 = 1;
-   pf_get_block(PIPE_FORMAT_L8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
    tex = screen->texture_create(screen, &templ);
diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
index d497861324..8d95826c9a 100644
--- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
+++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
@@ -28,7 +28,6 @@ dri_surface_from_handle(struct drm_api *api, struct pipe_screen *pscreen,
 	tmpl.format = format;
 	tmpl.width0 = width;
 	tmpl.height0 = height;
-	pf_get_block(tmpl.format, &tmpl.block);
 
 	pt = api->texture_from_shared_handle(api, pscreen, &tmpl,
 					     "front buffer", pitch, handle);
@@ -247,7 +246,7 @@ nouveau_drm_handle_from_pt(struct drm_api *api, struct pipe_screen *pscreen,
 		return false;
 
 	*handle = mt->bo->handle;
-	*stride = mt->base.nblocksx[0] * mt->base.block.size;
+	*stride = pf_get_stride(mt->base.format, mt->base.width0);
 	return true;
 }
 
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 74afffc9cf..65f7babff2 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -113,17 +113,13 @@ static struct pipe_buffer *radeon_surface_buffer_create(struct pipe_winsys *ws,
                                                         unsigned tex_usage,
                                                         unsigned *stride)
 {
-    struct pipe_format_block block;
-    unsigned nblocksx, nblocksy, size;
-
-    pf_get_block(format, &block);
-
-    nblocksx = pf_get_nblocksx(&block, width);
-    nblocksy = pf_get_nblocksy(&block, height);
-
     /* Radeons enjoy things in multiples of 32. */
     /* XXX this can be 32 when POT */
-    *stride = (nblocksx * block.size + 63) & ~63;
+    const unsigned alignment = 64;
+    unsigned nblocksy, size;
+
+    nblocksy = pf_get_nblocksy(format, height);
+    *stride = align(pf_get_stride(format, width), alignment);
     size = *stride * nblocksy;
 
     return radeon_buffer_create(ws, 64, usage, size);
@@ -321,9 +317,6 @@ struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_co
     tmpl.height0 = h;
     tmpl.depth0 = 1;
     tmpl.format = format;
-    pf_get_block(tmpl.format, &tmpl.block);
-    tmpl.nblocksx[0] = pf_get_nblocksx(&tmpl.block, w);
-    tmpl.nblocksy[0] = pf_get_nblocksy(&tmpl.block, h);
 
     pt = pipe_screen->texture_blanket(pipe_screen, &tmpl, &pitch, pb);
     if (pt == NULL) {
diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c
index 79ff2cc985..d5644c161f 100644
--- a/src/gallium/winsys/egl_xlib/sw_winsys.c
+++ b/src/gallium/winsys/egl_xlib/sw_winsys.c
@@ -71,16 +71,6 @@ sw_pipe_buffer(struct pipe_buffer *b)
 }
 
 
-/**
- * Round n up to next multiple.
- */
-static INLINE unsigned
-round_up(unsigned n, unsigned multiple)
-{
-   return (n + multiple - 1) & ~(multiple - 1);
-}
-
-
 static const char *
 get_name(struct pipe_winsys *pws)
 {
@@ -170,13 +160,10 @@ surface_buffer_create(struct pipe_winsys *winsys,
                       unsigned *stride)
 {
    const unsigned alignment = 64;
-   struct pipe_format_block block;
-   unsigned nblocksx, nblocksy;
+   unsigned nblocksy;
 
-   pf_get_block(format, &block);
-   nblocksx = pf_get_nblocksx(&block, width);
-   nblocksy = pf_get_nblocksy(&block, height);
-   *stride = round_up(nblocksx * block.size, alignment);
+   nblocksy = pf_get_nblocksy(format, height);
+   *stride = align(pf_get_stride(format, width), alignment);
 
    return winsys->buffer_create(winsys, alignment,
                                 usage,
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 08067aad64..b8c8502d7b 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -138,13 +138,10 @@ static struct pipe_buffer* xsp_surface_buffer_create
 )
 {
    const unsigned int ALIGNMENT = 1;
-   struct pipe_format_block block;
-   unsigned nblocksx, nblocksy;
+   unsigned nblocksy;
 
-   pf_get_block(format, &block);
-   nblocksx = pf_get_nblocksx(&block, width);
-   nblocksy = pf_get_nblocksy(&block, height);
-   *stride = align(nblocksx * block.size, ALIGNMENT);
+   nblocksy = pf_get_nblocksy(format, height);
+   *stride = align(pf_get_stride(format, width), ALIGNMENT);
 
    return pws->buffer_create(pws, ALIGNMENT, usage,
                              *stride * nblocksy);
diff --git a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c
index e8bc0f55ac..81c46c0a96 100644
--- a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c
+++ b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c
@@ -49,7 +49,6 @@
 struct gdi_llvmpipe_displaytarget
 {
    enum pipe_format format;
-   struct pipe_format_block block;
    unsigned width;
    unsigned height;
    unsigned stride;
@@ -118,16 +117,6 @@ gdi_llvmpipe_displaytarget_destroy(struct llvmpipe_winsys *winsys,
 }
 
 
-/**
- * Round n up to next multiple.
- */
-static INLINE unsigned
-round_up(unsigned n, unsigned multiple)
-{
-   return (n + multiple - 1) & ~(multiple - 1);
-}
-
-
 static struct llvmpipe_displaytarget *
 gdi_llvmpipe_displaytarget_create(struct llvmpipe_winsys *winsys,
                                   enum pipe_format format,
@@ -147,10 +136,10 @@ gdi_llvmpipe_displaytarget_create(struct llvmpipe_winsys *winsys,
    gdt->width = width;
    gdt->height = height;
 
-   bpp = pf_get_bits(format);
-   cpp = pf_get_size(format);
+   bpp = pf_get_blocksizebits(format);
+   cpp = pf_get_blocksize(format);
    
-   gdt->stride = round_up(width * cpp, alignment);
+   gdt->stride = align(width * cpp, alignment);
    gdt->size = gdt->stride * height;
    
    gdt->data = align_malloc(gdt->size, alignment);
diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
index 5e0ccf32f4..173fa5b6fe 100644
--- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
+++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
@@ -151,16 +151,6 @@ gdi_softpipe_user_buffer_create(struct pipe_winsys *winsys,
 }
 
 
-/**
- * Round n up to next multiple.
- */
-static INLINE unsigned
-round_up(unsigned n, unsigned multiple)
-{
-   return (n + multiple - 1) & ~(multiple - 1);
-}
-
-
 static struct pipe_buffer *
 gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
                                    unsigned width, unsigned height,
@@ -170,13 +160,10 @@ gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
                                    unsigned *stride)
 {
    const unsigned alignment = 64;
-   struct pipe_format_block block;
-   unsigned nblocksx, nblocksy;
+   unsigned nblocksy;
 
-   pf_get_block(format, &block);
-   nblocksx = pf_get_nblocksx(&block, width);
-   nblocksy = pf_get_nblocksy(&block, height);
-   *stride = round_up(nblocksx * block.size, alignment);
+   nblocksy = pf_get_nblocksy(format, height);
+   *stride = align(pf_get_stride(format, width), alignment);
 
    return winsys->buffer_create(winsys, alignment,
                                 usage,
@@ -283,10 +270,10 @@ gdi_softpipe_present(struct pipe_screen *screen,
 
     memset(&bmi, 0, sizeof(BITMAPINFO));
     bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
-    bmi.bmiHeader.biWidth = texture->stride[surface->level] / pf_get_size(surface->format);
+    bmi.bmiHeader.biWidth = texture->stride[surface->level] / pf_get_blocksize(surface->format);
     bmi.bmiHeader.biHeight= -(long)surface->height;
     bmi.bmiHeader.biPlanes = 1;
-    bmi.bmiHeader.biBitCount = pf_get_bits(surface->format);
+    bmi.bmiHeader.biBitCount = pf_get_blocksizebits(surface->format);
     bmi.bmiHeader.biCompression = BI_RGB;
     bmi.bmiHeader.biSizeImage = 0;
     bmi.bmiHeader.biXPelsPerMeter = 0;
diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c
index 13e609f58f..6e984ebe3c 100644
--- a/src/gallium/winsys/xlib/xlib_cell.c
+++ b/src/gallium/winsys/xlib/xlib_cell.c
@@ -277,15 +277,6 @@ xm_user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes)
 
 
-/**
- * Round n up to next multiple.
- */
-static INLINE unsigned
-round_up(unsigned n, unsigned multiple)
-{
-   return (n + multiple - 1) & ~(multiple - 1);
-}
-
 static struct pipe_buffer *
 xm_surface_buffer_create(struct pipe_winsys *winsys,
                          unsigned width, unsigned height,
@@ -294,18 +285,15 @@ xm_surface_buffer_create(struct pipe_winsys *winsys,
                          unsigned *stride)
 {
    const unsigned alignment = 64;
-   struct pipe_format_block block;
-   unsigned nblocksx, nblocksy;
+   unsigned nblocksy;
 
-   pf_get_block(format, &block);
-   nblocksx = pf_get_nblocksx(&block, width);
-   nblocksy = pf_get_nblocksy(&block, height);
-   *stride = round_up(nblocksx * block.size, alignment);
+   nblocksy = pf_get_nblocksy(format, height);
+   *stride = align(pf_get_stride(format, width), alignment);
 
    return winsys->buffer_create(winsys, alignment,
                                 usage,
                                 /* XXX a bit of a hack */
-                                *stride * round_up(nblocksy, TILE_SIZE));
+                                *stride * align(nblocksy, TILE_SIZE));
 }
 
 
diff --git a/src/gallium/winsys/xlib/xlib_llvmpipe.c b/src/gallium/winsys/xlib/xlib_llvmpipe.c
index 3dd15e099b..41f3e248e8 100644
--- a/src/gallium/winsys/xlib/xlib_llvmpipe.c
+++ b/src/gallium/winsys/xlib/xlib_llvmpipe.c
@@ -58,7 +58,6 @@
 struct xm_displaytarget
 {
    enum pipe_format format;
-   struct pipe_format_block block;
    unsigned width;
    unsigned height;
    unsigned stride;
@@ -262,10 +261,10 @@ xm_llvmpipe_display(struct xmesa_buffer *xm_buffer,
    {
       if (xm_dt->tempImage == NULL)
       {
-         assert(xm_dt->block.width == 1);
-         assert(xm_dt->block.height == 1);
+         assert(pf_get_blockwidth(xm_dt->format) == 1);
+         assert(pf_get_blockheight(xm_dt->format) == 1);
          alloc_shm_ximage(xm_dt, xm_buffer,
-                          xm_dt->stride / xm_dt->block.size,
+                          xm_dt->stride / pf_get_blocksize(xm_dt->format),
                           xm_dt->height);
       }
 
@@ -321,7 +320,7 @@ xm_displaytarget_create(struct llvmpipe_winsys *winsys,
                         unsigned *stride)
 {
    struct xm_displaytarget *xm_dt = CALLOC_STRUCT(xm_displaytarget);
-   unsigned nblocksx, nblocksy, size;
+   unsigned nblocksy, size;
 
    xm_dt = CALLOC_STRUCT(xm_displaytarget);
    if(!xm_dt)
@@ -331,10 +330,8 @@ xm_displaytarget_create(struct llvmpipe_winsys *winsys,
    xm_dt->width = width;
    xm_dt->height = height;
 
-   pf_get_block(format, &xm_dt->block);
-   nblocksx = pf_get_nblocksx(&xm_dt->block, width);
-   nblocksy = pf_get_nblocksy(&xm_dt->block, height);
-   xm_dt->stride = align(nblocksx * xm_dt->block.size, alignment);
+   nblocksy = pf_get_nblocksy(format, height);
+   xm_dt->stride = align(pf_get_stride(format, width), alignment);
    size = xm_dt->stride * nblocksy;
 
 #ifdef USE_XSHM
diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c
index 260b39e2a0..69a5dcc2b7 100644
--- a/src/gallium/winsys/xlib/xlib_softpipe.c
+++ b/src/gallium/winsys/xlib/xlib_softpipe.c
@@ -254,10 +254,10 @@ xlib_softpipe_display_surface(struct xmesa_buffer *b,
    {
       if (xm_buf->tempImage == NULL) 
       {
-         assert(surf->texture->block.width == 1);
-         assert(surf->texture->block.height == 1);
+         assert(pf_get_blockwidth(surf->texture->format) == 1);
+         assert(pf_get_blockheight(surf->texture->format) == 1);
          alloc_shm_ximage(xm_buf, b, spt->stride[surf->level] /
-                          surf->texture->block.size, surf->height);
+                          pf_get_blocksize(surf->texture->format), surf->height);
       }
 
       ximage = xm_buf->tempImage;
@@ -360,13 +360,10 @@ xm_surface_buffer_create(struct pipe_winsys *winsys,
                          unsigned *stride)
 {
    const unsigned alignment = 64;
-   struct pipe_format_block block;
-   unsigned nblocksx, nblocksy, size;
+   unsigned nblocksy, size;
 
-   pf_get_block(format, &block);
-   nblocksx = pf_get_nblocksx(&block, width);
-   nblocksy = pf_get_nblocksy(&block, height);
-   *stride = align(nblocksx * block.size, alignment);
+   nblocksy = pf_get_nblocksy(format, height);
+   *stride = align(pf_get_stride(format, width), alignment);
    size = *stride * nblocksy;
 
 #ifdef USE_XSHM
-- 
cgit v1.2.3


From cd3409ce059e46b4b675d2ad6f1f3b75939aa2ab Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:38:32 +0100
Subject: cell: Update for renamed sampler/texture state setters.

---
 src/gallium/drivers/cell/ppu/cell_pipe_state.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
index ccd0fef6e8..c18a5d0635 100644
--- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c
+++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c
@@ -383,10 +383,10 @@ cell_init_state_functions(struct cell_context *cell)
    cell->pipe.delete_blend_state = cell_delete_blend_state;
 
    cell->pipe.create_sampler_state = cell_create_sampler_state;
-   cell->pipe.bind_sampler_states = cell_bind_sampler_states;
+   cell->pipe.bind_fragment_sampler_states = cell_bind_sampler_states;
    cell->pipe.delete_sampler_state = cell_delete_sampler_state;
 
-   cell->pipe.set_sampler_textures = cell_set_sampler_textures;
+   cell->pipe.set_fragment_sampler_textures = cell_set_sampler_textures;
 
    cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state;
    cell->pipe.bind_depth_stencil_alpha_state   = cell_bind_depth_stencil_alpha_state;
-- 
cgit v1.2.3


From e04324b8f93919d75f224644a160a32405740860 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:39:07 +0100
Subject: fo: Update for renamed sampler/texture state setters.

---
 src/gallium/drivers/failover/fo_state.c      | 16 ++++++++--------
 src/gallium/drivers/failover/fo_state_emit.c |  8 ++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c
index c8eb926299..fca6caa227 100644
--- a/src/gallium/drivers/failover/fo_state.c
+++ b/src/gallium/drivers/failover/fo_state.c
@@ -339,10 +339,10 @@ failover_bind_sampler_states(struct pipe_context *pipe,
    }
    failover->dirty |= FO_NEW_SAMPLER;
    failover->num_samplers = num;
-   failover->sw->bind_sampler_states(failover->sw, num,
-                                     failover->sw_sampler_state);
-   failover->hw->bind_sampler_states(failover->hw, num,
-                                     failover->hw_sampler_state);
+   failover->sw->bind_fragment_sampler_states(failover->sw, num,
+                                              failover->sw_sampler_state);
+   failover->hw->bind_fragment_sampler_states(failover->hw, num,
+                                              failover->hw_sampler_state);
 }
 
 static void
@@ -381,8 +381,8 @@ failover_set_sampler_textures(struct pipe_context *pipe,
                              NULL);
    failover->dirty |= FO_NEW_TEXTURE;
    failover->num_textures = num;
-   failover->sw->set_sampler_textures( failover->sw, num, texture );
-   failover->hw->set_sampler_textures( failover->hw, num, texture );
+   failover->sw->set_fragment_sampler_textures( failover->sw, num, texture );
+   failover->hw->set_fragment_sampler_textures( failover->hw, num, texture );
 }
 
 
@@ -453,7 +453,7 @@ failover_init_state_functions( struct failover_context *failover )
    failover->pipe.bind_blend_state   = failover_bind_blend_state;
    failover->pipe.delete_blend_state = failover_delete_blend_state;
    failover->pipe.create_sampler_state = failover_create_sampler_state;
-   failover->pipe.bind_sampler_states  = failover_bind_sampler_states;
+   failover->pipe.bind_fragment_sampler_states  = failover_bind_sampler_states;
    failover->pipe.delete_sampler_state = failover_delete_sampler_state;
    failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state;
    failover->pipe.bind_depth_stencil_alpha_state   = failover_bind_depth_stencil_state;
@@ -473,7 +473,7 @@ failover_init_state_functions( struct failover_context *failover )
    failover->pipe.set_framebuffer_state = failover_set_framebuffer_state;
    failover->pipe.set_polygon_stipple = failover_set_polygon_stipple;
    failover->pipe.set_scissor_state = failover_set_scissor_state;
-   failover->pipe.set_sampler_textures = failover_set_sampler_textures;
+   failover->pipe.set_fragment_sampler_textures = failover_set_sampler_textures;
    failover->pipe.set_viewport_state = failover_set_viewport_state;
    failover->pipe.set_vertex_buffers = failover_set_vertex_buffers;
    failover->pipe.set_vertex_elements = failover_set_vertex_elements;
diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c
index bd4fce9d20..b4b1067924 100644
--- a/src/gallium/drivers/failover/fo_state_emit.c
+++ b/src/gallium/drivers/failover/fo_state_emit.c
@@ -92,13 +92,13 @@ failover_state_emit( struct failover_context *failover )
       failover->sw->set_viewport_state( failover->sw, &failover->viewport );
 
    if (failover->dirty & FO_NEW_SAMPLER) {
-      failover->sw->bind_sampler_states( failover->sw, failover->num_samplers,
-                                         failover->sw_sampler_state );
+      failover->sw->bind_fragment_sampler_states( failover->sw, failover->num_samplers,
+                                                  failover->sw_sampler_state );
    }
 
    if (failover->dirty & FO_NEW_TEXTURE) {
-      failover->sw->set_sampler_textures( failover->sw, failover->num_textures, 
-                                          failover->texture );
+      failover->sw->set_fragment_sampler_textures( failover->sw, failover->num_textures, 
+                                                   failover->texture );
    }
 
    if (failover->dirty & FO_NEW_VERTEX_BUFFER) {
-- 
cgit v1.2.3


From 25bb04a1ee9b3f28bfa6e60d7ce71ff23726c5b6 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:39:19 +0100
Subject: i915: Update for renamed sampler/texture state setters.

---
 src/gallium/drivers/i915/i915_state.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 71f00bc346..9103847f1c 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -767,7 +767,7 @@ i915_init_state_functions( struct i915_context *i915 )
    i915->base.delete_blend_state = i915_delete_blend_state;
 
    i915->base.create_sampler_state = i915_create_sampler_state;
-   i915->base.bind_sampler_states = i915_bind_sampler_states;
+   i915->base.bind_fragment_sampler_states = i915_bind_sampler_states;
    i915->base.delete_sampler_state = i915_delete_sampler_state;
 
    i915->base.create_depth_stencil_alpha_state = i915_create_depth_stencil_state;
@@ -791,7 +791,7 @@ i915_init_state_functions( struct i915_context *i915 )
 
    i915->base.set_polygon_stipple = i915_set_polygon_stipple;
    i915->base.set_scissor_state = i915_set_scissor_state;
-   i915->base.set_sampler_textures = i915_set_sampler_textures;
+   i915->base.set_fragment_sampler_textures = i915_set_sampler_textures;
    i915->base.set_viewport_state = i915_set_viewport_state;
    i915->base.set_vertex_buffers = i915_set_vertex_buffers;
    i915->base.set_vertex_elements = i915_set_vertex_elements;
-- 
cgit v1.2.3


From f0d3abf3834d3ae6107e66b61d8660e6c09a0a99 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:39:49 +0100
Subject: id: Update for renamed sampler/texture state setters.

---
 src/gallium/drivers/identity/id_context.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index 4e700089e3..2f5b38ea15 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -228,9 +228,9 @@ identity_bind_sampler_states(struct pipe_context *_pipe,
    struct identity_context *id_pipe = identity_context(_pipe);
    struct pipe_context *pipe = id_pipe->pipe;
 
-   pipe->bind_sampler_states(pipe,
-                             num,
-                             samplers);
+   pipe->bind_fragment_sampler_states(pipe,
+                                      num,
+                                      samplers);
 }
 
 static void
@@ -499,9 +499,9 @@ identity_set_sampler_textures(struct pipe_context *_pipe,
       textures = unwrapped_textures;
    }
 
-   pipe->set_sampler_textures(pipe,
-                              num_textures,
-                              textures);
+   pipe->set_fragment_sampler_textures(pipe,
+                                       num_textures,
+                                       textures);
 }
 
 static void
@@ -682,7 +682,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.bind_blend_state = identity_bind_blend_state;
    id_pipe->base.delete_blend_state = identity_delete_blend_state;
    id_pipe->base.create_sampler_state = identity_create_sampler_state;
-   id_pipe->base.bind_sampler_states = identity_bind_sampler_states;
+   id_pipe->base.bind_fragment_sampler_states = identity_bind_sampler_states;
    id_pipe->base.delete_sampler_state = identity_delete_sampler_state;
    id_pipe->base.create_rasterizer_state = identity_create_rasterizer_state;
    id_pipe->base.bind_rasterizer_state = identity_bind_rasterizer_state;
@@ -703,7 +703,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.set_polygon_stipple = identity_set_polygon_stipple;
    id_pipe->base.set_scissor_state = identity_set_scissor_state;
    id_pipe->base.set_viewport_state = identity_set_viewport_state;
-   id_pipe->base.set_sampler_textures = identity_set_sampler_textures;
+   id_pipe->base.set_fragment_sampler_textures = identity_set_sampler_textures;
    id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers;
    id_pipe->base.set_vertex_elements = identity_set_vertex_elements;
    id_pipe->base.surface_copy = identity_surface_copy;
-- 
cgit v1.2.3


From 551b2db82b5e5093dc19bde130785aceb92868a6 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:40:04 +0100
Subject: lp: Update for renamed sampler/texture state setters.

---
 src/gallium/drivers/llvmpipe/lp_context.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 57e71f3e98..c081f6de03 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -180,7 +180,7 @@ llvmpipe_create( struct pipe_screen *screen )
    llvmpipe->pipe.delete_blend_state = llvmpipe_delete_blend_state;
 
    llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state;
-   llvmpipe->pipe.bind_sampler_states  = llvmpipe_bind_sampler_states;
+   llvmpipe->pipe.bind_fragment_sampler_states  = llvmpipe_bind_sampler_states;
    llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state;
 
    llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state;
@@ -205,7 +205,7 @@ llvmpipe_create( struct pipe_screen *screen )
    llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state;
    llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple;
    llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state;
-   llvmpipe->pipe.set_sampler_textures = llvmpipe_set_sampler_textures;
+   llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures;
    llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
 
    llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
-- 
cgit v1.2.3


From d15bb1cba3fd2d36c48e33e14cc3c548cf40d555 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:40:21 +0100
Subject: nv: Update for renamed sampler/texture state setters.

---
 src/gallium/drivers/nv04/nv04_state.c | 4 ++--
 src/gallium/drivers/nv10/nv10_state.c | 4 ++--
 src/gallium/drivers/nv20/nv20_state.c | 4 ++--
 src/gallium/drivers/nv30/nv30_state.c | 4 ++--
 src/gallium/drivers/nv40/nv40_state.c | 4 ++--
 src/gallium/drivers/nv50/nv50_state.c | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c
index d356ebd8b3..ef3005db5f 100644
--- a/src/gallium/drivers/nv04/nv04_state.c
+++ b/src/gallium/drivers/nv04/nv04_state.c
@@ -425,9 +425,9 @@ nv04_init_state_functions(struct nv04_context *nv04)
 	nv04->pipe.delete_blend_state = nv04_blend_state_delete;
 
 	nv04->pipe.create_sampler_state = nv04_sampler_state_create;
-	nv04->pipe.bind_sampler_states = nv04_sampler_state_bind;
+	nv04->pipe.bind_fragment_sampler_states = nv04_sampler_state_bind;
 	nv04->pipe.delete_sampler_state = nv04_sampler_state_delete;
-	nv04->pipe.set_sampler_textures = nv04_set_sampler_texture;
+	nv04->pipe.set_fragment_sampler_textures = nv04_set_sampler_texture;
 
 	nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create;
 	nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c
index 9b38219b99..ffc6be3c40 100644
--- a/src/gallium/drivers/nv10/nv10_state.c
+++ b/src/gallium/drivers/nv10/nv10_state.c
@@ -553,9 +553,9 @@ nv10_init_state_functions(struct nv10_context *nv10)
 	nv10->pipe.delete_blend_state = nv10_blend_state_delete;
 
 	nv10->pipe.create_sampler_state = nv10_sampler_state_create;
-	nv10->pipe.bind_sampler_states = nv10_sampler_state_bind;
+	nv10->pipe.bind_fragment_sampler_states = nv10_sampler_state_bind;
 	nv10->pipe.delete_sampler_state = nv10_sampler_state_delete;
-	nv10->pipe.set_sampler_textures = nv10_set_sampler_texture;
+	nv10->pipe.set_fragment_sampler_textures = nv10_set_sampler_texture;
 
 	nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create;
 	nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c
index ed4084980f..3a82e63423 100644
--- a/src/gallium/drivers/nv20/nv20_state.c
+++ b/src/gallium/drivers/nv20/nv20_state.c
@@ -546,9 +546,9 @@ nv20_init_state_functions(struct nv20_context *nv20)
 	nv20->pipe.delete_blend_state = nv20_blend_state_delete;
 
 	nv20->pipe.create_sampler_state = nv20_sampler_state_create;
-	nv20->pipe.bind_sampler_states = nv20_sampler_state_bind;
+	nv20->pipe.bind_fragment_sampler_states = nv20_sampler_state_bind;
 	nv20->pipe.delete_sampler_state = nv20_sampler_state_delete;
-	nv20->pipe.set_sampler_textures = nv20_set_sampler_texture;
+	nv20->pipe.set_fragment_sampler_textures = nv20_set_sampler_texture;
 
 	nv20->pipe.create_rasterizer_state = nv20_rasterizer_state_create;
 	nv20->pipe.bind_rasterizer_state = nv20_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
index b91e972c12..3f802d9241 100644
--- a/src/gallium/drivers/nv30/nv30_state.c
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -690,9 +690,9 @@ nv30_init_state_functions(struct nv30_context *nv30)
 	nv30->pipe.delete_blend_state = nv30_blend_state_delete;
 
 	nv30->pipe.create_sampler_state = nv30_sampler_state_create;
-	nv30->pipe.bind_sampler_states = nv30_sampler_state_bind;
+	nv30->pipe.bind_fragment_sampler_states = nv30_sampler_state_bind;
 	nv30->pipe.delete_sampler_state = nv30_sampler_state_delete;
-	nv30->pipe.set_sampler_textures = nv30_set_sampler_texture;
+	nv30->pipe.set_fragment_sampler_textures = nv30_set_sampler_texture;
 
 	nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create;
 	nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
index c3ee4d2345..bc34e32a4b 100644
--- a/src/gallium/drivers/nv40/nv40_state.c
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -705,9 +705,9 @@ nv40_init_state_functions(struct nv40_context *nv40)
 	nv40->pipe.delete_blend_state = nv40_blend_state_delete;
 
 	nv40->pipe.create_sampler_state = nv40_sampler_state_create;
-	nv40->pipe.bind_sampler_states = nv40_sampler_state_bind;
+	nv40->pipe.bind_fragment_sampler_states = nv40_sampler_state_bind;
 	nv40->pipe.delete_sampler_state = nv40_sampler_state_delete;
-	nv40->pipe.set_sampler_textures = nv40_set_sampler_texture;
+	nv40->pipe.set_fragment_sampler_textures = nv40_set_sampler_texture;
 
 	nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create;
 	nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index ffaa5e29d1..07318f2394 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -648,9 +648,9 @@ nv50_init_state_functions(struct nv50_context *nv50)
 	nv50->pipe.delete_blend_state = nv50_blend_state_delete;
 
 	nv50->pipe.create_sampler_state = nv50_sampler_state_create;
-	nv50->pipe.bind_sampler_states = nv50_sampler_state_bind;
+	nv50->pipe.bind_fragment_sampler_states = nv50_sampler_state_bind;
 	nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
-	nv50->pipe.set_sampler_textures = nv50_set_sampler_texture;
+	nv50->pipe.set_fragment_sampler_textures = nv50_set_sampler_texture;
 
 	nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
 	nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
-- 
cgit v1.2.3


From c1bcedc4ce48031c9e5d2a2430d27c7a9aaa8b37 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:40:31 +0100
Subject: r300: Update for renamed sampler/texture state setters.

---
 src/gallium/drivers/r300/r300_state.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index a88d66db24..7505353953 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -822,10 +822,10 @@ void r300_init_state_functions(struct r300_context* r300)
     r300->context.delete_rasterizer_state = r300_delete_rs_state;
 
     r300->context.create_sampler_state = r300_create_sampler_state;
-    r300->context.bind_sampler_states = r300_bind_sampler_states;
+    r300->context.bind_fragment_sampler_states = r300_bind_sampler_states;
     r300->context.delete_sampler_state = r300_delete_sampler_state;
 
-    r300->context.set_sampler_textures = r300_set_sampler_textures;
+    r300->context.set_fragment_sampler_textures = r300_set_sampler_textures;
 
     r300->context.set_scissor_state = r300_set_scissor_state;
 
-- 
cgit v1.2.3


From 8eecd3bafb759df3f1853490cf149d053c8fcbce Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:40:53 +0100
Subject: svga: Update for renamed sampler/texture state setters.

---
 src/gallium/drivers/svga/svga_pipe_sampler.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index 3eeca6b784..b4e57c5d15 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -234,9 +234,9 @@ static void svga_set_sampler_textures(struct pipe_context *pipe,
 void svga_init_sampler_functions( struct svga_context *svga )
 {
    svga->pipe.create_sampler_state = svga_create_sampler_state;
-   svga->pipe.bind_sampler_states = svga_bind_sampler_states;
+   svga->pipe.bind_fragment_sampler_states = svga_bind_sampler_states;
    svga->pipe.delete_sampler_state = svga_delete_sampler_state;
-   svga->pipe.set_sampler_textures = svga_set_sampler_textures;
+   svga->pipe.set_fragment_sampler_textures = svga_set_sampler_textures;
 }
 
 
-- 
cgit v1.2.3


From eeb8dd12b48c6ad3f466cf0ea88472fca576ebd4 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:43:51 +0100
Subject: trace: Update for renamed sampler/texture state setters.

---
 src/gallium/drivers/trace/tr_context.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index bf470b46ae..26c01c9b84 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -486,13 +486,13 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe,
    struct trace_context *tr_ctx = trace_context(_pipe);
    struct pipe_context *pipe = tr_ctx->pipe;
 
-   trace_dump_call_begin("pipe_context", "bind_sampler_states");
+   trace_dump_call_begin("pipe_context", "bind_fragment_sampler_states");
 
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(uint, num_states);
    trace_dump_arg_array(ptr, states, num_states);
 
-   pipe->bind_sampler_states(pipe, num_states, states);;
+   pipe->bind_fragment_sampler_states(pipe, num_states, states);
 
    trace_dump_call_end();
 }
@@ -959,13 +959,13 @@ trace_context_set_sampler_textures(struct pipe_context *_pipe,
    }
    textures = unwrapped_textures;
 
-   trace_dump_call_begin("pipe_context", "set_sampler_textures");
+   trace_dump_call_begin("pipe_context", "set_fragment_sampler_textures");
 
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(uint, num_textures);
    trace_dump_arg_array(ptr, textures, num_textures);
 
-   pipe->set_sampler_textures(pipe, num_textures, textures);;
+   pipe->set_fragment_sampler_textures(pipe, num_textures, textures);
 
    trace_dump_call_end();
 }
@@ -1253,7 +1253,7 @@ trace_context_create(struct pipe_screen *_screen,
    tr_ctx->base.bind_blend_state = trace_context_bind_blend_state;
    tr_ctx->base.delete_blend_state = trace_context_delete_blend_state;
    tr_ctx->base.create_sampler_state = trace_context_create_sampler_state;
-   tr_ctx->base.bind_sampler_states = trace_context_bind_sampler_states;
+   tr_ctx->base.bind_fragment_sampler_states = trace_context_bind_sampler_states;
    tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state;
    tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state;
    tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state;
@@ -1274,7 +1274,7 @@ trace_context_create(struct pipe_screen *_screen,
    tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple;
    tr_ctx->base.set_scissor_state = trace_context_set_scissor_state;
    tr_ctx->base.set_viewport_state = trace_context_set_viewport_state;
-   tr_ctx->base.set_sampler_textures = trace_context_set_sampler_textures;
+   tr_ctx->base.set_fragment_sampler_textures = trace_context_set_sampler_textures;
    tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers;
    tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements;
    if (pipe->surface_copy)
-- 
cgit v1.2.3


From 8a619e62bffa6f21330df747940e322909937806 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:51:20 +0100
Subject: sp: Implement separate vertex sampler state.

---
 src/gallium/drivers/softpipe/sp_context.c       | 22 ++++++--
 src/gallium/drivers/softpipe/sp_context.h       |  7 ++-
 src/gallium/drivers/softpipe/sp_flush.c         |  3 ++
 src/gallium/drivers/softpipe/sp_screen.c        |  4 +-
 src/gallium/drivers/softpipe/sp_state.h         |  9 ++++
 src/gallium/drivers/softpipe/sp_state_derived.c | 13 +++++
 src/gallium/drivers/softpipe/sp_state_sampler.c | 69 +++++++++++++++++++++++--
 7 files changed, 116 insertions(+), 11 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index bdbb7fa9b9..f8bf3e9974 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -107,6 +107,11 @@ softpipe_destroy( struct pipe_context *pipe )
       pipe_texture_reference(&softpipe->texture[i], NULL);
    }
 
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+      sp_destroy_tex_tile_cache(softpipe->vertex_tex_cache[i]);
+      pipe_texture_reference(&softpipe->vertex_textures[i], NULL);
+   }
+
    for (i = 0; i < Elements(softpipe->constants); i++) {
       if (softpipe->constants[i].buffer) {
          pipe_buffer_reference(&softpipe->constants[i].buffer, NULL);
@@ -153,6 +158,11 @@ softpipe_is_texture_referenced( struct pipe_context *pipe,
           softpipe->tex_cache[i]->texture == texture)
          return PIPE_REFERENCED_FOR_READ;
    }
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+      if (softpipe->vertex_tex_cache[i] &&
+          softpipe->vertex_tex_cache[i]->texture == texture)
+         return PIPE_REFERENCED_FOR_READ;
+   }
    
    return PIPE_UNREFERENCED;
 }
@@ -192,7 +202,8 @@ softpipe_create( struct pipe_screen *screen )
    softpipe->pipe.delete_blend_state = softpipe_delete_blend_state;
 
    softpipe->pipe.create_sampler_state = softpipe_create_sampler_state;
-   softpipe->pipe.bind_sampler_states  = softpipe_bind_sampler_states;
+   softpipe->pipe.bind_fragment_sampler_states  = softpipe_bind_sampler_states;
+   softpipe->pipe.bind_vertex_sampler_states = softpipe_bind_vertex_sampler_states;
    softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state;
 
    softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state;
@@ -217,7 +228,8 @@ softpipe_create( struct pipe_screen *screen )
    softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state;
    softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple;
    softpipe->pipe.set_scissor_state = softpipe_set_scissor_state;
-   softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures;
+   softpipe->pipe.set_fragment_sampler_textures = softpipe_set_sampler_textures;
+   softpipe->pipe.set_vertex_sampler_textures = softpipe_set_vertex_sampler_textures;
    softpipe->pipe.set_viewport_state = softpipe_set_viewport_state;
 
    softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers;
@@ -247,7 +259,9 @@ softpipe_create( struct pipe_screen *screen )
 
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
       softpipe->tex_cache[i] = sp_create_tex_tile_cache( screen );
-
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+      softpipe->vertex_tex_cache[i] = sp_create_tex_tile_cache(screen);
+   }
 
    /* setup quad rendering stages */
    softpipe->quad.shade = sp_quad_shade_stage(softpipe);
@@ -263,7 +277,7 @@ softpipe_create( struct pipe_screen *screen )
       goto fail;
 
    draw_texture_samplers(softpipe->draw,
-                         PIPE_MAX_SAMPLERS,
+                         PIPE_MAX_VERTEX_SAMPLERS,
                          (struct tgsi_sampler **)
                             softpipe->tgsi.vert_samplers_list);
 
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index a735573d6f..8ce20c5744 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -53,6 +53,7 @@ struct softpipe_context {
    /** Constant state objects */
    struct pipe_blend_state *blend;
    struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+   struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_depth_stencil_alpha_state *depth_stencil;
    struct pipe_rasterizer_state *rasterizer;
    struct sp_fragment_shader *fs;
@@ -66,12 +67,15 @@ struct softpipe_context {
    struct pipe_poly_stipple poly_stipple;
    struct pipe_scissor_state scissor;
    struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+   struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
    struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
 
    unsigned num_samplers;
    unsigned num_textures;
+   unsigned num_vertex_samplers;
+   unsigned num_vertex_textures;
    unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
 
@@ -121,7 +125,7 @@ struct softpipe_context {
 
    /** TGSI exec things */
    struct {
-      struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_SAMPLERS];
+      struct sp_sampler_varient *vert_samplers_list[PIPE_MAX_VERTEX_SAMPLERS];
       struct sp_sampler_varient *frag_samplers_list[PIPE_MAX_SAMPLERS];
    } tgsi;
 
@@ -139,6 +143,7 @@ struct softpipe_context {
 
    unsigned tex_timestamp;
    struct softpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
+   struct softpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS];
 
    unsigned use_sse : 1;
    unsigned dump_fs : 1;
diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c
index e38b767cf2..75dac810a1 100644
--- a/src/gallium/drivers/softpipe/sp_flush.c
+++ b/src/gallium/drivers/softpipe/sp_flush.c
@@ -55,6 +55,9 @@ softpipe_flush( struct pipe_context *pipe,
       for (i = 0; i < softpipe->num_textures; i++) {
          sp_flush_tex_tile_cache(softpipe->tex_cache[i]);
       }
+      for (i = 0; i < softpipe->num_vertex_textures; i++) {
+         sp_flush_tex_tile_cache(softpipe->vertex_tex_cache[i]);
+      }
    }
 
    if (flags & PIPE_FLUSH_SWAPBUFFERS) {
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 81fb7aa20c..68e4ef4137 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -58,7 +58,9 @@ softpipe_get_param(struct pipe_screen *screen, int param)
    case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
       return PIPE_MAX_SAMPLERS;
    case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
-      return PIPE_MAX_SAMPLERS;
+      return PIPE_MAX_VERTEX_SAMPLERS;
+   case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+      return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS;
    case PIPE_CAP_NPOT_TEXTURES:
       return 1;
    case PIPE_CAP_TWO_SIDED_STENCIL:
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 77ee3c1136..d488fb8710 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -104,6 +104,10 @@ void *
 softpipe_create_sampler_state(struct pipe_context *,
                               const struct pipe_sampler_state *);
 void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **);
+void
+softpipe_bind_vertex_sampler_states(struct pipe_context *,
+                                    unsigned num_samplers,
+                                    void **samplers);
 void softpipe_delete_sampler_state(struct pipe_context *, void *);
 
 void *
@@ -150,6 +154,11 @@ void softpipe_set_sampler_textures( struct pipe_context *,
                                     unsigned num,
                                     struct pipe_texture ** );
 
+void
+softpipe_set_vertex_sampler_textures(struct pipe_context *,
+                                     unsigned num_textures,
+                                     struct pipe_texture **);
+
 void softpipe_set_viewport_state( struct pipe_context *,
                                   const struct pipe_viewport_state * );
 
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index 3bc96b9538..c24a737d07 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -213,6 +213,19 @@ update_tgsi_samplers( struct softpipe_context *softpipe )
          }
       }
    }
+
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+      struct softpipe_tex_tile_cache *tc = softpipe->vertex_tex_cache[i];
+
+      if (tc->texture) {
+         struct softpipe_texture *spt = softpipe_texture(tc->texture);
+
+         if (spt->timestamp != tc->timestamp) {
+	    sp_tex_tile_cache_validate_texture(tc);
+            tc->timestamp = spt->timestamp;
+         }
+      }
+   }
 }
 
 
diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c
index db0b8ab76b..ceb4e338f1 100644
--- a/src/gallium/drivers/softpipe/sp_state_sampler.c
+++ b/src/gallium/drivers/softpipe/sp_state_sampler.c
@@ -93,6 +93,34 @@ softpipe_bind_sampler_states(struct pipe_context *pipe,
 }
 
 
+void
+softpipe_bind_vertex_sampler_states(struct pipe_context *pipe,
+                                    unsigned num_samplers,
+                                    void **samplers)
+{
+   struct softpipe_context *softpipe = softpipe_context(pipe);
+   unsigned i;
+
+   assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS);
+
+   /* Check for no-op */
+   if (num_samplers == softpipe->num_vertex_samplers &&
+       !memcmp(softpipe->vertex_samplers, samplers, num_samplers * sizeof(void *)))
+      return;
+
+   draw_flush(softpipe->draw);
+
+   for (i = 0; i < num_samplers; ++i)
+      softpipe->vertex_samplers[i] = samplers[i];
+   for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+      softpipe->vertex_samplers[i] = NULL;
+
+   softpipe->num_vertex_samplers = num_samplers;
+
+   softpipe->dirty |= SP_NEW_SAMPLER;
+}
+
+
 void
 softpipe_set_sampler_textures(struct pipe_context *pipe,
                               unsigned num, struct pipe_texture **texture)
@@ -122,6 +150,37 @@ softpipe_set_sampler_textures(struct pipe_context *pipe,
 }
 
 
+void
+softpipe_set_vertex_sampler_textures(struct pipe_context *pipe,
+                                     unsigned num_textures,
+                                     struct pipe_texture **textures)
+{
+   struct softpipe_context *softpipe = softpipe_context(pipe);
+   uint i;
+
+   assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS);
+
+   /* Check for no-op */
+   if (num_textures == softpipe->num_vertex_textures &&
+       !memcmp(softpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) {
+      return;
+   }
+
+   draw_flush(softpipe->draw);
+
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+      struct pipe_texture *tex = i < num_textures ? textures[i] : NULL;
+
+      pipe_texture_reference(&softpipe->vertex_textures[i], tex);
+      sp_tex_tile_cache_set_texture(softpipe->vertex_tex_cache[i], tex);
+   }
+
+   softpipe->num_vertex_textures = num_textures;
+
+   softpipe->dirty |= SP_NEW_TEXTURE;
+}
+
+
 /**
  * Find/create an sp_sampler_varient object for sampling the given texture,
  * sampler and tex unit.
@@ -185,16 +244,16 @@ softpipe_reset_sampler_varients(struct softpipe_context *softpipe)
     * fragment programs.
     */
    for (i = 0; i <= softpipe->vs->max_sampler; i++) {
-      if (softpipe->sampler[i]) {
+      if (softpipe->vertex_samplers[i]) {
          softpipe->tgsi.vert_samplers_list[i] = 
             get_sampler_varient( i,
-                                 sp_sampler(softpipe->sampler[i]),
-                                 softpipe->texture[i],
+                                sp_sampler(softpipe->vertex_samplers[i]),
+                                softpipe->vertex_textures[i],
                                  TGSI_PROCESSOR_VERTEX );
 
          sp_sampler_varient_bind_texture( softpipe->tgsi.vert_samplers_list[i], 
-                                          softpipe->tex_cache[i],
-                                          softpipe->texture[i] );
+                                         softpipe->vertex_tex_cache[i],
+                                         softpipe->vertex_textures[i] );
       }
    }
 
-- 
cgit v1.2.3


From 0f884ed993500171ad91fc9f2552574face9ee17 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:52:37 +0100
Subject: sp: Do not falsely advertise support for some SNORM formats.

---
 src/gallium/drivers/softpipe/sp_screen.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 68e4ef4137..6bf3df8e6a 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -145,6 +145,10 @@ softpipe_is_format_supported( struct pipe_screen *screen,
    case PIPE_FORMAT_DXT3_RGBA:
    case PIPE_FORMAT_DXT5_RGBA:
    case PIPE_FORMAT_Z32_FLOAT:
+   case PIPE_FORMAT_R8G8_SNORM:
+   case PIPE_FORMAT_B6UG5SR5S_NORM:
+   case PIPE_FORMAT_X8UB8UG8SR8S_NORM:
+   case PIPE_FORMAT_A8B8G8R8_SNORM:
       return FALSE;
    default:
       return TRUE;
-- 
cgit v1.2.3


From 3f900c33ae6ede1c6f309628b1369a1b968a115d Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 08:54:30 +0100
Subject: trace: Reduce double semicolons to single ones.

---
 src/gallium/drivers/trace/tr_context.c | 60 +++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 30 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 26c01c9b84..696b787c74 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -107,7 +107,7 @@ trace_context_set_edgeflags(struct pipe_context *_pipe,
    /* FIXME: we don't know how big this array is */
    trace_dump_arg(ptr, bitfield);
 
-   pipe->set_edgeflags(pipe, bitfield);;
+   pipe->set_edgeflags(pipe, bitfield);
 
    trace_dump_call_end();
 }
@@ -192,7 +192,7 @@ trace_context_draw_arrays(struct pipe_context *_pipe,
    trace_dump_arg(uint, start);
    trace_dump_arg(uint, count);
 
-   result = pipe->draw_arrays(pipe, mode, start, count);;
+   result = pipe->draw_arrays(pipe, mode, start, count);
 
    trace_dump_ret(bool, result);
 
@@ -232,7 +232,7 @@ trace_context_draw_elements(struct pipe_context *_pipe,
    trace_dump_arg(uint, start);
    trace_dump_arg(uint, count);
 
-   result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);;
+   result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);
 
    trace_dump_ret(bool, result);
 
@@ -306,7 +306,7 @@ trace_context_create_query(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(uint, query_type);
 
-   result = pipe->create_query(pipe, query_type);;
+   result = pipe->create_query(pipe, query_type);
 
    trace_dump_ret(ptr, result);
 
@@ -328,7 +328,7 @@ trace_context_destroy_query(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(ptr, query);
 
-   pipe->destroy_query(pipe, query);;
+   pipe->destroy_query(pipe, query);
 
    trace_dump_call_end();
 }
@@ -346,7 +346,7 @@ trace_context_begin_query(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(ptr, query);
 
-   pipe->begin_query(pipe, query);;
+   pipe->begin_query(pipe, query);
 
    trace_dump_call_end();
 }
@@ -385,7 +385,7 @@ trace_context_get_query_result(struct pipe_context *_pipe,
 
    trace_dump_arg(ptr, pipe);
 
-   _result = pipe->get_query_result(pipe, query, wait, presult);;
+   _result = pipe->get_query_result(pipe, query, wait, presult);
    result = *presult;
 
    trace_dump_arg(uint, result);
@@ -410,7 +410,7 @@ trace_context_create_blend_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(blend_state, state);
 
-   result = pipe->create_blend_state(pipe, state);;
+   result = pipe->create_blend_state(pipe, state);
 
    trace_dump_ret(ptr, result);
 
@@ -432,7 +432,7 @@ trace_context_bind_blend_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(ptr, state);
 
-   pipe->bind_blend_state(pipe, state);;
+   pipe->bind_blend_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -450,7 +450,7 @@ trace_context_delete_blend_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(ptr, state);
 
-   pipe->delete_blend_state(pipe, state);;
+   pipe->delete_blend_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -469,7 +469,7 @@ trace_context_create_sampler_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(sampler_state, state);
 
-   result = pipe->create_sampler_state(pipe, state);;
+   result = pipe->create_sampler_state(pipe, state);
 
    trace_dump_ret(ptr, result);
 
@@ -510,7 +510,7 @@ trace_context_delete_sampler_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(ptr, state);
 
-   pipe->delete_sampler_state(pipe, state);;
+   pipe->delete_sampler_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -529,7 +529,7 @@ trace_context_create_rasterizer_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(rasterizer_state, state);
 
-   result = pipe->create_rasterizer_state(pipe, state);;
+   result = pipe->create_rasterizer_state(pipe, state);
 
    trace_dump_ret(ptr, result);
 
@@ -551,7 +551,7 @@ trace_context_bind_rasterizer_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(ptr, state);
 
-   pipe->bind_rasterizer_state(pipe, state);;
+   pipe->bind_rasterizer_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -569,7 +569,7 @@ trace_context_delete_rasterizer_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(ptr, state);
 
-   pipe->delete_rasterizer_state(pipe, state);;
+   pipe->delete_rasterizer_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -585,7 +585,7 @@ trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe,
 
    trace_dump_call_begin("pipe_context", "create_depth_stencil_alpha_state");
 
-   result = pipe->create_depth_stencil_alpha_state(pipe, state);;
+   result = pipe->create_depth_stencil_alpha_state(pipe, state);
 
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(depth_stencil_alpha_state, state);
@@ -610,7 +610,7 @@ trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(ptr, state);
 
-   pipe->bind_depth_stencil_alpha_state(pipe, state);;
+   pipe->bind_depth_stencil_alpha_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -628,7 +628,7 @@ trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(ptr, state);
 
-   pipe->delete_depth_stencil_alpha_state(pipe, state);;
+   pipe->delete_depth_stencil_alpha_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -647,7 +647,7 @@ trace_context_create_fs_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(shader_state, state);
 
-   result = pipe->create_fs_state(pipe, state);;
+   result = pipe->create_fs_state(pipe, state);
 
    trace_dump_ret(ptr, result);
 
@@ -750,7 +750,7 @@ trace_context_bind_vs_state(struct pipe_context *_pipe,
    if (tr_shdr && tr_shdr->replaced)
       state = tr_shdr->replaced;
 
-   pipe->bind_vs_state(pipe, state);;
+   pipe->bind_vs_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -770,7 +770,7 @@ trace_context_delete_vs_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(ptr, state);
 
-   pipe->delete_vs_state(pipe, state);;
+   pipe->delete_vs_state(pipe, state);
 
    trace_dump_call_end();
 
@@ -790,7 +790,7 @@ trace_context_set_blend_color(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(blend_color, state);
 
-   pipe->set_blend_color(pipe, state);;
+   pipe->set_blend_color(pipe, state);
 
    trace_dump_call_end();
 }
@@ -808,7 +808,7 @@ trace_context_set_clip_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(clip_state, state);
 
-   pipe->set_clip_state(pipe, state);;
+   pipe->set_clip_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -880,7 +880,7 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(framebuffer_state, state);
 
-   pipe->set_framebuffer_state(pipe, state);;
+   pipe->set_framebuffer_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -898,7 +898,7 @@ trace_context_set_polygon_stipple(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(poly_stipple, state);
 
-   pipe->set_polygon_stipple(pipe, state);;
+   pipe->set_polygon_stipple(pipe, state);
 
    trace_dump_call_end();
 }
@@ -916,7 +916,7 @@ trace_context_set_scissor_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(scissor_state, state);
 
-   pipe->set_scissor_state(pipe, state);;
+   pipe->set_scissor_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -934,7 +934,7 @@ trace_context_set_viewport_state(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(viewport_state, state);
 
-   pipe->set_viewport_state(pipe, state);;
+   pipe->set_viewport_state(pipe, state);
 
    trace_dump_call_end();
 }
@@ -1024,7 +1024,7 @@ trace_context_set_vertex_elements(struct pipe_context *_pipe,
    trace_dump_struct_array(vertex_element, elements, num_elements);
    trace_dump_arg_end();
 
-   pipe->set_vertex_elements(pipe, num_elements, elements);;
+   pipe->set_vertex_elements(pipe, num_elements, elements);
 
    trace_dump_call_end();
 }
@@ -1085,7 +1085,7 @@ trace_context_surface_fill(struct pipe_context *_pipe,
    trace_dump_arg(uint, width);
    trace_dump_arg(uint, height);
 
-   pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);;
+   pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);
 
    trace_dump_call_end();
 }
@@ -1128,7 +1128,7 @@ trace_context_flush(struct pipe_context *_pipe,
    trace_dump_arg(ptr, pipe);
    trace_dump_arg(uint, flags);
 
-   pipe->flush(pipe, flags, fence);;
+   pipe->flush(pipe, flags, fence);
 
    if(fence)
       trace_dump_ret(ptr, *fence);
-- 
cgit v1.2.3


From f2f7bd855af49752b1c77746542c62f1c529e953 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 09:01:27 +0100
Subject: id: Implement separate vertex sampler state.

---
 src/gallium/drivers/identity/id_context.c | 58 ++++++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index 2f5b38ea15..4509c7b1e5 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -221,18 +221,31 @@ identity_create_sampler_state(struct pipe_context *_pipe,
 }
 
 static void
-identity_bind_sampler_states(struct pipe_context *_pipe,
-                             unsigned num,
-                             void **samplers)
+identity_bind_fragment_sampler_states(struct pipe_context *_pipe,
+                                      unsigned num_samplers,
+                                      void **samplers)
 {
    struct identity_context *id_pipe = identity_context(_pipe);
    struct pipe_context *pipe = id_pipe->pipe;
 
    pipe->bind_fragment_sampler_states(pipe,
-                                      num,
+                                      num_samplers,
                                       samplers);
 }
 
+static void
+identity_bind_vertex_sampler_states(struct pipe_context *_pipe,
+                                    unsigned num_samplers,
+                                    void **samplers)
+{
+   struct identity_context *id_pipe = identity_context(_pipe);
+   struct pipe_context *pipe = id_pipe->pipe;
+
+   pipe->bind_vertex_sampler_states(pipe,
+                                    num_samplers,
+                                    samplers);
+}
+
 static void
 identity_delete_sampler_state(struct pipe_context *_pipe,
                               void *sampler)
@@ -480,9 +493,9 @@ identity_set_viewport_state(struct pipe_context *_pipe,
 }
 
 static void
-identity_set_sampler_textures(struct pipe_context *_pipe,
-                              unsigned num_textures,
-                              struct pipe_texture **_textures)
+identity_set_fragment_sampler_textures(struct pipe_context *_pipe,
+                                       unsigned num_textures,
+                                       struct pipe_texture **_textures)
 {
    struct identity_context *id_pipe = identity_context(_pipe);
    struct pipe_context *pipe = id_pipe->pipe;
@@ -504,6 +517,31 @@ identity_set_sampler_textures(struct pipe_context *_pipe,
                                        textures);
 }
 
+static void
+identity_set_vertex_sampler_textures(struct pipe_context *_pipe,
+                                     unsigned num_textures,
+                                     struct pipe_texture **_textures)
+{
+   struct identity_context *id_pipe = identity_context(_pipe);
+   struct pipe_context *pipe = id_pipe->pipe;
+   struct pipe_texture *unwrapped_textures[PIPE_MAX_VERTEX_SAMPLERS];
+   struct pipe_texture **textures = NULL;
+   unsigned i;
+
+   if (_textures) {
+      for (i = 0; i < num_textures; i++)
+         unwrapped_textures[i] = identity_texture_unwrap(_textures[i]);
+      for (; i < PIPE_MAX_VERTEX_SAMPLERS; i++)
+         unwrapped_textures[i] = NULL;
+
+      textures = unwrapped_textures;
+   }
+
+   pipe->set_vertex_sampler_textures(pipe,
+                                     num_textures,
+                                     textures);
+}
+
 static void
 identity_set_vertex_buffers(struct pipe_context *_pipe,
                             unsigned num_buffers,
@@ -682,7 +720,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.bind_blend_state = identity_bind_blend_state;
    id_pipe->base.delete_blend_state = identity_delete_blend_state;
    id_pipe->base.create_sampler_state = identity_create_sampler_state;
-   id_pipe->base.bind_fragment_sampler_states = identity_bind_sampler_states;
+   id_pipe->base.bind_fragment_sampler_states = identity_bind_fragment_sampler_states;
+   id_pipe->base.bind_vertex_sampler_states = identity_bind_vertex_sampler_states;
    id_pipe->base.delete_sampler_state = identity_delete_sampler_state;
    id_pipe->base.create_rasterizer_state = identity_create_rasterizer_state;
    id_pipe->base.bind_rasterizer_state = identity_bind_rasterizer_state;
@@ -703,7 +742,8 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.set_polygon_stipple = identity_set_polygon_stipple;
    id_pipe->base.set_scissor_state = identity_set_scissor_state;
    id_pipe->base.set_viewport_state = identity_set_viewport_state;
-   id_pipe->base.set_fragment_sampler_textures = identity_set_sampler_textures;
+   id_pipe->base.set_fragment_sampler_textures = identity_set_vertex_sampler_textures;
+   id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures;
    id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers;
    id_pipe->base.set_vertex_elements = identity_set_vertex_elements;
    id_pipe->base.surface_copy = identity_surface_copy;
-- 
cgit v1.2.3


From f8969db2f8410fd3b653734948251ada4284a3c6 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 09:39:08 +0100
Subject: fo: Implement separate vertex sampler state.

---
 src/gallium/drivers/failover/fo_context.h    |  6 +++
 src/gallium/drivers/failover/fo_state.c      | 77 +++++++++++++++++++++++++---
 src/gallium/drivers/failover/fo_state_emit.c |  6 +++
 3 files changed, 82 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h
index 9ba86ba866..149393712a 100644
--- a/src/gallium/drivers/failover/fo_context.h
+++ b/src/gallium/drivers/failover/fo_context.h
@@ -72,6 +72,7 @@ struct failover_context {
     */
    const struct fo_state     *blend;
    const struct fo_state     *sampler[PIPE_MAX_SAMPLERS];
+   const struct fo_state     *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
    const struct fo_state     *depth_stencil;
    const struct fo_state     *rasterizer;
    const struct fo_state     *fragment_shader;
@@ -83,6 +84,7 @@ struct failover_context {
    struct pipe_poly_stipple poly_stipple;
    struct pipe_scissor_state scissor;
    struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+   struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
    struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS];
@@ -92,11 +94,15 @@ struct failover_context {
 
    void *sw_sampler_state[PIPE_MAX_SAMPLERS];
    void *hw_sampler_state[PIPE_MAX_SAMPLERS];
+   void *sw_vertex_sampler_state[PIPE_MAX_VERTEX_SAMPLERS];
+   void *hw_vertex_sampler_state[PIPE_MAX_VERTEX_SAMPLERS];
 
    unsigned dirty;
 
    unsigned num_samplers;
+   unsigned num_vertex_samplers;
    unsigned num_textures;
+   unsigned num_vertex_textures;
 
    unsigned mode;
    struct pipe_context *hw;
diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c
index fca6caa227..3f5f556032 100644
--- a/src/gallium/drivers/failover/fo_state.c
+++ b/src/gallium/drivers/failover/fo_state.c
@@ -322,8 +322,9 @@ failover_create_sampler_state(struct pipe_context *pipe,
 }
 
 static void
-failover_bind_sampler_states(struct pipe_context *pipe,
-                             unsigned num, void **sampler)
+failover_bind_fragment_sampler_states(struct pipe_context *pipe,
+                                      unsigned num,
+                                      void **sampler)
 {
    struct failover_context *failover = failover_context(pipe);
    struct fo_state *state = (struct fo_state*)sampler;
@@ -345,6 +346,36 @@ failover_bind_sampler_states(struct pipe_context *pipe,
                                               failover->hw_sampler_state);
 }
 
+static void
+failover_bind_vertex_sampler_states(struct pipe_context *pipe,
+                                    unsigned num_samplers,
+                                    void **samplers)
+{
+   struct failover_context *failover = failover_context(pipe);
+   struct fo_state *state = (struct fo_state*)samplers;
+   uint i;
+
+   assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS);
+
+   /* Check for no-op */
+   if (num_samplers == failover->num_vertex_samplers &&
+       !memcmp(failover->vertex_samplers, samplers, num_samplers * sizeof(void *))) {
+      return;
+   }
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+      failover->sw_vertex_sampler_state[i] = i < num_samplers ? state[i].sw_state : NULL;
+      failover->hw_vertex_sampler_state[i] = i < num_samplers ? state[i].hw_state : NULL;
+   }
+   failover->dirty |= FO_NEW_SAMPLER;
+   failover->num_vertex_samplers = num_samplers;
+   failover->sw->bind_vertex_sampler_states(failover->sw,
+                                            num_samplers,
+                                            failover->sw_vertex_sampler_state);
+   failover->hw->bind_vertex_sampler_states(failover->hw,
+                                            num_samplers,
+                                            failover->hw_vertex_sampler_state);
+}
+
 static void
 failover_delete_sampler_state(struct pipe_context *pipe, void *sampler)
 {
@@ -360,9 +391,9 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler)
 
 
 static void
-failover_set_sampler_textures(struct pipe_context *pipe,
-                              unsigned num,
-                              struct pipe_texture **texture)
+failover_set_fragment_sampler_textures(struct pipe_context *pipe,
+                                       unsigned num,
+                                       struct pipe_texture **texture)
 {
    struct failover_context *failover = failover_context(pipe);
    uint i;
@@ -386,6 +417,36 @@ failover_set_sampler_textures(struct pipe_context *pipe,
 }
 
 
+static void
+failover_set_vertex_sampler_textures(struct pipe_context *pipe,
+                                     unsigned num_textures,
+                                     struct pipe_texture **textures)
+{
+   struct failover_context *failover = failover_context(pipe);
+   uint i;
+
+   assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS);
+
+   /* Check for no-op */
+   if (num_textures == failover->num_vertex_textures &&
+       !memcmp(failover->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) {
+      return;
+   }
+   for (i = 0; i < num_textures; i++) {
+      pipe_texture_reference((struct pipe_texture **)&failover->vertex_textures[i],
+                             textures[i]);
+   }
+   for (i = num_textures; i < failover->num_vertex_textures; i++) {
+      pipe_texture_reference((struct pipe_texture **)&failover->vertex_textures[i],
+                             NULL);
+   }
+   failover->dirty |= FO_NEW_TEXTURE;
+   failover->num_vertex_textures = num_textures;
+   failover->sw->set_vertex_sampler_textures(failover->sw, num_textures, textures);
+   failover->hw->set_vertex_sampler_textures(failover->hw, num_textures, textures);
+}
+
+
 static void 
 failover_set_viewport_state( struct pipe_context *pipe,
 			     const struct pipe_viewport_state *viewport )
@@ -453,7 +514,8 @@ failover_init_state_functions( struct failover_context *failover )
    failover->pipe.bind_blend_state   = failover_bind_blend_state;
    failover->pipe.delete_blend_state = failover_delete_blend_state;
    failover->pipe.create_sampler_state = failover_create_sampler_state;
-   failover->pipe.bind_fragment_sampler_states  = failover_bind_sampler_states;
+   failover->pipe.bind_fragment_sampler_states  = failover_bind_fragment_sampler_states;
+   failover->pipe.bind_vertex_sampler_states  = failover_bind_vertex_sampler_states;
    failover->pipe.delete_sampler_state = failover_delete_sampler_state;
    failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state;
    failover->pipe.bind_depth_stencil_alpha_state   = failover_bind_depth_stencil_state;
@@ -473,7 +535,8 @@ failover_init_state_functions( struct failover_context *failover )
    failover->pipe.set_framebuffer_state = failover_set_framebuffer_state;
    failover->pipe.set_polygon_stipple = failover_set_polygon_stipple;
    failover->pipe.set_scissor_state = failover_set_scissor_state;
-   failover->pipe.set_fragment_sampler_textures = failover_set_sampler_textures;
+   failover->pipe.set_fragment_sampler_textures = failover_set_fragment_sampler_textures;
+   failover->pipe.set_vertex_sampler_textures = failover_set_vertex_sampler_textures;
    failover->pipe.set_viewport_state = failover_set_viewport_state;
    failover->pipe.set_vertex_buffers = failover_set_vertex_buffers;
    failover->pipe.set_vertex_elements = failover_set_vertex_elements;
diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c
index b4b1067924..a3341e33f8 100644
--- a/src/gallium/drivers/failover/fo_state_emit.c
+++ b/src/gallium/drivers/failover/fo_state_emit.c
@@ -94,11 +94,17 @@ failover_state_emit( struct failover_context *failover )
    if (failover->dirty & FO_NEW_SAMPLER) {
       failover->sw->bind_fragment_sampler_states( failover->sw, failover->num_samplers,
                                                   failover->sw_sampler_state );
+      failover->sw->bind_vertex_sampler_states(failover->sw,
+                                               failover->num_vertex_samplers,
+                                               failover->sw_vertex_sampler_state);
    }
 
    if (failover->dirty & FO_NEW_TEXTURE) {
       failover->sw->set_fragment_sampler_textures( failover->sw, failover->num_textures, 
                                                    failover->texture );
+      failover->sw->set_vertex_sampler_textures(failover->sw,
+                                                failover->num_vertex_textures, 
+                                                failover->vertex_textures);
    }
 
    if (failover->dirty & FO_NEW_VERTEX_BUFFER) {
-- 
cgit v1.2.3


From 57ed791305ded187c455b07e6c6a5b916f37a293 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 1 Dec 2009 09:50:38 +0100
Subject: trace: Implement separate vertex sampler state.

---
 src/gallium/drivers/trace/tr_context.c | 76 ++++++++++++++++++++++++++++++----
 src/gallium/drivers/trace/tr_context.h |  3 ++
 2 files changed, 71 insertions(+), 8 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 696b787c74..2f0f063d2d 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -143,10 +143,16 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag)
          for (k = 0; k < tr_ctx->curr.nr_cbufs; k++)
             if (tr_ctx->draw_rule.surf == tr_ctx->curr.cbufs[k])
                block = TRUE;
-      if (tr_ctx->draw_rule.tex)
+      if (tr_ctx->draw_rule.tex) {
          for (k = 0; k < tr_ctx->curr.num_texs; k++)
             if (tr_ctx->draw_rule.tex == tr_ctx->curr.tex[k])
                block = TRUE;
+         for (k = 0; k < tr_ctx->curr.num_vert_texs; k++) {
+            if (tr_ctx->draw_rule.tex == tr_ctx->curr.vert_tex[k]) {
+               block = TRUE;
+            }
+         }
+      }
 
       if (block)
          tr_ctx->draw_blocked |= (flag | 4);
@@ -480,8 +486,9 @@ trace_context_create_sampler_state(struct pipe_context *_pipe,
 
 
 static INLINE void
-trace_context_bind_sampler_states(struct pipe_context *_pipe,
-                                  unsigned num_states, void **states)
+trace_context_bind_fragment_sampler_states(struct pipe_context *_pipe,
+                                           unsigned num_states,
+                                           void **states)
 {
    struct trace_context *tr_ctx = trace_context(_pipe);
    struct pipe_context *pipe = tr_ctx->pipe;
@@ -498,6 +505,26 @@ trace_context_bind_sampler_states(struct pipe_context *_pipe,
 }
 
 
+static INLINE void
+trace_context_bind_vertex_sampler_states(struct pipe_context *_pipe,
+                                         unsigned num_states,
+                                         void **states)
+{
+   struct trace_context *tr_ctx = trace_context(_pipe);
+   struct pipe_context *pipe = tr_ctx->pipe;
+
+   trace_dump_call_begin("pipe_context", "bind_vertex_sampler_states");
+
+   trace_dump_arg(ptr, pipe);
+   trace_dump_arg(uint, num_states);
+   trace_dump_arg_array(ptr, states, num_states);
+
+   pipe->bind_vertex_sampler_states(pipe, num_states, states);
+
+   trace_dump_call_end();
+}
+
+
 static INLINE void
 trace_context_delete_sampler_state(struct pipe_context *_pipe,
                                    void *state)
@@ -941,9 +968,9 @@ trace_context_set_viewport_state(struct pipe_context *_pipe,
 
 
 static INLINE void
-trace_context_set_sampler_textures(struct pipe_context *_pipe,
-                                   unsigned num_textures,
-                                   struct pipe_texture **textures)
+trace_context_set_fragment_sampler_textures(struct pipe_context *_pipe,
+                                            unsigned num_textures,
+                                            struct pipe_texture **textures)
 {
    struct trace_context *tr_ctx = trace_context(_pipe);
    struct trace_texture *tr_tex;
@@ -971,6 +998,37 @@ trace_context_set_sampler_textures(struct pipe_context *_pipe,
 }
 
 
+static INLINE void
+trace_context_set_vertex_sampler_textures(struct pipe_context *_pipe,
+                                          unsigned num_textures,
+                                          struct pipe_texture **textures)
+{
+   struct trace_context *tr_ctx = trace_context(_pipe);
+   struct trace_texture *tr_tex;
+   struct pipe_context *pipe = tr_ctx->pipe;
+   struct pipe_texture *unwrapped_textures[PIPE_MAX_VERTEX_SAMPLERS];
+   unsigned i;
+
+   tr_ctx->curr.num_vert_texs = num_textures;
+   for(i = 0; i < num_textures; ++i) {
+      tr_tex = trace_texture(textures[i]);
+      tr_ctx->curr.vert_tex[i] = tr_tex;
+      unwrapped_textures[i] = tr_tex ? tr_tex->texture : NULL;
+   }
+   textures = unwrapped_textures;
+
+   trace_dump_call_begin("pipe_context", "set_vertex_sampler_textures");
+
+   trace_dump_arg(ptr, pipe);
+   trace_dump_arg(uint, num_textures);
+   trace_dump_arg_array(ptr, textures, num_textures);
+
+   pipe->set_vertex_sampler_textures(pipe, num_textures, textures);
+
+   trace_dump_call_end();
+}
+
+
 static INLINE void
 trace_context_set_vertex_buffers(struct pipe_context *_pipe,
                                  unsigned num_buffers,
@@ -1253,7 +1311,8 @@ trace_context_create(struct pipe_screen *_screen,
    tr_ctx->base.bind_blend_state = trace_context_bind_blend_state;
    tr_ctx->base.delete_blend_state = trace_context_delete_blend_state;
    tr_ctx->base.create_sampler_state = trace_context_create_sampler_state;
-   tr_ctx->base.bind_fragment_sampler_states = trace_context_bind_sampler_states;
+   tr_ctx->base.bind_fragment_sampler_states = trace_context_bind_fragment_sampler_states;
+   tr_ctx->base.bind_vertex_sampler_states = trace_context_bind_vertex_sampler_states;
    tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state;
    tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state;
    tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state;
@@ -1274,7 +1333,8 @@ trace_context_create(struct pipe_screen *_screen,
    tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple;
    tr_ctx->base.set_scissor_state = trace_context_set_scissor_state;
    tr_ctx->base.set_viewport_state = trace_context_set_viewport_state;
-   tr_ctx->base.set_fragment_sampler_textures = trace_context_set_sampler_textures;
+   tr_ctx->base.set_fragment_sampler_textures = trace_context_set_fragment_sampler_textures;
+   tr_ctx->base.set_vertex_sampler_textures = trace_context_set_vertex_sampler_textures;
    tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers;
    tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements;
    if (pipe->surface_copy)
diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h
index 6febe4b411..852b480765 100644
--- a/src/gallium/drivers/trace/tr_context.h
+++ b/src/gallium/drivers/trace/tr_context.h
@@ -54,6 +54,9 @@ struct trace_context
       struct trace_texture *tex[PIPE_MAX_SAMPLERS];
       unsigned num_texs;
 
+      struct trace_texture *vert_tex[PIPE_MAX_VERTEX_SAMPLERS];
+      unsigned num_vert_texs;
+
       unsigned nr_cbufs;
       struct trace_texture *cbufs[PIPE_MAX_COLOR_BUFS];
       struct trace_texture *zsbuf;
-- 
cgit v1.2.3


From 759604e32bb5b00d7b70fbab7bd8125e135d7a68 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Wed, 25 Nov 2009 00:24:28 +0100
Subject: r300g: add R300 prefix in reg definitions

---
 src/gallium/drivers/r300/r300_reg.h | 82 ++++++++++++++++++-------------------
 1 file changed, 41 insertions(+), 41 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 3a419b24b0..85b1ea568a 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -661,20 +661,20 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #	define R300_GB_SUPER_TILE_B		(1 << 15)
 #	define R300_GB_SUBPIXEL_1_12		(0 << 16)
 #	define R300_GB_SUBPIXEL_1_16		(1 << 16)
-#	define GB_TILE_CONFIG_QUADS_PER_RAS_4   (0 << 17)
-#	define GB_TILE_CONFIG_QUADS_PER_RAS_8   (1 << 17)
-#	define GB_TILE_CONFIG_QUADS_PER_RAS_16  (2 << 17)
-#	define GB_TILE_CONFIG_QUADS_PER_RAS_32  (3 << 17)
-#	define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19)
-#	define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19)
-#	define GB_TILE_CONFIG_ALT_SCAN_EN_LR    (0 << 20)
-#	define GB_TILE_CONFIG_ALT_SCAN_EN_LRL   (1 << 20)
-#	define GB_TILE_CONFIG_ALT_OFFSET        (0 << 21)
-#	define GB_TILE_CONFIG_SUBPRECISION      (0 << 22)
-#	define GB_TILE_CONFIG_ALT_TILING_DEF    (0 << 23)
-#	define GB_TILE_CONFIG_ALT_TILING_3_2    (1 << 23)
-#	define GB_TILE_CONFIG_Z_EXTENDED_24_1   (0 << 24)
-#	define GB_TILE_CONFIG_Z_EXTENDED_S25_1  (1 << 24)
+#	define R300_GB_TILE_CONFIG_QUADS_PER_RAS_4   (0 << 17)
+#	define R300_GB_TILE_CONFIG_QUADS_PER_RAS_8   (1 << 17)
+#	define R300_GB_TILE_CONFIG_QUADS_PER_RAS_16  (2 << 17)
+#	define R300_GB_TILE_CONFIG_QUADS_PER_RAS_32  (3 << 17)
+#	define R300_GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19)
+#	define R300_GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19)
+#	define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LR    (0 << 20)
+#	define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LRL   (1 << 20)
+#	define R300_GB_TILE_CONFIG_ALT_OFFSET        (0 << 21)
+#	define R300_GB_TILE_CONFIG_SUBPRECISION      (0 << 22)
+#	define R300_GB_TILE_CONFIG_ALT_TILING_DEF    (0 << 23)
+#	define R300_GB_TILE_CONFIG_ALT_TILING_3_2    (1 << 23)
+#	define R300_GB_TILE_CONFIG_Z_EXTENDED_24_1   (0 << 24)
+#	define R300_GB_TILE_CONFIG_Z_EXTENDED_S25_1  (1 << 24)
 
 /* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */
 #define R300_GB_FIFO_SIZE	0x4024
@@ -700,9 +700,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #	define R300_OFIFO_HIGHWATER_SHIFT	22	/* two bits only */
 #	define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT	24
 
-#define GB_Z_PEQ_CONFIG                          0x4028
-#	define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4    (0 << 0)
-#	define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8    (1 << 0)
+#define R300_GB_Z_PEQ_CONFIG                          0x4028
+#	define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4    (0 << 0)
+#	define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8    (1 << 0)
 
 /* Specifies various polygon specific selects (fog, depth, perspective). */
 #define R300_GB_SELECT                           0x401c
@@ -725,39 +725,39 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 /* Specifies the graphics pipeline configuration for antialiasing. */
 #define R300_GB_AA_CONFIG                         0x4020
-#	define GB_AA_CONFIG_AA_DISABLE           (0 << 0)
-#	define GB_AA_CONFIG_AA_ENABLE            (1 << 0)
-#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2  (0 << 1)
-#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3  (1 << 1)
-#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4  (2 << 1)
-#	define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6  (3 << 1)
+#	define R300_GB_AA_CONFIG_AA_DISABLE           (0 << 0)
+#	define R300_GB_AA_CONFIG_AA_ENABLE            (1 << 0)
+#	define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2  (0 << 1)
+#	define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3  (1 << 1)
+#	define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4  (2 << 1)
+#	define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6  (3 << 1)
 
 /* Selects which of 4 pipes are active. */
-#define GB_PIPE_SELECT                           0x402c
-#	define GB_PIPE_SELECT_PIPE0_ID_SHIFT  0
-#	define GB_PIPE_SELECT_PIPE1_ID_SHIFT  2
-#	define GB_PIPE_SELECT_PIPE2_ID_SHIFT  4
-#	define GB_PIPE_SELECT_PIPE3_ID_SHIFT  6
-#	define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8
-#	define GB_PIPE_SELECT_MAX_PIPE        12
-#	define GB_PIPE_SELECT_BAD_PIPES       14
-#	define GB_PIPE_SELECT_CONFIG_PIPES    18
+#define R300_GB_PIPE_SELECT                           0x402c
+#	define R300_GB_PIPE_SELECT_PIPE0_ID_SHIFT  0
+#	define R300_GB_PIPE_SELECT_PIPE1_ID_SHIFT  2
+#	define R300_GB_PIPE_SELECT_PIPE2_ID_SHIFT  4
+#	define R300_GB_PIPE_SELECT_PIPE3_ID_SHIFT  6
+#	define R300_GB_PIPE_SELECT_PIPE_MASK_SHIFT 8
+#	define R300_GB_PIPE_SELECT_MAX_PIPE        12
+#	define R300_GB_PIPE_SELECT_BAD_PIPES       14
+#	define R300_GB_PIPE_SELECT_CONFIG_PIPES    18
 
 
 /* Specifies the sizes of the various FIFO`s in the sc/rs. */
-#define GB_FIFO_SIZE1                            0x4070
+#define R300_GB_FIFO_SIZE1                            0x4070
 /* High water mark for SC input fifo */
-#	define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0
-#	define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK  0x0000003f
+#	define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0
+#	define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK  0x0000003f
 /* High water mark for SC input fifo (B) */
-#	define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6
-#	define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK  0x00000fc0
+#	define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6
+#	define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK  0x00000fc0
 /* High water mark for RS colors' fifo */
-#	define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT   12
-#	define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK    0x0003f000
+#	define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT   12
+#	define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK    0x0003f000
 /* High water mark for RS textures' fifo */
-#	define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT   18
-#	define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK    0x00fc0000
+#	define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT   18
+#	define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK    0x00fc0000
 
 /* This table specifies the source location and format for up to 16 texture
  * addresses (i[0]:i[15]) and four colors (c[0]:c[3])
-- 
cgit v1.2.3


From 6f05eba0204a9f1371bb1ae5cdb0f71bb819eb28 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 26 Nov 2009 13:49:41 +0100
Subject: r300g: VS->FS attribute routing rework

Now it always correctly pairs up VS and FS even if the semantics and indices
of VS outputs and FS inputs don't match.
---
 src/gallium/drivers/r300/r300_state_derived.c | 562 ++++++++++++++++++--------
 1 file changed, 392 insertions(+), 170 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 6fb780cb29..03cdba0538 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -35,6 +36,25 @@
 /* r300_state_derived: Various bits of state which are dependent upon
  * currently bound CSO data. */
 
+#define ATTR_UNUSED             (-1)
+#define ATTR_COLOR_COUNT        2
+#define ATTR_GENERIC_COUNT      16
+
+/* This structure contains information about what attributes are written by VS
+ * or read by FS. (but not both) It's much easier to work with than
+ * tgsi_shader_info.
+ *
+ * The variables basically means used/unused and may optionally contain
+ * indices to tgsi_shader_info semantics which we need to know for Draw. */
+struct r300_shader_info {
+    int pos;
+    int psize;
+    int color[ATTR_COLOR_COUNT];
+    int bcolor[ATTR_COLOR_COUNT];
+    int generic[ATTR_GENERIC_COUNT];
+    int fog;
+};
+
 struct r300_shader_key {
     struct r300_vertex_shader* vs;
     struct r300_fragment_shader* fs;
@@ -61,51 +81,184 @@ int r300_shader_key_compare(void* key1, void* key2) {
         (shader_key1->fs == shader_key2->fs);
 }
 
-/* Set up the vs_output_tab and routes. */
-static void r300_vs_output_tab_routes(struct r300_context* r300,
-                                      int* vs_output_tab)
+static void r300_draw_emit_attrib(struct r300_context* r300,
+                                  enum attrib_emit emit,
+                                  enum interp_mode interp,
+                                  int index)
 {
-    struct vertex_info* vinfo = &r300->vertex_info->vinfo;
-    boolean pos = FALSE, psize = FALSE, fog = FALSE;
-    int i, texs = 0, cols = 0;
-    struct tgsi_shader_info* info = &r300->fs->info;
+    struct tgsi_shader_info* info = &r300->vs->info;
+    int output;
 
-    /* XXX One day we should figure out how to handle a different number of
-     * VS outputs and FS inputs, as well as a different number of vertex streams
-     * and VS inputs. It's definitely one of the sources of hardlocks. */
+    if (r300->draw) {
+        output = draw_find_vs_output(r300->draw,
+                                     info->output_semantic_name[index],
+                                     info->output_semantic_index[index]);
+        draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output);
+    }
+}
 
-    for (i = 0; i < info->num_inputs; i++) {
-        switch (info->input_semantic_name[i]) {
+static void r300_shader_info_reset(struct r300_shader_info* info)
+{
+    int i;
+
+    info->pos = ATTR_UNUSED;
+    info->psize = ATTR_UNUSED;
+    info->fog = ATTR_UNUSED;
+
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        info->color[i] = ATTR_UNUSED;
+        info->bcolor[i] = ATTR_UNUSED;
+    }
+
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        info->generic[i] = ATTR_UNUSED;
+    }
+}
+
+/* Convert info about VS output semantics to r300_shader_info. */
+static void r300_shader_read_vs_outputs(struct tgsi_shader_info* info,
+                                        struct r300_shader_info* vs_outputs)
+{
+    int i;
+    unsigned index;
+
+    r300_shader_info_reset(vs_outputs);
+
+    for (i = 0; i < info->num_outputs; i++) {
+        index = info->output_semantic_index[i];
+
+        switch (info->output_semantic_name[i]) {
             case TGSI_SEMANTIC_POSITION:
-                pos = TRUE;
-                vs_output_tab[i] = 0;
+                assert(index == 0);
+                vs_outputs->pos = i;
+                break;
+
+            case TGSI_SEMANTIC_PSIZE:
+                assert(index == 0);
+                vs_outputs->psize = i;
                 break;
+
             case TGSI_SEMANTIC_COLOR:
-                vs_output_tab[i] = 2 + cols;
-                cols++;
+                assert(index <= ATTR_COLOR_COUNT);
+                vs_outputs->color[index] = i;
                 break;
-            case TGSI_SEMANTIC_PSIZE:
-                assert(psize == FALSE);
-                psize = TRUE;
-                vs_output_tab[i] = 15;
+
+            case TGSI_SEMANTIC_BCOLOR:
+                assert(index <= ATTR_COLOR_COUNT);
+                vs_outputs->bcolor[index] = i;
                 break;
-            case TGSI_SEMANTIC_FOG:
-                assert(fog == FALSE);
-                fog = TRUE;
-                /* Fall through */
+
             case TGSI_SEMANTIC_GENERIC:
-                vs_output_tab[i] = 6 + texs;
-                texs++;
+                assert(index <= ATTR_GENERIC_COUNT);
+                vs_outputs->generic[index] = i;
                 break;
-            default:
-                debug_printf("r300: Unknown vertex input %d\n",
-                    info->input_semantic_name[i]);
+
+            case TGSI_SEMANTIC_FOG:
+                assert(index == 0);
+                vs_outputs->fog = i;
                 break;
+
+            default:
+                assert(0);
         }
     }
+}
+
+/* Set VS output stream locations for SWTCL. */
+static void r300_stream_locations_swtcl(struct r300_shader_info* vs_outputs,
+                                        int* vs_output_tab)
+{
+    int i, tabi = 0, gen_count;
+
+    /* XXX Check whether the numbers (0, 1, 2+i, etc.) are correct.
+     * These should go to VAP_PROG_STREAM_CNTL/DST_VEC_LOC. */
+
+    /* Position. */
+    vs_output_tab[tabi++] = 0;
+
+    /* Point size. */
+    if (vs_outputs->psize != ATTR_UNUSED) {
+        vs_output_tab[tabi++] = 1;
+    }
+
+    /* Colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->color[i] != ATTR_UNUSED) {
+            vs_output_tab[tabi++] = 2 + i;
+        }
+    }
+
+    /* Back-face colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
+            vs_output_tab[tabi++] = 4 + i;
+        }
+    }
+
+    /* Texture coordinates. */
+    gen_count = 0;
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
+            assert(tabi < 16);
+            vs_output_tab[tabi++] = 6 + gen_count;
+            gen_count++;
+        }
+    }
+
+    /* Fog coordinates. */
+    if (vs_outputs->fog != ATTR_UNUSED) {
+        assert(tabi < 16);
+        vs_output_tab[tabi++] = 6 + gen_count;
+        gen_count++;
+    }
 
     /* XXX magic */
-    assert(texs <= 8);
+    assert(gen_count <= 8);
+
+    for (; tabi < 16;) {
+        vs_output_tab[tabi++] = -1;
+    }
+}
+
+/* Convert info about FS input semantics to r300_shader_info. */
+static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
+                                       struct r300_shader_info* fs_inputs)
+{
+    int i;
+    unsigned index;
+
+    r300_shader_info_reset(fs_inputs);
+
+    for (i = 0; i < info->num_inputs; i++) {
+        index = info->input_semantic_index[i];
+
+        switch (info->input_semantic_name[i]) {
+            case TGSI_SEMANTIC_COLOR:
+                assert(index <= ATTR_COLOR_COUNT);
+                fs_inputs->color[index] = i;
+                break;
+
+            case TGSI_SEMANTIC_GENERIC:
+                assert(index <= ATTR_GENERIC_COUNT);
+                fs_inputs->generic[index] = i;
+                break;
+
+            case TGSI_SEMANTIC_FOG:
+                assert(index == 0);
+                fs_inputs->fog = i;
+                break;
+
+            default:
+                assert(0);
+        }
+    }
+}
+
+static void r300_update_vap_output_fmt(struct r300_context* r300,
+                                       struct r300_shader_info* vs_outputs)
+{
+    struct vertex_info* vinfo = &r300->vertex_info->vinfo;
+    int i, gen_count;
 
     /* Do the actual vertex_info setup.
      *
@@ -117,69 +270,59 @@ static void r300_vs_output_tab_routes(struct r300_context* r300,
 
     vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */
 
-    /* We need to add vertex position attribute only for SW TCL case,
-     * for HW TCL case it could be generated by vertex shader */
-    if (!pos) {
-        /* Make room for the position attribute
-         * at the beginning of the vs_output_tab. */
-        for (i = 15; i > 0; i--) {
-            vs_output_tab[i] = vs_output_tab[i-1];
-        }
-        vs_output_tab[0] = 0;
-    }
-
     /* Position. */
-    if (r300->draw) {
-        draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
-            draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0));
+    if (vs_outputs->pos != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                              vs_outputs->pos);
+        vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS;
+        vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+    } else {
+        assert(0);
     }
-    vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS;
-    vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
 
     /* Point size. */
-    if (psize) {
-        if (r300->draw) {
-            draw_emit_vertex_attr(vinfo, EMIT_1F_PSIZE, INTERP_POS,
-                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0));
-        }
+    if (vs_outputs->psize != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
+                              vs_outputs->psize);
         vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
     }
 
     /* Colors. */
-    for (i = 0; i < cols; i++) {
-        if (r300->draw) {
-            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR,
-                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i));
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->color[i] != ATTR_UNUSED) {
+            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
+                                  vs_outputs->color[i]);
+            vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR;
+            vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
         }
-        vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR;
-        vinfo->hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i);
     }
 
-    /* Init i right here, increment it if fog is enabled.
-     * This gets around a double-increment problem. */
-    i = 0;
-
-    /* Fog. This is a special-cased texcoord. */
-    if (fog) {
-        i++;
-        if (r300->draw) {
-            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
-                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0));
+    /* XXX Back-face colors. */
+
+    /* Texture coordinates. */
+    gen_count = 0;
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (vs_outputs->generic[i] != ATTR_UNUSED) {
+            r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                                  vs_outputs->generic[i]);
+            vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count);
+            vinfo->hwfmt[3] |= (4 << (3 * gen_count));
+            gen_count++;
         }
-        vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
-        vinfo->hwfmt[3] |= (4 << (3 * i));
     }
 
-    /* Texcoords. */
-    for (; i < texs; i++) {
-        if (r300->draw) {
-            draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE,
-                draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i));
-        }
-        vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i);
-        vinfo->hwfmt[3] |= (4 << (3 * i));
+    /* Fog coordinates. */
+    if (vs_outputs->fog != ATTR_UNUSED) {
+        r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+                              vs_outputs->fog);
+        vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count);
+        vinfo->hwfmt[3] |= (4 << (3 * gen_count));
+        gen_count++;
     }
 
+    /* XXX magic */
+    assert(gen_count <= 8);
+
     draw_compute_vertex_size(vinfo);
 }
 
@@ -279,109 +422,191 @@ static void r300_swtcl_vertex_psc(struct r300_context* r300,
         (R300_LAST_VEC << (i & 1 ? 16 : 0));
 }
 
-/* Set up the RS block. This is the part of the chipset that actually does
- * the rasterization of vertices into fragments. This is also the part of the
- * chipset that locks up if any part of it is even slightly wrong. */
-static void r300_update_rs_block(struct r300_context* r300)
+static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
+                        boolean swizzle_0001)
 {
-    struct r300_rs_block* rs = r300->rs_block;
-    struct tgsi_shader_info* info = &r300->fs->info;
-    int col_count = 0, fp_offset = 0, i, tex_count = 0;
-    int rs_tex_comp = 0;
+    rs->ip[id] |= R300_RS_COL_PTR(ptr);
+    if (swizzle_0001) {
+        rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+    } else {
+        rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+    }
+    rs->inst[id] |= R300_RS_INST_COL_ID(id);
+}
 
-    if (r300_screen(r300->context.screen)->caps->is_r500) {
-        for (i = 0; i < info->num_inputs; i++) {
-            switch (info->input_semantic_name[i]) {
-                case TGSI_SEMANTIC_COLOR:
-                    rs->ip[col_count] |=
-                        R500_RS_COL_PTR(col_count) |
-                        R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
-                    col_count++;
-                    break;
-                case TGSI_SEMANTIC_GENERIC:
-                    rs->ip[tex_count] |=
-                        R500_RS_SEL_S(rs_tex_comp) |
-                        R500_RS_SEL_T(rs_tex_comp + 1) |
-                        R500_RS_SEL_R(rs_tex_comp + 2) |
-                        R500_RS_SEL_Q(rs_tex_comp + 3);
-                    tex_count++;
-                    rs_tex_comp += 4;
-                    break;
-                default:
-                    break;
-            }
-        }
+static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+    rs->inst[id] |= R300_RS_INST_COL_CN_WRITE |
+                    R300_RS_INST_COL_ADDR(fp_offset);
+}
 
-        /* Rasterize at least one color, or bad things happen. */
-        if ((col_count == 0) && (tex_count == 0)) {
-            rs->ip[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
-            col_count++;
-        }
+static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr,
+                        boolean swizzle_X001)
+{
+    if (swizzle_X001) {
+        rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+                      R300_RS_SEL_S(R300_RS_SEL_C0) |
+                      R300_RS_SEL_T(R300_RS_SEL_K0) |
+                      R300_RS_SEL_R(R300_RS_SEL_K0) |
+                      R300_RS_SEL_Q(R300_RS_SEL_K1);
+    } else {
+        rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+                      R300_RS_SEL_S(R300_RS_SEL_C0) |
+                      R300_RS_SEL_T(R300_RS_SEL_C1) |
+                      R300_RS_SEL_R(R300_RS_SEL_C2) |
+                      R300_RS_SEL_Q(R300_RS_SEL_C3);
+    }
+    rs->inst[id] |= R300_RS_INST_TEX_ID(id);
+}
 
-        for (i = 0; i < col_count; i++) {
-            rs->inst[i] |= R500_RS_INST_COL_ID(i) |
-                R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset);
-            fp_offset++;
-        }
+static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+    rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE |
+                    R300_RS_INST_TEX_ADDR(fp_offset);
+}
 
-        for (i = 0; i < tex_count; i++) {
-            rs->inst[i] |= R500_RS_INST_TEX_ID(i) |
-                R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_offset);
-            fp_offset++;
-        }
+static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
+                        boolean swizzle_0001)
+{
+    rs->ip[id] |= R500_RS_COL_PTR(ptr);
+    if (swizzle_0001) {
+        rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+    } else {
+        rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+    }
+    rs->inst[id] |= R500_RS_INST_COL_ID(id);
+}
+
+static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+    rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
+                    R500_RS_INST_COL_ADDR(fp_offset);
+}
 
+static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr,
+                        boolean swizzle_X001)
+{
+    int rs_tex_comp = ptr*4;
+
+    if (swizzle_X001) {
+        rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
+                      R500_RS_SEL_T(R500_RS_IP_PTR_K0) |
+                      R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
+                      R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
     } else {
-        for (i = 0; i < info->num_inputs; i++) {
-            switch (info->input_semantic_name[i]) {
-                case TGSI_SEMANTIC_COLOR:
-                    rs->ip[col_count] |=
-                        R300_RS_COL_PTR(col_count) |
-                        R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
-                    col_count++;
-                    break;
-                case TGSI_SEMANTIC_GENERIC:
-                    rs->ip[tex_count] |=
-                        R300_RS_TEX_PTR(rs_tex_comp) |
-                        R300_RS_SEL_S(R300_RS_SEL_C0) |
-                        R300_RS_SEL_T(R300_RS_SEL_C1) |
-                        R300_RS_SEL_R(R300_RS_SEL_C2) |
-                        R300_RS_SEL_Q(R300_RS_SEL_C3);
-                    tex_count++;
-                    rs_tex_comp+=4;
-                    break;
-                default:
-                    break;
-            }
-        }
+        rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
+                      R500_RS_SEL_T(rs_tex_comp + 1) |
+                      R500_RS_SEL_R(rs_tex_comp + 2) |
+                      R500_RS_SEL_Q(rs_tex_comp + 3);
+    }
+    rs->inst[id] |= R500_RS_INST_TEX_ID(id);
+}
+
+static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+    rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE |
+                    R500_RS_INST_TEX_ADDR(fp_offset);
+}
+
+/* Set up the RS block.
+ *
+ * This is the part of the chipset that actually does the rasterization
+ * of vertices into fragments. This is also the part of the chipset that
+ * locks up if any part of it is even slightly wrong. */
+static void r300_update_rs_block(struct r300_context* r300,
+                                 struct r300_shader_info* vs_outputs,
+                                 struct r300_shader_info* fs_inputs)
+{
+    struct r300_rs_block* rs = r300->rs_block;
+    int i, col_count = 0, tex_count = 0, fp_offset = 0;
+    void (*rX00_rs_col)(struct r300_rs_block*, int, int, boolean);
+    void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
+    void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean);
+    void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
 
-        /* Rasterize at least one color, or bad things happen. */
-        if (col_count == 0) {
-            rs->ip[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+    if (r300_screen(r300->context.screen)->caps->is_r500) {
+        rX00_rs_col       = r500_rs_col;
+        rX00_rs_col_write = r500_rs_col_write;
+        rX00_rs_tex       = r500_rs_tex;
+        rX00_rs_tex_write = r500_rs_tex_write;
+    } else {
+        rX00_rs_col       = r300_rs_col;
+        rX00_rs_col_write = r300_rs_col_write;
+        rX00_rs_tex       = r300_rs_tex;
+        rX00_rs_tex_write = r300_rs_tex_write;
+    }
+
+    /* Rasterize colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->color[i] != ATTR_UNUSED) {
+            /* Always rasterize if it's written by the VS,
+             * otherwise it locks up. */
+            rX00_rs_col(rs, col_count, i, FALSE);
+
+            /* Write it to the FS input register if it's used by the FS. */
+            if (fs_inputs->color[i] != ATTR_UNUSED) {
+                rX00_rs_col_write(rs, col_count, fp_offset);
+                fp_offset++;
+            }
             col_count++;
+        } else {
+            /* Skip the FS input register, leave it uninitialized. */
+            /* If we try to set it to (0,0,0,1), it will lock up. */
+            if (fs_inputs->color[i] != ATTR_UNUSED) {
+                fp_offset++;
+            }
         }
+    }
 
-        if (tex_count == 0) {
-            rs->ip[0] |=
-                R300_RS_SEL_S(R300_RS_SEL_K0) |
-                R300_RS_SEL_T(R300_RS_SEL_K0) |
-                R300_RS_SEL_R(R300_RS_SEL_K0) |
-                R300_RS_SEL_Q(R300_RS_SEL_K1);
+    /* Rasterize texture coordinates. */
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (vs_outputs->generic[i] != ATTR_UNUSED) {
+            /* Always rasterize if it's written by the VS,
+             * otherwise it locks up. */
+            rX00_rs_tex(rs, tex_count, tex_count, FALSE);
+
+            /* Write it to the FS input register if it's used by the FS. */
+            if (fs_inputs->generic[i] != ATTR_UNUSED) {
+                rX00_rs_tex_write(rs, tex_count, fp_offset);
+                fp_offset++;
+            }
+            tex_count++;
+        } else {
+            /* Skip the FS input register, leave it uninitialized. */
+            /* If we try to set it to (0,0,0,1), it will lock up. */
+            if (fs_inputs->generic[i] != ATTR_UNUSED) {
+                fp_offset++;
+            }
         }
+    }
 
-        for (i = 0; i < col_count; i++) {
-            rs->inst[i] |= R300_RS_INST_COL_ID(i) |
-                R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset);
+    /* Rasterize fog coordinates. */
+    if (vs_outputs->fog != ATTR_UNUSED) {
+        /* Always rasterize if it's written by the VS,
+         * otherwise it locks up. */
+        rX00_rs_tex(rs, tex_count, tex_count, TRUE);
+
+        /* Write it to the FS input register if it's used by the FS. */
+        if (fs_inputs->fog != ATTR_UNUSED) {
+            rX00_rs_tex_write(rs, tex_count, fp_offset);
             fp_offset++;
         }
-
-        for (i = 0; i < tex_count; i++) {
-            rs->inst[i] |= R300_RS_INST_TEX_ID(i) |
-                R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_offset);
+        tex_count++;
+    } else {
+        /* Skip the FS input register, leave it uninitialized. */
+        /* If we try to set it to (0,0,0,1), it will lock up. */
+        if (fs_inputs->fog != ATTR_UNUSED) {
             fp_offset++;
         }
     }
 
-    rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) |
+    /* Rasterize at least one color, or bad things happen. */
+    if (col_count == 0 && tex_count == 0) {
+        rX00_rs_col(rs, 0, 0, TRUE);
+        col_count++;
+    }
+
+    rs->count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) |
         R300_HIRES_EN;
 
     rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0);
@@ -391,9 +616,8 @@ static void r300_update_rs_block(struct r300_context* r300)
 static void r300_update_derived_shader_state(struct r300_context* r300)
 {
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
+    struct r300_shader_info vs_outputs, fs_inputs;
     int vs_output_tab[16];
-    int i;
-
 
     /*
     struct r300_shader_key* key;
@@ -425,21 +649,19 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
     memset(r300->rs_block, 0, sizeof(struct r300_rs_block));
     memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info));
 
-    for (i = 0; i < 16; i++) {
-        vs_output_tab[i] = -1;
-    }
+    r300_shader_read_vs_outputs(&r300->vs->info, &vs_outputs);
+    r300_shader_read_fs_inputs(&r300->fs->info, &fs_inputs);
 
-    /* Update states */
-    r300_vs_output_tab_routes(r300, vs_output_tab);
+    r300_update_vap_output_fmt(r300, &vs_outputs);
+    r300_update_rs_block(r300, &vs_outputs, &fs_inputs);
 
     if (r300screen->caps->has_tcl) {
         r300_vertex_psc(r300);
     } else {
+        r300_stream_locations_swtcl(&vs_outputs, vs_output_tab);
         r300_swtcl_vertex_psc(r300, vs_output_tab);
     }
 
-    r300_update_rs_block(r300);
-
     r300->dirty_state |= R300_NEW_RS_BLOCK;
 }
 
-- 
cgit v1.2.3


From cb90235135ef7c657053657f3bdfbda7ca70d708 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 26 Nov 2009 19:37:58 +0100
Subject: r300g: clean up derived states

The state setups which aren't derived anymore have been moved to the VS
and FS objects.
---
 src/gallium/drivers/r300/r300_fs.c               |  42 +++-
 src/gallium/drivers/r300/r300_fs.h               |   6 +-
 src/gallium/drivers/r300/r300_shader_semantics.h |  64 ++++++
 src/gallium/drivers/r300/r300_state_derived.c    | 235 ++---------------------
 src/gallium/drivers/r300/r300_vs.c               | 182 +++++++++++++++++-
 src/gallium/drivers/r300/r300_vs.h               |  11 +-
 6 files changed, 311 insertions(+), 229 deletions(-)
 create mode 100644 src/gallium/drivers/r300/r300_shader_semantics.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 29ddc84c41..9cc833e606 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -1,6 +1,7 @@
 /*
  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
  *                Joakim Sindholt <opensource@zhasha.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -31,6 +32,40 @@
 #include "radeon_code.h"
 #include "radeon_compiler.h"
 
+/* Convert info about FS input semantics to r300_shader_semantics. */
+static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
+                                       struct r300_shader_semantics* fs_inputs)
+{
+    int i;
+    unsigned index;
+
+    r300_shader_semantics_reset(fs_inputs);
+
+    for (i = 0; i < info->num_inputs; i++) {
+        index = info->input_semantic_index[i];
+
+        switch (info->input_semantic_name[i]) {
+            case TGSI_SEMANTIC_COLOR:
+                assert(index <= ATTR_COLOR_COUNT);
+                fs_inputs->color[index] = i;
+                break;
+
+            case TGSI_SEMANTIC_GENERIC:
+                assert(index <= ATTR_GENERIC_COUNT);
+                fs_inputs->generic[index] = i;
+                break;
+
+            case TGSI_SEMANTIC_FOG:
+                assert(index == 0);
+                fs_inputs->fog = i;
+                break;
+
+            default:
+                assert(0);
+        }
+    }
+}
+
 static void find_output_registers(struct r300_fragment_program_compiler * compiler,
                                   struct r300_fragment_shader * fs)
 {
@@ -98,6 +133,10 @@ void r300_translate_fragment_shader(struct r300_context* r300,
     struct r300_fragment_program_compiler compiler;
     struct tgsi_to_rc ttr;
 
+    /* Initialize. */
+    r300_shader_read_fs_inputs(&fs->info, &fs->inputs);
+
+    /* Setup the compiler. */
     memset(&compiler, 0, sizeof(compiler));
     rc_init(&compiler.Base);
     compiler.Base.Debug = DBG_ON(r300, DBG_FP);
@@ -107,7 +146,7 @@ void r300_translate_fragment_shader(struct r300_context* r300,
     compiler.AllocateHwInputs = &allocate_hardware_inputs;
     compiler.UserData = fs;
 
-    /* TODO: Program compilation depends on texture compare modes,
+    /* XXX: Program compilation depends on texture compare modes,
      * which are sampler state. Therefore, programs need to be recompiled
      * depending on this state as in the classic Mesa driver.
      *
@@ -133,6 +172,7 @@ void r300_translate_fragment_shader(struct r300_context* r300,
         /* XXX failover maybe? */
         DBG(r300, DBG_FP, "r300: Error compiling fragment program: %s\n",
             compiler.Base.ErrorMsg);
+        assert(0);
     }
 
     /* And, finally... */
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index e831c30301..630e2d0c8a 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -1,6 +1,7 @@
 /*
  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
  *                Joakim Sindholt <opensource@zhasha.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -25,15 +26,16 @@
 #define R300_FS_H
 
 #include "pipe/p_state.h"
-
 #include "tgsi/tgsi_scan.h"
-
 #include "radeon_code.h"
+#include "r300_shader_semantics.h"
 
 struct r300_fragment_shader {
     /* Parent class */
     struct pipe_shader_state state;
+
     struct tgsi_shader_info info;
+    struct r300_shader_semantics inputs;
 
     /* Has this shader been translated yet? */
     boolean translated;
diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h
new file mode 100644
index 0000000000..85184e2cfd
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_shader_semantics.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_SHADER_SEMANTICS_H
+#define R300_SHADER_SEMANTICS_H
+
+#define ATTR_UNUSED             (-1)
+#define ATTR_COLOR_COUNT        2
+#define ATTR_GENERIC_COUNT      16
+
+/* This structure contains information about what attributes are written by VS
+ * or read by FS. (but not both) It's much easier to work with than
+ * tgsi_shader_info.
+ *
+ * The variables contain indices to tgsi_shader_info semantics and those
+ * indices are nothing else than input/output register numbers. */
+struct r300_shader_semantics {
+    int pos;
+    int psize;
+    int color[ATTR_COLOR_COUNT];
+    int bcolor[ATTR_COLOR_COUNT];
+    int generic[ATTR_GENERIC_COUNT];
+    int fog;
+};
+
+static INLINE void r300_shader_semantics_reset(
+    struct r300_shader_semantics* info)
+{
+    int i;
+
+    info->pos = ATTR_UNUSED;
+    info->psize = ATTR_UNUSED;
+    info->fog = ATTR_UNUSED;
+
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        info->color[i] = ATTR_UNUSED;
+        info->bcolor[i] = ATTR_UNUSED;
+    }
+
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        info->generic[i] = ATTR_UNUSED;
+    }
+}
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 03cdba0538..cd969d633b 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -29,6 +29,7 @@
 #include "r300_context.h"
 #include "r300_fs.h"
 #include "r300_screen.h"
+#include "r300_shader_semantics.h"
 #include "r300_state_derived.h"
 #include "r300_state_inlines.h"
 #include "r300_vs.h"
@@ -36,25 +37,6 @@
 /* r300_state_derived: Various bits of state which are dependent upon
  * currently bound CSO data. */
 
-#define ATTR_UNUSED             (-1)
-#define ATTR_COLOR_COUNT        2
-#define ATTR_GENERIC_COUNT      16
-
-/* This structure contains information about what attributes are written by VS
- * or read by FS. (but not both) It's much easier to work with than
- * tgsi_shader_info.
- *
- * The variables basically means used/unused and may optionally contain
- * indices to tgsi_shader_info semantics which we need to know for Draw. */
-struct r300_shader_info {
-    int pos;
-    int psize;
-    int color[ATTR_COLOR_COUNT];
-    int bcolor[ATTR_COLOR_COUNT];
-    int generic[ATTR_GENERIC_COUNT];
-    int fog;
-};
-
 struct r300_shader_key {
     struct r300_vertex_shader* vs;
     struct r300_fragment_shader* fs;
@@ -89,193 +71,21 @@ static void r300_draw_emit_attrib(struct r300_context* r300,
     struct tgsi_shader_info* info = &r300->vs->info;
     int output;
 
-    if (r300->draw) {
-        output = draw_find_vs_output(r300->draw,
-                                     info->output_semantic_name[index],
-                                     info->output_semantic_index[index]);
-        draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output);
-    }
+    output = draw_find_vs_output(r300->draw,
+                                 info->output_semantic_name[index],
+                                 info->output_semantic_index[index]);
+    draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output);
 }
 
-static void r300_shader_info_reset(struct r300_shader_info* info)
+static void r300_draw_emit_all_attribs(struct r300_context* r300)
 {
-    int i;
-
-    info->pos = ATTR_UNUSED;
-    info->psize = ATTR_UNUSED;
-    info->fog = ATTR_UNUSED;
-
-    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
-        info->color[i] = ATTR_UNUSED;
-        info->bcolor[i] = ATTR_UNUSED;
-    }
-
-    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
-        info->generic[i] = ATTR_UNUSED;
-    }
-}
-
-/* Convert info about VS output semantics to r300_shader_info. */
-static void r300_shader_read_vs_outputs(struct tgsi_shader_info* info,
-                                        struct r300_shader_info* vs_outputs)
-{
-    int i;
-    unsigned index;
-
-    r300_shader_info_reset(vs_outputs);
-
-    for (i = 0; i < info->num_outputs; i++) {
-        index = info->output_semantic_index[i];
-
-        switch (info->output_semantic_name[i]) {
-            case TGSI_SEMANTIC_POSITION:
-                assert(index == 0);
-                vs_outputs->pos = i;
-                break;
-
-            case TGSI_SEMANTIC_PSIZE:
-                assert(index == 0);
-                vs_outputs->psize = i;
-                break;
-
-            case TGSI_SEMANTIC_COLOR:
-                assert(index <= ATTR_COLOR_COUNT);
-                vs_outputs->color[index] = i;
-                break;
-
-            case TGSI_SEMANTIC_BCOLOR:
-                assert(index <= ATTR_COLOR_COUNT);
-                vs_outputs->bcolor[index] = i;
-                break;
-
-            case TGSI_SEMANTIC_GENERIC:
-                assert(index <= ATTR_GENERIC_COUNT);
-                vs_outputs->generic[index] = i;
-                break;
-
-            case TGSI_SEMANTIC_FOG:
-                assert(index == 0);
-                vs_outputs->fog = i;
-                break;
-
-            default:
-                assert(0);
-        }
-    }
-}
-
-/* Set VS output stream locations for SWTCL. */
-static void r300_stream_locations_swtcl(struct r300_shader_info* vs_outputs,
-                                        int* vs_output_tab)
-{
-    int i, tabi = 0, gen_count;
-
-    /* XXX Check whether the numbers (0, 1, 2+i, etc.) are correct.
-     * These should go to VAP_PROG_STREAM_CNTL/DST_VEC_LOC. */
-
-    /* Position. */
-    vs_output_tab[tabi++] = 0;
-
-    /* Point size. */
-    if (vs_outputs->psize != ATTR_UNUSED) {
-        vs_output_tab[tabi++] = 1;
-    }
-
-    /* Colors. */
-    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
-        if (vs_outputs->color[i] != ATTR_UNUSED) {
-            vs_output_tab[tabi++] = 2 + i;
-        }
-    }
-
-    /* Back-face colors. */
-    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
-        if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
-            vs_output_tab[tabi++] = 4 + i;
-        }
-    }
-
-    /* Texture coordinates. */
-    gen_count = 0;
-    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
-        if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
-            assert(tabi < 16);
-            vs_output_tab[tabi++] = 6 + gen_count;
-            gen_count++;
-        }
-    }
-
-    /* Fog coordinates. */
-    if (vs_outputs->fog != ATTR_UNUSED) {
-        assert(tabi < 16);
-        vs_output_tab[tabi++] = 6 + gen_count;
-        gen_count++;
-    }
-
-    /* XXX magic */
-    assert(gen_count <= 8);
-
-    for (; tabi < 16;) {
-        vs_output_tab[tabi++] = -1;
-    }
-}
-
-/* Convert info about FS input semantics to r300_shader_info. */
-static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
-                                       struct r300_shader_info* fs_inputs)
-{
-    int i;
-    unsigned index;
-
-    r300_shader_info_reset(fs_inputs);
-
-    for (i = 0; i < info->num_inputs; i++) {
-        index = info->input_semantic_index[i];
-
-        switch (info->input_semantic_name[i]) {
-            case TGSI_SEMANTIC_COLOR:
-                assert(index <= ATTR_COLOR_COUNT);
-                fs_inputs->color[index] = i;
-                break;
-
-            case TGSI_SEMANTIC_GENERIC:
-                assert(index <= ATTR_GENERIC_COUNT);
-                fs_inputs->generic[index] = i;
-                break;
-
-            case TGSI_SEMANTIC_FOG:
-                assert(index == 0);
-                fs_inputs->fog = i;
-                break;
-
-            default:
-                assert(0);
-        }
-    }
-}
-
-static void r300_update_vap_output_fmt(struct r300_context* r300,
-                                       struct r300_shader_info* vs_outputs)
-{
-    struct vertex_info* vinfo = &r300->vertex_info->vinfo;
+    struct r300_shader_semantics* vs_outputs = &r300->vs->outputs;
     int i, gen_count;
 
-    /* Do the actual vertex_info setup.
-     *
-     * vertex_info has four uints of hardware-specific data in it.
-     * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL
-     * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM
-     * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0
-     * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */
-
-    vinfo->hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */
-
     /* Position. */
     if (vs_outputs->pos != ATTR_UNUSED) {
         r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
                               vs_outputs->pos);
-        vinfo->hwfmt[1] |= R300_INPUT_CNTL_POS;
-        vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
     } else {
         assert(0);
     }
@@ -284,7 +94,6 @@ static void r300_update_vap_output_fmt(struct r300_context* r300,
     if (vs_outputs->psize != ATTR_UNUSED) {
         r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
                               vs_outputs->psize);
-        vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
     }
 
     /* Colors. */
@@ -292,8 +101,6 @@ static void r300_update_vap_output_fmt(struct r300_context* r300,
         if (vs_outputs->color[i] != ATTR_UNUSED) {
             r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
                                   vs_outputs->color[i]);
-            vinfo->hwfmt[1] |= R300_INPUT_CNTL_COLOR;
-            vinfo->hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
         }
     }
 
@@ -305,8 +112,6 @@ static void r300_update_vap_output_fmt(struct r300_context* r300,
         if (vs_outputs->generic[i] != ATTR_UNUSED) {
             r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
                                   vs_outputs->generic[i]);
-            vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count);
-            vinfo->hwfmt[3] |= (4 << (3 * gen_count));
             gen_count++;
         }
     }
@@ -315,15 +120,11 @@ static void r300_update_vap_output_fmt(struct r300_context* r300,
     if (vs_outputs->fog != ATTR_UNUSED) {
         r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
                               vs_outputs->fog);
-        vinfo->hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count);
-        vinfo->hwfmt[3] |= (4 << (3 * gen_count));
         gen_count++;
     }
 
     /* XXX magic */
     assert(gen_count <= 8);
-
-    draw_compute_vertex_size(vinfo);
 }
 
 /* Update the PSC tables. */
@@ -370,14 +171,14 @@ static void r300_vertex_psc(struct r300_context* r300)
 }
 
 /* Update the PSC tables for SW TCL, using Draw. */
-static void r300_swtcl_vertex_psc(struct r300_context* r300,
-                                  int* vs_output_tab)
+static void r300_swtcl_vertex_psc(struct r300_context* r300)
 {
     struct r300_vertex_info *vformat = r300->vertex_info;
     struct vertex_info* vinfo = &vformat->vinfo;
     uint16_t type, swizzle;
     enum pipe_format format;
     unsigned i, attrib_count;
+    int* vs_output_tab = r300->vs->output_stream_loc_swtcl;
 
     /* For each Draw attribute, route it to the fragment shader according
      * to the vs_output_tab. */
@@ -514,8 +315,8 @@ static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
  * of vertices into fragments. This is also the part of the chipset that
  * locks up if any part of it is even slightly wrong. */
 static void r300_update_rs_block(struct r300_context* r300,
-                                 struct r300_shader_info* vs_outputs,
-                                 struct r300_shader_info* fs_inputs)
+                                 struct r300_shader_semantics* vs_outputs,
+                                 struct r300_shader_semantics* fs_inputs)
 {
     struct r300_rs_block* rs = r300->rs_block;
     int i, col_count = 0, tex_count = 0, fp_offset = 0;
@@ -616,8 +417,6 @@ static void r300_update_rs_block(struct r300_context* r300,
 static void r300_update_derived_shader_state(struct r300_context* r300)
 {
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
-    struct r300_shader_info vs_outputs, fs_inputs;
-    int vs_output_tab[16];
 
     /*
     struct r300_shader_key* key;
@@ -648,18 +447,16 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
     /* Reset structures */
     memset(r300->rs_block, 0, sizeof(struct r300_rs_block));
     memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info));
+    memcpy(r300->vertex_info->vinfo.hwfmt, r300->vs->hwfmt, sizeof(uint)*4);
 
-    r300_shader_read_vs_outputs(&r300->vs->info, &vs_outputs);
-    r300_shader_read_fs_inputs(&r300->fs->info, &fs_inputs);
-
-    r300_update_vap_output_fmt(r300, &vs_outputs);
-    r300_update_rs_block(r300, &vs_outputs, &fs_inputs);
+    r300_update_rs_block(r300, &r300->vs->outputs, &r300->fs->inputs);
 
     if (r300screen->caps->has_tcl) {
         r300_vertex_psc(r300);
     } else {
-        r300_stream_locations_swtcl(&vs_outputs, vs_output_tab);
-        r300_swtcl_vertex_psc(r300, vs_output_tab);
+        r300_draw_emit_all_attribs(r300);
+        draw_compute_vertex_size(&r300->vertex_info->vinfo);
+        r300_swtcl_vertex_psc(r300);
     }
 
     r300->dirty_state |= R300_NEW_RS_BLOCK;
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 74ef416dc1..49bff3e931 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -23,13 +24,181 @@
 #include "r300_vs.h"
 
 #include "r300_context.h"
+#include "r300_screen.h"
 #include "r300_tgsi_to_rc.h"
+#include "r300_reg.h"
 
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_parse.h"
 
 #include "radeon_compiler.h"
 
+/* Convert info about VS output semantics into r300_shader_semantics. */
+static void r300_shader_read_vs_outputs(
+    struct tgsi_shader_info* info,
+    struct r300_shader_semantics* vs_outputs)
+{
+    int i;
+    unsigned index;
+
+    r300_shader_semantics_reset(vs_outputs);
+
+    for (i = 0; i < info->num_outputs; i++) {
+        index = info->output_semantic_index[i];
+
+        switch (info->output_semantic_name[i]) {
+            case TGSI_SEMANTIC_POSITION:
+                assert(index == 0);
+                vs_outputs->pos = i;
+                break;
+
+            case TGSI_SEMANTIC_PSIZE:
+                assert(index == 0);
+                vs_outputs->psize = i;
+                break;
+
+            case TGSI_SEMANTIC_COLOR:
+                assert(index <= ATTR_COLOR_COUNT);
+                vs_outputs->color[index] = i;
+                break;
+
+            case TGSI_SEMANTIC_BCOLOR:
+                assert(index <= ATTR_COLOR_COUNT);
+                vs_outputs->bcolor[index] = i;
+                break;
+
+            case TGSI_SEMANTIC_GENERIC:
+                assert(index <= ATTR_GENERIC_COUNT);
+                vs_outputs->generic[index] = i;
+                break;
+
+            case TGSI_SEMANTIC_FOG:
+                assert(index == 0);
+                vs_outputs->fog = i;
+                break;
+
+            default:
+                assert(0);
+        }
+    }
+}
+
+static void r300_shader_vap_output_fmt(
+    struct r300_shader_semantics* vs_outputs,
+    uint* hwfmt)
+{
+    int i, gen_count;
+
+    /* Do the actual vertex_info setup.
+     *
+     * vertex_info has four uints of hardware-specific data in it.
+     * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL
+     * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM
+     * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0
+     * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */
+
+    hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */
+
+    /* Position. */
+    if (vs_outputs->pos != ATTR_UNUSED) {
+        hwfmt[1] |= R300_INPUT_CNTL_POS;
+        hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+    } else {
+        assert(0);
+    }
+
+    /* Point size. */
+    if (vs_outputs->psize != ATTR_UNUSED) {
+        hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+    }
+
+    /* Colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->color[i] != ATTR_UNUSED) {
+            hwfmt[1] |= R300_INPUT_CNTL_COLOR;
+            hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
+        }
+    }
+
+    /* XXX Back-face colors. */
+
+    /* Texture coordinates. */
+    gen_count = 0;
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (vs_outputs->generic[i] != ATTR_UNUSED) {
+            hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count);
+            hwfmt[3] |= (4 << (3 * gen_count));
+            gen_count++;
+        }
+    }
+
+    /* Fog coordinates. */
+    if (vs_outputs->fog != ATTR_UNUSED) {
+        hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count);
+        hwfmt[3] |= (4 << (3 * gen_count));
+        gen_count++;
+    }
+
+    /* XXX magic */
+    assert(gen_count <= 8);
+}
+
+/* Set VS output stream locations for SWTCL. */
+static void r300_stream_locations_swtcl(
+    struct r300_shader_semantics* vs_outputs,
+    int* output_stream_loc)
+{
+    int i, tabi = 0, gen_count;
+
+    /* XXX Check whether the numbers (0, 1, 2+i, etc.) are correct.
+     * These should go to VAP_PROG_STREAM_CNTL/DST_VEC_LOC. */
+
+    /* Position. */
+    output_stream_loc[tabi++] = 0;
+
+    /* Point size. */
+    if (vs_outputs->psize != ATTR_UNUSED) {
+        output_stream_loc[tabi++] = 1;
+    }
+
+    /* Colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->color[i] != ATTR_UNUSED) {
+            output_stream_loc[tabi++] = 2 + i;
+        }
+    }
+
+    /* Back-face colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
+            output_stream_loc[tabi++] = 4 + i;
+        }
+    }
+
+    /* Texture coordinates. */
+    gen_count = 0;
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
+            assert(tabi < 16);
+            output_stream_loc[tabi++] = 6 + gen_count;
+            gen_count++;
+        }
+    }
+
+    /* Fog coordinates. */
+    if (vs_outputs->fog != ATTR_UNUSED) {
+        assert(tabi < 16);
+        output_stream_loc[tabi++] = 6 + gen_count;
+        gen_count++;
+    }
+
+    /* XXX magic */
+    assert(gen_count <= 8);
+
+    for (; tabi < 16;) {
+        output_stream_loc[tabi++] = -1;
+    }
+}
 
 static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
 {
@@ -99,20 +268,27 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
             default:
                 debug_printf("r300: vs: Bad semantic declaration %d\n",
                     decl->Semantic.SemanticName);
-                break;
+                assert(0);
         }
     }
 
     tgsi_parse_free(&parser);
 }
 
-
 void r300_translate_vertex_shader(struct r300_context* r300,
                                   struct r300_vertex_shader* vs)
 {
     struct r300_vertex_program_compiler compiler;
     struct tgsi_to_rc ttr;
 
+    /* Initialize. */
+    r300_shader_read_vs_outputs(&vs->info, &vs->outputs);
+    r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt);
+
+    if (!r300_screen(r300->context.screen)->caps->has_tcl) {
+        r300_stream_locations_swtcl(&vs->outputs, vs->output_stream_loc_swtcl);
+    }
+
     /* Setup the compiler */
     rc_init(&compiler.Base);
 
@@ -137,7 +313,7 @@ void r300_translate_vertex_shader(struct r300_context* r300,
     /* Invoke the compiler */
     r3xx_compile_vertex_program(&compiler);
     if (compiler.Base.Error) {
-        /* Todo: Fail gracefully */
+        /* XXX Fail gracefully */
         fprintf(stderr, "r300 VP: Compiler error\n");
         abort();
     }
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 00b02bf510..283dd5a9e8 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -1,5 +1,6 @@
 /*
  * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -25,15 +26,20 @@
 
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_scan.h"
-
 #include "radeon_code.h"
 
+#include "r300_shader_semantics.h"
+
 struct r300_context;
 
 struct r300_vertex_shader {
     /* Parent class */
     struct pipe_shader_state state;
+
     struct tgsi_shader_info info;
+    struct r300_shader_semantics outputs;
+    int output_stream_loc_swtcl[16];
+    uint hwfmt[4];
 
     /* Has this shader been translated yet? */
     boolean translated;
@@ -42,9 +48,6 @@ struct r300_vertex_shader {
     struct r300_vertex_program_code code;
 };
 
-
-extern struct r300_vertex_program_code r300_passthrough_vertex_shader;
-
 void r300_translate_vertex_shader(struct r300_context* r300,
                                   struct r300_vertex_shader* vs);
 
-- 
cgit v1.2.3


From f55c088f89eeaa6d16480f5f373887c6a2965e21 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Fri, 27 Nov 2009 06:36:31 +0100
Subject: r300g: simplify allocations of FS input registers

---
 src/gallium/drivers/r300/r300_fs.c | 43 +++++++++++++-------------------------
 1 file changed, 15 insertions(+), 28 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 9cc833e606..79b01bb4dc 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -66,6 +66,7 @@ static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
     }
 }
 
+
 static void find_output_registers(struct r300_fragment_program_compiler * compiler,
                                   struct r300_fragment_shader * fs)
 {
@@ -93,38 +94,24 @@ static void allocate_hardware_inputs(
     void (*allocate)(void * data, unsigned input, unsigned hwreg),
     void * mydata)
 {
-    struct tgsi_shader_info* info = &((struct r300_fragment_shader*)c->UserData)->info;
-    int total_colors = 0;
-    int colors = 0;
-    int total_generic = 0;
-    int generic = 0;
-    int i;
-
-    for (i = 0; i < info->num_inputs; i++) {
-        switch (info->input_semantic_name[i]) {
-            case TGSI_SEMANTIC_COLOR:
-                total_colors++;
-                break;
-            case TGSI_SEMANTIC_FOG:
-            case TGSI_SEMANTIC_GENERIC:
-                total_generic++;
-                break;
+    struct r300_shader_semantics* inputs =
+        &((struct r300_fragment_shader*)c->UserData)->inputs;
+    int i, reg = 0;
+
+    /* Allocate input registers. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (inputs->color[i] != ATTR_UNUSED) {
+            allocate(mydata, inputs->color[i], reg++);
         }
     }
-
-    for(i = 0; i < info->num_inputs; i++) {
-        switch (info->input_semantic_name[i]) {
-            case TGSI_SEMANTIC_COLOR:
-                allocate(mydata, i, colors);
-                colors++;
-                break;
-            case TGSI_SEMANTIC_FOG:
-            case TGSI_SEMANTIC_GENERIC:
-                allocate(mydata, i, total_colors + generic);
-                generic++;
-                break;
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (inputs->generic[i] != ATTR_UNUSED) {
+            allocate(mydata, inputs->generic[i], reg++);
         }
     }
+    if (inputs->fog != ATTR_UNUSED) {
+        allocate(mydata, inputs->fog, reg++);
+    }
 }
 
 void r300_translate_fragment_shader(struct r300_context* r300,
-- 
cgit v1.2.3


From af3dea36603687067197c22747537eaeb6c4ad2b Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Fri, 27 Nov 2009 10:19:20 +0100
Subject: r300g: simplify allocations of VS output registers

No need to parse TGSI tokens since it's easier to walk through shader
semantics.

Also fog coordinates now work reliably.
---
 src/gallium/drivers/r300/r300_vs.c | 82 ++++++++++++--------------------------
 1 file changed, 26 insertions(+), 56 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 49bff3e931..31248346bc 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -203,76 +203,46 @@ static void r300_stream_locations_swtcl(
 static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
 {
     struct r300_vertex_shader * vs = c->UserData;
+    struct r300_shader_semantics* outputs = &vs->outputs;
     struct tgsi_shader_info* info = &vs->info;
-    struct tgsi_parse_context parser;
-    struct tgsi_full_declaration * decl;
-    boolean pointsize = FALSE;
-    int out_colors = 0;
-    int colors = 0;
-    int out_generic = 0;
-    int generic = 0;
-    int i;
+    int i, reg = 0;
 
     /* Fill in the input mapping */
     for (i = 0; i < info->num_inputs; i++)
         c->code->inputs[i] = i;
 
-    /* Fill in the output mapping */
-    for (i = 0; i < info->num_outputs; i++) {
-        switch (info->output_semantic_name[i]) {
-            case TGSI_SEMANTIC_PSIZE:
-                pointsize = TRUE;
-                break;
-            case TGSI_SEMANTIC_COLOR:
-                out_colors++;
-                break;
-            case TGSI_SEMANTIC_FOG:
-            case TGSI_SEMANTIC_GENERIC:
-                out_generic++;
-                break;
-        }
+    /* Position. */
+    if (outputs->pos != ATTR_UNUSED) {
+        c->code->outputs[outputs->pos] = reg++;
+    } else {
+        assert(0);
     }
 
-    tgsi_parse_init(&parser, vs->state.tokens);
-
-    while (!tgsi_parse_end_of_tokens(&parser)) {
-        tgsi_parse_token(&parser);
-
-        if (parser.FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
-            continue;
+    /* Point size. */
+    if (outputs->psize != ATTR_UNUSED) {
+        c->code->outputs[outputs->psize] = reg++;
+    }
 
-        decl = &parser.FullToken.FullDeclaration;
+    /* Colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (outputs->color[i] != ATTR_UNUSED) {
+            c->code->outputs[outputs->color[i]] = reg++;
+        }
+    }
 
-        if (decl->Declaration.File != TGSI_FILE_OUTPUT)
-            continue;
+    /* XXX Back-face colors. */
 
-        switch (decl->Semantic.SemanticName) {
-            case TGSI_SEMANTIC_POSITION:
-                c->code->outputs[decl->DeclarationRange.First] = 0;
-                break;
-            case TGSI_SEMANTIC_PSIZE:
-                c->code->outputs[decl->DeclarationRange.First] = 1;
-                break;
-            case TGSI_SEMANTIC_COLOR:
-                c->code->outputs[decl->DeclarationRange.First] = 1 +
-                    (pointsize ? 1 : 0) +
-                    colors++;
-                break;
-            case TGSI_SEMANTIC_FOG:
-            case TGSI_SEMANTIC_GENERIC:
-                c->code->outputs[decl->DeclarationRange.First] = 1 +
-                    (pointsize ? 1 : 0) +
-                    out_colors +
-                    generic++;
-                break;
-            default:
-                debug_printf("r300: vs: Bad semantic declaration %d\n",
-                    decl->Semantic.SemanticName);
-                assert(0);
+    /* Texture coordinates. */
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        if (outputs->generic[i] != ATTR_UNUSED) {
+            c->code->outputs[outputs->generic[i]] = reg++;
         }
     }
 
-    tgsi_parse_free(&parser);
+    /* Fog coordinates. */
+    if (outputs->fog != ATTR_UNUSED) {
+        c->code->outputs[outputs->fog] = reg++;
+    }
 }
 
 void r300_translate_vertex_shader(struct r300_context* r300,
-- 
cgit v1.2.3


From 9077ddaa2557e1e76c8a052c8d079ef3d443186b Mon Sep 17 00:00:00 2001
From: Jakob Bornecrantz <jakob@vmware.com>
Date: Wed, 25 Nov 2009 00:33:43 +0100
Subject: svga: Add header files for overlay support

---
 src/gallium/drivers/svga/include/svga_escape.h  |  89 +++++++++++
 src/gallium/drivers/svga/include/svga_overlay.h | 201 ++++++++++++++++++++++++
 2 files changed, 290 insertions(+)
 create mode 100644 src/gallium/drivers/svga/include/svga_escape.h
 create mode 100644 src/gallium/drivers/svga/include/svga_overlay.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/include/svga_escape.h b/src/gallium/drivers/svga/include/svga_escape.h
new file mode 100644
index 0000000000..7b85e9b8c8
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga_escape.h
@@ -0,0 +1,89 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga_escape.h --
+ *
+ *    Definitions for our own (vendor-specific) SVGA Escape commands.
+ */
+
+#ifndef _SVGA_ESCAPE_H_
+#define _SVGA_ESCAPE_H_
+
+
+/*
+ * Namespace IDs for the escape command
+ */
+
+#define SVGA_ESCAPE_NSID_VMWARE 0x00000000
+#define SVGA_ESCAPE_NSID_DEVEL  0xFFFFFFFF
+
+
+/*
+ * Within SVGA_ESCAPE_NSID_VMWARE, we multiplex commands according to
+ * the first DWORD of escape data (after the nsID and size). As a
+ * guideline we're using the high word and low word as a major and
+ * minor command number, respectively.
+ *
+ * Major command number allocation:
+ *
+ *   0000: Reserved
+ *   0001: SVGA_ESCAPE_VMWARE_LOG (svga_binary_logger.h)
+ *   0002: SVGA_ESCAPE_VMWARE_VIDEO (svga_overlay.h)
+ *   0003: SVGA_ESCAPE_VMWARE_HINT (svga_escape.h)
+ */
+
+#define SVGA_ESCAPE_VMWARE_MAJOR_MASK  0xFFFF0000
+
+
+/*
+ * SVGA Hint commands.
+ *
+ * These escapes let the SVGA driver provide optional information to
+ * he host about the state of the guest or guest applications. The
+ * host can use these hints to make user interface or performance
+ * decisions.
+ *
+ * Notes:
+ *
+ *   - SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN is deprecated for guests
+ *     that use the SVGA Screen Object extension. Instead of sending
+ *     this escape, use the SVGA_SCREEN_FULLSCREEN_HINT flag on your
+ *     Screen Object.
+ */
+
+#define SVGA_ESCAPE_VMWARE_HINT               0x00030000
+#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  // Deprecated
+
+typedef
+struct {
+   uint32 command;
+   uint32 fullscreen;
+   struct {
+      int32 x, y;
+   } monitorPosition;
+} SVGAEscapeHintFullscreen;
+
+#endif /* _SVGA_ESCAPE_H_ */
diff --git a/src/gallium/drivers/svga/include/svga_overlay.h b/src/gallium/drivers/svga/include/svga_overlay.h
new file mode 100644
index 0000000000..82c1d3ff3e
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga_overlay.h
@@ -0,0 +1,201 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga_overlay.h --
+ *
+ *    Definitions for video-overlay support.
+ */
+
+#ifndef _SVGA_OVERLAY_H_
+#define _SVGA_OVERLAY_H_
+
+#include "svga_reg.h"
+
+/*
+ * Video formats we support
+ */
+
+#define VMWARE_FOURCC_YV12 0x32315659 // 'Y' 'V' '1' '2'
+#define VMWARE_FOURCC_YUY2 0x32595559 // 'Y' 'U' 'Y' '2'
+#define VMWARE_FOURCC_UYVY 0x59565955 // 'U' 'Y' 'V' 'Y'
+
+typedef enum {
+   SVGA_OVERLAY_FORMAT_INVALID = 0,
+   SVGA_OVERLAY_FORMAT_YV12 = VMWARE_FOURCC_YV12,
+   SVGA_OVERLAY_FORMAT_YUY2 = VMWARE_FOURCC_YUY2,
+   SVGA_OVERLAY_FORMAT_UYVY = VMWARE_FOURCC_UYVY,
+} SVGAOverlayFormat;
+
+#define SVGA_VIDEO_COLORKEY_MASK             0x00ffffff
+
+#define SVGA_ESCAPE_VMWARE_VIDEO             0x00020000
+
+#define SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS    0x00020001
+        /* FIFO escape layout:
+         * Type, Stream Id, (Register Id, Value) pairs */
+
+#define SVGA_ESCAPE_VMWARE_VIDEO_FLUSH       0x00020002
+        /* FIFO escape layout:
+         * Type, Stream Id */
+
+typedef
+struct SVGAEscapeVideoSetRegs {
+   struct {
+      uint32 cmdType;
+      uint32 streamId;
+   } header;
+
+   // May include zero or more items.
+   struct {
+      uint32 registerId;
+      uint32 value;
+   } items[1];
+} SVGAEscapeVideoSetRegs;
+
+typedef
+struct SVGAEscapeVideoFlush {
+   uint32 cmdType;
+   uint32 streamId;
+} SVGAEscapeVideoFlush;
+
+
+/*
+ * Struct definitions for the video overlay commands built on
+ * SVGAFifoCmdEscape.
+ */
+typedef
+struct {
+   uint32 command;
+   uint32 overlay;
+} SVGAFifoEscapeCmdVideoBase;
+
+typedef
+struct {
+   SVGAFifoEscapeCmdVideoBase videoCmd;
+} SVGAFifoEscapeCmdVideoFlush;
+
+typedef
+struct {
+   SVGAFifoEscapeCmdVideoBase videoCmd;
+   struct {
+      uint32 regId;
+      uint32 value;
+   } items[1];
+} SVGAFifoEscapeCmdVideoSetRegs;
+
+typedef
+struct {
+   SVGAFifoEscapeCmdVideoBase videoCmd;
+   struct {
+      uint32 regId;
+      uint32 value;
+   } items[SVGA_VIDEO_NUM_REGS];
+} SVGAFifoEscapeCmdVideoSetAllRegs;
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * VMwareVideoGetAttributes --
+ *
+ *      Computes the size, pitches and offsets for YUV frames.
+ *
+ * Results:
+ *      TRUE on success; otherwise FALSE on failure.
+ *
+ * Side effects:
+ *      Pitches and offsets for the given YUV frame are put in 'pitches'
+ *      and 'offsets' respectively. They are both optional though.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE Bool
+VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
+                         uint32 *width,                     // IN / OUT
+                         uint32 *height,                    // IN / OUT
+                         uint32 *size,                      // OUT
+                         uint32 *pitches,                   // OUT (optional)
+                         uint32 *offsets)                   // OUT (optional)
+{
+    int tmp;
+
+    *width = (*width + 1) & ~1;
+
+    if (offsets) {
+        offsets[0] = 0;
+    }
+
+    switch (format) {
+    case VMWARE_FOURCC_YV12:
+       *height = (*height + 1) & ~1;
+       *size = (*width + 3) & ~3;
+
+       if (pitches) {
+          pitches[0] = *size;
+       }
+
+       *size *= *height;
+
+       if (offsets) {
+          offsets[1] = *size;
+       }
+
+       tmp = ((*width >> 1) + 3) & ~3;
+
+       if (pitches) {
+          pitches[1] = pitches[2] = tmp;
+       }
+
+       tmp *= (*height >> 1);
+       *size += tmp;
+
+       if (offsets) {
+          offsets[2] = *size;
+       }
+
+       *size += tmp;
+       break;
+
+    case VMWARE_FOURCC_YUY2:
+    case VMWARE_FOURCC_UYVY:
+       *size = *width * 2;
+
+       if (pitches) {
+          pitches[0] = *size;
+       }
+
+       *size *= *height;
+       break;
+
+    default:
+       return FALSE;
+    }
+
+    return TRUE;
+}
+
+#endif // _SVGA_OVERLAY_H_
-- 
cgit v1.2.3


From eb68acaf6db689ba2fa62b188ff14507a7007266 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 1 Dec 2009 16:32:33 +0000
Subject: i965g: don't reference unused vars in RSQ

Probably overly pedantic
---
 src/gallium/drivers/i965/brw_vs_emit.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 20cec0f59b..6d8366f862 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -1462,7 +1462,8 @@ static void emit_insn(struct brw_vs_compile *c,
       emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
       break;
    case TGSI_OPCODE_RSQ:
-      emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
+      emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, 
+                 brw_swizzle(args[0], 0,0,0,0), BRW_MATH_PRECISION_FULL);
       break;
    case TGSI_OPCODE_SEQ:
       emit_seq(p, dst, args[0], args[1]);
@@ -1618,7 +1619,7 @@ void brw_vs_emit(struct brw_vs_compile *c)
    struct tgsi_parse_context parse;
    struct tgsi_full_instruction *inst;
 
-   if (BRW_DEBUG & DEBUG_VS)
+//   if (BRW_DEBUG & DEBUG_VS)
       tgsi_dump(c->vp->tokens, 0); 
 
    c->stack_index = brw_indirect(0, 0);
-- 
cgit v1.2.3


From 63a8637c7425f64f5e48c2df2b60cc56ae6237ab Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 1 Dec 2009 16:39:02 +0000
Subject: i965g: add missing header

---
 src/gallium/drivers/i965/brw_disasm.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 src/gallium/drivers/i965/brw_disasm.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h
new file mode 100644
index 0000000000..77d402d35e
--- /dev/null
+++ b/src/gallium/drivers/i965/brw_disasm.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright © 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef BRW_DISASM_H
+#define BRW_DISASM_H
+
+struct brw_instruction;
+
+int brw_disasm_insn (FILE *file, const struct brw_instruction *inst);
+int brw_disasm (FILE *file, 
+                const struct brw_instruction *inst,
+                unsigned count);
+
+#endif
+
-- 
cgit v1.2.3


From ba4cb8b2caac69c6d2b210a5c3c634d8c1c20940 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 1 Dec 2009 17:04:46 +0000
Subject: i965g: nasty hack for clearing y-tiled surfaces

---
 src/gallium/drivers/i965/brw_pipe_clear.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c
index f846b4342c..211be88178 100644
--- a/src/gallium/drivers/i965/brw_pipe_clear.c
+++ b/src/gallium/drivers/i965/brw_pipe_clear.c
@@ -79,7 +79,11 @@ try_clear( struct brw_context *brw,
       BR13 |= BR13_565;
    }
 
-   assert(surface->tiling != BRW_TILING_Y);
+   /* XXX: nasty hack for clearing depth buffers
+    */
+   if (surface->tiling == BRW_TILING_Y) {
+      x2 = pitch;
+   }
 
    if (surface->tiling == BRW_TILING_X) {
       CMD |= XY_DST_TILED;
-- 
cgit v1.2.3


From e0399fddf2efd556ece8b81078368e6ab388c3b7 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 30 Nov 2009 09:21:49 -0700
Subject: softpipe: setup machine->Face without a conditional

---
 src/gallium/drivers/softpipe/sp_fs_exec.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index a8999ed347..27fa126b7c 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -127,11 +127,8 @@ exec_run( const struct sp_fragment_shader *base,
                     (float)quad->input.x0, (float)quad->input.y0, 
                     &machine->QuadPos);
 
-   if (quad->input.facing) {
-      machine->Face = -1.0f;
-   } else {
-      machine->Face = 1.0f;
-   }
+   /* convert 0 to 1.0 and 1 to -1.0 */
+   machine->Face = (float) (quad->input.facing * -2 + 1);
 
    quad->inout.mask &= tgsi_exec_machine_run( machine );
    if (quad->inout.mask == 0)
-- 
cgit v1.2.3


From c78748a5274e58bcbb122923edf81065be9bbe16 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Wed, 2 Dec 2009 02:08:26 +0100
Subject: gallium: adapt drivers to interface cleanups

---
 src/gallium/drivers/cell/ppu/cell_texture.c    |  30 +++---
 src/gallium/drivers/i915/i915_surface.c        |  21 +++--
 src/gallium/drivers/i915/i915_texture.c        | 125 ++++++++++---------------
 src/gallium/drivers/llvmpipe/lp_setup.c        |   2 +-
 src/gallium/drivers/llvmpipe/lp_tex_cache.c    |   2 +-
 src/gallium/drivers/llvmpipe/lp_texture.c      |  48 +++++-----
 src/gallium/drivers/llvmpipe/lp_tile_cache.c   |   6 +-
 src/gallium/drivers/r300/r300_emit.c           |  12 +--
 src/gallium/drivers/r300/r300_screen.c         |   9 +-
 src/gallium/drivers/r300/r300_texture.c        |  12 +--
 src/gallium/drivers/svga/svga_screen_texture.c |  37 ++++----
 src/gallium/drivers/svga/svga_state_vs.c       |   2 +-
 12 files changed, 138 insertions(+), 168 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index e6b8a87045..77a57aef14 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -65,14 +65,11 @@ cell_texture_layout(struct cell_texture *ct)
       w_tile = align(width, TILE_SIZE);
       h_tile = align(height, TILE_SIZE);
 
-      pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile);  
-      pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile);  
-
-      ct->stride[level] = pt->nblocksx[level] * pt->block.size;
+      ct->stride[level] = pf_get_stride(pt->format, w_tile);
 
       ct->level_offset[level] = ct->buffer_size;
 
-      size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size;
+      size = ct->stride[level] * pf_get_nblocksy(pt->format, h_tile);
       if (pt->target == PIPE_TEXTURE_CUBE)
          size *= 6;
       else
@@ -283,10 +280,12 @@ cell_get_tex_surface(struct pipe_screen *screen,
       ps->zslice = zslice;
 
       if (pt->target == PIPE_TEXTURE_CUBE) {
-         ps->offset += face * pt->nblocksy[level] * ct->stride[level];
+         unsigned h_tile = align(ps->height, TILE_SIZE);
+         ps->offset += face * pf_get_nblocksy(ps->format, h_tile) * ct->stride[level];
       }
       else if (pt->target == PIPE_TEXTURE_3D) {
-         ps->offset += zslice * pt->nblocksy[level] * ct->stride[level];
+         unsigned h_tile = align(ps->height, TILE_SIZE);
+         ps->offset += zslice * pf_get_nblocksy(ps->format, h_tile) * ct->stride[level];
       }
       else {
          assert(face == 0);
@@ -327,14 +326,10 @@ cell_get_tex_transfer(struct pipe_screen *screen,
    if (ctrans) {
       struct pipe_transfer *pt = &ctrans->base;
       pipe_texture_reference(&pt->texture, texture);
-      pt->format = texture->format;
-      pt->block = texture->block;
       pt->x = x;
       pt->y = y;
       pt->width = w;
       pt->height = h;
-      pt->nblocksx = texture->nblocksx[level];
-      pt->nblocksy = texture->nblocksy[level];
       pt->stride = ct->stride[level];
       pt->usage = usage;
       pt->face = face;
@@ -344,10 +339,12 @@ cell_get_tex_transfer(struct pipe_screen *screen,
       ctrans->offset = ct->level_offset[level];
 
       if (texture->target == PIPE_TEXTURE_CUBE) {
-         ctrans->offset += face * pt->nblocksy * pt->stride;
+         unsigned h_tile = align(u_minify(texture->height0, level), TILE_SIZE);
+         ctrans->offset += face * pf_get_nblocksy(texture->format, h_tile) * pt->stride;
       }
       else if (texture->target == PIPE_TEXTURE_3D) {
-         ctrans->offset += zslice * pt->nblocksy * pt->stride;
+         unsigned h_tile = align(u_minify(texture->height0, level), TILE_SIZE);
+         ctrans->offset += zslice * pf_get_nblocksy(texture->format, h_tile) * pt->stride;
       }
       else {
          assert(face == 0);
@@ -400,7 +397,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
     * Create a buffer of ordinary memory for the linear texture.
     * This is the memory that the user will read/write.
     */
-   size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size;
+   size = pf_get_stride(pt->format, align(texWidth, TILE_SIZE)) *
+          pf_get_nblocksy(pt->format, align(texHeight, TILE_SIZE));
 
    ctrans->map = align_malloc(size, 16);
    if (!ctrans->map)
@@ -408,7 +406,7 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
 
    if (transfer->usage & PIPE_TRANSFER_READ) {
       /* need to untwiddle the texture to make a linear version */
-      const uint bpp = pf_get_size(ct->base.format);
+      const uint bpp = pf_get_blocksize(ct->base.format);
       if (bpp == 4) {
          const uint *src = (uint *) (ct->mapped + ctrans->offset);
          uint *dst = ctrans->map;
@@ -451,7 +449,7 @@ cell_transfer_unmap(struct pipe_screen *screen,
       /* The user wrote new texture data into the mapped buffer.
        * We need to convert the new linear data into the twiddled/tiled format.
        */
-      const uint bpp = pf_get_size(ct->base.format);
+      const uint bpp = pf_get_blocksize(ct->base.format);
       if (bpp == 4) {
          const uint *src = ctrans->map;
          uint *dst = (uint *) (ct->mapped + ctrans->offset);
diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c
index ab8331f3e6..24e1024aaa 100644
--- a/src/gallium/drivers/i915/i915_surface.c
+++ b/src/gallium/drivers/i915/i915_surface.c
@@ -48,17 +48,19 @@ i915_surface_copy(struct pipe_context *pipe,
 {
    struct i915_texture *dst_tex = (struct i915_texture *)dst->texture;
    struct i915_texture *src_tex = (struct i915_texture *)src->texture;
+   struct pipe_texture *dpt = &dst_tex->base;
+   struct pipe_texture *spt = &src_tex->base;
 
    assert( dst != src );
-   assert( dst_tex->base.block.size == src_tex->base.block.size );
-   assert( dst_tex->base.block.width == src_tex->base.block.height );
-   assert( dst_tex->base.block.height == src_tex->base.block.height );
-   assert( dst_tex->base.block.width == 1 );
-   assert( dst_tex->base.block.height == 1 );
+   assert( pf_get_blocksize(dpt->format) == pf_get_blocksize(spt->format) );
+   assert( pf_get_blockwidth(dpt->format) == pf_get_blockwidth(spt->format) );
+   assert( pf_get_blockheight(dpt->format) == pf_get_blockheight(spt->format) );
+   assert( pf_get_blockwidth(dpt->format) == 1 );
+   assert( pf_get_blockheight(dpt->format) == 1 );
 
    i915_copy_blit( i915_context(pipe),
                    FALSE,
-                   dst_tex->base.block.size,
+                   pf_get_blocksize(dpt->format),
                    (unsigned short) src_tex->stride, src_tex->buffer, src->offset,
                    (unsigned short) dst_tex->stride, dst_tex->buffer, dst->offset,
                    (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height );
@@ -72,12 +74,13 @@ i915_surface_fill(struct pipe_context *pipe,
 		  unsigned width, unsigned height, unsigned value)
 {
    struct i915_texture *tex = (struct i915_texture *)dst->texture;
+   struct pipe_texture *pt = &tex->base;
 
-   assert(tex->base.block.width == 1);
-   assert(tex->base.block.height == 1);
+   assert(pf_get_blockwidth(pt->format) == 1);
+   assert(pf_get_blockheight(pt->format) == 1);
 
    i915_fill_blit( i915_context(pipe),
-                   tex->base.block.size,
+                   pf_get_blocksize(pt->format),
                    (unsigned short) tex->stride,
                    tex->buffer, dst->offset,
                    (short) dstx, (short) dsty,
diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c
index c7b86dd4c5..b28b413771 100644
--- a/src/gallium/drivers/i915/i915_texture.c
+++ b/src/gallium/drivers/i915/i915_texture.c
@@ -74,6 +74,9 @@ static const int step_offsets[6][2] = {
    {-1, 1}
 };
 
+/* XXX really need twice the size if x is already pot?
+   Otherwise just use util_next_power_of_two?
+*/
 static unsigned
 power_of_two(unsigned x)
 {
@@ -83,13 +86,6 @@ power_of_two(unsigned x)
    return value;
 }
 
-static unsigned
-round_up(unsigned n, unsigned multiple)
-{
-   return (n + multiple - 1) & ~(multiple - 1);
-}
-
-
 /*
  * More advanced helper funcs
  */
@@ -101,13 +97,8 @@ i915_miptree_set_level_info(struct i915_texture *tex,
                              unsigned nr_images,
                              unsigned w, unsigned h, unsigned d)
 {
-   struct pipe_texture *pt = &tex->base;
-
    assert(level < PIPE_MAX_TEXTURE_LEVELS);
 
-   pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w);
-   pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h);
-
    tex->nr_images[level] = nr_images;
 
    /*
@@ -138,7 +129,7 @@ i915_miptree_set_image_offset(struct i915_texture *tex,
 
    assert(img < tex->nr_images[level]);
 
-   tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size;
+   tex->image_offset[level][img] = y * tex->stride + x * pf_get_blocksize(tex->base.format);
 
    /*
    printf("%s level %d img %d pos %d,%d image_offset %x\n",
@@ -160,28 +151,28 @@ i915_scanout_layout(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
 
-   if (pt->last_level > 0 || pt->block.size != 4)
+   if (pt->last_level > 0 || pf_get_blocksize(pt->format) != 4)
       return FALSE;
 
    i915_miptree_set_level_info(tex, 0, 1,
-                               tex->base.width0,
-                               tex->base.height0,
+                               pt->width0,
+                               pt->height0,
                                1);
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
-   if (tex->base.width0 >= 240) {
-      tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
-      tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
+   if (pt->width0 >= 240) {
+      tex->stride = power_of_two(pf_get_stride(pt->format, pt->width0));
+      tex->total_nblocksy = align(pf_get_nblocksy(pt->format, pt->height0), 8);
       tex->hw_tiled = INTEL_TILE_X;
-   } else if (tex->base.width0 == 64 && tex->base.height0 == 64) {
-      tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
-      tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
+   } else if (pt->width0 == 64 && pt->height0 == 64) {
+      tex->stride = power_of_two(pf_get_stride(pt->format, pt->width0));
+      tex->total_nblocksy = align(pf_get_nblocksy(pt->format, pt->height0), 8);
    } else {
       return FALSE;
    }
 
    debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
-      tex->base.width0, tex->base.height0, pt->block.size,
+      pt->width0, pt->height0, pf_get_blocksize(pt->format),
       tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
 
    return TRUE;
@@ -195,25 +186,25 @@ i915_display_target_layout(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
 
-   if (pt->last_level > 0 || pt->block.size != 4)
+   if (pt->last_level > 0 || pf_get_blocksize(pt->format) != 4)
       return FALSE;
 
    /* fallback to normal textures for small textures */
-   if (tex->base.width0 < 240)
+   if (pt->width0 < 240)
       return FALSE;
 
    i915_miptree_set_level_info(tex, 0, 1,
-                               tex->base.width0,
-                               tex->base.height0,
+                               pt->width0,
+                               pt->height0,
                                1);
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
-   tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
-   tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
+   tex->stride = power_of_two(pf_get_stride(pt->format, pt->width0));
+   tex->total_nblocksy = align(pf_get_nblocksy(pt->format, pt->height0), 8);
    tex->hw_tiled = INTEL_TILE_X;
 
    debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
-      tex->base.width0, tex->base.height0, pt->block.size,
+      pt->width0, pt->height0, pf_get_blocksize(pt->format),
       tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
 
    return TRUE;
@@ -226,34 +217,32 @@ i915_miptree_layout_2d(struct i915_texture *tex)
    unsigned level;
    unsigned width = pt->width0;
    unsigned height = pt->height0;
-   unsigned nblocksx = pt->nblocksx[0];
-   unsigned nblocksy = pt->nblocksy[0];
+   unsigned nblocksy = pf_get_nblocksy(pt->format, pt->width0);
 
    /* used for scanouts that need special layouts */
-   if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
+   if (pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
       if (i915_scanout_layout(tex))
          return;
 
    /* for shared buffers we use some very like scanout */
-   if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
+   if (pt->tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
       if (i915_display_target_layout(tex))
          return;
 
-   tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+   tex->stride = align(pf_get_stride(pt->format, pt->width0), 4);
    tex->total_nblocksy = 0;
 
    for (level = 0; level <= pt->last_level; level++) {
       i915_miptree_set_level_info(tex, level, 1, width, height, 1);
       i915_miptree_set_image_offset(tex, level, 0, 0, tex->total_nblocksy);
 
-      nblocksy = round_up(MAX2(2, nblocksy), 2);
+      nblocksy = align(MAX2(2, nblocksy), 2);
 
       tex->total_nblocksy += nblocksy;
 
       width = u_minify(width, 1);
       height = u_minify(height, 1);
-      nblocksx = pf_get_nblocksx(&pt->block, width);
-      nblocksy = pf_get_nblocksy(&pt->block, height);
+      nblocksy = pf_get_nblocksy(pt->format, height);
    }
 }
 
@@ -266,13 +255,12 @@ i915_miptree_layout_3d(struct i915_texture *tex)
    unsigned width = pt->width0;
    unsigned height = pt->height0;
    unsigned depth = pt->depth0;
-   unsigned nblocksx = pt->nblocksx[0];
-   unsigned nblocksy = pt->nblocksy[0];
+   unsigned nblocksy = pf_get_nblocksy(pt->format, pt->height0);
    unsigned stack_nblocksy = 0;
 
    /* Calculate the size of a single slice. 
     */
-   tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+   tex->stride = align(pf_get_stride(pt->format, pt->width0), 4);
 
    /* XXX: hardware expects/requires 9 levels at minimum.
     */
@@ -283,8 +271,7 @@ i915_miptree_layout_3d(struct i915_texture *tex)
 
       width = u_minify(width, 1);
       height = u_minify(height, 1);
-      nblocksx = pf_get_nblocksx(&pt->block, width);
-      nblocksy = pf_get_nblocksy(&pt->block, height);
+      nblocksy = pf_get_nblocksy(pt->format, height);
    }
 
    /* Fixup depth image_offsets: 
@@ -309,14 +296,14 @@ i915_miptree_layout_cube(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
    unsigned width = pt->width0, height = pt->height0;
-   const unsigned nblocks = pt->nblocksx[0];
+   const unsigned nblocks = pf_get_nblocksx(pt->format, pt->width0);
    unsigned level;
    unsigned face;
 
    assert(width == height); /* cubemap images are square */
 
    /* double pitch for cube layouts */
-   tex->stride = round_up(nblocks * pt->block.size * 2, 4);
+   tex->stride = align(nblocks * pf_get_blocksize(pt->format) * 2, 4);
    tex->total_nblocksy = nblocks * 4;
 
    for (level = 0; level <= pt->last_level; level++) {
@@ -379,8 +366,8 @@ i945_miptree_layout_2d(struct i915_texture *tex)
    unsigned y = 0;
    unsigned width = pt->width0;
    unsigned height = pt->height0;
-   unsigned nblocksx = pt->nblocksx[0];
-   unsigned nblocksy = pt->nblocksy[0];
+   unsigned nblocksx = pf_get_nblocksx(pt->format, pt->width0);
+   unsigned nblocksy = pf_get_nblocksy(pt->format, pt->height0);
 
    /* used for scanouts that need special layouts */
    if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
@@ -392,7 +379,7 @@ i945_miptree_layout_2d(struct i915_texture *tex)
       if (i915_display_target_layout(tex))
          return;
 
-   tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+   tex->stride = align(pf_get_stride(pt->format, pt->width0), 4);
 
    /* May need to adjust pitch to accomodate the placement of
     * the 2nd mipmap level.  This occurs when the alignment
@@ -401,11 +388,11 @@ i945_miptree_layout_2d(struct i915_texture *tex)
     */
    if (pt->last_level > 0) {
       unsigned mip1_nblocksx 
-         = align(pf_get_nblocksx(&pt->block, u_minify(width, 1)), align_x)
-         + pf_get_nblocksx(&pt->block, u_minify(width, 2));
+         = align(pf_get_nblocksx(pt->format, u_minify(width, 1)), align_x)
+         + pf_get_nblocksx(pt->format, u_minify(width, 2));
 
       if (mip1_nblocksx > nblocksx)
-         tex->stride = mip1_nblocksx * pt->block.size;
+         tex->stride = mip1_nblocksx * pf_get_blocksize(pt->format);
    }
 
    /* Pitch must be a whole number of dwords
@@ -435,8 +422,8 @@ i945_miptree_layout_2d(struct i915_texture *tex)
 
       width  = u_minify(width, 1);
       height = u_minify(height, 1);
-      nblocksx = pf_get_nblocksx(&pt->block, width);
-      nblocksy = pf_get_nblocksy(&pt->block, height);
+      nblocksx = pf_get_nblocksx(pt->format, width);
+      nblocksy = pf_get_nblocksy(pt->format, height);
    }
 }
 
@@ -447,17 +434,16 @@ i945_miptree_layout_3d(struct i915_texture *tex)
    unsigned width = pt->width0;
    unsigned height = pt->height0;
    unsigned depth = pt->depth0;
-   unsigned nblocksx = pt->nblocksx[0];
-   unsigned nblocksy = pt->nblocksy[0];
+   unsigned nblocksy = pf_get_nblocksy(pt->format, pt->width0);
    unsigned pack_x_pitch, pack_x_nr;
    unsigned pack_y_pitch;
    unsigned level;
 
-   tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4);
+   tex->stride = align(pf_get_stride(pt->format, pt->width0), 4);
    tex->total_nblocksy = 0;
 
-   pack_y_pitch = MAX2(pt->nblocksy[0], 2);
-   pack_x_pitch = tex->stride / pt->block.size;
+   pack_y_pitch = MAX2(nblocksy, 2);
+   pack_x_pitch = tex->stride / pf_get_blocksize(pt->format);
    pack_x_nr = 1;
 
    for (level = 0; level <= pt->last_level; level++) {
@@ -482,7 +468,7 @@ i945_miptree_layout_3d(struct i915_texture *tex)
       if (pack_x_pitch > 4) {
          pack_x_pitch >>= 1;
          pack_x_nr <<= 1;
-         assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride);
+         assert(pack_x_pitch * pack_x_nr * pf_get_blocksize(pt->format) <= tex->stride);
       }
 
       if (pack_y_pitch > 2) {
@@ -492,8 +478,7 @@ i945_miptree_layout_3d(struct i915_texture *tex)
       width = u_minify(width, 1);
       height = u_minify(height, 1);
       depth = u_minify(depth, 1);
-      nblocksx = pf_get_nblocksx(&pt->block, width);
-      nblocksy = pf_get_nblocksy(&pt->block, height);
+      nblocksy = pf_get_nblocksy(pt->format, height);
    }
 }
 
@@ -503,7 +488,7 @@ i945_miptree_layout_cube(struct i915_texture *tex)
    struct pipe_texture *pt = &tex->base;
    unsigned level;
 
-   const unsigned nblocks = pt->nblocksx[0];
+   const unsigned nblocks = pf_get_nblocksx(pt->format, pt->width0);
    unsigned face;
    unsigned width = pt->width0;
    unsigned height = pt->height0;
@@ -523,9 +508,9 @@ i945_miptree_layout_cube(struct i915_texture *tex)
     * or the final row of 4x4, 2x2 and 1x1 faces below this.
     */
    if (nblocks > 32)
-      tex->stride = round_up(nblocks * pt->block.size * 2, 4);
+      tex->stride = align(nblocks * pf_get_blocksize(pt->format) * 2, 4);
    else
-      tex->stride = 14 * 8 * pt->block.size;
+      tex->stride = 14 * 8 * pf_get_blocksize(pt->format);
 
    tex->total_nblocksy = nblocks * 4;
 
@@ -645,9 +630,6 @@ i915_texture_create(struct pipe_screen *screen,
    pipe_reference_init(&tex->base.reference, 1);
    tex->base.screen = screen;
 
-   tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width0);
-   tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height0);
-   
    if (is->is_i945) {
       if (!i945_miptree_layout(tex))
          goto fail;
@@ -829,14 +811,10 @@ i915_get_tex_transfer(struct pipe_screen *screen,
    trans = CALLOC_STRUCT(i915_transfer);
    if (trans) {
       pipe_texture_reference(&trans->base.texture, texture);
-      trans->base.format = trans->base.format;
       trans->base.x = x;
       trans->base.y = y;
       trans->base.width = w;
       trans->base.height = h;
-      trans->base.block = texture->block;
-      trans->base.nblocksx = texture->nblocksx[level];
-      trans->base.nblocksy = texture->nblocksy[level];
       trans->base.stride = tex->stride;
       trans->offset = offset;
       trans->base.usage = usage;
@@ -852,6 +830,7 @@ i915_transfer_map(struct pipe_screen *screen,
    struct intel_winsys *iws = i915_screen(tex->base.screen)->iws;
    char *map;
    boolean write = FALSE;
+   enum pipe_format format = tex->base.format;
 
    if (transfer->usage & PIPE_TRANSFER_WRITE)
       write = TRUE;
@@ -861,8 +840,8 @@ i915_transfer_map(struct pipe_screen *screen,
       return NULL;
 
    return map + i915_transfer(transfer)->offset +
-      transfer->y / transfer->block.height * transfer->stride +
-      transfer->x / transfer->block.width * transfer->block.size;
+      transfer->y / pf_get_blockheight(format) * transfer->stride +
+      transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format);
 }
 
 static void
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index ffcbc9a379..b4aabd4d7c 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -166,7 +166,7 @@ shade_quads(struct llvmpipe_context *llvmpipe,
       assert((y % 2) == 0);
       depth = llvmpipe->zsbuf_map +
               y*llvmpipe->zsbuf_transfer->stride +
-              2*x*llvmpipe->zsbuf_transfer->block.size;
+              2*x*pf_get_blocksize(llvmpipe->zsbuf_transfer->texture->format);
    }
    else
       depth = NULL;
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
index c7c4143bc6..5dbc597d2c 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
@@ -291,7 +291,7 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
             assert(0);
          }
 
-         util_format_read_4ub(tc->tex_trans->format,
+         util_format_read_4ub(tc->tex_trans->texture->format,
                               (uint8_t *)tile->color, sizeof tile->color[0],
                               tc->tex_trans_map, tc->tex_trans->stride,
                               x, y, w, h);
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 65d62fd072..f099f903bd 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -48,7 +48,6 @@
 /* Simple, maximally packed layout.
  */
 
-
 /* Conventional allocation path for non-display textures:
  */
 static boolean
@@ -63,20 +62,15 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
 
    unsigned buffer_size = 0;
 
-   pf_get_block(lpt->base.format, &lpt->base.block);
-
    for (level = 0; level <= pt->last_level; level++) {
       unsigned nblocksx, nblocksy;
 
-      pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);  
-      pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
-
       /* Allocate storage for whole quads. This is particularly important
        * for depth surfaces, which are currently stored in a swizzled format. */
-      nblocksx = pf_get_nblocksx(&pt->block, align(width, 2));
-      nblocksy = pf_get_nblocksy(&pt->block, align(height, 2));
+      nblocksx = pf_get_nblocksx(pt->format, align(width, 2));
+      nblocksy = pf_get_nblocksy(pt->format, align(height, 2));
 
-      lpt->stride[level] = align(nblocksx*pt->block.size, 16);
+      lpt->stride[level] = align(nblocksx * pf_get_blocksize(pt->format), 16);
 
       lpt->level_offset[level] = buffer_size;
 
@@ -100,10 +94,6 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
 {
    struct llvmpipe_winsys *winsys = screen->winsys;
 
-   pf_get_block(lpt->base.format, &lpt->base.block);
-   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0);  
-   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0);  
-
    lpt->dt = winsys->displaytarget_create(winsys,
                                           lpt->base.format,
                                           lpt->base.width0,
@@ -180,8 +170,6 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
    lpt->base = *base;
    pipe_reference_init(&lpt->base.reference, 1);
    lpt->base.screen = screen;
-   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0);  
-   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0);  
    lpt->stride[0] = stride[0];
 
    pipe_buffer_reference(&lpt->buffer, buffer);
@@ -255,11 +243,17 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen,
       ps->level = level;
       ps->zslice = zslice;
 
+      /* XXX shouldn't that rather be
+         tex_height = align(ps->height, 2);
+         to account for alignment done in llvmpipe_texture_layout ?
+      */
       if (pt->target == PIPE_TEXTURE_CUBE) {
-         ps->offset += face * pt->nblocksy[level] * lpt->stride[level];
+         unsigned tex_height = ps->height;
+         ps->offset += face * pf_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
       }
       else if (pt->target == PIPE_TEXTURE_3D) {
-         ps->offset += zslice * pt->nblocksy[level] * lpt->stride[level];
+         unsigned tex_height = ps->height;
+         ps->offset += zslice * pf_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
       }
       else {
          assert(face == 0);
@@ -300,14 +294,10 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen,
    if (lpt) {
       struct pipe_transfer *pt = &lpt->base;
       pipe_texture_reference(&pt->texture, texture);
-      pt->format = texture->format;
-      pt->block = texture->block;
       pt->x = x;
       pt->y = y;
       pt->width = w;
       pt->height = h;
-      pt->nblocksx = texture->nblocksx[level];
-      pt->nblocksy = texture->nblocksy[level];
       pt->stride = lptex->stride[level];
       pt->usage = usage;
       pt->face = face;
@@ -316,11 +306,17 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen,
 
       lpt->offset = lptex->level_offset[level];
 
+      /* XXX shouldn't that rather be
+         tex_height = align(u_minify(texture->height0, level), 2)
+         to account for alignment done in llvmpipe_texture_layout ?
+      */
       if (texture->target == PIPE_TEXTURE_CUBE) {
-         lpt->offset += face * pt->nblocksy * pt->stride;
+         unsigned tex_height = u_minify(texture->height0, level);
+         lpt->offset += face *  pf_get_nblocksy(texture->format, tex_height) * pt->stride;
       }
       else if (texture->target == PIPE_TEXTURE_3D) {
-         lpt->offset += zslice * pt->nblocksy * pt->stride;
+         unsigned tex_height = u_minify(texture->height0, level);
+         lpt->offset += zslice * pf_get_nblocksy(texture->format, tex_height) * pt->stride;
       }
       else {
          assert(face == 0);
@@ -352,9 +348,11 @@ llvmpipe_transfer_map( struct pipe_screen *_screen,
    struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
    ubyte *map, *xfer_map;
    struct llvmpipe_texture *lpt;
+   enum pipe_format format;
 
    assert(transfer->texture);
    lpt = llvmpipe_texture(transfer->texture);
+   format = lpt->base.format;
 
    if(lpt->dt) {
       struct llvmpipe_winsys *winsys = screen->winsys;
@@ -379,8 +377,8 @@ llvmpipe_transfer_map( struct pipe_screen *_screen,
    }
    
    xfer_map = map + llvmpipe_transfer(transfer)->offset +
-      transfer->y / transfer->block.height * transfer->stride +
-      transfer->x / transfer->block.width * transfer->block.size;
+      transfer->y / pf_get_blockheight(format) * transfer->stride +
+      transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format);
    /*printf("map = %p  xfer map = %p\n", map, xfer_map);*/
    return xfer_map;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
index ec3e002d62..50891c4227 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
@@ -252,13 +252,13 @@ lp_flush_tile_cache(struct llvmpipe_tile_cache *tc)
                case LP_TILE_STATUS_CLEAR:
                   /* Actually clear the tiles which were flagged as being in a
                    * clear state. */
-                  util_fill_rect(tc->transfer_map, &pt->block, pt->stride,
+                  util_fill_rect(tc->transfer_map, pt->texture->format, pt->stride,
                                  x, y, w, h,
                                  tc->clear_val);
                   break;
 
                case LP_TILE_STATUS_DEFINED:
-                  lp_tile_write_4ub(pt->format,
+                  lp_tile_write_4ub(pt->texture->format,
                                     tile->color,
                                     tc->transfer_map, pt->stride,
                                     x, y, w, h);
@@ -306,7 +306,7 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc,
       y &= ~(TILE_SIZE - 1);
 
       if (!pipe_clip_tile(x, y, &w, &h, tc->transfer))
-         lp_tile_read_4ub(pt->format,
+         lp_tile_read_4ub(pt->texture->format,
                           tile->color,
                           tc->transfer_map, tc->transfer->stride,
                           x, y, w, h);
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 98a39390bf..a479842f9e 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -631,10 +631,10 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
     for (i = 0; i < aos_count - 1; i += 2) {
         int buf_num1 = velem[i].vertex_buffer_index;
         int buf_num2 = velem[i+1].vertex_buffer_index;
-        assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0);
-        assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_size(velem[i+1].src_format) % 4 == 0);
-        OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) |
-               (pf_get_size(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22));
+        assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_blocksize(velem[i].src_format) % 4 == 0);
+        assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_blocksize(velem[i+1].src_format) % 4 == 0);
+        OUT_CS((pf_get_blocksize(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) |
+               (pf_get_blocksize(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22));
         OUT_CS(vbuf[buf_num1].buffer_offset + velem[i].src_offset +
                offset * vbuf[buf_num1].stride);
         OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset +
@@ -642,8 +642,8 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
     }
     if (aos_count & 1) {
         int buf_num = velem[i].vertex_buffer_index;
-        assert(vbuf[buf_num].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0);
-        OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6));
+        assert(vbuf[buf_num].stride % 4 == 0 && pf_get_blocksize(velem[i].src_format) % 4 == 0);
+        OUT_CS((pf_get_blocksize(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6));
         OUT_CS(vbuf[buf_num].buffer_offset + velem[i].src_offset +
                offset * vbuf[buf_num].stride);
     }
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 390b63007e..032fa69ec0 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -311,14 +311,10 @@ r300_get_tex_transfer(struct pipe_screen *screen,
     trans = CALLOC_STRUCT(r300_transfer);
     if (trans) {
         pipe_texture_reference(&trans->transfer.texture, texture);
-        trans->transfer.format = texture->format;
         trans->transfer.x = x;
         trans->transfer.y = y;
         trans->transfer.width = w;
         trans->transfer.height = h;
-        trans->transfer.block = texture->block;
-        trans->transfer.nblocksx = texture->nblocksx[level];
-        trans->transfer.nblocksy = texture->nblocksy[level];
         trans->transfer.stride = r300_texture_get_stride(tex, level);
         trans->transfer.usage = usage;
 
@@ -344,6 +340,7 @@ static void* r300_transfer_map(struct pipe_screen* screen,
 {
     struct r300_texture* tex = (struct r300_texture*)transfer->texture;
     char* map;
+    enum pipe_format format = tex->tex.format;
 
     map = pipe_buffer_map(screen, tex->buffer,
                           pipe_transfer_buffer_flags(transfer));
@@ -353,8 +350,8 @@ static void* r300_transfer_map(struct pipe_screen* screen,
     }
 
     return map + r300_transfer(transfer)->offset +
-        transfer->y / transfer->block.height * transfer->stride +
-        transfer->x / transfer->block.width * transfer->block.size;
+        transfer->y / pf_get_blockheight(format) * transfer->stride +
+        transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format);
 }
 
 static void r300_transfer_unmap(struct pipe_screen* screen,
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 093a21ebe2..63fc6a235a 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -105,7 +105,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level)
         return 0;
     }
 
-    return align(pf_get_stride(&tex->tex.block, u_minify(tex->tex.width0, level)), 32);
+    return align(pf_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32);
 }
 
 static void r300_setup_miptree(struct r300_texture* tex)
@@ -115,11 +115,10 @@ static void r300_setup_miptree(struct r300_texture* tex)
     int i;
 
     for (i = 0; i <= base->last_level; i++) {
-        base->nblocksx[i] = pf_get_nblocksx(&base->block, u_minify(base->width0, i));
-        base->nblocksy[i] = pf_get_nblocksy(&base->block, u_minify(base->height0, i));
+        unsigned nblocksy = pf_get_nblocksy(base->format, u_minify(base->height0, i));
 
         stride = r300_texture_get_stride(tex, i);
-        layer_size = stride * base->nblocksy[i];
+        layer_size = stride * nblocksy;
 
         if (base->target == PIPE_TEXTURE_CUBE)
             size = layer_size * 6;
@@ -129,7 +128,7 @@ static void r300_setup_miptree(struct r300_texture* tex)
         tex->offset[i] = align(tex->size, 32);
         tex->size = tex->offset[i] + size;
         tex->layer_size[i] = layer_size;
-        tex->pitch[i] = stride / base->block.size;
+        tex->pitch[i] = stride / pf_get_blocksize(base->format);
 
         debug_printf("r300: Texture miptree: Level %d "
                 "(%dx%dx%d px, pitch %d bytes)\n",
@@ -245,7 +244,7 @@ static struct pipe_texture*
     tex->tex.screen = screen;
 
     tex->stride_override = *stride;
-    tex->pitch[0] = *stride / base->block.size;
+    tex->pitch[0] = *stride / pf_get_blocksize(base->format);
 
     r300_setup_flags(tex);
     r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500);
@@ -283,7 +282,6 @@ r300_video_surface_create(struct pipe_screen *screen,
     template.width0 = util_next_power_of_two(width);
     template.height0 = util_next_power_of_two(height);
     template.depth0 = 1;
-    pf_get_block(template.format, &template.block);
     template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER |
                          PIPE_TEXTURE_USAGE_RENDER_TARGET;
 
diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
index fb11b80dcf..410adf881b 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.c
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -158,7 +158,8 @@ svga_transfer_dma_band(struct svga_transfer *st,
                 st->base.x + st->base.width,
                 y + h,
                 st->base.zslice + 1,
-                texture->base.block.size*8/(texture->base.block.width*texture->base.block.height));
+                pf_get_blocksize(texture->base.format)*8/
+                (pf_get_blockwidth(texture->base.format)*pf_get_blockheight(texture->base.format)));
    
    box.x = st->base.x;
    box.y = y;
@@ -208,7 +209,8 @@ svga_transfer_dma(struct svga_transfer *st,
    }
    else {
       unsigned y, h, srcy;
-      h = st->hw_nblocksy * st->base.block.height;
+      unsigned blockheight = pf_get_blockheight(st->base.texture->format);
+      h = st->hw_nblocksy * blockheight;
       srcy = 0;
       for(y = 0; y < st->base.height; y += h) {
          unsigned offset, length;
@@ -218,11 +220,11 @@ svga_transfer_dma(struct svga_transfer *st,
             h = st->base.height - y;
 
          /* Transfer band must be aligned to pixel block boundaries */
-         assert(y % st->base.block.height == 0);
-         assert(h % st->base.block.height == 0);
+         assert(y % blockheight == 0);
+         assert(h % blockheight == 0);
          
-         offset = y * st->base.stride / st->base.block.height;
-         length = h * st->base.stride / st->base.block.height;
+         offset = y * st->base.stride / blockheight;
+         length = h * st->base.stride / blockheight;
 
          sw = (uint8_t *)st->swbuf + offset;
          
@@ -291,8 +293,6 @@ svga_texture_create(struct pipe_screen *screen,
    height = templat->height0;
    depth = templat->depth0;
    for(level = 0; level <= templat->last_level; ++level) {
-      tex->base.nblocksx[level] = pf_get_nblocksx(&tex->base.block, width);  
-      tex->base.nblocksy[level] = pf_get_nblocksy(&tex->base.block, height);  
       width = u_minify(width, 1);
       height = u_minify(height, 1);
       depth = u_minify(depth, 1);
@@ -750,6 +750,8 @@ svga_get_tex_transfer(struct pipe_screen *screen,
    struct svga_screen *ss = svga_screen(screen);
    struct svga_winsys_screen *sws = ss->sws;
    struct svga_transfer *st;
+   unsigned nblocksx = pf_get_nblocksx(texture->format, w);
+   unsigned nblocksy = pf_get_nblocksy(texture->format, h);
 
    /* We can't map texture storage directly */
    if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
@@ -759,21 +761,17 @@ svga_get_tex_transfer(struct pipe_screen *screen,
    if (!st)
       return NULL;
    
-   st->base.format = texture->format;
-   st->base.block = texture->block;
    st->base.x = x;
    st->base.y = y;
    st->base.width = w;
    st->base.height = h;
-   st->base.nblocksx = pf_get_nblocksx(&texture->block, w);
-   st->base.nblocksy = pf_get_nblocksy(&texture->block, h);
-   st->base.stride = st->base.nblocksx*st->base.block.size;
+   st->base.stride = nblocksx*pf_get_blocksize(texture->format);
    st->base.usage = usage;
    st->base.face = face;
    st->base.level = level;
    st->base.zslice = zslice;
 
-   st->hw_nblocksy = st->base.nblocksy;
+   st->hw_nblocksy = nblocksy;
    
    st->hwbuf = svga_winsys_buffer_create(ss, 
                                          1, 
@@ -789,15 +787,15 @@ svga_get_tex_transfer(struct pipe_screen *screen,
    if(!st->hwbuf)
       goto no_hwbuf;
 
-   if(st->hw_nblocksy < st->base.nblocksy) {
+   if(st->hw_nblocksy < nblocksy) {
       /* We couldn't allocate a hardware buffer big enough for the transfer, 
        * so allocate regular malloc memory instead */
       debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n",
                    __FUNCTION__,
-                   (st->base.nblocksy*st->base.stride + 1023)/1024,
-                   (st->base.nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
+                   (nblocksy*st->base.stride + 1023)/1024,
+                   (nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
                    (st->hw_nblocksy*st->base.stride + 1023)/1024);
-      st->swbuf = MALLOC(st->base.nblocksy*st->base.stride);
+      st->swbuf = MALLOC(nblocksy*st->base.stride);
       if(!st->swbuf)
          goto no_swbuf;
    }
@@ -1046,8 +1044,7 @@ svga_screen_buffer_from_texture(struct pipe_texture *texture,
        svga_translate_format(texture->format),
        stex->handle);
 
-   *stride = pf_get_nblocksx(&texture->block, texture->width0) *
-      texture->block.size;
+   *stride = pf_get_stride(texture->format, texture->width0);
 
    return *buffer != NULL;
 }
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index a947745732..f1b0daf9f6 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -210,7 +210,7 @@ static int update_zero_stride( struct svga_context *svga,
          mapped_buffer = pipe_buffer_map_range(svga->pipe.screen, 
                                                vbuffer->buffer,
                                                vel->src_offset,
-                                               pf_get_size(vel->src_format),
+                                               pf_get_blocksize(vel->src_format),
                                                PIPE_BUFFER_USAGE_CPU_READ);
          translate->set_buffer(translate, vel->vertex_buffer_index,
                                mapped_buffer,
-- 
cgit v1.2.3


From 94b5c28a98850f42fbcdab9ceda1450279e1e6fd Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Wed, 2 Dec 2009 16:55:33 +0100
Subject: gallium: adapt nv drivers to interface cleanups

---
 src/gallium/drivers/nv04/nv04_miptree.c    | 13 ++------
 src/gallium/drivers/nv04/nv04_surface_2d.c | 12 ++++----
 src/gallium/drivers/nv04/nv04_transfer.c   |  9 +-----
 src/gallium/drivers/nv10/nv10_miptree.c    | 11 ++-----
 src/gallium/drivers/nv10/nv10_transfer.c   |  9 +-----
 src/gallium/drivers/nv20/nv20_miptree.c    | 10 ++-----
 src/gallium/drivers/nv20/nv20_transfer.c   |  9 +-----
 src/gallium/drivers/nv30/nv30_miptree.c    | 11 ++-----
 src/gallium/drivers/nv30/nv30_transfer.c   |  9 +-----
 src/gallium/drivers/nv40/nv40_miptree.c    | 11 ++-----
 src/gallium/drivers/nv40/nv40_transfer.c   |  9 +-----
 src/gallium/drivers/nv50/nv50_miptree.c    | 10 +++----
 src/gallium/drivers/nv50/nv50_transfer.c   | 48 +++++++++++++-----------------
 13 files changed, 51 insertions(+), 120 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c
index 4fd72c82e6..eeab6dfa30 100644
--- a/src/gallium/drivers/nv04/nv04_miptree.c
+++ b/src/gallium/drivers/nv04/nv04_miptree.c
@@ -10,28 +10,21 @@ static void
 nv04_miptree_layout(struct nv04_miptree *nv04mt)
 {
 	struct pipe_texture *pt = &nv04mt->base;
-	uint width = pt->width0, height = pt->height0;
 	uint offset = 0;
 	int nr_faces, l;
 
 	nr_faces = 1;
 
 	for (l = 0; l <= pt->last_level; l++) {
-
-		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
-		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
-		
 		nv04mt->level[l].pitch = pt->width0;
 		nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63;
-
-		width  = u_minify(width, 1);
-		height = u_minify(height, 1);
 	}
 
 	for (l = 0; l <= pt->last_level; l++) {
-
 		nv04mt->level[l].image_offset = 
 			CALLOC(nr_faces, sizeof(unsigned));
+		/* XXX guess was obviously missing */
+		nv04mt->level[l].image_offset[0] = offset;
 		offset += nv04mt->level[l].pitch * u_minify(pt->height0, l);
 	}
 
@@ -128,7 +121,7 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	ns->base.zslice = zslice;
 	ns->pitch = nv04mt->level[level].pitch;
 
-	ns->base.offset = nv04mt->level[level].image_offset;
+	ns->base.offset = nv04mt->level[level].image_offset[0];
 
 	return &ns->base;
 }
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 8be134b83d..932893eef5 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -155,10 +155,10 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
 	    sub_w = MIN2(sub_w, w - x);
 
 	    /* Must be 64-byte aligned */
-	    assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size) & 63));
+	    assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * pf_get_blocksize(dst->texture->format)) & 63));
 
 	    BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
-	    OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * dst->texture->block.size,
+	    OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * pf_get_blocksize(dst->texture->format),
                              NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 
 	    BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
@@ -177,7 +177,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
 	    OUT_RING  (chan, src_pitch |
 			     NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
 			     NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
-	    OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * src->texture->block.size,
+	    OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * pf_get_blocksize(src->texture->format),
                              NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
 	    OUT_RING  (chan, 0);
 	  }
@@ -198,9 +198,9 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
 	unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
 	unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
 	unsigned dst_offset = dst->offset + dy * dst_pitch +
-	                      dx * dst->texture->block.size;
+	                      dx * pf_get_blocksize(dst->texture->format);
 	unsigned src_offset = src->offset + sy * src_pitch +
-	                      sx * src->texture->block.size;
+	                      sx * pf_get_blocksize(src->texture->format);
 
 	WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9);
 	BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
@@ -219,7 +219,7 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
 			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
 		OUT_RING  (chan, src_pitch);
 		OUT_RING  (chan, dst_pitch);
-		OUT_RING  (chan, w * src->texture->block.size);
+		OUT_RING  (chan, w * pf_get_blocksize(src->texture->format));
 		OUT_RING  (chan, count);
 		OUT_RING  (chan, 0x0101);
 		OUT_RING  (chan, 0);
diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c
index e6456429f4..e8ff686b4a 100644
--- a/src/gallium/drivers/nv04/nv04_transfer.c
+++ b/src/gallium/drivers/nv04/nv04_transfer.c
@@ -24,9 +24,6 @@ nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	template->width0 = u_minify(pt->width0, level);
 	template->height0 = u_minify(pt->height0, level);
 	template->depth0 = 1;
-	template->block = pt->block;
-	template->nblocksx[0] = pt->nblocksx[level];
-	template->nblocksy[0] = pt->nblocksx[level];
 	template->last_level = 0;
 	template->nr_samples = pt->nr_samples;
 
@@ -49,14 +46,10 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 
 	pipe_texture_reference(&tx->base.texture, pt);
-	tx->base.format = pt->format;
 	tx->base.x = x;
 	tx->base.y = y;
 	tx->base.width = w;
 	tx->base.height = h;
-	tx->base.block = pt->block;
-	tx->base.nblocksx = pt->nblocksx[level];
-	tx->base.nblocksy = pt->nblocksy[level];
 	tx->base.stride = mt->level[level].pitch;
 	tx->base.usage = usage;
 	tx->base.face = face;
@@ -158,7 +151,7 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	                            pipe_transfer_buffer_flags(ptx));
 
 	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * ptx->block.size;
+	       ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c
index b2a6c59b74..439beeccc3 100644
--- a/src/gallium/drivers/nv10/nv10_miptree.c
+++ b/src/gallium/drivers/nv10/nv10_miptree.c
@@ -11,7 +11,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt)
 {
 	struct pipe_texture *pt = &nv10mt->base;
 	boolean swizzled = FALSE;
-	uint width = pt->width0, height = pt->height0;
+	uint width = pt->width0;
 	uint offset = 0;
 	int nr_faces, l, f;
 
@@ -22,21 +22,16 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt)
 	}
 	
 	for (l = 0; l <= pt->last_level; l++) {
-
-		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
-		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
-
 		if (swizzled)
-			nv10mt->level[l].pitch = pt->nblocksx[l] * pt->block.size;
+			nv10mt->level[l].pitch = pf_get_stride(pt->format, width);
 		else
-			nv10mt->level[l].pitch = pt->nblocksx[0] * pt->block.size;
+			nv10mt->level[l].pitch = pf_get_stride(pt->format, pt->width0);
 		nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63;
 
 		nv10mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
 		width  = u_minify(width, 1);
-		height = u_minify(height, 1);
 
 	}
 
diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c
index ec54297ab0..9e44d37367 100644
--- a/src/gallium/drivers/nv10/nv10_transfer.c
+++ b/src/gallium/drivers/nv10/nv10_transfer.c
@@ -24,9 +24,6 @@ nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	template->width0 = u_minify(pt->width0, level);
 	template->height0 = u_minify(pt->height0, level);
 	template->depth0 = 1;
-	template->block = pt->block;
-	template->nblocksx[0] = pt->nblocksx[level];
-	template->nblocksy[0] = pt->nblocksx[level];
 	template->last_level = 0;
 	template->nr_samples = pt->nr_samples;
 
@@ -49,14 +46,10 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 
 	pipe_texture_reference(&tx->base.texture, pt);
-	tx->base.format = pt->format;
 	tx->base.x = x;
 	tx->base.y = y;
 	tx->base.width = w;
 	tx->base.height = h;
-	tx->base.block = pt->block;
-	tx->base.nblocksx = pt->nblocksx[level];
-	tx->base.nblocksy = pt->nblocksy[level];
 	tx->base.stride = mt->level[level].pitch;
 	tx->base.usage = usage;
 	tx->base.face = face;
@@ -158,7 +151,7 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	                            pipe_transfer_buffer_flags(ptx));
 
 	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * ptx->block.size;
+	       ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
index 554e28e47d..2bde9fb75b 100644
--- a/src/gallium/drivers/nv20/nv20_miptree.c
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -10,7 +10,7 @@ static void
 nv20_miptree_layout(struct nv20_miptree *nv20mt)
 {
 	struct pipe_texture *pt = &nv20mt->base;
-	uint width = pt->width0, height = pt->height0;
+	uint width = pt->width0;
 	uint offset = 0;
 	int nr_faces, l, f;
 	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -26,19 +26,15 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt)
 	}
 	
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
-		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
-
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			nv20mt->level[l].pitch = align(pt->width0 * pt->block.size, 64);
+			nv20mt->level[l].pitch = align(pf_get_stride(pt->format, pt->width0), 64);
 		else
-			nv20mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size;
+			nv20mt->level[l].pitch = pf_get_stride(pt->format, width);
 
 		nv20mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
 		width  = u_minify(width, 1);
-		height = u_minify(height, 1);
 	}
 
 	for (f = 0; f < nr_faces; f++) {
diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c
index 87b5c14a3c..f2e0a34db9 100644
--- a/src/gallium/drivers/nv20/nv20_transfer.c
+++ b/src/gallium/drivers/nv20/nv20_transfer.c
@@ -24,9 +24,6 @@ nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	template->width0 = u_minify(pt->width0, level);
 	template->height0 = u_minify(pt->height0, level);
 	template->depth0 = 1;
-	template->block = pt->block;
-	template->nblocksx[0] = pt->nblocksx[level];
-	template->nblocksy[0] = pt->nblocksx[level];
 	template->last_level = 0;
 	template->nr_samples = pt->nr_samples;
 
@@ -49,14 +46,10 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 
 	pipe_texture_reference(&tx->base.texture, pt);
-	tx->base.format = pt->format;
 	tx->base.x = x;
 	tx->base.y = y;
 	tx->base.width = w;
 	tx->base.height = h;
-	tx->base.block = pt->block;
-	tx->base.nblocksx = pt->nblocksx[level];
-	tx->base.nblocksy = pt->nblocksy[level];
 	tx->base.stride = mt->level[level].pitch;
 	tx->base.usage = usage;
 	tx->base.face = face;
@@ -158,7 +151,7 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	                            pipe_transfer_buffer_flags(ptx));
 
 	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * ptx->block.size;
+	       ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index b4c306d127..9e50a7cf6b 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -9,7 +9,7 @@ static void
 nv30_miptree_layout(struct nv30_miptree *nv30mt)
 {
 	struct pipe_texture *pt = &nv30mt->base;
-	uint width = pt->width0, height = pt->height0, depth = pt->depth0;
+	uint width = pt->width0;
 	uint offset = 0;
 	int nr_faces, l, f;
 	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -28,20 +28,15 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt)
 	}
 
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
-		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
-
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			nv30mt->level[l].pitch = align(pt->width0 * pt->block.size, 64);
+			nv30mt->level[l].pitch = align(pf_get_stride(pt->format, pt->width0), 64);
 		else
-			nv30mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size;
+			nv30mt->level[l].pitch = pf_get_stride(pt->format, width);
 
 		nv30mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
 		width  = u_minify(width, 1);
-		height = u_minify(height, 1);
-		depth  = u_minify(depth, 1);
 	}
 
 	for (f = 0; f < nr_faces; f++) {
diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
index 5e429b4d85..c8c3bd1f17 100644
--- a/src/gallium/drivers/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nv30/nv30_transfer.c
@@ -24,9 +24,6 @@ nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	template->width0 = u_minify(pt->width0, level);
 	template->height0 = u_minify(pt->height0, level);
 	template->depth0 = 1;
-	template->block = pt->block;
-	template->nblocksx[0] = pt->nblocksx[level];
-	template->nblocksy[0] = pt->nblocksx[level];
 	template->last_level = 0;
 	template->nr_samples = pt->nr_samples;
 
@@ -49,14 +46,10 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 
 	pipe_texture_reference(&tx->base.texture, pt);
-	tx->base.format = pt->format;
 	tx->base.x = x;
 	tx->base.y = y;
 	tx->base.width = w;
 	tx->base.height = h;
-	tx->base.block = pt->block;
-	tx->base.nblocksx = pt->nblocksx[level];
-	tx->base.nblocksy = pt->nblocksy[level];
 	tx->base.stride = mt->level[level].pitch;
 	tx->base.usage = usage;
 	tx->base.face = face;
@@ -158,7 +151,7 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	                            pipe_transfer_buffer_flags(ptx));
 
 	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * ptx->block.size;
+	       ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index f73bedff6d..8779c5572b 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -9,7 +9,7 @@ static void
 nv40_miptree_layout(struct nv40_miptree *mt)
 {
 	struct pipe_texture *pt = &mt->base;
-	uint width = pt->width0, height = pt->height0, depth = pt->depth0;
+	uint width = pt->width0;
 	uint offset = 0;
 	int nr_faces, l, f;
 	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -28,20 +28,15 @@ nv40_miptree_layout(struct nv40_miptree *mt)
 	}
 
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
-		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
-
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			mt->level[l].pitch = align(pt->width0 * pt->block.size, 64);
+			mt->level[l].pitch = align(pf_get_stride(pt->format, pt->width0), 64);
 		else
-			mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size;
+			mt->level[l].pitch = pf_get_stride(pt->format, width);
 
 		mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
 		width  = u_minify(width, 1);
-		height = u_minify(height, 1);
-		depth  = u_minify(depth, 1);
 	}
 
 	for (f = 0; f < nr_faces; f++) {
diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
index 36e253c96f..1ee5cf39e0 100644
--- a/src/gallium/drivers/nv40/nv40_transfer.c
+++ b/src/gallium/drivers/nv40/nv40_transfer.c
@@ -24,9 +24,6 @@ nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	template->width0 = u_minify(pt->width0, level);
 	template->height0 = u_minify(pt->height0, level);
 	template->depth0 = 1;
-	template->block = pt->block;
-	template->nblocksx[0] = pt->nblocksx[level];
-	template->nblocksy[0] = pt->nblocksx[level];
 	template->last_level = 0;
 	template->nr_samples = pt->nr_samples;
 
@@ -49,14 +46,10 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 
 	pipe_texture_reference(&tx->base.texture, pt);
-	tx->base.format = pt->format;
 	tx->base.x = x;
 	tx->base.y = y;
 	tx->base.width = w;
 	tx->base.height = h;
-	tx->base.block = pt->block;
-	tx->base.nblocksx = pt->nblocksx[level];
-	tx->base.nblocksy = pt->nblocksy[level];
 	tx->base.stride = mt->level[level].pitch;
 	tx->base.usage = usage;
 	tx->base.face = face;
@@ -158,7 +151,7 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	                            pipe_transfer_buffer_flags(ptx));
 
 	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * ptx->block.size;
+	       ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 3d58746793..40ee665999 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -91,13 +91,11 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 
 	for (l = 0; l <= pt->last_level; l++) {
 		struct nv50_miptree_level *lvl = &mt->level[l];
-
-		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
-		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
+		unsigned nblocksy = pf_get_nblocksy(pt->format, height);
 
 		lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
-		lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64);
-		lvl->tile_mode = get_tile_mode(pt->nblocksy[l], depth);
+		lvl->pitch = align(pf_get_stride(pt->format, width), 64);
+		lvl->tile_mode = get_tile_mode(nblocksy, depth);
 
 		width = u_minify(width, 1);
 		height = u_minify(height, 1);
@@ -118,7 +116,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 			unsigned tile_d = get_tile_depth(lvl->tile_mode);
 
 			size  = lvl->pitch;
-			size *= align(pt->nblocksy[l], tile_h);
+			size *= align(pf_get_nblocksy(pt->format, u_minify(pt->height0, l)), tile_h);
 			size *= align(u_minify(pt->depth0, l), tile_d);
 
 			lvl->image_offset[i] = mt->total_size;
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 39d65279fc..4705f96f57 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -16,6 +16,8 @@ struct nv50_transfer {
 	int level_depth;
 	int level_x;
 	int level_y;
+	unsigned nblocksx;
+	unsigned nblocksy;
 };
 
 static void
@@ -151,20 +153,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 
 	pipe_texture_reference(&tx->base.texture, pt);
-	tx->base.format = pt->format;
+	tx->nblocksx = pf_get_nblocksx(pt->format, u_minify(pt->width0, level));
+	tx->nblocksy = pf_get_nblocksy(pt->format, u_minify(pt->height0, level));
 	tx->base.width = w;
 	tx->base.height = h;
-	tx->base.block = pt->block;
-	if (!pt->nblocksx[level]) {
-		tx->base.nblocksx = pf_get_nblocksx(&pt->block,
-						    u_minify(pt->width0, level));
-		tx->base.nblocksy = pf_get_nblocksy(&pt->block,
-						    u_minify(pt->height0, level));
-	} else {
-		tx->base.nblocksx = pt->nblocksx[level];
-		tx->base.nblocksy = pt->nblocksy[level];
-	}
-	tx->base.stride = tx->base.nblocksx * pt->block.size;
+	tx->base.stride = tx->nblocksx * pf_get_blocksize(pt->format);
 	tx->base.usage = usage;
 
 	tx->level_pitch = lvl->pitch;
@@ -173,10 +166,10 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->level_depth = u_minify(mt->base.base.depth0, level);
 	tx->level_offset = lvl->image_offset[image];
 	tx->level_tiling = lvl->tile_mode;
-	tx->level_x = pf_get_nblocksx(&tx->base.block, x);
-	tx->level_y = pf_get_nblocksy(&tx->base.block, y);
+	tx->level_x = pf_get_nblocksx(pt->format, x);
+	tx->level_y = pf_get_nblocksy(pt->format, y);
 	ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
-			     tx->base.nblocksy * tx->base.stride, &tx->bo);
+			     tx->nblocksy * tx->base.stride, &tx->bo);
 	if (ret) {
 		FREE(tx);
 		return NULL;
@@ -185,22 +178,22 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	if (pt->target == PIPE_TEXTURE_3D)
 		tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice,
 						      lvl->pitch,
-						      tx->base.nblocksy);
+						      tx->nblocksy);
 
 	if (usage & PIPE_TRANSFER_READ) {
-		nx = pf_get_nblocksx(&tx->base.block, tx->base.width);
-		ny = pf_get_nblocksy(&tx->base.block, tx->base.height);
+		nx = pf_get_nblocksx(pt->format, tx->base.width);
+		ny = pf_get_nblocksy(pt->format, tx->base.height);
 
 		nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
 					x, y,
-					tx->base.nblocksx, tx->base.nblocksy,
+					tx->nblocksx, tx->nblocksy,
 					tx->level_depth,
 					tx->bo, 0,
 					tx->base.stride, tx->bo->tile_mode,
 					0, 0,
-					tx->base.nblocksx, tx->base.nblocksy, 1,
-					tx->base.block.size, nx, ny,
+					tx->nblocksx, tx->nblocksy, 1,
+					pf_get_blocksize(pt->format), nx, ny,
 					NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
 					NOUVEAU_BO_GART);
 	}
@@ -213,23 +206,24 @@ nv50_transfer_del(struct pipe_transfer *ptx)
 {
 	struct nv50_transfer *tx = (struct nv50_transfer *)ptx;
 	struct nv50_miptree *mt = nv50_miptree(ptx->texture);
+	struct pipe_texture *pt = ptx->texture;
 
-	unsigned nx = pf_get_nblocksx(&tx->base.block, tx->base.width);
-	unsigned ny = pf_get_nblocksy(&tx->base.block, tx->base.height);
+	unsigned nx = pf_get_nblocksx(pt->format, tx->base.width);
+	unsigned ny = pf_get_nblocksy(pt->format, tx->base.height);
 
 	if (ptx->usage & PIPE_TRANSFER_WRITE) {
-		struct pipe_screen *pscreen = ptx->texture->screen;
+		struct pipe_screen *pscreen = pt->screen;
 
 		nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
 					tx->base.stride, tx->bo->tile_mode,
 					0, 0,
-					tx->base.nblocksx, tx->base.nblocksy, 1,
+					tx->nblocksx, tx->nblocksy, 1,
 					mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
 					tx->level_x, tx->level_y,
-					tx->base.nblocksx, tx->base.nblocksy,
+					tx->nblocksx, tx->nblocksy,
 					tx->level_depth,
-					tx->base.block.size, nx, ny,
+					pf_get_blocksize(pt->format), nx, ny,
 					NOUVEAU_BO_GART, NOUVEAU_BO_VRAM |
 					NOUVEAU_BO_GART);
 	}
-- 
cgit v1.2.3


From 429bf7541777de08e070df3920b8566e3ac78223 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 2 Dec 2009 09:23:37 -0700
Subject: cell: fix TGSI breakage

---
 src/gallium/drivers/cell/ppu/cell_gen_fp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 4d43f65d29..1895a7940c 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -1351,7 +1351,7 @@ emit_function_call(struct codegen *gen,
 static boolean
 emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
 {
-   const uint target = inst->InstructionExtTexture.Texture;
+   const uint target = inst->Texture.Texture;
    const uint unit = inst->Src[1].Register.Index;
    uint addr;
    int ch;
-- 
cgit v1.2.3


From 792888121b92913733daec7526c9441f27ce1231 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 2 Dec 2009 10:09:53 -0700
Subject: llvmpipe: plug in dummy pipe_context::set_vertex_sampler_textures
 function

Fixes immediate segfault.
---
 src/gallium/drivers/llvmpipe/lp_context.c       |  3 ++-
 src/gallium/drivers/llvmpipe/lp_state.h         | 10 +++++++---
 src/gallium/drivers/llvmpipe/lp_state_sampler.c | 14 ++++++++++++--
 3 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index c081f6de03..66549132d4 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -205,7 +205,8 @@ llvmpipe_create( struct pipe_screen *screen )
    llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state;
    llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple;
    llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state;
-   llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures;
+   llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_fragment_sampler_textures;
+   llvmpipe->pipe.set_vertex_sampler_textures = llvmpipe_set_vertex_sampler_textures;
    llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
 
    llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 7b26ce61a3..805959af89 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -168,9 +168,13 @@ void llvmpipe_set_polygon_stipple( struct pipe_context *,
 void llvmpipe_set_scissor_state( struct pipe_context *,
                                  const struct pipe_scissor_state * );
 
-void llvmpipe_set_sampler_textures( struct pipe_context *,
-                                    unsigned num,
-                                    struct pipe_texture ** );
+void llvmpipe_set_fragment_sampler_textures( struct pipe_context *,
+                                             unsigned num,
+                                             struct pipe_texture ** );
+
+void llvmpipe_set_vertex_sampler_textures( struct pipe_context *,
+                                           unsigned num,
+                                           struct pipe_texture ** );
 
 void llvmpipe_set_viewport_state( struct pipe_context *,
                                   const struct pipe_viewport_state * );
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 8333805a3f..b61b669093 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -78,8 +78,9 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
 
 
 void
-llvmpipe_set_sampler_textures(struct pipe_context *pipe,
-                              unsigned num, struct pipe_texture **texture)
+llvmpipe_set_fragment_sampler_textures(struct pipe_context *pipe,
+                                       unsigned num,
+                                       struct pipe_texture **texture)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
    uint i;
@@ -116,6 +117,15 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
 }
 
 
+void
+llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe,
+                                     unsigned num,
+                                     struct pipe_texture **texture)
+{
+   /* XXX to do */
+}
+
+
 void
 llvmpipe_delete_sampler_state(struct pipe_context *pipe,
                               void *sampler)
-- 
cgit v1.2.3


From f42192e783521f49a7caab09b073740e63ab092b Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 2 Dec 2009 12:19:31 -0700
Subject: llvmpipe: return 0 for PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS query

The driver (and draw module) don't support vertex shader textures yet.
---
 src/gallium/drivers/llvmpipe/lp_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 0fb133486a..a6ecaa0b2b 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -59,7 +59,7 @@ llvmpipe_get_param(struct pipe_screen *screen, int param)
    case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
       return PIPE_MAX_SAMPLERS;
    case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
-      return PIPE_MAX_SAMPLERS;
+      return 0;
    case PIPE_CAP_NPOT_TEXTURES:
       return 1;
    case PIPE_CAP_TWO_SIDED_STENCIL:
-- 
cgit v1.2.3


From d5e5909f171952bc15f43bc618238fc0699edc09 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 2 Dec 2009 12:20:15 -0700
Subject: Revert "llvmpipe: plug in dummy
 pipe_context::set_vertex_sampler_textures function"

This reverts commit 792888121b92913733daec7526c9441f27ce1231.

We're instead returning 0 for the PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS query.
---
 src/gallium/drivers/llvmpipe/lp_context.c       |  3 +--
 src/gallium/drivers/llvmpipe/lp_state.h         | 10 +++-------
 src/gallium/drivers/llvmpipe/lp_state_sampler.c | 14 ++------------
 3 files changed, 6 insertions(+), 21 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 66549132d4..c081f6de03 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -205,8 +205,7 @@ llvmpipe_create( struct pipe_screen *screen )
    llvmpipe->pipe.set_framebuffer_state = llvmpipe_set_framebuffer_state;
    llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple;
    llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state;
-   llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_fragment_sampler_textures;
-   llvmpipe->pipe.set_vertex_sampler_textures = llvmpipe_set_vertex_sampler_textures;
+   llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures;
    llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
 
    llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 805959af89..7b26ce61a3 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -168,13 +168,9 @@ void llvmpipe_set_polygon_stipple( struct pipe_context *,
 void llvmpipe_set_scissor_state( struct pipe_context *,
                                  const struct pipe_scissor_state * );
 
-void llvmpipe_set_fragment_sampler_textures( struct pipe_context *,
-                                             unsigned num,
-                                             struct pipe_texture ** );
-
-void llvmpipe_set_vertex_sampler_textures( struct pipe_context *,
-                                           unsigned num,
-                                           struct pipe_texture ** );
+void llvmpipe_set_sampler_textures( struct pipe_context *,
+                                    unsigned num,
+                                    struct pipe_texture ** );
 
 void llvmpipe_set_viewport_state( struct pipe_context *,
                                   const struct pipe_viewport_state * );
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index b61b669093..8333805a3f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -78,9 +78,8 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
 
 
 void
-llvmpipe_set_fragment_sampler_textures(struct pipe_context *pipe,
-                                       unsigned num,
-                                       struct pipe_texture **texture)
+llvmpipe_set_sampler_textures(struct pipe_context *pipe,
+                              unsigned num, struct pipe_texture **texture)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
    uint i;
@@ -117,15 +116,6 @@ llvmpipe_set_fragment_sampler_textures(struct pipe_context *pipe,
 }
 
 
-void
-llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe,
-                                     unsigned num,
-                                     struct pipe_texture **texture)
-{
-   /* XXX to do */
-}
-
-
 void
 llvmpipe_delete_sampler_state(struct pipe_context *pipe,
                               void *sampler)
-- 
cgit v1.2.3


From 08383af4c749566dcb58db94d7b72ee02e4cab11 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 2 Dec 2009 11:22:55 -0800
Subject: r300g: No vertex textures here.

---
 src/gallium/drivers/r300/r300_state.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 7505353953..442af70e14 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -566,6 +566,12 @@ static void r300_bind_sampler_states(struct pipe_context* pipe,
     r300->sampler_count = count;
 }
 
+static void r300_lacks_vertex_textures(struct pipe_context* pipe,
+                                       unsigned count,
+                                       void** states)
+{
+}
+
 static void r300_delete_sampler_state(struct pipe_context* pipe, void* state)
 {
     FREE(state);
@@ -823,6 +829,7 @@ void r300_init_state_functions(struct r300_context* r300)
 
     r300->context.create_sampler_state = r300_create_sampler_state;
     r300->context.bind_fragment_sampler_states = r300_bind_sampler_states;
+    r300->context.bind_vertex_sampler_states = r300_lacks_vertex_textures;
     r300->context.delete_sampler_state = r300_delete_sampler_state;
 
     r300->context.set_fragment_sampler_textures = r300_set_sampler_textures;
-- 
cgit v1.2.3


From 4f77b0103d5f150845300ee8bddcef20d11a9820 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Wed, 2 Dec 2009 12:16:19 -0800
Subject: r300g, radeong: De-specialize r300_winsys into radeon_winsys.

There's like five good reasons for this, I swear.
---
 src/gallium/drivers/r300/Makefile                  |   3 +-
 src/gallium/drivers/r300/r300_context.c            |   9 +-
 src/gallium/drivers/r300/r300_context.h            |   2 +-
 src/gallium/drivers/r300/r300_cs.h                 |   5 +-
 src/gallium/drivers/r300/r300_screen.c             |  13 +-
 src/gallium/drivers/r300/r300_screen.h             |   4 +-
 src/gallium/drivers/r300/r300_vbo.c                |   3 +-
 src/gallium/drivers/r300/r300_winsys.h             |  70 +---------
 src/gallium/winsys/drm/radeon/core/radeon_buffer.h |  10 +-
 src/gallium/winsys/drm/radeon/core/radeon_drm.c    |   7 +-
 src/gallium/winsys/drm/radeon/core/radeon_r300.c   | 141 ++++++++-------------
 src/gallium/winsys/drm/radeon/core/radeon_r300.h   |   5 +-
 src/gallium/winsys/drm/radeon/core/radeon_winsys.h | 105 +++++++++++++++
 13 files changed, 190 insertions(+), 187 deletions(-)
 create mode 100644 src/gallium/winsys/drm/radeon/core/radeon_winsys.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index d13bb7a36b..63ae5c2766 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -23,7 +23,8 @@ C_SOURCES = \
 	r300_tgsi_to_rc.c
 
 LIBRARY_INCLUDES = \
-	-I$(TOP)/src/mesa/drivers/dri/r300/compiler
+	-I$(TOP)/src/mesa/drivers/dri/r300/compiler \
+	-I$(TOP)/src/gallium/winsys/drm/radeon/core
 
 COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a
 
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 769733b6dd..68a17dcb63 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -36,7 +36,8 @@
 #include "r300_screen.h"
 #include "r300_state_derived.h"
 #include "r300_state_invariant.h"
-#include "r300_winsys.h"
+
+#include "radeon_winsys.h"
 
 static enum pipe_error r300_clear_hash_table(void* key, void* value,
                                              void* data)
@@ -105,7 +106,7 @@ static void r300_flush_cb(void *data)
 }
 
 struct pipe_context* r300_create_context(struct pipe_screen* screen,
-                                         struct r300_winsys* r300_winsys)
+                                         struct radeon_winsys* radeon_winsys)
 {
     struct r300_context* r300 = CALLOC_STRUCT(r300_context);
     struct r300_screen* r300screen = r300_screen(screen);
@@ -113,9 +114,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     if (!r300)
         return NULL;
 
-    r300->winsys = r300_winsys;
+    r300->winsys = radeon_winsys;
 
-    r300->context.winsys = (struct pipe_winsys*)r300_winsys;
+    r300->context.winsys = (struct pipe_winsys*)radeon_winsys;
     r300->context.screen = screen;
 
     r300_init_debug(r300);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 39c0914cff..dd3f6ac143 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -237,7 +237,7 @@ struct r300_context {
     struct pipe_context context;
 
     /* The interface to the windowing system, etc. */
-    struct r300_winsys* winsys;
+    struct radeon_winsys* winsys;
     /* Draw module. Used mostly for SW TCL. */
     struct draw_context* draw;
 
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 86ba91db52..8b100375fd 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -26,7 +26,8 @@
 #include "util/u_math.h"
 
 #include "r300_reg.h"
-#include "r300_winsys.h"
+
+#include "radeon_winsys.h"
 
 /* Yes, I know macros are ugly. However, they are much prettier than the code
  * that they neatly hide away, and don't have the cost of function setup,so
@@ -50,7 +51,7 @@
 
 #define CS_LOCALS(context) \
     struct r300_context* const cs_context_copy = (context); \
-    struct r300_winsys* cs_winsys = cs_context_copy->winsys; \
+    struct radeon_winsys* cs_winsys = cs_context_copy->winsys; \
     int cs_count = 0;
 
 #define CHECK_CS(size) \
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 390b63007e..2e7b1423e6 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -27,7 +27,8 @@
 #include "r300_context.h"
 #include "r300_screen.h"
 #include "r300_texture.h"
-#include "r300_winsys.h"
+
+#include "radeon_winsys.h"
 
 /* Return the identifier behind whom the brave coders responsible for this
  * amalgamation of code, sweat, and duct tape, routinely obscure their names.
@@ -372,7 +373,7 @@ static void r300_destroy_screen(struct pipe_screen* pscreen)
     FREE(r300screen);
 }
 
-struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys)
+struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys)
 {
     struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen);
     struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities);
@@ -380,14 +381,14 @@ struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys)
     if (!r300screen || !caps)
         return NULL;
 
-    caps->pci_id = r300_winsys->pci_id;
-    caps->num_frag_pipes = r300_winsys->gb_pipes;
-    caps->num_z_pipes = r300_winsys->z_pipes;
+    caps->pci_id = radeon_winsys->pci_id;
+    caps->num_frag_pipes = radeon_winsys->gb_pipes;
+    caps->num_z_pipes = radeon_winsys->z_pipes;
 
     r300_parse_chipset(caps);
 
     r300screen->caps = caps;
-    r300screen->screen.winsys = (struct pipe_winsys*)r300_winsys;
+    r300screen->screen.winsys = (struct pipe_winsys*)radeon_winsys;
     r300screen->screen.destroy = r300_destroy_screen;
     r300screen->screen.get_name = r300_get_name;
     r300screen->screen.get_vendor = r300_get_vendor;
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 1ce5ff3904..2217988add 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -27,7 +27,7 @@
 
 #include "r300_chipset.h"
 
-struct r300_winsys;
+struct radeon_winsys;
 
 struct r300_screen {
     /* Parent class */
@@ -58,6 +58,6 @@ r300_transfer(struct pipe_transfer* transfer)
 }
 
 /* Creates a new r300 screen. */
-struct pipe_screen* r300_create_screen(struct r300_winsys* r300_winsys);
+struct pipe_screen* r300_create_screen(struct radeon_winsys* radeon_winsys);
 
 #endif /* R300_SCREEN_H */
diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index 6ebaf715dc..d8610dadfa 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -32,7 +32,8 @@
 #include "r300_context.h"
 #include "r300_state_inlines.h"
 #include "r300_reg.h"
-#include "r300_winsys.h"
+
+#include "radeon_winsys.h"
 
 static INLINE int get_buffer_offset(struct r300_context *r300,
                                     unsigned int buf_nr,
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index 864a6146b2..f86985841f 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -35,76 +35,8 @@ extern "C" {
 #include "pipe/p_state.h"
 #include "pipe/internal/p_winsys_screen.h"
 
-struct r300_winsys {
-    /* Parent class */
-    struct pipe_winsys base;
-
-    /* Opaque Radeon-specific winsys object. */
-    void* radeon_winsys;
-
-    /* PCI ID */
-    uint32_t pci_id;
-
-    /* GB pipe count */
-    uint32_t gb_pipes;
-
-    /* Z pipe count (rv530 only) */
-    uint32_t z_pipes;
-
-    /* GART size. */
-    uint32_t gart_size;
-
-    /* VRAM size. */
-    uint32_t vram_size;
-
-    /* Add a pipe_buffer to the list of buffer objects to validate. */
-    boolean (*add_buffer)(struct r300_winsys* winsys,
-                          struct pipe_buffer* pbuffer,
-                          uint32_t rd,
-                          uint32_t wd);
-
-    /* Revalidate all currently setup pipe_buffers.
-     * Returns TRUE if a flush is required. */
-    boolean (*validate)(struct r300_winsys* winsys);
-
-    /* Check to see if there's room for commands. */
-    boolean (*check_cs)(struct r300_winsys* winsys, int size);
-
-    /* Start a command emit. */
-    void (*begin_cs)(struct r300_winsys* winsys,
-                     int size,
-                     const char* file,
-                     const char* function,
-                     int line);
-
-    /* Write a dword to the command buffer. */
-    void (*write_cs_dword)(struct r300_winsys* winsys, uint32_t dword);
-
-    /* Write a relocated dword to the command buffer. */
-    void (*write_cs_reloc)(struct r300_winsys* winsys,
-                           struct pipe_buffer* bo,
-                           uint32_t rd,
-                           uint32_t wd,
-                           uint32_t flags);
-
-    /* Finish a command emit. */
-    void (*end_cs)(struct r300_winsys* winsys,
-                   const char* file,
-                   const char* function,
-                   int line);
-
-    /* Flush the CS. */
-    void (*flush_cs)(struct r300_winsys* winsys);
-
-    /* winsys flush - callback from winsys when flush required */
-    void (*set_flush_cb)(struct r300_winsys *winsys,
-			 void (*flush_cb)(void *), void *data);
-
-    void (*reset_bos)(struct r300_winsys *winsys);
-};
-
 struct pipe_context* r300_create_context(struct pipe_screen* screen,
-                                         struct r300_winsys* r300_winsys);
+                                         struct radeon_winsys* radeon_winsys);
 
 boolean r300_get_texture_buffer(struct pipe_texture* texture,
                                 struct pipe_buffer** buffer,
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
index f5153b06af..bfe2221d1e 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.h
@@ -45,6 +45,8 @@
 
 #include "radeon_drm.h"
 
+#include "radeon_winsys.h"
+
 struct radeon_pipe_buffer {
     struct pipe_buffer  base;
     struct radeon_bo    *bo;
@@ -68,14 +70,6 @@ struct radeon_winsys_priv {
     struct radeon_cs* cs;
 };
 
-struct radeon_winsys {
-    /* Parent class. */
-    struct pipe_winsys base;
-
-    /* This corresponds to void* radeon_winsys in r300_winsys. */
-    struct radeon_winsys_priv* priv;
-};
-
 struct radeon_winsys* radeon_pipe_winsys(int fb);
 #if 0
 struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_context,
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_drm.c b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
index 69f14e54f2..770d7c73eb 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_drm.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_drm.c
@@ -41,9 +41,8 @@ struct pipe_screen* radeon_create_screen(struct drm_api* api,
     if (debug_get_bool_option("RADEON_SOFTPIPE", FALSE)) {
         return softpipe_create_screen((struct pipe_winsys*)winsys);
     } else {
-        struct r300_winsys* r300 = radeon_create_r300_winsys(drmFB, winsys);
-        FREE(winsys);
-        return r300_create_screen(r300);
+        radeon_setup_winsys(drmFB, winsys);
+        return r300_create_screen(winsys);
     }
 }
 
@@ -55,7 +54,7 @@ struct pipe_context* radeon_create_context(struct drm_api* api,
         return radeon_create_softpipe(screen->winsys);
     } else {
         return r300_create_context(screen,
-                                   (struct r300_winsys*)screen->winsys);
+                                   (struct radeon_winsys*)screen->winsys);
     }
 }
 
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
index d3e468a9ef..b64e9c16e1 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
@@ -22,36 +22,27 @@
 
 #include "radeon_r300.h"
 
-static void radeon_r300_set_flush_cb(struct r300_winsys *winsys,
-				     void (*flush_cb)(void *),
-				     void *data)
+static void radeon_set_flush_cb(struct radeon_winsys *winsys,
+                                void (*flush_cb)(void *),
+                                void *data)
 {
-    struct radeon_winsys_priv* priv =
-        (struct radeon_winsys_priv*)winsys->radeon_winsys;
-
-    radeon_cs_space_set_flush(priv->cs, flush_cb,
-			      data);
+    radeon_cs_space_set_flush(winsys->priv->cs, flush_cb, data);
 }
 
-static boolean radeon_r300_add_buffer(struct r300_winsys* winsys,
-                                      struct pipe_buffer* pbuffer,
-                                      uint32_t rd,
-                                      uint32_t wd)
+static boolean radeon_add_buffer(struct radeon_winsys* winsys,
+                                 struct pipe_buffer* pbuffer,
+                                 uint32_t rd,
+                                 uint32_t wd)
 {
-    struct radeon_winsys_priv* priv =
-        (struct radeon_winsys_priv*)winsys->radeon_winsys;
     struct radeon_bo* bo = ((struct radeon_pipe_buffer*)pbuffer)->bo;
 
-    radeon_cs_space_add_persistent_bo(priv->cs, bo, rd, wd);
+    radeon_cs_space_add_persistent_bo(winsys->priv->cs, bo, rd, wd);
     return TRUE;
 }
 
-static boolean radeon_r300_validate(struct r300_winsys* winsys)
+static boolean radeon_validate(struct radeon_winsys* winsys)
 {
-    struct radeon_winsys_priv* priv =
-        (struct radeon_winsys_priv*)winsys->radeon_winsys;
-
-    if (radeon_cs_space_check(priv->cs) < 0) {
+    if (radeon_cs_space_check(winsys->priv->cs) < 0) {
         return FALSE;
     }
 
@@ -59,45 +50,37 @@ static boolean radeon_r300_validate(struct r300_winsys* winsys)
     return TRUE;
 }
 
-static boolean radeon_r300_check_cs(struct r300_winsys* winsys, int size)
+static boolean radeon_check_cs(struct radeon_winsys* winsys, int size)
 {
     /* XXX check size here, lazy ass! */
     /* XXX also validate buffers */
     return TRUE;
 }
 
-static void radeon_r300_begin_cs(struct r300_winsys* winsys,
-                                 int size,
-                                 const char* file,
-                                 const char* function,
-                                 int line)
+static void radeon_begin_cs(struct radeon_winsys* winsys,
+                            int size,
+                            const char* file,
+                            const char* function,
+                            int line)
 {
-    struct radeon_winsys_priv* priv =
-        (struct radeon_winsys_priv*)winsys->radeon_winsys;
-
-    radeon_cs_begin(priv->cs, size, file, function, line);
+    radeon_cs_begin(winsys->priv->cs, size, file, function, line);
 }
 
-static void radeon_r300_write_cs_dword(struct r300_winsys* winsys,
-                                       uint32_t dword)
+static void radeon_write_cs_dword(struct radeon_winsys* winsys,
+                                  uint32_t dword)
 {
-    struct radeon_winsys_priv* priv =
-        (struct radeon_winsys_priv*)winsys->radeon_winsys;
-
-    radeon_cs_write_dword(priv->cs, dword);
+    radeon_cs_write_dword(winsys->priv->cs, dword);
 }
 
-static void radeon_r300_write_cs_reloc(struct r300_winsys* winsys,
-                                       struct pipe_buffer* pbuffer,
-                                       uint32_t rd,
-                                       uint32_t wd,
-                                       uint32_t flags)
+static void radeon_write_cs_reloc(struct radeon_winsys* winsys,
+                                  struct pipe_buffer* pbuffer,
+                                  uint32_t rd,
+                                  uint32_t wd,
+                                  uint32_t flags)
 {
-    struct radeon_winsys_priv* priv =
-        (struct radeon_winsys_priv*)winsys->radeon_winsys;
     int retval = 0;
 
-    retval = radeon_cs_write_reloc(priv->cs,
+    retval = radeon_cs_write_reloc(winsys->priv->cs,
             ((struct radeon_pipe_buffer*)pbuffer)->bo, rd, wd, flags);
 
     if (retval) {
@@ -106,46 +89,39 @@ static void radeon_r300_write_cs_reloc(struct r300_winsys* winsys,
     }
 }
 
-static void radeon_r300_reset_bos(struct r300_winsys *winsys)
+static void radeon_reset_bos(struct radeon_winsys *winsys)
 {
-    struct radeon_winsys_priv* priv =
-        (struct radeon_winsys_priv*)winsys->radeon_winsys;
-    radeon_cs_space_reset_bos(priv->cs);
+    radeon_cs_space_reset_bos(winsys->priv->cs);
 }
 
-static void radeon_r300_end_cs(struct r300_winsys* winsys,
-                               const char* file,
-                               const char* function,
-                               int line)
+static void radeon_end_cs(struct radeon_winsys* winsys,
+                          const char* file,
+                          const char* function,
+                          int line)
 {
-    struct radeon_winsys_priv* priv =
-        (struct radeon_winsys_priv*)winsys->radeon_winsys;
-
-    radeon_cs_end(priv->cs, file, function, line);
+    radeon_cs_end(winsys->priv->cs, file, function, line);
 }
 
-static void radeon_r300_flush_cs(struct r300_winsys* winsys)
+static void radeon_flush_cs(struct radeon_winsys* winsys)
 {
-    struct radeon_winsys_priv* priv =
-        (struct radeon_winsys_priv*)winsys->radeon_winsys;
     int retval;
 
     /* Emit the CS. */
-    retval = radeon_cs_emit(priv->cs);
+    retval = radeon_cs_emit(winsys->priv->cs);
     if (retval) {
         debug_printf("radeon: Bad CS, dumping...\n");
-        radeon_cs_print(priv->cs, stderr);
+        radeon_cs_print(winsys->priv->cs, stderr);
     }
 
     /* Reset CS.
      * Someday, when we care about performance, we should really find a way
      * to rotate between two or three CS objects so that the GPU can be
      * spinning through one CS while another one is being filled. */
-    radeon_cs_erase(priv->cs);
+    radeon_cs_erase(winsys->priv->cs);
 }
 
 /* Helper function to do the ioctls needed for setup and init. */
-static void do_ioctls(struct r300_winsys* winsys, int fd)
+static void do_ioctls(struct radeon_winsys* winsys, int fd)
 {
     struct drm_radeon_gem_info gem_info = {0};
     struct drm_radeon_info info = {0};
@@ -207,18 +183,17 @@ static void do_ioctls(struct r300_winsys* winsys, int fd)
     winsys->vram_size = gem_info.vram_visible;
 }
 
-struct r300_winsys*
-radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys)
+void
+radeon_setup_winsys(int fd, struct radeon_winsys* winsys)
 {
-    struct r300_winsys* winsys = CALLOC_STRUCT(r300_winsys);
-    struct radeon_winsys_priv* priv;
-
+    /* XXX is this check needed now? */
     if (winsys == NULL) {
-        return NULL;
+        return;
     }
 
-    priv = old_winsys->priv;
+    struct radeon_winsys_priv* priv = winsys->priv;
 
+    /* XXX backwards is bad precedent */
     do_ioctls(winsys, fd);
 
     priv->csm = radeon_cs_manager_gem_ctor(fd);
@@ -229,19 +204,15 @@ radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys)
     radeon_cs_set_limit(priv->cs,
             RADEON_GEM_DOMAIN_VRAM, winsys->vram_size);
 
-    winsys->add_buffer = radeon_r300_add_buffer;
-    winsys->validate = radeon_r300_validate;
-
-    winsys->check_cs = radeon_r300_check_cs;
-    winsys->begin_cs = radeon_r300_begin_cs;
-    winsys->write_cs_dword = radeon_r300_write_cs_dword;
-    winsys->write_cs_reloc = radeon_r300_write_cs_reloc;
-    winsys->end_cs = radeon_r300_end_cs;
-    winsys->flush_cs = radeon_r300_flush_cs;
-    winsys->reset_bos = radeon_r300_reset_bos;
-    winsys->set_flush_cb = radeon_r300_set_flush_cb;
-
-    memcpy(winsys, old_winsys, sizeof(struct radeon_winsys));
-
-    return winsys;
+    winsys->add_buffer = radeon_add_buffer;
+    winsys->validate = radeon_validate;
+
+    winsys->check_cs = radeon_check_cs;
+    winsys->begin_cs = radeon_begin_cs;
+    winsys->write_cs_dword = radeon_write_cs_dword;
+    winsys->write_cs_reloc = radeon_write_cs_reloc;
+    winsys->end_cs = radeon_end_cs;
+    winsys->flush_cs = radeon_flush_cs;
+    winsys->reset_bos = radeon_reset_bos;
+    winsys->set_flush_cb = radeon_set_flush_cb;
 }
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.h b/src/gallium/winsys/drm/radeon/core/radeon_r300.h
index 775d7937fd..cfbdb30266 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.h
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.h
@@ -34,9 +34,6 @@
 
 #include "radeon_buffer.h"
 
-struct radeon_winsys;
-
-struct r300_winsys*
-radeon_create_r300_winsys(int fd, struct radeon_winsys* old_winsys);
+void radeon_setup_winsys(int fd, struct radeon_winsys* winsys);
 
 #endif /* RADEON_R300_H */
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_winsys.h b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h
new file mode 100644
index 0000000000..9edc9e038c
--- /dev/null
+++ b/src/gallium/winsys/drm/radeon/core/radeon_winsys.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2009 Corbin Simpson
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+/*
+ * Authors:
+ *      Corbin Simpson <MostAwesomeDude@gmail.com>
+ */
+#ifndef RADEON_WINSYS_H
+#define RADEON_WINSYS_H
+
+#include "pipe/internal/p_winsys_screen.h"
+
+struct radeon_winsys_priv;
+
+struct radeon_winsys {
+    /* Parent class. */
+    struct pipe_winsys base;
+
+    /* Winsys private */
+    struct radeon_winsys_priv* priv;
+
+    /* PCI ID */
+    uint32_t pci_id;
+
+    /* GB pipe count */
+    uint32_t gb_pipes;
+
+    /* Z pipe count (rv530 only) */
+    uint32_t z_pipes;
+
+    /* GART size. */
+    uint32_t gart_size;
+
+    /* VRAM size. */
+    uint32_t vram_size;
+
+    /* Add a pipe_buffer to the list of buffer objects to validate. */
+    boolean (*add_buffer)(struct radeon_winsys* winsys,
+                          struct pipe_buffer* pbuffer,
+                          uint32_t rd,
+                          uint32_t wd);
+
+    /* Revalidate all currently setup pipe_buffers.
+     * Returns TRUE if a flush is required. */
+    boolean (*validate)(struct radeon_winsys* winsys);
+
+    /* Check to see if there's room for commands. */
+    boolean (*check_cs)(struct radeon_winsys* winsys, int size);
+
+    /* Start a command emit. */
+    void (*begin_cs)(struct radeon_winsys* winsys,
+                     int size,
+                     const char* file,
+                     const char* function,
+                     int line);
+
+    /* Write a dword to the command buffer. */
+    void (*write_cs_dword)(struct radeon_winsys* winsys, uint32_t dword);
+
+    /* Write a relocated dword to the command buffer. */
+    void (*write_cs_reloc)(struct radeon_winsys* winsys,
+                           struct pipe_buffer* bo,
+                           uint32_t rd,
+                           uint32_t wd,
+                           uint32_t flags);
+
+    /* Finish a command emit. */
+    void (*end_cs)(struct radeon_winsys* winsys,
+                   const char* file,
+                   const char* function,
+                   int line);
+
+    /* Flush the CS. */
+    void (*flush_cs)(struct radeon_winsys* winsys);
+
+    /* winsys flush - callback from winsys when flush required */
+    void (*set_flush_cb)(struct radeon_winsys *winsys,
+			 void (*flush_cb)(void *), void *data);
+
+    void (*reset_bos)(struct radeon_winsys *winsys);
+};
+
+#endif
-- 
cgit v1.2.3


From 3400b668e35469d5dbba515e3a8b9d775fd2eff5 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Thu, 3 Dec 2009 09:56:03 +0100
Subject: Move pf_is_compressed() to u_format auxiliary module.

---
 src/gallium/auxiliary/util/u_format.h          | 22 ++++++++++++++++++++++
 src/gallium/drivers/svga/svga_screen_texture.c |  5 +++--
 src/gallium/include/pipe/p_format.h            |  6 ------
 src/mesa/state_tracker/st_cb_texture.c         |  3 ++-
 4 files changed, 27 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 7b5b7fcda5..2931d2a8bb 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -111,6 +111,28 @@ const struct util_format_description *
 util_format_description(enum pipe_format format);
 
 
+/*
+ * Format query functions.
+ */
+
+static INLINE boolean 
+util_format_is_compressed(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+
+   assert(format);
+   if (!format) {
+      return FALSE;
+   }
+
+   return desc->layout == UTIL_FORMAT_LAYOUT_DXT ? TRUE : FALSE;
+}
+
+
+/*
+ * Format access functions.
+ */
+
 void
 util_format_read_4f(enum pipe_format format,
                     float *dst, unsigned dst_stride, 
diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
index 6e10d65a20..b899796101 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.c
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -29,6 +29,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_thread.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -319,7 +320,7 @@ svga_texture_create(struct pipe_screen *screen,
     */
 #if 0
    if((templat->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) &&
-      !pf_is_compressed(templat->format))
+      !util_format_is_compressed(templat->format))
       tex->key.flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
 #endif
    
@@ -933,7 +934,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
       if (min_lod == 0 && max_lod >= pt->last_level)
          view = FALSE;
 
-      if (pf_is_compressed(pt->format) && view) {
+      if (util_format_is_compressed(pt->format) && view) {
          format = svga_translate_format_render(pt->format);
       }
 
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 8c61858e7b..3eb22ff077 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -569,12 +569,6 @@ pf_is_depth_stencil( enum pipe_format format )
    return pf_is_depth_or_stencil( format );
 }
 
-static INLINE boolean 
-pf_is_compressed( enum pipe_format format )
-{
-   return pf_layout(format) == PIPE_FORMAT_LAYOUT_DXT ? TRUE : FALSE;
-}
-
 enum pipe_video_chroma_format
 {
    PIPE_VIDEO_CHROMA_FORMAT_420,
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 3a2337802f..2c5601c22b 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -62,6 +62,7 @@
 #include "pipe/p_shader_tokens.h"
 #include "util/u_tile.h"
 #include "util/u_blit.h"
+#include "util/u_format.h"
 #include "util/u_surface.h"
 #include "util/u_math.h"
 
@@ -890,7 +891,7 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
    GLubyte *dest;
 
    if (stImage->pt &&
-       pf_is_compressed(stImage->pt->format) &&
+       util_format_is_compressed(stImage->pt->format) &&
        !compressed_dst) {
       /* Need to decompress the texture.
        * We'll do this by rendering a textured quad.
-- 
cgit v1.2.3


From 6df42d80234d13676fc3207cf44f0e371e3372b5 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Thu, 3 Dec 2009 10:52:47 +0100
Subject: Move pf_get_block() to u_format auxiliary module.

---
 src/gallium/auxiliary/draw/draw_pipe_aaline.c      |  4 ++-
 src/gallium/auxiliary/draw/draw_pipe_pstipple.c    |  3 +-
 src/gallium/auxiliary/util/u_blit.c                |  3 +-
 src/gallium/auxiliary/util/u_format.h              | 26 ++++++++++++++-
 src/gallium/auxiliary/util/u_surface.c             |  3 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c   |  3 +-
 src/gallium/drivers/llvmpipe/lp_texture.c          |  6 ++--
 src/gallium/drivers/r300/r300_texture.c            |  3 +-
 src/gallium/drivers/softpipe/sp_texture.c          |  4 ++-
 src/gallium/include/pipe/p_format.h                | 39 ----------------------
 src/gallium/state_trackers/dri/dri_drawable.c      |  3 +-
 src/gallium/state_trackers/egl/egl_surface.c       |  3 +-
 src/gallium/state_trackers/python/p_device.i       |  2 +-
 .../state_trackers/python/st_softpipe_winsys.c     |  3 +-
 src/gallium/state_trackers/vega/api_filters.c      |  3 +-
 src/gallium/state_trackers/vega/image.c            |  3 +-
 src/gallium/state_trackers/vega/mask.c             |  3 +-
 src/gallium/state_trackers/vega/paint.c            |  3 +-
 src/gallium/state_trackers/vega/renderer.c         |  3 +-
 src/gallium/state_trackers/vega/vg_tracker.c       |  3 +-
 src/gallium/state_trackers/xorg/xorg_crtc.c        |  3 +-
 src/gallium/state_trackers/xorg/xorg_dri2.c        |  2 +-
 src/gallium/state_trackers/xorg/xorg_exa.c         |  5 +--
 src/gallium/state_trackers/xorg/xorg_renderer.c    |  3 +-
 src/gallium/state_trackers/xorg/xorg_xv.c          |  4 ++-
 src/gallium/state_trackers/xorg/xvmc/surface.c     |  2 +-
 .../winsys/drm/nouveau/drm/nouveau_drm_api.c       |  3 +-
 src/gallium/winsys/drm/radeon/core/radeon_buffer.c |  6 ++--
 src/gallium/winsys/egl_xlib/sw_winsys.c            |  3 +-
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c         |  3 +-
 src/gallium/winsys/gdi/gdi_softpipe_winsys.c       |  3 +-
 src/gallium/winsys/xlib/xlib_cell.c                |  3 +-
 src/gallium/winsys/xlib/xlib_llvmpipe.c            |  3 +-
 src/gallium/winsys/xlib/xlib_softpipe.c            |  3 +-
 src/mesa/state_tracker/st_cb_fbo.c                 |  4 +--
 src/mesa/state_tracker/st_cb_texture.c             |  2 +-
 src/mesa/state_tracker/st_texture.c                |  3 +-
 37 files changed, 99 insertions(+), 79 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 8ccd527b3a..dbeb22b917 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -35,6 +35,8 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
+
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -402,7 +404,7 @@ aaline_create_texture(struct aaline_stage *aaline)
    texTemp.width0 = 1 << MAX_TEXTURE_LEVEL;
    texTemp.height0 = 1 << MAX_TEXTURE_LEVEL;
    texTemp.depth0 = 1;
-   pf_get_block(texTemp.format, &texTemp.block);
+   util_format_get_block(texTemp.format, &texTemp.block);
 
    aaline->texture = screen->texture_create(screen, &texTemp);
    if (!aaline->texture)
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index a500edd7fe..53dc163895 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -38,6 +38,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
 
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -431,7 +432,7 @@ pstip_create_texture(struct pstip_stage *pstip)
    texTemp.width0 = 32;
    texTemp.height0 = 32;
    texTemp.depth0 = 1;
-   pf_get_block(texTemp.format, &texTemp.block);
+   util_format_get_block(texTemp.format, &texTemp.block);
 
    pstip->texture = screen->texture_create(screen, &texTemp);
    if (pstip->texture == NULL)
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 5372df5735..df2bcf2d02 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -42,6 +42,7 @@
 
 #include "util/u_blit.h"
 #include "util/u_draw_quad.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_simple_shaders.h"
@@ -357,7 +358,7 @@ util_blit_pixels_writemask(struct blit_state *ctx,
       texTemp.width0 = srcW;
       texTemp.height0 = srcH;
       texTemp.depth0 = 1;
-      pf_get_block(src->format, &texTemp.block);
+      util_format_get_block(src->format, &texTemp.block);
 
       tex = screen->texture_create(screen, &texTemp);
       if (!tex)
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index e57c9e0023..583b62e606 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -50,7 +50,7 @@ struct util_format_block
    /** Block height in pixels */
    unsigned height;
 
-   /** Block size in bytes */
+   /** Block size in bits */
    unsigned bits;
 };
 
@@ -159,6 +159,30 @@ util_format_is_depth_and_stencil(enum pipe_format format)
            desc->swizzle[1] != UTIL_FORMAT_SWIZZLE_NONE) ? TRUE : FALSE;
 }
 
+/**
+ * Describe pixel format's block.   
+ * 
+ * @sa http://msdn2.microsoft.com/en-us/library/ms796147.aspx
+ */
+static INLINE void 
+util_format_get_block(enum pipe_format format,
+                      struct pipe_format_block *block)
+{
+   const struct util_format_description *desc = util_format_description(format);
+
+   assert(format);
+   if (!format) {
+      block->size = 0;
+      block->width = 1;
+      block->height = 1;
+      return;
+   }
+
+   block->size = desc->block.bits / 8;
+   block->width = desc->block.width;
+   block->height = desc->block.height;
+}
+
 
 /*
  * Format access functions.
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index de8c266db8..9c84ca733b 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -36,6 +36,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 
+#include "util/u_format.h"
 #include "util/u_surface.h"
 
 
@@ -82,7 +83,7 @@ util_create_rgba_surface(struct pipe_screen *screen,
    templ.width0 = width;
    templ.height0 = height;
    templ.depth0 = 1;
-   pf_get_block(format, &templ.block);
+   util_format_get_block(format, &templ.block);
    templ.tex_usage = usage;
 
    *textureOut = screen->texture_create(screen, &templ);
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 85fe2efd2b..32374abb49 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -29,6 +29,7 @@
 #include <assert.h>
 #include <pipe/p_context.h>
 #include <pipe/p_inlines.h>
+#include <util/u_format.h>
 #include <util/u_math.h>
 #include <util/u_memory.h>
 #include <tgsi/tgsi_parse.h>
@@ -834,7 +835,7 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    template.height0 = r->pot_buffers ?
       util_next_power_of_two(r->picture_height) : r->picture_height;
    template.depth0 = 1;
-   pf_get_block(template.format, &template.block);
+   util_format_get_block(template.format, &template.block);
    template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
 
    r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 65d62fd072..9b19cac972 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -34,6 +34,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
 #include "pipe/internal/p_winsys_screen.h"
+
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -63,7 +65,7 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
 
    unsigned buffer_size = 0;
 
-   pf_get_block(lpt->base.format, &lpt->base.block);
+   util_format_get_block(lpt->base.format, &lpt->base.block);
 
    for (level = 0; level <= pt->last_level; level++) {
       unsigned nblocksx, nblocksy;
@@ -100,7 +102,7 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
 {
    struct llvmpipe_winsys *winsys = screen->winsys;
 
-   pf_get_block(lpt->base.format, &lpt->base.block);
+   util_format_get_block(lpt->base.format, &lpt->base.block);
    lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0);  
    lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0);  
 
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 093a21ebe2..5538ec3918 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -22,6 +22,7 @@
 
 #include "pipe/p_screen.h"
 
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -283,7 +284,7 @@ r300_video_surface_create(struct pipe_screen *screen,
     template.width0 = util_next_power_of_two(width);
     template.height0 = util_next_power_of_two(height);
     template.depth0 = 1;
-    pf_get_block(template.format, &template.block);
+    util_format_get_block(template.format, &template.block);
     template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER |
                          PIPE_TEXTURE_USAGE_RENDER_TARGET;
 
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index ac5f61e46f..0f3323ff2f 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -32,6 +32,8 @@
 
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -438,7 +440,7 @@ softpipe_video_surface_create(struct pipe_screen *screen,
    template.width0 = util_next_power_of_two(width);
    template.height0 = util_next_power_of_two(height);
    template.depth0 = 1;
-   pf_get_block(template.format, &template.block);
+   util_format_get_block(template.format, &template.block);
    template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
 
    sp_vsfc->tex = screen->texture_create(screen, &template);
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 69639ab011..3be5b18a25 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -479,45 +479,6 @@ struct pipe_format_block
    unsigned height;
 };
 
-/**
- * Describe pixel format's block.   
- * 
- * @sa http://msdn2.microsoft.com/en-us/library/ms796147.aspx
- */
-static INLINE void 
-pf_get_block(enum pipe_format format, struct pipe_format_block *block)
-{
-   switch(format) {
-   case PIPE_FORMAT_DXT1_RGBA:
-   case PIPE_FORMAT_DXT1_RGB:
-   case PIPE_FORMAT_DXT1_SRGBA:
-   case PIPE_FORMAT_DXT1_SRGB:
-      block->size = 8;
-      block->width = 4;
-      block->height = 4;
-      break;
-   case PIPE_FORMAT_DXT3_RGBA:
-   case PIPE_FORMAT_DXT5_RGBA:
-   case PIPE_FORMAT_DXT3_SRGBA:
-   case PIPE_FORMAT_DXT5_SRGBA:
-      block->size = 16;
-      block->width = 4;
-      block->height = 4;
-      break;
-   case PIPE_FORMAT_YCBCR:
-   case PIPE_FORMAT_YCBCR_REV:
-      block->size = 4; /* 2*cpp */
-      block->width = 2;
-      block->height = 1;
-      break;
-   default:
-      block->size = pf_get_size(format);
-      block->width = 1;
-      block->height = 1;
-      break;
-   }
-}
-
 static INLINE unsigned
 pf_get_nblocksx(const struct pipe_format_block *block, unsigned x)
 {
diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c
index 45a6059ea8..2749cdee8d 100644
--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -44,6 +44,7 @@
 #include "state_tracker/st_context.h"
 #include "state_tracker/st_cb_fbo.h"
 
+#include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_rect.h"
  
@@ -66,7 +67,7 @@ dri_surface_from_handle(struct drm_api *api,
    templat.format = format;
    templat.width0 = width;
    templat.height0 = height;
-   pf_get_block(templat.format, &templat.block);
+   util_format_get_block(templat.format, &templat.block);
 
    texture = api->texture_from_shared_handle(api, screen, &templat,
                                              "dri2 buffer", pitch, handle);
diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c
index ddd9b04cd4..35c8b10685 100644
--- a/src/gallium/state_trackers/egl/egl_surface.c
+++ b/src/gallium/state_trackers/egl/egl_surface.c
@@ -12,6 +12,7 @@
 
 #include "state_tracker/drm_api.h"
 
+#include "util/u_format.h"
 #include "util/u_rect.h"
 
 /*
@@ -118,7 +119,7 @@ drm_create_texture(_EGLDisplay *dpy,
 	templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
 	templat.width0 = w;
 	templat.height0 = h;
-	pf_get_block(templat.format, &templat.block);
+	util_format_get_block(templat.format, &templat.block);
 
 	texture = screen->texture_create(dev->screen,
 	                                 &templat);
diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i
index a83bcc71a1..bfe3f051fc 100644
--- a/src/gallium/state_trackers/python/p_device.i
+++ b/src/gallium/state_trackers/python/p_device.i
@@ -112,7 +112,7 @@ struct st_device {
       struct pipe_texture templat;
       memset(&templat, 0, sizeof(templat));
       templat.format = format;
-      pf_get_block(templat.format, &templat.block);
+      util_format_get_block(templat.format, &templat.block);
       templat.width0 = width;
       templat.height0 = height;
       templat.depth0 = depth;
diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c
index f0abd12e3d..010a5ded66 100644
--- a/src/gallium/state_trackers/python/st_softpipe_winsys.c
+++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c
@@ -40,6 +40,7 @@
 #include "pipe/p_format.h"
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "softpipe/sp_winsys.h"
@@ -179,7 +180,7 @@ st_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
    struct pipe_format_block block;
    unsigned nblocksx, nblocksy;
 
-   pf_get_block(format, &block);
+   util_format_get_block(format, &block);
    nblocksx = pf_get_nblocksx(&block, width);
    nblocksy = pf_get_nblocksy(&block, height);
    *stride = round_up(nblocksx * block.size, alignment);
diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c
index faf396d087..4787ae38f0 100644
--- a/src/gallium/state_trackers/vega/api_filters.c
+++ b/src/gallium/state_trackers/vega/api_filters.c
@@ -38,6 +38,7 @@
 #include "pipe/p_screen.h"
 #include "pipe/p_shader_tokens.h"
 
+#include "util/u_format.h"
 #include "util/u_memory.h"
 
 
@@ -71,7 +72,7 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx,
    templ.width0 = color_data_len;
    templ.height0 = 1;
    templ.depth0 = 1;
-   pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block);
+   util_format_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
    tex = screen->texture_create(screen, &templ);
diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c
index 4684a5727d..24ca911f79 100644
--- a/src/gallium/state_trackers/vega/image.c
+++ b/src/gallium/state_trackers/vega/image.c
@@ -39,6 +39,7 @@
 #include "pipe/p_screen.h"
 #include "pipe/p_inlines.h"
 #include "util/u_blit.h"
+#include "util/u_format.h"
 #include "util/u_tile.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
@@ -270,7 +271,7 @@ struct vg_image * image_create(VGImageFormat format,
    memset(&pt, 0, sizeof(pt));
    pt.target = PIPE_TEXTURE_2D;
    pt.format = pformat;
-   pf_get_block(pformat, &pt.block);
+   util_format_get_block(pformat, &pt.block);
    pt.last_level = 0;
    pt.width0 = width;
    pt.height0 = height;
diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c
index b84103fdba..6e93e2551e 100644
--- a/src/gallium/state_trackers/vega/mask.c
+++ b/src/gallium/state_trackers/vega/mask.c
@@ -36,6 +36,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_screen.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_memory.h"
 
 struct vg_mask_layer {
@@ -491,7 +492,7 @@ struct vg_mask_layer * mask_layer_create(VGint width, VGint height)
       memset(&pt, 0, sizeof(pt));
       pt.target = PIPE_TEXTURE_2D;
       pt.format = PIPE_FORMAT_A8R8G8B8_UNORM;
-      pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &pt.block);
+      util_format_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &pt.block);
       pt.last_level = 0;
       pt.width0 = width;
       pt.height0 = height;
diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c
index e8ca7d9e89..b88322f433 100644
--- a/src/gallium/state_trackers/vega/paint.c
+++ b/src/gallium/state_trackers/vega/paint.c
@@ -34,6 +34,7 @@
 #include "pipe/p_compiler.h"
 #include "pipe/p_inlines.h"
 
+#include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
 
@@ -154,7 +155,7 @@ static INLINE struct pipe_texture *create_gradient_texture(struct vg_paint *p)
    templ.width0 = 1024;
    templ.height0 = 1;
    templ.depth0 = 1;
-   pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block);
+   util_format_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
    tex = screen->texture_create(screen, &templ);
diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c
index 9085ed1bfe..1706ed83f2 100644
--- a/src/gallium/state_trackers/vega/renderer.c
+++ b/src/gallium/state_trackers/vega/renderer.c
@@ -35,6 +35,7 @@
 #include "pipe/p_shader_tokens.h"
 
 #include "util/u_draw_quad.h"
+#include "util/u_format.h"
 #include "util/u_simple_shaders.h"
 #include "util/u_memory.h"
 #include "util/u_rect.h"
@@ -448,7 +449,7 @@ void renderer_copy_surface(struct renderer *ctx,
    texTemp.width0 = srcW;
    texTemp.height0 = srcH;
    texTemp.depth0 = 1;
-   pf_get_block(src->format, &texTemp.block);
+   util_format_get_block(src->format, &texTemp.block);
 
    tex = screen->texture_create(screen, &texTemp);
    if (!tex)
diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c
index d28463dd1b..e7b04a8e06 100644
--- a/src/gallium/state_trackers/vega/vg_tracker.c
+++ b/src/gallium/state_trackers/vega/vg_tracker.c
@@ -31,6 +31,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
 #include "pipe/p_screen.h"
+#include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
 
@@ -50,7 +51,7 @@ create_texture(struct pipe_context *pipe, enum pipe_format format,
    }
 
    templ.target = PIPE_TEXTURE_2D;
-   pf_get_block(templ.format, &templ.block);
+   util_format_get_block(templ.format, &templ.block);
    templ.width0 = width;
    templ.height0 = height;
    templ.depth0 = 1;
diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c
index 9e8c14d741..fe994d1ea3 100644
--- a/src/gallium/state_trackers/xorg/xorg_crtc.c
+++ b/src/gallium/state_trackers/xorg/xorg_crtc.c
@@ -50,6 +50,7 @@
 #endif
 
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_rect.h"
 
 #ifdef HAVE_LIBKMS
@@ -200,7 +201,7 @@ crtc_load_cursor_argb_ga3d(xf86CrtcPtr crtc, CARD32 * image)
 	templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
 	templat.width0 = 64;
 	templat.height0 = 64;
-	pf_get_block(templat.format, &templat.block);
+	util_format_get_block(templat.format, &templat.block);
 
 	crtcp->cursor_tex = ms->screen->texture_create(ms->screen,
 						       &templat);
diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c
index 36711609d2..fe2e0f68aa 100644
--- a/src/gallium/state_trackers/xorg/xorg_dri2.c
+++ b/src/gallium/state_trackers/xorg/xorg_dri2.c
@@ -109,7 +109,7 @@ dri2_do_create_buffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int form
 	    else
 		template.format = ms->ds_depth_bits_last ?
 		    PIPE_FORMAT_S8Z24_UNORM : PIPE_FORMAT_Z24S8_UNORM;
-	    pf_get_block(template.format, &template.block);
+	    util_format_get_block(template.format, &template.block);
 	    template.width0 = pDraw->width;
 	    template.height0 = pDraw->height;
 	    template.depth0 = 1;
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c
index aa46cd45f1..f79e1ef845 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa.c
@@ -43,6 +43,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_inlines.h"
 
+#include "util/u_format.h"
 #include "util/u_rect.h"
 #include "util/u_math.h"
 #include "util/u_debug.h"
@@ -899,7 +900,7 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
 	memset(&template, 0, sizeof(template));
 	template.target = PIPE_TEXTURE_2D;
 	exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &priv->picture_format);
-	pf_get_block(template.format, &template.block);
+	util_format_get_block(template.format, &template.block);
         if (ROUND_UP_TEXTURES && priv->flags == 0) {
            template.width0 = util_next_power_of_two(width);
            template.height0 = util_next_power_of_two(height);
@@ -985,7 +986,7 @@ xorg_exa_create_root_texture(ScrnInfoPtr pScrn,
     memset(&template, 0, sizeof(template));
     template.target = PIPE_TEXTURE_2D;
     exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &dummy);
-    pf_get_block(template.format, &template.block);
+    util_format_get_block(template.format, &template.block);
     template.width0 = width;
     template.height0 = height;
     template.depth0 = 1;
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c
index f777395100..7cac91e564 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.c
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.c
@@ -5,6 +5,7 @@
 
 #include "cso_cache/cso_context.h"
 #include "util/u_draw_quad.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_rect.h"
@@ -512,7 +513,7 @@ renderer_clone_texture(struct xorg_renderer *r,
    templ.width0 = src->width0;
    templ.height0 = src->height0;
    templ.depth0 = 1;
-   pf_get_block(format, &templ.block);
+   util_format_get_block(format, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
    pt = screen->texture_create(screen, &templ);
diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c
index b8eca8c817..8c491c030d 100644
--- a/src/gallium/state_trackers/xorg/xorg_xv.c
+++ b/src/gallium/state_trackers/xorg/xorg_xv.c
@@ -13,6 +13,8 @@
 #include "pipe/p_screen.h"
 #include "pipe/p_inlines.h"
 
+#include "util/u_format.h"
+
 /*XXX get these from pipe's texture limits */
 #define IMAGE_MAX_WIDTH		2048
 #define IMAGE_MAX_HEIGHT	2048
@@ -170,7 +172,7 @@ create_component_texture(struct pipe_context *pipe,
    templ.width0 = width;
    templ.height0 = height;
    templ.depth0 = 1;
-   pf_get_block(PIPE_FORMAT_L8_UNORM, &templ.block);
+   util_format_get_block(PIPE_FORMAT_L8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
    tex = screen->texture_create(screen, &templ);
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index 8cb73f4897..5059424da7 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -106,7 +106,7 @@ CreateOrResizeBackBuffer(struct pipe_video_context *vpipe, unsigned int width, u
    template.width0 = width;
    template.height0 = height;
    template.depth0 = 1;
-   pf_get_block(template.format, &template.block);
+   util_format_get_block(template.format, &template.block);
    template.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
 
    tex = vpipe->screen->texture_create(vpipe->screen, &template);
diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
index d497861324..cc25fd1741 100644
--- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
+++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
@@ -1,5 +1,6 @@
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
+#include "util/u_format.h"
 #include "util/u_memory.h"
 
 #include "nouveau_drm_api.h"
@@ -28,7 +29,7 @@ dri_surface_from_handle(struct drm_api *api, struct pipe_screen *pscreen,
 	tmpl.format = format;
 	tmpl.width0 = width;
 	tmpl.height0 = height;
-	pf_get_block(tmpl.format, &tmpl.block);
+	util_format_get_block(tmpl.format, &tmpl.block);
 
 	pt = api->texture_from_shared_handle(api, pscreen, &tmpl,
 					     "front buffer", pitch, handle);
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 74afffc9cf..555c57d4e7 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -36,6 +36,8 @@
 #include "softpipe/sp_texture.h"
 #include "r300_context.h"
 #include <X11/Xutil.h>
+#include "util/u_format.h"
+
 struct radeon_vl_context
 {
     Display *display;
@@ -116,7 +118,7 @@ static struct pipe_buffer *radeon_surface_buffer_create(struct pipe_winsys *ws,
     struct pipe_format_block block;
     unsigned nblocksx, nblocksy, size;
 
-    pf_get_block(format, &block);
+    util_format_get_block(format, &block);
 
     nblocksx = pf_get_nblocksx(&block, width);
     nblocksy = pf_get_nblocksy(&block, height);
@@ -321,7 +323,7 @@ struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_co
     tmpl.height0 = h;
     tmpl.depth0 = 1;
     tmpl.format = format;
-    pf_get_block(tmpl.format, &tmpl.block);
+    util_format_get_block(tmpl.format, &tmpl.block);
     tmpl.nblocksx[0] = pf_get_nblocksx(&tmpl.block, w);
     tmpl.nblocksy[0] = pf_get_nblocksy(&tmpl.block, h);
 
diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c
index 79ff2cc985..2cd89bb04a 100644
--- a/src/gallium/winsys/egl_xlib/sw_winsys.c
+++ b/src/gallium/winsys/egl_xlib/sw_winsys.c
@@ -38,6 +38,7 @@
 #include "pipe/internal/p_winsys_screen.h"
 #include "pipe/p_state.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -173,7 +174,7 @@ surface_buffer_create(struct pipe_winsys *winsys,
    struct pipe_format_block block;
    unsigned nblocksx, nblocksy;
 
-   pf_get_block(format, &block);
+   util_format_get_block(format, &block);
    nblocksx = pf_get_nblocksx(&block, width);
    nblocksy = pf_get_nblocksy(&block, height);
    *stride = round_up(nblocksx * block.size, alignment);
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 08067aad64..44b508c1d4 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -30,6 +30,7 @@
 #include <pipe/internal/p_winsys_screen.h>
 #include <pipe/p_state.h>
 #include <pipe/p_inlines.h>
+#include <util/u_format.h>
 #include <util/u_memory.h>
 #include <util/u_math.h>
 #include <softpipe/sp_winsys.h>
@@ -141,7 +142,7 @@ static struct pipe_buffer* xsp_surface_buffer_create
    struct pipe_format_block block;
    unsigned nblocksx, nblocksy;
 
-   pf_get_block(format, &block);
+   util_format_get_block(format, &block);
    nblocksx = pf_get_nblocksx(&block, width);
    nblocksy = pf_get_nblocksy(&block, height);
    *stride = align(nblocksx * block.size, ALIGNMENT);
diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
index 5e0ccf32f4..d9fb2080a1 100644
--- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
+++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
@@ -42,6 +42,7 @@
 #include "pipe/p_format.h"
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "softpipe/sp_winsys.h"
@@ -173,7 +174,7 @@ gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
    struct pipe_format_block block;
    unsigned nblocksx, nblocksy;
 
-   pf_get_block(format, &block);
+   util_format_get_block(format, &block);
    nblocksx = pf_get_nblocksx(&block, width);
    nblocksy = pf_get_nblocksy(&block, height);
    *stride = round_up(nblocksx * block.size, alignment);
diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c
index 13e609f58f..84f445c8e9 100644
--- a/src/gallium/winsys/xlib/xlib_cell.c
+++ b/src/gallium/winsys/xlib/xlib_cell.c
@@ -45,6 +45,7 @@
 #include "pipe/p_format.h"
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -297,7 +298,7 @@ xm_surface_buffer_create(struct pipe_winsys *winsys,
    struct pipe_format_block block;
    unsigned nblocksx, nblocksy;
 
-   pf_get_block(format, &block);
+   util_format_get_block(format, &block);
    nblocksx = pf_get_nblocksx(&block, width);
    nblocksy = pf_get_nblocksy(&block, height);
    *stride = round_up(nblocksx * block.size, alignment);
diff --git a/src/gallium/winsys/xlib/xlib_llvmpipe.c b/src/gallium/winsys/xlib/xlib_llvmpipe.c
index 3dd15e099b..e7914583ba 100644
--- a/src/gallium/winsys/xlib/xlib_llvmpipe.c
+++ b/src/gallium/winsys/xlib/xlib_llvmpipe.c
@@ -44,6 +44,7 @@
 #include "pipe/p_format.h"
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "llvmpipe/lp_winsys.h"
@@ -331,7 +332,7 @@ xm_displaytarget_create(struct llvmpipe_winsys *winsys,
    xm_dt->width = width;
    xm_dt->height = height;
 
-   pf_get_block(format, &xm_dt->block);
+   util_format_get_block(format, &xm_dt->block);
    nblocksx = pf_get_nblocksx(&xm_dt->block, width);
    nblocksy = pf_get_nblocksy(&xm_dt->block, height);
    xm_dt->stride = align(nblocksx * xm_dt->block.size, alignment);
diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c
index 260b39e2a0..2994694614 100644
--- a/src/gallium/winsys/xlib/xlib_softpipe.c
+++ b/src/gallium/winsys/xlib/xlib_softpipe.c
@@ -42,6 +42,7 @@
 #include "pipe/p_format.h"
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "softpipe/sp_winsys.h"
@@ -363,7 +364,7 @@ xm_surface_buffer_create(struct pipe_winsys *winsys,
    struct pipe_format_block block;
    unsigned nblocksx, nblocksy, size;
 
-   pf_get_block(format, &block);
+   util_format_get_block(format, &block);
    nblocksx = pf_get_nblocksx(&block, width);
    nblocksy = pf_get_nblocksy(&block, height);
    *stride = align(nblocksx * block.size, alignment);
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 7ccdddb00b..3a5b634e87 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -105,7 +105,7 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
       _mesa_free(strb->data);
 
       assert(strb->format != PIPE_FORMAT_NONE);
-      pf_get_block(strb->format, &block);
+      util_format_get_block(strb->format, &block);
       
       strb->stride = pf_get_stride(&block, width);
       size = pf_get_2d_size(&block, strb->stride, height);
@@ -128,7 +128,7 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
       memset(&template, 0, sizeof(template));
       template.target = PIPE_TEXTURE_2D;
       template.format = format;
-      pf_get_block(format, &template.block);
+      util_format_get_block(format, &template.block);
       template.width0 = width;
       template.height0 = height;
       template.depth0 = 1;
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index bf17f33fc1..6084ded72d 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -406,7 +406,7 @@ compress_with_blit(GLcontext * ctx,
    memset(&templ, 0, sizeof(templ));
    templ.target = PIPE_TEXTURE_2D;
    templ.format = st_mesa_format_to_pipe_format(mesa_format);
-   pf_get_block(templ.format, &templ.block);
+   util_format_get_block(templ.format, &templ.block);
    templ.width0 = width;
    templ.height0 = height;
    templ.depth0 = 1;
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index dbccee86c1..bd6ee5d71c 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -43,6 +43,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_rect.h"
 #include "util/u_math.h"
 
@@ -104,7 +105,7 @@ st_texture_create(struct st_context *st,
    pt.width0 = width0;
    pt.height0 = height0;
    pt.depth0 = depth0;
-   pf_get_block(format, &pt.block);
+   util_format_get_block(format, &pt.block);
    pt.tex_usage = usage;
 
    newtex = screen->texture_create(screen, &pt);
-- 
cgit v1.2.3


From cceeab39ea541b1be1521114316d660a77769c2a Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Thu, 3 Dec 2009 11:17:37 +0100
Subject: Move pf_get_bits/size() to u_format auxiliary module.

---
 src/gallium/auxiliary/util/u_format.h        | 29 ++++++++++++++++++++++
 src/gallium/auxiliary/util/u_gen_mipmap.c    |  5 ++--
 src/gallium/drivers/cell/ppu/cell_texture.c  |  6 +++--
 src/gallium/drivers/r300/r300_emit.c         | 13 +++++-----
 src/gallium/drivers/softpipe/sp_tile_cache.c |  3 ++-
 src/gallium/drivers/svga/svga_state_vs.c     |  3 ++-
 src/gallium/include/pipe/p_format.h          | 36 ----------------------------
 src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c |  5 ++--
 src/gallium/winsys/gdi/gdi_softpipe_winsys.c |  4 ++--
 src/mesa/state_tracker/st_cb_texture.c       |  2 +-
 10 files changed, 53 insertions(+), 53 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 583b62e606..3ac5384024 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -183,6 +183,35 @@ util_format_get_block(enum pipe_format format,
    block->height = desc->block.height;
 }
 
+/**
+ * Return total bits needed for the pixel format.
+ */
+static INLINE uint
+util_format_get_bits(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+
+   assert(format);
+   if (!format) {
+      return 0;
+   }
+
+   return desc->block.bits / (desc->block.width * desc->block.height);
+}
+
+/**
+ * Return bytes per pixel for the given format.
+ */
+static INLINE uint
+util_format_get_size(enum pipe_format format)
+{
+   uint bits = util_format_get_bits(format);
+
+   assert(bits % 8 == 0);
+
+   return bits / 8;
+}
+
 
 /*
  * Format access functions.
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index f67f1e458d..70ec925d15 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -41,6 +41,7 @@
 #include "pipe/p_shader_tokens.h"
 #include "pipe/p_state.h"
 
+#include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_draw_quad.h"
 #include "util/u_gen_mipmap.h"
@@ -996,7 +997,7 @@ reduce_2d(enum pipe_format pformat,
 {
    enum dtype datatype;
    uint comps;
-   const int bpt = pf_get_size(pformat);
+   const int bpt = util_format_get_size(pformat);
    const ubyte *srcA, *srcB;
    ubyte *dst;
    int row;
@@ -1035,7 +1036,7 @@ reduce_3d(enum pipe_format pformat,
           int dstWidth, int dstHeight, int dstDepth,
           int dstRowStride, ubyte *dstPtr)
 {
-   const int bpt = pf_get_size(pformat);
+   const int bpt = util_format_get_size(pformat);
    const int border = 0;
    int img, row;
    int bytesPerSrcImage, bytesPerDstImage;
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index e6b8a87045..605d53a948 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -35,6 +35,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
 #include "pipe/internal/p_winsys_screen.h"
+
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -408,7 +410,7 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
 
    if (transfer->usage & PIPE_TRANSFER_READ) {
       /* need to untwiddle the texture to make a linear version */
-      const uint bpp = pf_get_size(ct->base.format);
+      const uint bpp = util_format_get_size(ct->base.format);
       if (bpp == 4) {
          const uint *src = (uint *) (ct->mapped + ctrans->offset);
          uint *dst = ctrans->map;
@@ -451,7 +453,7 @@ cell_transfer_unmap(struct pipe_screen *screen,
       /* The user wrote new texture data into the mapped buffer.
        * We need to convert the new linear data into the twiddled/tiled format.
        */
-      const uint bpp = pf_get_size(ct->base.format);
+      const uint bpp = util_format_get_size(ct->base.format);
       if (bpp == 4) {
          const uint *src = ctrans->map;
          uint *dst = (uint *) (ct->mapped + ctrans->offset);
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 98a39390bf..171859b8e4 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -22,6 +22,7 @@
 
 /* r300_emit: Functions for emitting state. */
 
+#include "util/u_format.h"
 #include "util/u_math.h"
 
 #include "r300_context.h"
@@ -631,10 +632,10 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
     for (i = 0; i < aos_count - 1; i += 2) {
         int buf_num1 = velem[i].vertex_buffer_index;
         int buf_num2 = velem[i+1].vertex_buffer_index;
-        assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0);
-        assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_size(velem[i+1].src_format) % 4 == 0);
-        OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) |
-               (pf_get_size(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22));
+        assert(vbuf[buf_num1].stride % 4 == 0 && util_format_get_size(velem[i].src_format) % 4 == 0);
+        assert(vbuf[buf_num2].stride % 4 == 0 && util_format_get_size(velem[i+1].src_format) % 4 == 0);
+        OUT_CS((util_format_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) |
+               (util_format_get_size(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22));
         OUT_CS(vbuf[buf_num1].buffer_offset + velem[i].src_offset +
                offset * vbuf[buf_num1].stride);
         OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset +
@@ -642,8 +643,8 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
     }
     if (aos_count & 1) {
         int buf_num = velem[i].vertex_buffer_index;
-        assert(vbuf[buf_num].stride % 4 == 0 && pf_get_size(velem[i].src_format) % 4 == 0);
-        OUT_CS((pf_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6));
+        assert(vbuf[buf_num].stride % 4 == 0 && util_format_get_size(velem[i].src_format) % 4 == 0);
+        OUT_CS((util_format_get_size(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6));
         OUT_CS(vbuf[buf_num].buffer_offset + velem[i].src_offset +
                offset * vbuf[buf_num].stride);
     }
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 65872cecc4..cde2260073 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -33,6 +33,7 @@
  */
 
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_tile.h"
 #include "sp_tile_cache.h"
@@ -238,7 +239,7 @@ clear_tile(struct softpipe_cached_tile *tile,
 {
    uint i, j;
 
-   switch (pf_get_size(format)) {
+   switch (util_format_get_size(format)) {
    case 1:
       memset(tile->data.any, clear_value, TILE_SIZE * TILE_SIZE);
       break;
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index a947745732..9e339577c7 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -25,6 +25,7 @@
 
 #include "pipe/p_inlines.h"
 #include "pipe/p_defines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "translate/translate.h"
 
@@ -210,7 +211,7 @@ static int update_zero_stride( struct svga_context *svga,
          mapped_buffer = pipe_buffer_map_range(svga->pipe.screen, 
                                                vbuffer->buffer,
                                                vel->src_offset,
-                                               pf_get_size(vel->src_format),
+                                               util_format_get_size(vel->src_format),
                                                PIPE_BUFFER_USAGE_CPU_READ);
          translate->set_buffer(translate, vel->vertex_buffer_index,
                                mapped_buffer,
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 3be5b18a25..5fd073c95f 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -422,42 +422,6 @@ static INLINE uint pf_get_component_bits( enum pipe_format format, uint comp )
    return size << (pf_mixed_scale8( format ) * 3);
 }
 
-/**
- * Return total bits needed for the pixel format.
- */
-static INLINE uint pf_get_bits( enum pipe_format format )
-{
-   switch (pf_layout(format)) {
-   case PIPE_FORMAT_LAYOUT_RGBAZS:
-   case PIPE_FORMAT_LAYOUT_MIXED:
-      return
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_0 ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_1 ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_R ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_G ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_B ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_A ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_Z ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_S );
-   case PIPE_FORMAT_LAYOUT_YCBCR:
-      assert( format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV );
-      /* return effective bits per pixel */
-      return 16; 
-   default:
-      assert( 0 );
-      return 0;
-   }
-}
-
-/**
- * Return bytes per pixel for the given format.
- */
-static INLINE uint pf_get_size( enum pipe_format format )
-{
-   assert(pf_get_bits(format) % 8 == 0);
-   return pf_get_bits(format) / 8;
-}
-
 /**
  * Describe accurately the pixel format.
  * 
diff --git a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c
index e8bc0f55ac..9cb77f3ad3 100644
--- a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c
+++ b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c
@@ -39,6 +39,7 @@
 #include "pipe/p_format.h"
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "llvmpipe/lp_winsys.h"
@@ -147,8 +148,8 @@ gdi_llvmpipe_displaytarget_create(struct llvmpipe_winsys *winsys,
    gdt->width = width;
    gdt->height = height;
 
-   bpp = pf_get_bits(format);
-   cpp = pf_get_size(format);
+   bpp = util_format_get_bits(format);
+   cpp = util_format_get_size(format);
    
    gdt->stride = round_up(width * cpp, alignment);
    gdt->size = gdt->stride * height;
diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
index d9fb2080a1..a58648ddb9 100644
--- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
+++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
@@ -284,10 +284,10 @@ gdi_softpipe_present(struct pipe_screen *screen,
 
     memset(&bmi, 0, sizeof(BITMAPINFO));
     bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
-    bmi.bmiHeader.biWidth = texture->stride[surface->level] / pf_get_size(surface->format);
+    bmi.bmiHeader.biWidth = texture->stride[surface->level] / util_format_get_size(surface->format);
     bmi.bmiHeader.biHeight= -(long)surface->height;
     bmi.bmiHeader.biPlanes = 1;
-    bmi.bmiHeader.biBitCount = pf_get_bits(surface->format);
+    bmi.bmiHeader.biBitCount = util_format_get_bits(surface->format);
     bmi.bmiHeader.biCompression = BI_RGB;
     bmi.bmiHeader.biSizeImage = 0;
     bmi.bmiHeader.biXPelsPerMeter = 0;
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 6084ded72d..b41b36f98f 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -834,7 +834,7 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level,
    /* copy/pack data into user buffer */
    if (st_equal_formats(stImage->pt->format, format, type)) {
       /* memcpy */
-      const uint bytesPerRow = width * pf_get_size(stImage->pt->format);
+      const uint bytesPerRow = width * util_format_get_size(stImage->pt->format);
       ubyte *map = screen->transfer_map(screen, tex_xfer);
       GLuint row;
       for (row = 0; row < height; row++) {
-- 
cgit v1.2.3


From f5bd93fae2e4f46665eb1f09ca64cb39ff2b8a79 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Thu, 3 Dec 2009 11:58:36 +0100
Subject: Move pf_get_component_bits() to u_format auxiliary module.

---
 src/gallium/auxiliary/util/u_format.h            | 44 ++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_state_surface.c  |  7 ++--
 src/gallium/drivers/softpipe/sp_state_surface.c  |  7 ++--
 src/gallium/include/pipe/p_format.h              | 27 ---------------
 src/gallium/state_trackers/vega/vg_tracker.c     |  2 +-
 src/gallium/state_trackers/wgl/stw_framebuffer.c |  5 +--
 src/gallium/state_trackers/wgl/stw_pixelformat.c | 13 +++----
 src/mesa/state_tracker/st_cb_drawpixels.c        |  4 +--
 src/mesa/state_tracker/st_format.c               | 44 ++++++++++--------------
 src/mesa/state_tracker/st_texture.c              |  2 +-
 10 files changed, 87 insertions(+), 68 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 3ac5384024..fa6dc1f55c 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -212,6 +212,50 @@ util_format_get_size(enum pipe_format format)
    return bits / 8;
 }
 
+static INLINE uint
+util_format_get_component_bits(enum pipe_format format,
+                               enum util_format_colorspace colorspace,
+                               uint component)
+{
+   const struct util_format_description *desc = util_format_description(format);
+   enum util_format_colorspace desc_colorspace;
+   uint swizzle;
+
+   assert(format);
+   if (!format) {
+      return 0;
+   }
+
+   assert(component >= 4);
+
+   /* Treat RGB and SRGB as equivalent. */
+   if (colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+      colorspace = UTIL_FORMAT_COLORSPACE_RGB;
+   }
+   if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+      desc_colorspace = UTIL_FORMAT_COLORSPACE_RGB;
+   } else {
+      desc_colorspace = desc->colorspace;
+   }
+
+   if (desc_colorspace != colorspace) {
+      return 0;
+   }
+
+   switch (desc->swizzle[component]) {
+   case UTIL_FORMAT_SWIZZLE_X:
+      return desc->channel[0].size;
+   case UTIL_FORMAT_SWIZZLE_Y:
+      return desc->channel[1].size;
+   case UTIL_FORMAT_SWIZZLE_Z:
+      return desc->channel[2].size;
+   case UTIL_FORMAT_SWIZZLE_W:
+      return desc->channel[3].size;
+   default:
+      return 0;
+   }
+}
+
 
 /*
  * Format access functions.
diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c
index c06ce8b75c..ba970cac98 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -35,6 +35,8 @@
 
 #include "draw/draw_context.h"
 
+#include "util/u_format.h"
+
 
 /**
  * XXX this might get moved someday
@@ -88,8 +90,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
       if (lp->framebuffer.zsbuf) {
          int depth_bits;
          double mrd;
-         depth_bits = pf_get_component_bits(lp->framebuffer.zsbuf->format,
-                                            PIPE_FORMAT_COMP_Z);
+         depth_bits = util_format_get_component_bits(lp->framebuffer.zsbuf->format,
+                                                     UTIL_FORMAT_COLORSPACE_ZS,
+                                                     0);
          if (depth_bits > 16) {
             mrd = 0.0000001;
          }
diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c
index bc0e201130..a518248bb1 100644
--- a/src/gallium/drivers/softpipe/sp_state_surface.c
+++ b/src/gallium/drivers/softpipe/sp_state_surface.c
@@ -35,6 +35,8 @@
 
 #include "draw/draw_context.h"
 
+#include "util/u_format.h"
+
 
 /**
  * XXX this might get moved someday
@@ -80,8 +82,9 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe,
       if (sp->framebuffer.zsbuf) {
          int depth_bits;
          double mrd;
-         depth_bits = pf_get_component_bits(sp->framebuffer.zsbuf->format,
-                                            PIPE_FORMAT_COMP_Z);
+         depth_bits = util_format_get_component_bits(sp->framebuffer.zsbuf->format,
+                                                     UTIL_FORMAT_COLORSPACE_ZS,
+                                                     0);
          if (depth_bits > 16) {
             mrd = 0.0000001;
          }
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 5fd073c95f..ec2961e8d7 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -394,33 +394,6 @@ enum pipe_format {
  */
 extern const char *pf_name( enum pipe_format format );
 
-/**
- * Return bits for a particular component.
- * \param comp  component index, starting at 0
- */
-static INLINE uint pf_get_component_bits( enum pipe_format format, uint comp )
-{
-   uint size;
-
-   if (pf_swizzle_x(format) == comp) {
-      size = pf_size_x(format);
-   }
-   else if (pf_swizzle_y(format) == comp) {
-      size = pf_size_y(format);
-   }
-   else if (pf_swizzle_z(format) == comp) {
-      size = pf_size_z(format);
-   }
-   else if (pf_swizzle_w(format) == comp) {
-      size = pf_size_w(format);
-   }
-   else {
-      size = 0;
-   }
-   if (pf_layout( format ) == PIPE_FORMAT_LAYOUT_RGBAZS)
-      return size << pf_exp2( format );
-   return size << (pf_mixed_scale8( format ) * 3);
-}
 
 /**
  * Describe accurately the pixel format.
diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c
index e7b04a8e06..257feda671 100644
--- a/src/gallium/state_trackers/vega/vg_tracker.c
+++ b/src/gallium/state_trackers/vega/vg_tracker.c
@@ -57,7 +57,7 @@ create_texture(struct pipe_context *pipe, enum pipe_format format,
    templ.depth0 = 1;
    templ.last_level = 0;
 
-   if (pf_get_component_bits(format, PIPE_FORMAT_COMP_S)) {
+   if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) {
       templ.tex_usage = PIPE_TEXTURE_USAGE_DEPTH_STENCIL;
    } else {
       templ.tex_usage = (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index 8a3e11b6b4..5c3444777a 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -30,6 +30,7 @@
 #include "main/context.h"
 #include "pipe/p_format.h"
 #include "pipe/p_screen.h"
+#include "util/u_format.h"
 #include "state_tracker/st_context.h"
 #include "state_tracker/st_public.h"
 
@@ -270,12 +271,12 @@ stw_framebuffer_allocate(
       
       assert(pf_layout( pfi->depth_stencil_format ) == PIPE_FORMAT_LAYOUT_RGBAZS );
    
-      if(pf_get_component_bits( pfi->depth_stencil_format, PIPE_FORMAT_COMP_Z ))
+      if(util_format_get_component_bits(pfi->depth_stencil_format, UTIL_FORMAT_COLORSPACE_ZS, 0))
          depthFormat = pfi->depth_stencil_format;
       else
          depthFormat = PIPE_FORMAT_NONE;
    
-      if(pf_get_component_bits( pfi->depth_stencil_format, PIPE_FORMAT_COMP_S ))
+      if(util_format_get_component_bits(pfi->depth_stencil_format, UTIL_FORMAT_COLORSPACE_ZS, 1))
          stencilFormat = pfi->depth_stencil_format;
       else
          stencilFormat = PIPE_FORMAT_NONE;
diff --git a/src/gallium/state_trackers/wgl/stw_pixelformat.c b/src/gallium/state_trackers/wgl/stw_pixelformat.c
index 7abe5d9f7f..5ac833ced6 100644
--- a/src/gallium/state_trackers/wgl/stw_pixelformat.c
+++ b/src/gallium/state_trackers/wgl/stw_pixelformat.c
@@ -32,6 +32,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 
+#include "util/u_format.h"
 #include "util/u_debug.h"
 
 #include "stw_icd.h"
@@ -133,13 +134,13 @@ stw_pixelformat_add(
       return;
 
    assert(pf_layout( color->format ) == PIPE_FORMAT_LAYOUT_RGBAZS );
-   assert(pf_get_component_bits( color->format, PIPE_FORMAT_COMP_R ) == color->bits.red );
-   assert(pf_get_component_bits( color->format, PIPE_FORMAT_COMP_G ) == color->bits.green );
-   assert(pf_get_component_bits( color->format, PIPE_FORMAT_COMP_B ) == color->bits.blue );
-   assert(pf_get_component_bits( color->format, PIPE_FORMAT_COMP_A ) == color->bits.alpha );
+   assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 0) == color->bits.red);
+   assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 1) == color->bits.green);
+   assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 2) == color->bits.blue);
+   assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 3) == color->bits.alpha);
    assert(pf_layout( depth->format ) == PIPE_FORMAT_LAYOUT_RGBAZS );
-   assert(pf_get_component_bits( depth->format, PIPE_FORMAT_COMP_Z ) == depth->bits.depth );
-   assert(pf_get_component_bits( depth->format, PIPE_FORMAT_COMP_S ) == depth->bits.stencil );
+   assert(util_format_get_component_bits(depth->format, UTIL_FORMAT_COLORSPACE_ZS, 0) == depth->bits.depth);
+   assert(util_format_get_component_bits(depth->format, UTIL_FORMAT_COLORSPACE_ZS, 1) == depth->bits.stencil);
    
    pfi = &stw_dev->pixelformats[stw_dev->pixelformat_extended_count];
    
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 0889cd6d2c..496c1c4f3c 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -648,7 +648,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y,
    }
 
    if(format != GL_DEPTH_STENCIL && 
-      pf_get_component_bits( strb->format, PIPE_FORMAT_COMP_Z ) != 0)
+      util_format_get_component_bits(strb->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
       usage = PIPE_TRANSFER_READ_WRITE;
    else
       usage = PIPE_TRANSFER_WRITE;
@@ -843,7 +843,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
                           GL_STENCIL_INDEX, GL_UNSIGNED_BYTE,
                           &ctx->DefaultPacking, buffer);
 
-   if(pf_get_component_bits( rbDraw->format, PIPE_FORMAT_COMP_Z ) != 0)
+   if(util_format_get_component_bits(rbDraw->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0)
       usage = PIPE_TRANSFER_READ_WRITE;
    else
       usage = PIPE_TRANSFER_WRITE;
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 02f80057c2..c492d77530 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -42,28 +42,22 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
+#include "util/u_format.h"
 #include "st_context.h"
 #include "st_format.h"
 
-static GLuint
-format_bits(
-   pipe_format_rgbazs_t  info,
-   GLuint comp )
-{
-   return pf_get_component_bits( (enum pipe_format) info, comp );
-}
 
 static GLuint
 format_max_bits(
    pipe_format_rgbazs_t  info )
 {
-   GLuint   size = format_bits( info, PIPE_FORMAT_COMP_R );
+   GLuint size = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 0);
 
-   size = MAX2( size, format_bits( info, PIPE_FORMAT_COMP_G ) );
-   size = MAX2( size, format_bits( info, PIPE_FORMAT_COMP_B ) );
-   size = MAX2( size, format_bits( info, PIPE_FORMAT_COMP_A ) );
-   size = MAX2( size, format_bits( info, PIPE_FORMAT_COMP_Z ) );
-   size = MAX2( size, format_bits( info, PIPE_FORMAT_COMP_S ) );
+   size = MAX2(size, util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 1));
+   size = MAX2(size, util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 2));
+   size = MAX2(size, util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 3));
+   size = MAX2(size, util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 0));
+   size = MAX2(size, util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 1));
    return size;
 }
 
@@ -72,12 +66,12 @@ format_size(
    pipe_format_rgbazs_t  info )
 {
    return
-      format_bits( info, PIPE_FORMAT_COMP_R ) +
-      format_bits( info, PIPE_FORMAT_COMP_G ) +
-      format_bits( info, PIPE_FORMAT_COMP_B ) +
-      format_bits( info, PIPE_FORMAT_COMP_A ) +
-      format_bits( info, PIPE_FORMAT_COMP_Z ) +
-      format_bits( info, PIPE_FORMAT_COMP_S );
+      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 0) +
+      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 1) +
+      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 2) +
+      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 3) +
+      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 0) +
+      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 1);
 }
 
 /*
@@ -126,12 +120,12 @@ st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
       }
 
       /* Component bits */
-      pinfo->red_bits = format_bits( info, PIPE_FORMAT_COMP_R );
-      pinfo->green_bits = format_bits( info, PIPE_FORMAT_COMP_G );
-      pinfo->blue_bits = format_bits( info, PIPE_FORMAT_COMP_B );
-      pinfo->alpha_bits = format_bits( info, PIPE_FORMAT_COMP_A );
-      pinfo->depth_bits = format_bits( info, PIPE_FORMAT_COMP_Z );
-      pinfo->stencil_bits = format_bits( info, PIPE_FORMAT_COMP_S );
+      pinfo->red_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 0);
+      pinfo->green_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 1);
+      pinfo->blue_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 2);
+      pinfo->alpha_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 3);
+      pinfo->depth_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 0);
+      pinfo->stencil_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 1);
       pinfo->luminance_bits = 0;
       pinfo->intensity_bits = 0;
 
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index bd6ee5d71c..b1b515b28a 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -406,7 +406,7 @@ st_bind_texture_surface(struct pipe_surface *ps, int target, int level,
    }
 
    /* map pipe format to base format for now */
-   if (pf_get_component_bits(format, PIPE_FORMAT_COMP_A) > 0)
+   if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3) > 0)
       internalFormat = GL_RGBA;
    else
       internalFormat = GL_RGB;
-- 
cgit v1.2.3


From 72befaaae5fd9555e1f6ccbd6a74c0d640fc2929 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Thu, 3 Dec 2009 12:25:49 +0100
Subject: Remove pf_swizzle_* internal macros.

---
 src/gallium/drivers/nv50/nv50_vbo.c           |  4 +++-
 src/gallium/drivers/r300/r300_state_inlines.h | 13 +++++++++----
 src/gallium/include/pipe/p_format.h           |  5 -----
 src/mesa/state_tracker/st_format.c            | 11 +++++++----
 4 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index db54380241..4b8783899e 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -24,6 +24,8 @@
 #include "pipe/p_state.h"
 #include "pipe/p_inlines.h"
 
+#include "util/u_format.h"
+
 #include "nv50_context.h"
 
 static boolean
@@ -131,7 +133,7 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
 		return 0x24e80000;
 	}
 
-	if (pf_swizzle_x(pf) == 2) /* BGRA */
+	if (util_format_description(pf)->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */
 		hw_size |= (1 << 31); /* no real swizzle bits :-( */
 
 	return (hw_type | hw_size);
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index e6c1cb54da..ab720f366d 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -28,6 +28,8 @@
 
 #include "pipe/p_format.h"
 
+#include "util/u_format.h"
+
 #include "r300_reg.h"
 
 /* Some maths. These should probably find their way to u_math, if needed. */
@@ -534,6 +536,9 @@ r300_translate_vertex_data_type(enum pipe_format format) {
 
 static INLINE uint16_t
 r300_translate_vertex_data_swizzle(enum pipe_format format) {
+    const struct util_format_description *desc = util_format_description(format);
+
+    assert(format);
 
     if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) {
         debug_printf("r300: Bad format %s in %s:%d\n",
@@ -541,10 +546,10 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) {
         return 0;
     }
 
-    return ((pf_swizzle_x(format) << R300_SWIZZLE_SELECT_X_SHIFT) |
-        (pf_swizzle_y(format) << R300_SWIZZLE_SELECT_Y_SHIFT) |
-        (pf_swizzle_z(format) << R300_SWIZZLE_SELECT_Z_SHIFT) |
-        (pf_swizzle_w(format) << R300_SWIZZLE_SELECT_W_SHIFT) |
+    return ((desc->swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) |
+        (desc->swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) |
+        (desc->swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) |
+        (desc->swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT) |
         (0xf << R300_WRITE_ENA_SHIFT));
 }
 
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index 52fdab4570..cc7a8ab8df 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -112,11 +112,6 @@ static INLINE uint pf_get(pipe_format_rgbazs_t f, uint shift, uint mask)
    return (f >> shift) & mask;
 }
 
-#define pf_swizzle_x(f)       pf_get(f, 2, 0x7)  /**< PIPE_FORMAT_COMP_ */
-#define pf_swizzle_y(f)       pf_get(f, 5, 0x7)  /**< PIPE_FORMAT_COMP_ */
-#define pf_swizzle_z(f)       pf_get(f, 8, 0x7)  /**< PIPE_FORMAT_COMP_ */
-#define pf_swizzle_w(f)       pf_get(f, 11, 0x7) /**< PIPE_FORMAT_COMP_ */
-#define pf_swizzle_xyzw(f,i)  pf_get(f, 2+((i)*3), 0x7)
 #define pf_size_x(f)          pf_get(f, 14, 0x7) /**< Size of X */
 #define pf_size_y(f)          pf_get(f, 17, 0x7) /**< Size of Y */
 #define pf_size_z(f)          pf_get(f, 20, 0x7) /**< Size of Z */
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index c492d77530..c9f020c40f 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -81,8 +81,11 @@ GLboolean
 st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
 {
    if (pf_layout(format) == PIPE_FORMAT_LAYOUT_RGBAZS) {
+      const struct util_format_description *desc = util_format_description(format);
       pipe_format_rgbazs_t info;
 
+      assert(desc);
+
       info = format;
 
 #if 0
@@ -133,10 +136,10 @@ st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
       pinfo->size = format_size( info ) / 8;
 
       /* Luminance & Intensity bits */
-      if( pf_swizzle_x(info) == PIPE_FORMAT_COMP_R &&
-          pf_swizzle_y(info) == PIPE_FORMAT_COMP_R &&
-          pf_swizzle_z(info) == PIPE_FORMAT_COMP_R ) {
-         if( pf_swizzle_w(info) == PIPE_FORMAT_COMP_R ) {
+      if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
+          desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_X &&
+          desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_X) {
+         if (desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_X) {
             pinfo->intensity_bits = pinfo->red_bits;
          }
          else {
-- 
cgit v1.2.3


From 35a15f02634a31c1517363d91aaef8f190e24687 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Thu, 3 Dec 2009 23:15:38 +0100
Subject: gallium: fix reference counting functions to be strict-aliasing
 compliant

Historically, parts of mesa code are not strict-aliasing safe, hence
-fno-strict-aliasing is needed to compile (this got forgotten for scons
builds for gallium, which indeed not only caused compiler warnings but also
unexplicable crashes in non-debug builds). However, we should try to eliminate
code not complying with strict-aliasing code at least for gallium.
Hence change pipe_reference functions to make them strict-aliasing compliant.
This adds a bit more complexity (especially for derived classes) but is the
right thing to do, and it does in fact fix a segfault.
---
 src/gallium/auxiliary/pipebuffer/pb_buffer.h        |  3 ++-
 src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c |  5 ++++-
 src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c  |  3 +++
 src/gallium/drivers/svga/svga_screen_texture.h      |  3 ++-
 src/gallium/include/pipe/p_refcnt.h                 | 18 ++++++++----------
 src/gallium/include/pipe/p_state.h                  |  9 ++++++---
 src/gallium/include/pipe/p_video_state.h            |  3 ++-
 7 files changed, 27 insertions(+), 17 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 4ef372233f..eb7e84be84 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -237,8 +237,9 @@ pb_reference(struct pb_buffer **dst,
 {
    struct pb_buffer *old = *dst;
 
-   if (pipe_reference((struct pipe_reference**)dst, &src->base.reference))
+   if (pipe_reference(&(*dst)->base.reference, &src->base.reference))
       pb_destroy( old );
+   *dst = src;
 }
 
 
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index 2f973684f6..a9375abd21 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -243,6 +243,7 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list,
    struct pb_fence_ops *ops = fenced_list->ops;
    struct list_head *curr, *next;
    struct fenced_buffer *fenced_buf;
+   struct pb_buffer *pb_buf;
    struct pipe_fence_handle *prev_fence = NULL;
 
    curr = fenced_list->delayed.next;
@@ -271,7 +272,9 @@ fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list,
       fenced_buffer_remove_locked(fenced_list, fenced_buf);
       pipe_mutex_unlock(fenced_buf->mutex);
 
-      pb_reference((struct pb_buffer **)&fenced_buf, NULL);
+      pb_buf = &fenced_buf->base;
+      pb_reference(&pb_buf, NULL);
+      
 
       curr = next; 
       next = curr->next;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 57d1ede45a..f0c88a0ccb 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -293,8 +293,11 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr,
    if(buf) {
       LIST_DEL(&buf->head);
       pipe_mutex_unlock(mgr->mutex);
+#if 0
+      /* XXX this didn't do anything right??? */
       /* Increase refcount */
       pb_reference((struct pb_buffer**)&buf, &buf->base);
+#endif
       return &buf->base;
    }
    
diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h
index 1cc4063e65..727f2c51d2 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.h
+++ b/src/gallium/drivers/svga/svga_screen_texture.h
@@ -164,8 +164,9 @@ svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_
 {
    struct svga_sampler_view *old = *ptr;
 
-   if (pipe_reference((struct pipe_reference **)ptr, &v->reference))
+   if (pipe_reference(&(*ptr)->reference, &v->reference))
       svga_destroy_sampler_view_priv(old);
+   *ptr = v;
 }
 
 extern void
diff --git a/src/gallium/include/pipe/p_refcnt.h b/src/gallium/include/pipe/p_refcnt.h
index 1f9088b3e9..f1875b6b82 100644
--- a/src/gallium/include/pipe/p_refcnt.h
+++ b/src/gallium/include/pipe/p_refcnt.h
@@ -59,30 +59,28 @@ pipe_is_referenced(struct pipe_reference *reference)
 
 
 /**
- * Set 'ptr' to point to 'reference' and update reference counting.
- * The old thing pointed to, if any, will be unreferenced first.
- * 'reference' may be NULL.
+ * Update reference counting.
+ * The old thing pointed to, if any, will be unreferenced.
+ * Both 'ptr' and 'reference' may be NULL.
  */
 static INLINE bool
-pipe_reference(struct pipe_reference **ptr, struct pipe_reference *reference)
+pipe_reference(struct pipe_reference *ptr, struct pipe_reference *reference)
 {
    bool destroy = FALSE;
 
-   if(*ptr != reference) {
+   if(ptr != reference) {
       /* bump the reference.count first */
       if (reference) {
          assert(pipe_is_referenced(reference));
          p_atomic_inc(&reference->count);
       }
    
-      if (*ptr) {
-         assert(pipe_is_referenced(*ptr));
-         if (p_atomic_dec_zero(&(*ptr)->count)) {
+      if (ptr) {
+         assert(pipe_is_referenced(ptr));
+         if (p_atomic_dec_zero(&ptr->count)) {
             destroy = TRUE;
          }
       }
-   
-      *ptr = reference;
    }
 
    return destroy;
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 6de7af6a81..b9dfa1c7d3 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -400,8 +400,9 @@ pipe_buffer_reference(struct pipe_buffer **ptr, struct pipe_buffer *buf)
 {
    struct pipe_buffer *old_buf = *ptr;
 
-   if (pipe_reference((struct pipe_reference **)ptr, &buf->reference))
+   if (pipe_reference(&(*ptr)->reference, &buf->reference))
       old_buf->screen->buffer_destroy(old_buf);
+   *ptr = buf;
 }
 
 static INLINE void
@@ -409,8 +410,9 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
 {
    struct pipe_surface *old_surf = *ptr;
 
-   if (pipe_reference((struct pipe_reference **)ptr, &surf->reference))
+   if (pipe_reference(&(*ptr)->reference, &surf->reference))
       old_surf->texture->screen->tex_surface_destroy(old_surf);
+   *ptr = surf;
 }
 
 static INLINE void
@@ -418,8 +420,9 @@ pipe_texture_reference(struct pipe_texture **ptr, struct pipe_texture *tex)
 {
    struct pipe_texture *old_tex = *ptr;
 
-   if (pipe_reference((struct pipe_reference **)ptr, &tex->reference))
+   if (pipe_reference(&(*ptr)->reference, &tex->reference))
       old_tex->screen->texture_destroy(old_tex);
+   *ptr = tex;
 }
 
 
diff --git a/src/gallium/include/pipe/p_video_state.h b/src/gallium/include/pipe/p_video_state.h
index 4da26d608c..b85f01c2b0 100644
--- a/src/gallium/include/pipe/p_video_state.h
+++ b/src/gallium/include/pipe/p_video_state.h
@@ -56,8 +56,9 @@ pipe_video_surface_reference(struct pipe_video_surface **ptr, struct pipe_video_
 {
    struct pipe_video_surface *old_surf = *ptr;
 
-   if (pipe_reference((struct pipe_reference **)ptr, &surf->reference))
+   if (pipe_reference(&(*ptr)->reference, &surf->reference))
       old_surf->screen->video_surface_destroy(old_surf);
+   *ptr = surf;
 }
 
 struct pipe_video_rect
-- 
cgit v1.2.3


From 4153ec547cfb7fcb26bbeb09ac9ef19fe88d3e4e Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Thu, 3 Dec 2009 23:58:30 +0100
Subject: gallium: fix remaining users of pipe_reference function

---
 src/gallium/drivers/nouveau/nouveau_stateobj.h     | 3 ++-
 src/gallium/state_trackers/python/st_device.c      | 3 ++-
 src/gallium/winsys/drm/intel/gem/intel_drm_fence.c | 3 ++-
 src/gallium/winsys/drm/vmware/core/vmw_surface.c   | 2 +-
 4 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
index b595405357..62990f9b6a 100644
--- a/src/gallium/drivers/nouveau/nouveau_stateobj.h
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -48,13 +48,14 @@ so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso)
 	struct nouveau_stateobj *so = *pso;
 	int i;
 
-        if (pipe_reference((struct pipe_reference**)pso, &ref->reference)) {
+        if (pipe_reference(&(*pso)->reference, &ref->reference)) {
 		free(so->push);
 		for (i = 0; i < so->cur_reloc; i++)
 			nouveau_bo_ref(NULL, &so->reloc[i].bo);
 		free(so->reloc);
 		free(so);
 	}
+	*pso = ref;
 }
 
 static INLINE void
diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c
index a791113aba..f19cf4b577 100644
--- a/src/gallium/state_trackers/python/st_device.c
+++ b/src/gallium/state_trackers/python/st_device.c
@@ -62,8 +62,9 @@ st_device_reference(struct st_device **ptr, struct st_device *st_dev)
 {
    struct st_device *old_dev = *ptr;
 
-   if (pipe_reference((struct pipe_reference **)ptr, &st_dev->reference))
+   if (pipe_reference(&(*ptr)->reference, &st_dev->reference))
       st_device_really_destroy(old_dev);
+   *ptr = st_dev;
 }
 
 
diff --git a/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c b/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c
index e70bfe7b44..b6248a3bcf 100644
--- a/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c
+++ b/src/gallium/winsys/drm/intel/gem/intel_drm_fence.c
@@ -39,11 +39,12 @@ intel_drm_fence_reference(struct intel_winsys *iws,
    struct intel_drm_fence *old = (struct intel_drm_fence *)*ptr;
    struct intel_drm_fence *f = (struct intel_drm_fence *)fence;
 
-   if (pipe_reference((struct pipe_reference**)ptr, &f->reference)) {
+   if (pipe_reference(&(*ptr)->reference, &f->reference)) {
       if (old->bo)
          drm_intel_bo_unreference(old->bo);
       FREE(old);
    }
+   *ptr = fence;
 }
 
 static int
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_surface.c b/src/gallium/winsys/drm/vmware/core/vmw_surface.c
index 64eb32f8b9..5f1b9ad577 100644
--- a/src/gallium/winsys/drm/vmware/core/vmw_surface.c
+++ b/src/gallium/winsys/drm/vmware/core/vmw_surface.c
@@ -47,7 +47,7 @@ vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst,
    src_ref = src ? &src->refcnt : NULL;
    dst_ref = dst ? &dst->refcnt : NULL;
 
-   if (pipe_reference(&dst_ref, src_ref)) {
+   if (pipe_reference(dst_ref, src_ref)) {
       vmw_ioctl_surface_destroy(dst->screen, dst->sid);
 #ifdef DEBUG
       /* to detect dangling pointers */
-- 
cgit v1.2.3


From a4b3bb12d7627a0bb39dd625e7646c9ef9ccd7fb Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 4 Dec 2009 11:49:42 +0000
Subject: softpipe: fix double-minify in texture layout

---
 src/gallium/drivers/softpipe/sp_texture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index bd653216c0..4f946ccfcf 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -67,7 +67,7 @@ softpipe_texture_layout(struct pipe_screen *screen,
 
       spt->level_offset[level] = buffer_size;
 
-      buffer_size += (pf_get_nblocksy(pt->format, u_minify(height, level)) *
+      buffer_size += (pf_get_nblocksy(pt->format, height) *
                       ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
                       spt->stride[level]);
 
-- 
cgit v1.2.3


From 6bb415f862fec94b82915f806beb3a7427bd4bb8 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Fri, 4 Dec 2009 14:15:21 +0000
Subject: softpipe: dont claim to support PIPE_FORMAT_NONE

---
 src/gallium/drivers/softpipe/sp_screen.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 6bf3df8e6a..bd3532de4f 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -149,6 +149,7 @@ softpipe_is_format_supported( struct pipe_screen *screen,
    case PIPE_FORMAT_B6UG5SR5S_NORM:
    case PIPE_FORMAT_X8UB8UG8SR8S_NORM:
    case PIPE_FORMAT_A8B8G8R8_SNORM:
+   case PIPE_FORMAT_NONE:
       return FALSE;
    default:
       return TRUE;
-- 
cgit v1.2.3


From 225bc70b77fcf107dd8abc93be27a15c27743071 Mon Sep 17 00:00:00 2001
From: Coleman Kane <Coleman Kane>
Date: Fri, 4 Dec 2009 08:44:57 -0700
Subject: r300g: use $(MAKE) variable

Fixes bug 24501
---
 src/gallium/drivers/r300/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index d7a2c8c462..8d0c6e33bb 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -38,4 +38,4 @@ include ../../Makefile.template
 .PHONY : $(COMPILER_ARCHIVE)
 
 $(COMPILER_ARCHIVE):
-	cd $(TOP)/src/mesa/drivers/dri/r300/compiler; make
+	$(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler
-- 
cgit v1.2.3


From c977dd9c7716b0a086eeb0c07f2da148065c3b18 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Fri, 4 Dec 2009 18:23:35 +0100
Subject: svga: fix another pipe_reference strict aliasing violation

---
 src/gallium/drivers/svga/svga_screen_buffer.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c
index 1f8a889672..58a1aba464 100644
--- a/src/gallium/drivers/svga/svga_screen_buffer.c
+++ b/src/gallium/drivers/svga/svga_screen_buffer.c
@@ -356,7 +356,8 @@ svga_buffer_upload_flush(struct svga_context *svga,
    sbuf->hw.boxes = NULL;
 
    /* Decrement reference count */
-   pipe_buffer_reference((struct pipe_buffer **)&sbuf, NULL);
+   pipe_reference(&(sbuf->base.reference), NULL);
+   sbuf = NULL;
 }
 
 
-- 
cgit v1.2.3


From 3da8265cd3233e2b22ab0f8a28fbba892984e399 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Fri, 4 Dec 2009 16:06:16 +0100
Subject: r300g: fix warnings

---
 src/gallium/drivers/r300/r300_context.c            | 4 ++--
 src/gallium/drivers/r300/r300_winsys.h             | 2 ++
 src/gallium/winsys/drm/radeon/core/radeon_buffer.c | 2 ++
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 68a17dcb63..5b337f03ac 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -36,8 +36,8 @@
 #include "r300_screen.h"
 #include "r300_state_derived.h"
 #include "r300_state_invariant.h"
-
-#include "radeon_winsys.h"
+#include "r300_texture.h"
+#include "r300_winsys.h"
 
 static enum pipe_error r300_clear_hash_table(void* key, void* value,
                                              void* data)
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
index f86985841f..1ae6de70fe 100644
--- a/src/gallium/drivers/r300/r300_winsys.h
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -35,6 +35,8 @@ extern "C" {
 #include "pipe/p_state.h"
 #include "pipe/internal/p_winsys_screen.h"
 
+#include "radeon_winsys.h"
+
 struct pipe_context* r300_create_context(struct pipe_screen* screen,
                                          struct radeon_winsys* radeon_winsys);
 
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 65f7babff2..0ca7b39255 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -35,7 +35,9 @@
 #include "radeon_bo_gem.h"
 #include "softpipe/sp_texture.h"
 #include "r300_context.h"
+#include "util/u_math.h"
 #include <X11/Xutil.h>
+
 struct radeon_vl_context
 {
     Display *display;
-- 
cgit v1.2.3


From 7679447b5835fd73ab44b3d77b12a034c95af5c5 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Wed, 2 Dec 2009 17:15:27 +0100
Subject: r300g, radeong: fix the CS overflow

---
 src/gallium/drivers/r300/r300_cs.h               | 2 +-
 src/gallium/drivers/r300/r300_emit.c             | 9 ++++++++-
 src/gallium/winsys/drm/radeon/core/radeon_r300.c | 5 +++--
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 8b100375fd..9fcf3ab538 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -55,7 +55,7 @@
     int cs_count = 0;
 
 #define CHECK_CS(size) \
-    cs_winsys->check_cs(cs_winsys, (size))
+    assert(cs_winsys->check_cs(cs_winsys, (size)))
 
 #define BEGIN_CS(size) do { \
     CHECK_CS(size); \
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index a479842f9e..3bb42f9e43 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -871,10 +871,17 @@ void r300_emit_dirty_state(struct r300_context* r300)
         return;
     }
 
+    /* Check size of CS. */
+    /* Make sure we have at least 8*1024 spare dwords. */
+    /* XXX It would be nice to know the number of dwords we really need to
+     * XXX emit. */
+    if (!r300->winsys->check_cs(r300->winsys, 8*1024)) {
+        r300->context.flush(&r300->context, 0, NULL);
+    }
+
     /* Clean out BOs. */
     r300->winsys->reset_bos(r300->winsys);
 
-    /* XXX check size */
 validate:
     /* Color buffers... */
     for (i = 0; i < r300->framebuffer_state.nr_cbufs; i++) {
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_r300.c b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
index 7362279b77..ba0596c30d 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_r300.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_r300.c
@@ -52,8 +52,9 @@ static boolean radeon_validate(struct radeon_winsys* winsys)
 
 static boolean radeon_check_cs(struct radeon_winsys* winsys, int size)
 {
-    /* XXX check size here, lazy ass! */
-    return radeon_validate(winsys);
+    struct radeon_cs* cs = winsys->priv->cs;
+
+    return radeon_validate(winsys) && cs->cdw + size <= cs->ndw;
 }
 
 static void radeon_begin_cs(struct radeon_winsys* winsys,
-- 
cgit v1.2.3


From 042b524d48ebb15215430149b9b1653f4b46dee3 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Fri, 4 Dec 2009 15:54:29 +0100
Subject: radeong: flush CS if a buffer being mapped is referenced by it

Also, overlapping occlusion queries seems to work now.
---
 src/gallium/drivers/r300/r300_emit.c               | 2 --
 src/gallium/winsys/drm/radeon/core/radeon_buffer.c | 5 +++++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 3bb42f9e43..60be03f54f 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -382,8 +382,6 @@ static void r300_emit_query_start(struct r300_context *r300)
     if (!query)
 	return;
 
-    /* XXX This will almost certainly not return good results
-     * for overlapping queries. */
     BEGIN_CS(4);
     if (caps->family == CHIP_FAMILY_RV530) {
         OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 0ca7b39255..2a8daed051 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -140,10 +140,15 @@ static void *radeon_buffer_map(struct pipe_winsys *ws,
                                struct pipe_buffer *buffer,
                                unsigned flags)
 {
+    struct radeon_winsys_priv *priv = ((struct radeon_winsys *)ws)->priv;
     struct radeon_pipe_buffer *radeon_buffer =
         (struct radeon_pipe_buffer*)buffer;
     int write = 0;
 
+    if (radeon_bo_is_referenced_by_cs(radeon_buffer->bo, priv->cs)) {
+        priv->cs->space_flush_fn(priv->cs->space_flush_data);
+    }
+
     if (flags & PIPE_BUFFER_USAGE_DONTBLOCK) {
         uint32_t domain;
 
-- 
cgit v1.2.3


From 7d9b2edb97419b562a542b5cd701724c009421d4 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Fri, 4 Dec 2009 18:34:52 +0100
Subject: identity: fix copy&paste error

---
 src/gallium/drivers/identity/id_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index 4509c7b1e5..bedab56f59 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -742,7 +742,7 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.set_polygon_stipple = identity_set_polygon_stipple;
    id_pipe->base.set_scissor_state = identity_set_scissor_state;
    id_pipe->base.set_viewport_state = identity_set_viewport_state;
-   id_pipe->base.set_fragment_sampler_textures = identity_set_vertex_sampler_textures;
+   id_pipe->base.set_fragment_sampler_textures = identity_set_fragment_sampler_textures;
    id_pipe->base.set_vertex_sampler_textures = identity_set_vertex_sampler_textures;
    id_pipe->base.set_vertex_buffers = identity_set_vertex_buffers;
    id_pipe->base.set_vertex_elements = identity_set_vertex_elements;
-- 
cgit v1.2.3


From b00b06b6e486a87dd88a695ae122863df13ad84e Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 4 Dec 2009 18:59:24 +0000
Subject: llvmpipe: Remove debug printf.

---
 src/gallium/drivers/llvmpipe/lp_tex_cache.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
index 5dbc597d2c..a6d9a2c1ac 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
@@ -155,7 +155,6 @@ lp_tex_tile_cache_validate_texture(struct llvmpipe_tex_tile_cache *tc)
       if (lpt->timestamp != tc->timestamp) {
          /* texture was modified, invalidate all cached tiles */
          uint i;
-         debug_printf("INV %d %d\n", tc->timestamp, lpt->timestamp);
          for (i = 0; i < NUM_ENTRIES; i++) {
             tc->entries[i].addr.bits.invalid = 1;
          }
-- 
cgit v1.2.3


From a312e76468435fc1eb7ec5fe0a98601a7fdfec53 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 4 Dec 2009 21:16:14 +0000
Subject: llvmpipe: Ensure transfers are mapped.

This shouldn't happen but it does by some misterious reason. Fail the
assertion but at least do not segfault on release builds.
---
 src/gallium/drivers/llvmpipe/lp_tile_cache.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
index 50891c4227..e83210f93b 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
@@ -290,6 +290,10 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc,
    
    assert(tc->surface);
    assert(tc->transfer);
+   assert(tc->transfer_map);
+
+   if(!tc->transfer_map)
+      lp_tile_cache_map_transfers(tc);
 
    switch(tile->status) {
    case LP_TILE_STATUS_CLEAR:
-- 
cgit v1.2.3


From c0a13bbae15a471fea278e37b92b874fed1f6b3b Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 4 Dec 2009 21:25:40 +0000
Subject: llvmpipe: Port vertex sampler support from softpipe.

Just enough boilerplate code to avoid segfaulting.
---
 src/gallium/drivers/llvmpipe/lp_context.c       | 20 +++++++--
 src/gallium/drivers/llvmpipe/lp_context.h       |  5 +++
 src/gallium/drivers/llvmpipe/lp_screen.c        |  4 +-
 src/gallium/drivers/llvmpipe/lp_state.h         |  9 ++++
 src/gallium/drivers/llvmpipe/lp_state_derived.c | 12 +++--
 src/gallium/drivers/llvmpipe/lp_state_sampler.c | 59 +++++++++++++++++++++++++
 6 files changed, 101 insertions(+), 8 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index c081f6de03..679e244274 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -118,6 +118,11 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
       pipe_texture_reference(&llvmpipe->texture[i], NULL);
    }
 
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+      lp_destroy_tex_tile_cache(llvmpipe->vertex_tex_cache[i]);
+      pipe_texture_reference(&llvmpipe->vertex_textures[i], NULL);
+   }
+
    for (i = 0; i < Elements(llvmpipe->constants); i++) {
       if (llvmpipe->constants[i].buffer) {
          pipe_buffer_reference(&llvmpipe->constants[i].buffer, NULL);
@@ -145,6 +150,11 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
          llvmpipe->framebuffer.zsbuf->texture == texture)
          return PIPE_REFERENCED_FOR_WRITE;
    }
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+      if (llvmpipe->vertex_tex_cache[i] &&
+          llvmpipe->vertex_tex_cache[i]->texture == texture)
+         return PIPE_REFERENCED_FOR_READ;
+   }
    
    return PIPE_UNREFERENCED;
 }
@@ -181,6 +191,7 @@ llvmpipe_create( struct pipe_screen *screen )
 
    llvmpipe->pipe.create_sampler_state = llvmpipe_create_sampler_state;
    llvmpipe->pipe.bind_fragment_sampler_states  = llvmpipe_bind_sampler_states;
+   llvmpipe->pipe.bind_vertex_sampler_states  = llvmpipe_bind_vertex_sampler_states;
    llvmpipe->pipe.delete_sampler_state = llvmpipe_delete_sampler_state;
 
    llvmpipe->pipe.create_depth_stencil_alpha_state = llvmpipe_create_depth_stencil_state;
@@ -206,6 +217,7 @@ llvmpipe_create( struct pipe_screen *screen )
    llvmpipe->pipe.set_polygon_stipple = llvmpipe_set_polygon_stipple;
    llvmpipe->pipe.set_scissor_state = llvmpipe_set_scissor_state;
    llvmpipe->pipe.set_fragment_sampler_textures = llvmpipe_set_sampler_textures;
+   llvmpipe->pipe.set_vertex_sampler_textures = llvmpipe_set_vertex_sampler_textures;
    llvmpipe->pipe.set_viewport_state = llvmpipe_set_viewport_state;
 
    llvmpipe->pipe.set_vertex_buffers = llvmpipe_set_vertex_buffers;
@@ -234,13 +246,15 @@ llvmpipe_create( struct pipe_screen *screen )
 
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
       llvmpipe->tex_cache[i] = lp_create_tex_tile_cache( screen );
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++)
+      llvmpipe->vertex_tex_cache[i] = lp_create_tex_tile_cache(screen);
 
 
    /* vertex shader samplers */
-   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
       llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples;
       llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX;
-      llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->tex_cache[i];
+      llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->vertex_tex_cache[i];
       llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i];
    }
 
@@ -260,7 +274,7 @@ llvmpipe_create( struct pipe_screen *screen )
       goto fail;
 
    draw_texture_samplers(llvmpipe->draw,
-                         PIPE_MAX_SAMPLERS,
+                         PIPE_MAX_VERTEX_SAMPLERS,
                          (struct tgsi_sampler **)
                             llvmpipe->tgsi.vert_samplers_list);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 3ad95d0bfc..cc4d5ad5fd 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -55,6 +55,7 @@ struct llvmpipe_context {
    /** Constant state objects */
    const struct pipe_blend_state *blend;
    const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+   struct pipe_sampler_state *vertex_samplers[PIPE_MAX_VERTEX_SAMPLERS];
    const struct pipe_depth_stencil_alpha_state *depth_stencil;
    const struct pipe_rasterizer_state *rasterizer;
    struct lp_fragment_shader *fs;
@@ -68,12 +69,15 @@ struct llvmpipe_context {
    struct pipe_poly_stipple poly_stipple;
    struct pipe_scissor_state scissor;
    struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+   struct pipe_texture *vertex_textures[PIPE_MAX_VERTEX_SAMPLERS];
    struct pipe_viewport_state viewport;
    struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
    struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS];
 
    unsigned num_samplers;
    unsigned num_textures;
+   unsigned num_vertex_samplers;
+   unsigned num_vertex_textures;
    unsigned num_vertex_elements;
    unsigned num_vertex_buffers;
 
@@ -136,6 +140,7 @@ struct llvmpipe_context {
 
    unsigned tex_timestamp;
    struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
+   struct llvmpipe_tex_tile_cache *vertex_tex_cache[PIPE_MAX_VERTEX_SAMPLERS];
 
    unsigned no_rast : 1;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index a6ecaa0b2b..19fe2850fd 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -59,7 +59,9 @@ llvmpipe_get_param(struct pipe_screen *screen, int param)
    case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
       return PIPE_MAX_SAMPLERS;
    case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
-      return 0;
+      return PIPE_MAX_VERTEX_SAMPLERS;
+   case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+      return PIPE_MAX_SAMPLERS + PIPE_MAX_VERTEX_SAMPLERS;
    case PIPE_CAP_NPOT_TEXTURES:
       return 1;
    case PIPE_CAP_TWO_SIDED_STENCIL:
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 7b26ce61a3..d1c74ab07b 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -126,6 +126,10 @@ void *
 llvmpipe_create_sampler_state(struct pipe_context *,
                               const struct pipe_sampler_state *);
 void llvmpipe_bind_sampler_states(struct pipe_context *, unsigned, void **);
+void
+llvmpipe_bind_vertex_sampler_states(struct pipe_context *,
+                                    unsigned num_samplers,
+                                    void **samplers);
 void llvmpipe_delete_sampler_state(struct pipe_context *, void *);
 
 void *
@@ -172,6 +176,11 @@ void llvmpipe_set_sampler_textures( struct pipe_context *,
                                     unsigned num,
                                     struct pipe_texture ** );
 
+void
+llvmpipe_set_vertex_sampler_textures(struct pipe_context *,
+                                     unsigned num_textures,
+                                     struct pipe_texture **);
+
 void llvmpipe_set_viewport_state( struct pipe_context *,
                                   const struct pipe_viewport_state * );
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index c753b183c0..e703964aaa 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -198,10 +198,14 @@ update_tgsi_samplers( struct llvmpipe_context *llvmpipe )
    unsigned i;
 
    /* vertex shader samplers */
-   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
-      llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->sampler[i];
-      llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->texture[i];
-      llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples;
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+      llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->vertex_samplers[i];
+      llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->vertex_textures[i];
+      llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples;
+   }
+
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+      lp_tex_tile_cache_validate_texture( llvmpipe->vertex_tex_cache[i] );
    }
 
    /* fragment shader samplers */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index 8333805a3f..d382f9ca87 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -77,6 +77,34 @@ llvmpipe_bind_sampler_states(struct pipe_context *pipe,
 }
 
 
+void
+llvmpipe_bind_vertex_sampler_states(struct pipe_context *pipe,
+                                    unsigned num_samplers,
+                                    void **samplers)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   unsigned i;
+
+   assert(num_samplers <= PIPE_MAX_VERTEX_SAMPLERS);
+
+   /* Check for no-op */
+   if (num_samplers == llvmpipe->num_vertex_samplers &&
+       !memcmp(llvmpipe->vertex_samplers, samplers, num_samplers * sizeof(void *)))
+      return;
+
+   draw_flush(llvmpipe->draw);
+
+   for (i = 0; i < num_samplers; ++i)
+      llvmpipe->vertex_samplers[i] = samplers[i];
+   for (i = num_samplers; i < PIPE_MAX_VERTEX_SAMPLERS; ++i)
+      llvmpipe->vertex_samplers[i] = NULL;
+
+   llvmpipe->num_vertex_samplers = num_samplers;
+
+   llvmpipe->dirty |= LP_NEW_SAMPLER;
+}
+
+
 void
 llvmpipe_set_sampler_textures(struct pipe_context *pipe,
                               unsigned num, struct pipe_texture **texture)
@@ -116,6 +144,37 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
 }
 
 
+void
+llvmpipe_set_vertex_sampler_textures(struct pipe_context *pipe,
+                                     unsigned num_textures,
+                                     struct pipe_texture **textures)
+{
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   uint i;
+
+   assert(num_textures <= PIPE_MAX_VERTEX_SAMPLERS);
+
+   /* Check for no-op */
+   if (num_textures == llvmpipe->num_vertex_textures &&
+       !memcmp(llvmpipe->vertex_textures, textures, num_textures * sizeof(struct pipe_texture *))) {
+      return;
+   }
+
+   draw_flush(llvmpipe->draw);
+
+   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
+      struct pipe_texture *tex = i < num_textures ? textures[i] : NULL;
+
+      pipe_texture_reference(&llvmpipe->vertex_textures[i], tex);
+      lp_tex_tile_cache_set_texture(llvmpipe->vertex_tex_cache[i], tex);
+   }
+
+   llvmpipe->num_vertex_textures = num_textures;
+
+   llvmpipe->dirty |= LP_NEW_TEXTURE;
+}
+
+
 void
 llvmpipe_delete_sampler_state(struct pipe_context *pipe,
                               void *sampler)
-- 
cgit v1.2.3


From dd51b4f9091abf762e470f0cd4c802215a108290 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 5 Dec 2009 05:43:10 +0000
Subject: llvmpipe: Stop disassembling when an unsupported opcode is found.

Otherwise the terminal gets full of garbage.
---
 src/gallium/drivers/llvmpipe/lp_bld_debug.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
index 59d8f492e6..354b3af49e 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
@@ -115,7 +115,8 @@ lp_disassemble(const void* func)
          }
       }
 
-      if (ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret)
+      if ((ud_insn_off(&ud_obj) >= max_jmp_pc && ud_obj.mnemonic == UD_Iret) ||
+           ud_obj.mnemonic == UD_Iinvalid)
          break;
    }
    debug_printf("\n");
-- 
cgit v1.2.3


From 501989bbcd159f8b44148a22151bb46c4800d298 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 5 Dec 2009 05:43:53 +0000
Subject: llvmpipe: Tweak disassembly to match gdb.

Helps verifying udis86 output.
---
 src/gallium/drivers/llvmpipe/lp_bld_debug.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_debug.c b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
index 354b3af49e..39dfc51e50 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_debug.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_debug.c
@@ -77,10 +77,10 @@ lp_disassemble(const void* func)
    while (ud_disassemble(&ud_obj)) {
 
 #ifdef PIPE_ARCH_X86
-      debug_printf("%08lx: ", (unsigned long)ud_insn_off(&ud_obj));
+      debug_printf("0x%08lx:\t", (unsigned long)ud_insn_off(&ud_obj));
 #endif
 #ifdef PIPE_ARCH_X86_64
-      debug_printf("%016llx: ", (unsigned long long)ud_insn_off(&ud_obj));
+      debug_printf("0x%016llx:\t", (unsigned long long)ud_insn_off(&ud_obj));
 #endif
 
 #if 0
@@ -119,6 +119,12 @@ lp_disassemble(const void* func)
            ud_obj.mnemonic == UD_Iinvalid)
          break;
    }
+
+#if 0
+   /* Print GDB command, useful to verify udis86 output */
+   debug_printf("disassemble %p %p\n", func, (void*)(uintptr_t)ud_obj.pc);
+#endif
+
    debug_printf("\n");
 #else
    (void)func;
-- 
cgit v1.2.3


From 781d8fccba1bdaadbae042d23bf1d17e25c800fd Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 5 Dec 2009 06:05:56 +0000
Subject: svga: Use _debug_printf, so that output may be dumped in release
 builds too.

The dump calls should be wrapped in #ifdef DEBUG .. #endif.
---
 src/gallium/drivers/svga/svgadump/svga_dump.c      | 1122 ++++++++++----------
 src/gallium/drivers/svga/svgadump/svga_dump.py     |   22 +-
 .../drivers/svga/svgadump/svga_shader_dump.c       |  230 ++--
 3 files changed, 687 insertions(+), 687 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
index 910afa2528..18e0eb5139 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -42,554 +42,554 @@ dump_SVGA3dVertexDecl(const SVGA3dVertexDecl *cmd)
 {
    switch((*cmd).identity.type) {
    case SVGA3D_DECLTYPE_FLOAT1:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT1\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT1\n");
       break;
    case SVGA3D_DECLTYPE_FLOAT2:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT2\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT2\n");
       break;
    case SVGA3D_DECLTYPE_FLOAT3:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT3\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT3\n");
       break;
    case SVGA3D_DECLTYPE_FLOAT4:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT4\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT4\n");
       break;
    case SVGA3D_DECLTYPE_D3DCOLOR:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_D3DCOLOR\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_D3DCOLOR\n");
       break;
    case SVGA3D_DECLTYPE_UBYTE4:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4\n");
       break;
    case SVGA3D_DECLTYPE_SHORT2:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2\n");
       break;
    case SVGA3D_DECLTYPE_SHORT4:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4\n");
       break;
    case SVGA3D_DECLTYPE_UBYTE4N:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4N\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4N\n");
       break;
    case SVGA3D_DECLTYPE_SHORT2N:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2N\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2N\n");
       break;
    case SVGA3D_DECLTYPE_SHORT4N:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4N\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4N\n");
       break;
    case SVGA3D_DECLTYPE_USHORT2N:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT2N\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT2N\n");
       break;
    case SVGA3D_DECLTYPE_USHORT4N:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT4N\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT4N\n");
       break;
    case SVGA3D_DECLTYPE_UDEC3:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UDEC3\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UDEC3\n");
       break;
    case SVGA3D_DECLTYPE_DEC3N:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_DEC3N\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_DEC3N\n");
       break;
    case SVGA3D_DECLTYPE_FLOAT16_2:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_2\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_2\n");
       break;
    case SVGA3D_DECLTYPE_FLOAT16_4:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_4\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_4\n");
       break;
    case SVGA3D_DECLTYPE_MAX:
-      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_MAX\n");
+      _debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.identity.type = %i\n", (*cmd).identity.type);
+      _debug_printf("\t\t.identity.type = %i\n", (*cmd).identity.type);
       break;
    }
    switch((*cmd).identity.method) {
    case SVGA3D_DECLMETHOD_DEFAULT:
-      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_DEFAULT\n");
+      _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_DEFAULT\n");
       break;
    case SVGA3D_DECLMETHOD_PARTIALU:
-      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALU\n");
+      _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALU\n");
       break;
    case SVGA3D_DECLMETHOD_PARTIALV:
-      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALV\n");
+      _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALV\n");
       break;
    case SVGA3D_DECLMETHOD_CROSSUV:
-      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_CROSSUV\n");
+      _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_CROSSUV\n");
       break;
    case SVGA3D_DECLMETHOD_UV:
-      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_UV\n");
+      _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_UV\n");
       break;
    case SVGA3D_DECLMETHOD_LOOKUP:
-      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUP\n");
+      _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUP\n");
       break;
    case SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED:
-      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED\n");
+      _debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED\n");
       break;
    default:
-      debug_printf("\t\t.identity.method = %i\n", (*cmd).identity.method);
+      _debug_printf("\t\t.identity.method = %i\n", (*cmd).identity.method);
       break;
    }
    switch((*cmd).identity.usage) {
    case SVGA3D_DECLUSAGE_POSITION:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITION\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITION\n");
       break;
    case SVGA3D_DECLUSAGE_BLENDWEIGHT:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDWEIGHT\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDWEIGHT\n");
       break;
    case SVGA3D_DECLUSAGE_BLENDINDICES:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDINDICES\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDINDICES\n");
       break;
    case SVGA3D_DECLUSAGE_NORMAL:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_NORMAL\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_NORMAL\n");
       break;
    case SVGA3D_DECLUSAGE_PSIZE:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_PSIZE\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_PSIZE\n");
       break;
    case SVGA3D_DECLUSAGE_TEXCOORD:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TEXCOORD\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TEXCOORD\n");
       break;
    case SVGA3D_DECLUSAGE_TANGENT:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TANGENT\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TANGENT\n");
       break;
    case SVGA3D_DECLUSAGE_BINORMAL:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BINORMAL\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BINORMAL\n");
       break;
    case SVGA3D_DECLUSAGE_TESSFACTOR:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TESSFACTOR\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TESSFACTOR\n");
       break;
    case SVGA3D_DECLUSAGE_POSITIONT:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITIONT\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITIONT\n");
       break;
    case SVGA3D_DECLUSAGE_COLOR:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_COLOR\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_COLOR\n");
       break;
    case SVGA3D_DECLUSAGE_FOG:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_FOG\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_FOG\n");
       break;
    case SVGA3D_DECLUSAGE_DEPTH:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_DEPTH\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_DEPTH\n");
       break;
    case SVGA3D_DECLUSAGE_SAMPLE:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_SAMPLE\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_SAMPLE\n");
       break;
    case SVGA3D_DECLUSAGE_MAX:
-      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_MAX\n");
+      _debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.identity.usage = %i\n", (*cmd).identity.usage);
+      _debug_printf("\t\t.identity.usage = %i\n", (*cmd).identity.usage);
       break;
    }
-   debug_printf("\t\t.identity.usageIndex = %u\n", (*cmd).identity.usageIndex);
-   debug_printf("\t\t.array.surfaceId = %u\n", (*cmd).array.surfaceId);
-   debug_printf("\t\t.array.offset = %u\n", (*cmd).array.offset);
-   debug_printf("\t\t.array.stride = %u\n", (*cmd).array.stride);
-   debug_printf("\t\t.rangeHint.first = %u\n", (*cmd).rangeHint.first);
-   debug_printf("\t\t.rangeHint.last = %u\n", (*cmd).rangeHint.last);
+   _debug_printf("\t\t.identity.usageIndex = %u\n", (*cmd).identity.usageIndex);
+   _debug_printf("\t\t.array.surfaceId = %u\n", (*cmd).array.surfaceId);
+   _debug_printf("\t\t.array.offset = %u\n", (*cmd).array.offset);
+   _debug_printf("\t\t.array.stride = %u\n", (*cmd).array.stride);
+   _debug_printf("\t\t.rangeHint.first = %u\n", (*cmd).rangeHint.first);
+   _debug_printf("\t\t.rangeHint.last = %u\n", (*cmd).rangeHint.last);
 }
 
 static void
 dump_SVGA3dTextureState(const SVGA3dTextureState *cmd)
 {
-   debug_printf("\t\t.stage = %u\n", (*cmd).stage);
+   _debug_printf("\t\t.stage = %u\n", (*cmd).stage);
    switch((*cmd).name) {
    case SVGA3D_TS_INVALID:
-      debug_printf("\t\t.name = SVGA3D_TS_INVALID\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_INVALID\n");
       break;
    case SVGA3D_TS_BIND_TEXTURE:
-      debug_printf("\t\t.name = SVGA3D_TS_BIND_TEXTURE\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_BIND_TEXTURE\n");
       break;
    case SVGA3D_TS_COLOROP:
-      debug_printf("\t\t.name = SVGA3D_TS_COLOROP\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_COLOROP\n");
       break;
    case SVGA3D_TS_COLORARG1:
-      debug_printf("\t\t.name = SVGA3D_TS_COLORARG1\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_COLORARG1\n");
       break;
    case SVGA3D_TS_COLORARG2:
-      debug_printf("\t\t.name = SVGA3D_TS_COLORARG2\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_COLORARG2\n");
       break;
    case SVGA3D_TS_ALPHAOP:
-      debug_printf("\t\t.name = SVGA3D_TS_ALPHAOP\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_ALPHAOP\n");
       break;
    case SVGA3D_TS_ALPHAARG1:
-      debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG1\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG1\n");
       break;
    case SVGA3D_TS_ALPHAARG2:
-      debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG2\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG2\n");
       break;
    case SVGA3D_TS_ADDRESSU:
-      debug_printf("\t\t.name = SVGA3D_TS_ADDRESSU\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSU\n");
       break;
    case SVGA3D_TS_ADDRESSV:
-      debug_printf("\t\t.name = SVGA3D_TS_ADDRESSV\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSV\n");
       break;
    case SVGA3D_TS_MIPFILTER:
-      debug_printf("\t\t.name = SVGA3D_TS_MIPFILTER\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_MIPFILTER\n");
       break;
    case SVGA3D_TS_MAGFILTER:
-      debug_printf("\t\t.name = SVGA3D_TS_MAGFILTER\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_MAGFILTER\n");
       break;
    case SVGA3D_TS_MINFILTER:
-      debug_printf("\t\t.name = SVGA3D_TS_MINFILTER\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_MINFILTER\n");
       break;
    case SVGA3D_TS_BORDERCOLOR:
-      debug_printf("\t\t.name = SVGA3D_TS_BORDERCOLOR\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_BORDERCOLOR\n");
       break;
    case SVGA3D_TS_TEXCOORDINDEX:
-      debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDINDEX\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDINDEX\n");
       break;
    case SVGA3D_TS_TEXTURETRANSFORMFLAGS:
-      debug_printf("\t\t.name = SVGA3D_TS_TEXTURETRANSFORMFLAGS\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_TEXTURETRANSFORMFLAGS\n");
       break;
    case SVGA3D_TS_TEXCOORDGEN:
-      debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDGEN\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDGEN\n");
       break;
    case SVGA3D_TS_BUMPENVMAT00:
-      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT00\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT00\n");
       break;
    case SVGA3D_TS_BUMPENVMAT01:
-      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT01\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT01\n");
       break;
    case SVGA3D_TS_BUMPENVMAT10:
-      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT10\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT10\n");
       break;
    case SVGA3D_TS_BUMPENVMAT11:
-      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT11\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT11\n");
       break;
    case SVGA3D_TS_TEXTURE_MIPMAP_LEVEL:
-      debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_MIPMAP_LEVEL\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_MIPMAP_LEVEL\n");
       break;
    case SVGA3D_TS_TEXTURE_LOD_BIAS:
-      debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_LOD_BIAS\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_LOD_BIAS\n");
       break;
    case SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL:
-      debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL\n");
       break;
    case SVGA3D_TS_ADDRESSW:
-      debug_printf("\t\t.name = SVGA3D_TS_ADDRESSW\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_ADDRESSW\n");
       break;
    case SVGA3D_TS_GAMMA:
-      debug_printf("\t\t.name = SVGA3D_TS_GAMMA\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_GAMMA\n");
       break;
    case SVGA3D_TS_BUMPENVLSCALE:
-      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLSCALE\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLSCALE\n");
       break;
    case SVGA3D_TS_BUMPENVLOFFSET:
-      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLOFFSET\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLOFFSET\n");
       break;
    case SVGA3D_TS_COLORARG0:
-      debug_printf("\t\t.name = SVGA3D_TS_COLORARG0\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_COLORARG0\n");
       break;
    case SVGA3D_TS_ALPHAARG0:
-      debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG0\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG0\n");
       break;
    case SVGA3D_TS_MAX:
-      debug_printf("\t\t.name = SVGA3D_TS_MAX\n");
+      _debug_printf("\t\t.name = SVGA3D_TS_MAX\n");
       break;
    default:
-      debug_printf("\t\t.name = %i\n", (*cmd).name);
+      _debug_printf("\t\t.name = %i\n", (*cmd).name);
       break;
    }
-   debug_printf("\t\t.value = %u\n", (*cmd).value);
-   debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue);
+   _debug_printf("\t\t.value = %u\n", (*cmd).value);
+   _debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue);
 }
 
 static void
 dump_SVGA3dCopyBox(const SVGA3dCopyBox *cmd)
 {
-   debug_printf("\t\t.x = %u\n", (*cmd).x);
-   debug_printf("\t\t.y = %u\n", (*cmd).y);
-   debug_printf("\t\t.z = %u\n", (*cmd).z);
-   debug_printf("\t\t.w = %u\n", (*cmd).w);
-   debug_printf("\t\t.h = %u\n", (*cmd).h);
-   debug_printf("\t\t.d = %u\n", (*cmd).d);
-   debug_printf("\t\t.srcx = %u\n", (*cmd).srcx);
-   debug_printf("\t\t.srcy = %u\n", (*cmd).srcy);
-   debug_printf("\t\t.srcz = %u\n", (*cmd).srcz);
+   _debug_printf("\t\t.x = %u\n", (*cmd).x);
+   _debug_printf("\t\t.y = %u\n", (*cmd).y);
+   _debug_printf("\t\t.z = %u\n", (*cmd).z);
+   _debug_printf("\t\t.w = %u\n", (*cmd).w);
+   _debug_printf("\t\t.h = %u\n", (*cmd).h);
+   _debug_printf("\t\t.d = %u\n", (*cmd).d);
+   _debug_printf("\t\t.srcx = %u\n", (*cmd).srcx);
+   _debug_printf("\t\t.srcy = %u\n", (*cmd).srcy);
+   _debug_printf("\t\t.srcz = %u\n", (*cmd).srcz);
 }
 
 static void
 dump_SVGA3dCmdSetClipPlane(const SVGA3dCmdSetClipPlane *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
-   debug_printf("\t\t.index = %u\n", (*cmd).index);
-   debug_printf("\t\t.plane[0] = %f\n", (*cmd).plane[0]);
-   debug_printf("\t\t.plane[1] = %f\n", (*cmd).plane[1]);
-   debug_printf("\t\t.plane[2] = %f\n", (*cmd).plane[2]);
-   debug_printf("\t\t.plane[3] = %f\n", (*cmd).plane[3]);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.index = %u\n", (*cmd).index);
+   _debug_printf("\t\t.plane[0] = %f\n", (*cmd).plane[0]);
+   _debug_printf("\t\t.plane[1] = %f\n", (*cmd).plane[1]);
+   _debug_printf("\t\t.plane[2] = %f\n", (*cmd).plane[2]);
+   _debug_printf("\t\t.plane[3] = %f\n", (*cmd).plane[3]);
 }
 
 static void
 dump_SVGA3dCmdWaitForQuery(const SVGA3dCmdWaitForQuery *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
    switch((*cmd).type) {
    case SVGA3D_QUERYTYPE_OCCLUSION:
-      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+      _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
       break;
    case SVGA3D_QUERYTYPE_MAX:
-      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+      _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      _debug_printf("\t\t.type = %i\n", (*cmd).type);
       break;
    }
-   debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId);
-   debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset);
+   _debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId);
+   _debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset);
 }
 
 static void
 dump_SVGA3dCmdSetRenderTarget(const SVGA3dCmdSetRenderTarget *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
    switch((*cmd).type) {
    case SVGA3D_RT_DEPTH:
-      debug_printf("\t\t.type = SVGA3D_RT_DEPTH\n");
+      _debug_printf("\t\t.type = SVGA3D_RT_DEPTH\n");
       break;
    case SVGA3D_RT_STENCIL:
-      debug_printf("\t\t.type = SVGA3D_RT_STENCIL\n");
+      _debug_printf("\t\t.type = SVGA3D_RT_STENCIL\n");
       break;
    default:
-      debug_printf("\t\t.type = SVGA3D_RT_COLOR%u\n", (*cmd).type - SVGA3D_RT_COLOR0);
+      _debug_printf("\t\t.type = SVGA3D_RT_COLOR%u\n", (*cmd).type - SVGA3D_RT_COLOR0);
       break;
    }
-   debug_printf("\t\t.target.sid = %u\n", (*cmd).target.sid);
-   debug_printf("\t\t.target.face = %u\n", (*cmd).target.face);
-   debug_printf("\t\t.target.mipmap = %u\n", (*cmd).target.mipmap);
+   _debug_printf("\t\t.target.sid = %u\n", (*cmd).target.sid);
+   _debug_printf("\t\t.target.face = %u\n", (*cmd).target.face);
+   _debug_printf("\t\t.target.mipmap = %u\n", (*cmd).target.mipmap);
 }
 
 static void
 dump_SVGA3dCmdSetTextureState(const SVGA3dCmdSetTextureState *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
 }
 
 static void
 dump_SVGA3dCmdSurfaceCopy(const SVGA3dCmdSurfaceCopy *cmd)
 {
-   debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid);
-   debug_printf("\t\t.src.face = %u\n", (*cmd).src.face);
-   debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap);
-   debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid);
-   debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face);
-   debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap);
+   _debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid);
+   _debug_printf("\t\t.src.face = %u\n", (*cmd).src.face);
+   _debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap);
+   _debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid);
+   _debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face);
+   _debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap);
 }
 
 static void
 dump_SVGA3dCmdSetMaterial(const SVGA3dCmdSetMaterial *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
    switch((*cmd).face) {
    case SVGA3D_FACE_INVALID:
-      debug_printf("\t\t.face = SVGA3D_FACE_INVALID\n");
+      _debug_printf("\t\t.face = SVGA3D_FACE_INVALID\n");
       break;
    case SVGA3D_FACE_NONE:
-      debug_printf("\t\t.face = SVGA3D_FACE_NONE\n");
+      _debug_printf("\t\t.face = SVGA3D_FACE_NONE\n");
       break;
    case SVGA3D_FACE_FRONT:
-      debug_printf("\t\t.face = SVGA3D_FACE_FRONT\n");
+      _debug_printf("\t\t.face = SVGA3D_FACE_FRONT\n");
       break;
    case SVGA3D_FACE_BACK:
-      debug_printf("\t\t.face = SVGA3D_FACE_BACK\n");
+      _debug_printf("\t\t.face = SVGA3D_FACE_BACK\n");
       break;
    case SVGA3D_FACE_FRONT_BACK:
-      debug_printf("\t\t.face = SVGA3D_FACE_FRONT_BACK\n");
+      _debug_printf("\t\t.face = SVGA3D_FACE_FRONT_BACK\n");
       break;
    case SVGA3D_FACE_MAX:
-      debug_printf("\t\t.face = SVGA3D_FACE_MAX\n");
+      _debug_printf("\t\t.face = SVGA3D_FACE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.face = %i\n", (*cmd).face);
+      _debug_printf("\t\t.face = %i\n", (*cmd).face);
       break;
    }
-   debug_printf("\t\t.material.diffuse[0] = %f\n", (*cmd).material.diffuse[0]);
-   debug_printf("\t\t.material.diffuse[1] = %f\n", (*cmd).material.diffuse[1]);
-   debug_printf("\t\t.material.diffuse[2] = %f\n", (*cmd).material.diffuse[2]);
-   debug_printf("\t\t.material.diffuse[3] = %f\n", (*cmd).material.diffuse[3]);
-   debug_printf("\t\t.material.ambient[0] = %f\n", (*cmd).material.ambient[0]);
-   debug_printf("\t\t.material.ambient[1] = %f\n", (*cmd).material.ambient[1]);
-   debug_printf("\t\t.material.ambient[2] = %f\n", (*cmd).material.ambient[2]);
-   debug_printf("\t\t.material.ambient[3] = %f\n", (*cmd).material.ambient[3]);
-   debug_printf("\t\t.material.specular[0] = %f\n", (*cmd).material.specular[0]);
-   debug_printf("\t\t.material.specular[1] = %f\n", (*cmd).material.specular[1]);
-   debug_printf("\t\t.material.specular[2] = %f\n", (*cmd).material.specular[2]);
-   debug_printf("\t\t.material.specular[3] = %f\n", (*cmd).material.specular[3]);
-   debug_printf("\t\t.material.emissive[0] = %f\n", (*cmd).material.emissive[0]);
-   debug_printf("\t\t.material.emissive[1] = %f\n", (*cmd).material.emissive[1]);
-   debug_printf("\t\t.material.emissive[2] = %f\n", (*cmd).material.emissive[2]);
-   debug_printf("\t\t.material.emissive[3] = %f\n", (*cmd).material.emissive[3]);
-   debug_printf("\t\t.material.shininess = %f\n", (*cmd).material.shininess);
+   _debug_printf("\t\t.material.diffuse[0] = %f\n", (*cmd).material.diffuse[0]);
+   _debug_printf("\t\t.material.diffuse[1] = %f\n", (*cmd).material.diffuse[1]);
+   _debug_printf("\t\t.material.diffuse[2] = %f\n", (*cmd).material.diffuse[2]);
+   _debug_printf("\t\t.material.diffuse[3] = %f\n", (*cmd).material.diffuse[3]);
+   _debug_printf("\t\t.material.ambient[0] = %f\n", (*cmd).material.ambient[0]);
+   _debug_printf("\t\t.material.ambient[1] = %f\n", (*cmd).material.ambient[1]);
+   _debug_printf("\t\t.material.ambient[2] = %f\n", (*cmd).material.ambient[2]);
+   _debug_printf("\t\t.material.ambient[3] = %f\n", (*cmd).material.ambient[3]);
+   _debug_printf("\t\t.material.specular[0] = %f\n", (*cmd).material.specular[0]);
+   _debug_printf("\t\t.material.specular[1] = %f\n", (*cmd).material.specular[1]);
+   _debug_printf("\t\t.material.specular[2] = %f\n", (*cmd).material.specular[2]);
+   _debug_printf("\t\t.material.specular[3] = %f\n", (*cmd).material.specular[3]);
+   _debug_printf("\t\t.material.emissive[0] = %f\n", (*cmd).material.emissive[0]);
+   _debug_printf("\t\t.material.emissive[1] = %f\n", (*cmd).material.emissive[1]);
+   _debug_printf("\t\t.material.emissive[2] = %f\n", (*cmd).material.emissive[2]);
+   _debug_printf("\t\t.material.emissive[3] = %f\n", (*cmd).material.emissive[3]);
+   _debug_printf("\t\t.material.shininess = %f\n", (*cmd).material.shininess);
 }
 
 static void
 dump_SVGA3dCmdSetLightData(const SVGA3dCmdSetLightData *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
-   debug_printf("\t\t.index = %u\n", (*cmd).index);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.index = %u\n", (*cmd).index);
    switch((*cmd).data.type) {
    case SVGA3D_LIGHTTYPE_INVALID:
-      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_INVALID\n");
+      _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_INVALID\n");
       break;
    case SVGA3D_LIGHTTYPE_POINT:
-      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_POINT\n");
+      _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_POINT\n");
       break;
    case SVGA3D_LIGHTTYPE_SPOT1:
-      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT1\n");
+      _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT1\n");
       break;
    case SVGA3D_LIGHTTYPE_SPOT2:
-      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT2\n");
+      _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT2\n");
       break;
    case SVGA3D_LIGHTTYPE_DIRECTIONAL:
-      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_DIRECTIONAL\n");
+      _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_DIRECTIONAL\n");
       break;
    case SVGA3D_LIGHTTYPE_MAX:
-      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_MAX\n");
+      _debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.data.type = %i\n", (*cmd).data.type);
+      _debug_printf("\t\t.data.type = %i\n", (*cmd).data.type);
       break;
    }
-   debug_printf("\t\t.data.inWorldSpace = %u\n", (*cmd).data.inWorldSpace);
-   debug_printf("\t\t.data.diffuse[0] = %f\n", (*cmd).data.diffuse[0]);
-   debug_printf("\t\t.data.diffuse[1] = %f\n", (*cmd).data.diffuse[1]);
-   debug_printf("\t\t.data.diffuse[2] = %f\n", (*cmd).data.diffuse[2]);
-   debug_printf("\t\t.data.diffuse[3] = %f\n", (*cmd).data.diffuse[3]);
-   debug_printf("\t\t.data.specular[0] = %f\n", (*cmd).data.specular[0]);
-   debug_printf("\t\t.data.specular[1] = %f\n", (*cmd).data.specular[1]);
-   debug_printf("\t\t.data.specular[2] = %f\n", (*cmd).data.specular[2]);
-   debug_printf("\t\t.data.specular[3] = %f\n", (*cmd).data.specular[3]);
-   debug_printf("\t\t.data.ambient[0] = %f\n", (*cmd).data.ambient[0]);
-   debug_printf("\t\t.data.ambient[1] = %f\n", (*cmd).data.ambient[1]);
-   debug_printf("\t\t.data.ambient[2] = %f\n", (*cmd).data.ambient[2]);
-   debug_printf("\t\t.data.ambient[3] = %f\n", (*cmd).data.ambient[3]);
-   debug_printf("\t\t.data.position[0] = %f\n", (*cmd).data.position[0]);
-   debug_printf("\t\t.data.position[1] = %f\n", (*cmd).data.position[1]);
-   debug_printf("\t\t.data.position[2] = %f\n", (*cmd).data.position[2]);
-   debug_printf("\t\t.data.position[3] = %f\n", (*cmd).data.position[3]);
-   debug_printf("\t\t.data.direction[0] = %f\n", (*cmd).data.direction[0]);
-   debug_printf("\t\t.data.direction[1] = %f\n", (*cmd).data.direction[1]);
-   debug_printf("\t\t.data.direction[2] = %f\n", (*cmd).data.direction[2]);
-   debug_printf("\t\t.data.direction[3] = %f\n", (*cmd).data.direction[3]);
-   debug_printf("\t\t.data.range = %f\n", (*cmd).data.range);
-   debug_printf("\t\t.data.falloff = %f\n", (*cmd).data.falloff);
-   debug_printf("\t\t.data.attenuation0 = %f\n", (*cmd).data.attenuation0);
-   debug_printf("\t\t.data.attenuation1 = %f\n", (*cmd).data.attenuation1);
-   debug_printf("\t\t.data.attenuation2 = %f\n", (*cmd).data.attenuation2);
-   debug_printf("\t\t.data.theta = %f\n", (*cmd).data.theta);
-   debug_printf("\t\t.data.phi = %f\n", (*cmd).data.phi);
+   _debug_printf("\t\t.data.inWorldSpace = %u\n", (*cmd).data.inWorldSpace);
+   _debug_printf("\t\t.data.diffuse[0] = %f\n", (*cmd).data.diffuse[0]);
+   _debug_printf("\t\t.data.diffuse[1] = %f\n", (*cmd).data.diffuse[1]);
+   _debug_printf("\t\t.data.diffuse[2] = %f\n", (*cmd).data.diffuse[2]);
+   _debug_printf("\t\t.data.diffuse[3] = %f\n", (*cmd).data.diffuse[3]);
+   _debug_printf("\t\t.data.specular[0] = %f\n", (*cmd).data.specular[0]);
+   _debug_printf("\t\t.data.specular[1] = %f\n", (*cmd).data.specular[1]);
+   _debug_printf("\t\t.data.specular[2] = %f\n", (*cmd).data.specular[2]);
+   _debug_printf("\t\t.data.specular[3] = %f\n", (*cmd).data.specular[3]);
+   _debug_printf("\t\t.data.ambient[0] = %f\n", (*cmd).data.ambient[0]);
+   _debug_printf("\t\t.data.ambient[1] = %f\n", (*cmd).data.ambient[1]);
+   _debug_printf("\t\t.data.ambient[2] = %f\n", (*cmd).data.ambient[2]);
+   _debug_printf("\t\t.data.ambient[3] = %f\n", (*cmd).data.ambient[3]);
+   _debug_printf("\t\t.data.position[0] = %f\n", (*cmd).data.position[0]);
+   _debug_printf("\t\t.data.position[1] = %f\n", (*cmd).data.position[1]);
+   _debug_printf("\t\t.data.position[2] = %f\n", (*cmd).data.position[2]);
+   _debug_printf("\t\t.data.position[3] = %f\n", (*cmd).data.position[3]);
+   _debug_printf("\t\t.data.direction[0] = %f\n", (*cmd).data.direction[0]);
+   _debug_printf("\t\t.data.direction[1] = %f\n", (*cmd).data.direction[1]);
+   _debug_printf("\t\t.data.direction[2] = %f\n", (*cmd).data.direction[2]);
+   _debug_printf("\t\t.data.direction[3] = %f\n", (*cmd).data.direction[3]);
+   _debug_printf("\t\t.data.range = %f\n", (*cmd).data.range);
+   _debug_printf("\t\t.data.falloff = %f\n", (*cmd).data.falloff);
+   _debug_printf("\t\t.data.attenuation0 = %f\n", (*cmd).data.attenuation0);
+   _debug_printf("\t\t.data.attenuation1 = %f\n", (*cmd).data.attenuation1);
+   _debug_printf("\t\t.data.attenuation2 = %f\n", (*cmd).data.attenuation2);
+   _debug_printf("\t\t.data.theta = %f\n", (*cmd).data.theta);
+   _debug_printf("\t\t.data.phi = %f\n", (*cmd).data.phi);
 }
 
 static void
 dump_SVGA3dCmdSetViewport(const SVGA3dCmdSetViewport *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
-   debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x);
-   debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y);
-   debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w);
-   debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x);
+   _debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y);
+   _debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w);
+   _debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h);
 }
 
 static void
 dump_SVGA3dCmdSetScissorRect(const SVGA3dCmdSetScissorRect *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
-   debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x);
-   debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y);
-   debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w);
-   debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x);
+   _debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y);
+   _debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w);
+   _debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h);
 }
 
 static void
 dump_SVGA3dCopyRect(const SVGA3dCopyRect *cmd)
 {
-   debug_printf("\t\t.x = %u\n", (*cmd).x);
-   debug_printf("\t\t.y = %u\n", (*cmd).y);
-   debug_printf("\t\t.w = %u\n", (*cmd).w);
-   debug_printf("\t\t.h = %u\n", (*cmd).h);
-   debug_printf("\t\t.srcx = %u\n", (*cmd).srcx);
-   debug_printf("\t\t.srcy = %u\n", (*cmd).srcy);
+   _debug_printf("\t\t.x = %u\n", (*cmd).x);
+   _debug_printf("\t\t.y = %u\n", (*cmd).y);
+   _debug_printf("\t\t.w = %u\n", (*cmd).w);
+   _debug_printf("\t\t.h = %u\n", (*cmd).h);
+   _debug_printf("\t\t.srcx = %u\n", (*cmd).srcx);
+   _debug_printf("\t\t.srcy = %u\n", (*cmd).srcy);
 }
 
 static void
 dump_SVGA3dCmdSetShader(const SVGA3dCmdSetShader *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
    switch((*cmd).type) {
    case SVGA3D_SHADERTYPE_COMPILED_DX8:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
       break;
    case SVGA3D_SHADERTYPE_VS:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
       break;
    case SVGA3D_SHADERTYPE_PS:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
       break;
    case SVGA3D_SHADERTYPE_MAX:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      _debug_printf("\t\t.type = %i\n", (*cmd).type);
       break;
    }
-   debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+   _debug_printf("\t\t.shid = %u\n", (*cmd).shid);
 }
 
 static void
 dump_SVGA3dCmdEndQuery(const SVGA3dCmdEndQuery *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
    switch((*cmd).type) {
    case SVGA3D_QUERYTYPE_OCCLUSION:
-      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+      _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
       break;
    case SVGA3D_QUERYTYPE_MAX:
-      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+      _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      _debug_printf("\t\t.type = %i\n", (*cmd).type);
       break;
    }
-   debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId);
-   debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset);
+   _debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId);
+   _debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset);
 }
 
 static void
 dump_SVGA3dSize(const SVGA3dSize *cmd)
 {
-   debug_printf("\t\t.width = %u\n", (*cmd).width);
-   debug_printf("\t\t.height = %u\n", (*cmd).height);
-   debug_printf("\t\t.depth = %u\n", (*cmd).depth);
+   _debug_printf("\t\t.width = %u\n", (*cmd).width);
+   _debug_printf("\t\t.height = %u\n", (*cmd).height);
+   _debug_printf("\t\t.depth = %u\n", (*cmd).depth);
 }
 
 static void
 dump_SVGA3dCmdDestroySurface(const SVGA3dCmdDestroySurface *cmd)
 {
-   debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+   _debug_printf("\t\t.sid = %u\n", (*cmd).sid);
 }
 
 static void
 dump_SVGA3dCmdDefineContext(const SVGA3dCmdDefineContext *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
 }
 
 static void
 dump_SVGA3dRect(const SVGA3dRect *cmd)
 {
-   debug_printf("\t\t.x = %u\n", (*cmd).x);
-   debug_printf("\t\t.y = %u\n", (*cmd).y);
-   debug_printf("\t\t.w = %u\n", (*cmd).w);
-   debug_printf("\t\t.h = %u\n", (*cmd).h);
+   _debug_printf("\t\t.x = %u\n", (*cmd).x);
+   _debug_printf("\t\t.y = %u\n", (*cmd).y);
+   _debug_printf("\t\t.w = %u\n", (*cmd).w);
+   _debug_printf("\t\t.h = %u\n", (*cmd).h);
 }
 
 static void
 dump_SVGA3dCmdBeginQuery(const SVGA3dCmdBeginQuery *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
    switch((*cmd).type) {
    case SVGA3D_QUERYTYPE_OCCLUSION:
-      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+      _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
       break;
    case SVGA3D_QUERYTYPE_MAX:
-      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+      _debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      _debug_printf("\t\t.type = %i\n", (*cmd).type);
       break;
    }
 }
@@ -599,336 +599,336 @@ dump_SVGA3dRenderState(const SVGA3dRenderState *cmd)
 {
    switch((*cmd).state) {
    case SVGA3D_RS_INVALID:
-      debug_printf("\t\t.state = SVGA3D_RS_INVALID\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_INVALID\n");
       break;
    case SVGA3D_RS_ZENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_ZENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_ZENABLE\n");
       break;
    case SVGA3D_RS_ZWRITEENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_ZWRITEENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_ZWRITEENABLE\n");
       break;
    case SVGA3D_RS_ALPHATESTENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_ALPHATESTENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_ALPHATESTENABLE\n");
       break;
    case SVGA3D_RS_DITHERENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_DITHERENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_DITHERENABLE\n");
       break;
    case SVGA3D_RS_BLENDENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_BLENDENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_BLENDENABLE\n");
       break;
    case SVGA3D_RS_FOGENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_FOGENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_FOGENABLE\n");
       break;
    case SVGA3D_RS_SPECULARENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_SPECULARENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_SPECULARENABLE\n");
       break;
    case SVGA3D_RS_STENCILENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE\n");
       break;
    case SVGA3D_RS_LIGHTINGENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_LIGHTINGENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_LIGHTINGENABLE\n");
       break;
    case SVGA3D_RS_NORMALIZENORMALS:
-      debug_printf("\t\t.state = SVGA3D_RS_NORMALIZENORMALS\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_NORMALIZENORMALS\n");
       break;
    case SVGA3D_RS_POINTSPRITEENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_POINTSPRITEENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_POINTSPRITEENABLE\n");
       break;
    case SVGA3D_RS_POINTSCALEENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALEENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALEENABLE\n");
       break;
    case SVGA3D_RS_STENCILREF:
-      debug_printf("\t\t.state = SVGA3D_RS_STENCILREF\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_STENCILREF\n");
       break;
    case SVGA3D_RS_STENCILMASK:
-      debug_printf("\t\t.state = SVGA3D_RS_STENCILMASK\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_STENCILMASK\n");
       break;
    case SVGA3D_RS_STENCILWRITEMASK:
-      debug_printf("\t\t.state = SVGA3D_RS_STENCILWRITEMASK\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_STENCILWRITEMASK\n");
       break;
    case SVGA3D_RS_FOGSTART:
-      debug_printf("\t\t.state = SVGA3D_RS_FOGSTART\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_FOGSTART\n");
       break;
    case SVGA3D_RS_FOGEND:
-      debug_printf("\t\t.state = SVGA3D_RS_FOGEND\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_FOGEND\n");
       break;
    case SVGA3D_RS_FOGDENSITY:
-      debug_printf("\t\t.state = SVGA3D_RS_FOGDENSITY\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_FOGDENSITY\n");
       break;
    case SVGA3D_RS_POINTSIZE:
-      debug_printf("\t\t.state = SVGA3D_RS_POINTSIZE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZE\n");
       break;
    case SVGA3D_RS_POINTSIZEMIN:
-      debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMIN\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMIN\n");
       break;
    case SVGA3D_RS_POINTSIZEMAX:
-      debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMAX\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMAX\n");
       break;
    case SVGA3D_RS_POINTSCALE_A:
-      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_A\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_A\n");
       break;
    case SVGA3D_RS_POINTSCALE_B:
-      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_B\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_B\n");
       break;
    case SVGA3D_RS_POINTSCALE_C:
-      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_C\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_C\n");
       break;
    case SVGA3D_RS_FOGCOLOR:
-      debug_printf("\t\t.state = SVGA3D_RS_FOGCOLOR\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_FOGCOLOR\n");
       break;
    case SVGA3D_RS_AMBIENT:
-      debug_printf("\t\t.state = SVGA3D_RS_AMBIENT\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_AMBIENT\n");
       break;
    case SVGA3D_RS_CLIPPLANEENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_CLIPPLANEENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_CLIPPLANEENABLE\n");
       break;
    case SVGA3D_RS_FOGMODE:
-      debug_printf("\t\t.state = SVGA3D_RS_FOGMODE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_FOGMODE\n");
       break;
    case SVGA3D_RS_FILLMODE:
-      debug_printf("\t\t.state = SVGA3D_RS_FILLMODE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_FILLMODE\n");
       break;
    case SVGA3D_RS_SHADEMODE:
-      debug_printf("\t\t.state = SVGA3D_RS_SHADEMODE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_SHADEMODE\n");
       break;
    case SVGA3D_RS_LINEPATTERN:
-      debug_printf("\t\t.state = SVGA3D_RS_LINEPATTERN\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_LINEPATTERN\n");
       break;
    case SVGA3D_RS_SRCBLEND:
-      debug_printf("\t\t.state = SVGA3D_RS_SRCBLEND\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_SRCBLEND\n");
       break;
    case SVGA3D_RS_DSTBLEND:
-      debug_printf("\t\t.state = SVGA3D_RS_DSTBLEND\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_DSTBLEND\n");
       break;
    case SVGA3D_RS_BLENDEQUATION:
-      debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATION\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATION\n");
       break;
    case SVGA3D_RS_CULLMODE:
-      debug_printf("\t\t.state = SVGA3D_RS_CULLMODE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_CULLMODE\n");
       break;
    case SVGA3D_RS_ZFUNC:
-      debug_printf("\t\t.state = SVGA3D_RS_ZFUNC\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_ZFUNC\n");
       break;
    case SVGA3D_RS_ALPHAFUNC:
-      debug_printf("\t\t.state = SVGA3D_RS_ALPHAFUNC\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_ALPHAFUNC\n");
       break;
    case SVGA3D_RS_STENCILFUNC:
-      debug_printf("\t\t.state = SVGA3D_RS_STENCILFUNC\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_STENCILFUNC\n");
       break;
    case SVGA3D_RS_STENCILFAIL:
-      debug_printf("\t\t.state = SVGA3D_RS_STENCILFAIL\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_STENCILFAIL\n");
       break;
    case SVGA3D_RS_STENCILZFAIL:
-      debug_printf("\t\t.state = SVGA3D_RS_STENCILZFAIL\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_STENCILZFAIL\n");
       break;
    case SVGA3D_RS_STENCILPASS:
-      debug_printf("\t\t.state = SVGA3D_RS_STENCILPASS\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_STENCILPASS\n");
       break;
    case SVGA3D_RS_ALPHAREF:
-      debug_printf("\t\t.state = SVGA3D_RS_ALPHAREF\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_ALPHAREF\n");
       break;
    case SVGA3D_RS_FRONTWINDING:
-      debug_printf("\t\t.state = SVGA3D_RS_FRONTWINDING\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_FRONTWINDING\n");
       break;
    case SVGA3D_RS_COORDINATETYPE:
-      debug_printf("\t\t.state = SVGA3D_RS_COORDINATETYPE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_COORDINATETYPE\n");
       break;
    case SVGA3D_RS_ZBIAS:
-      debug_printf("\t\t.state = SVGA3D_RS_ZBIAS\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_ZBIAS\n");
       break;
    case SVGA3D_RS_RANGEFOGENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_RANGEFOGENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_RANGEFOGENABLE\n");
       break;
    case SVGA3D_RS_COLORWRITEENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE\n");
       break;
    case SVGA3D_RS_VERTEXMATERIALENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_VERTEXMATERIALENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_VERTEXMATERIALENABLE\n");
       break;
    case SVGA3D_RS_DIFFUSEMATERIALSOURCE:
-      debug_printf("\t\t.state = SVGA3D_RS_DIFFUSEMATERIALSOURCE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_DIFFUSEMATERIALSOURCE\n");
       break;
    case SVGA3D_RS_SPECULARMATERIALSOURCE:
-      debug_printf("\t\t.state = SVGA3D_RS_SPECULARMATERIALSOURCE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_SPECULARMATERIALSOURCE\n");
       break;
    case SVGA3D_RS_AMBIENTMATERIALSOURCE:
-      debug_printf("\t\t.state = SVGA3D_RS_AMBIENTMATERIALSOURCE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_AMBIENTMATERIALSOURCE\n");
       break;
    case SVGA3D_RS_EMISSIVEMATERIALSOURCE:
-      debug_printf("\t\t.state = SVGA3D_RS_EMISSIVEMATERIALSOURCE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_EMISSIVEMATERIALSOURCE\n");
       break;
    case SVGA3D_RS_TEXTUREFACTOR:
-      debug_printf("\t\t.state = SVGA3D_RS_TEXTUREFACTOR\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_TEXTUREFACTOR\n");
       break;
    case SVGA3D_RS_LOCALVIEWER:
-      debug_printf("\t\t.state = SVGA3D_RS_LOCALVIEWER\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_LOCALVIEWER\n");
       break;
    case SVGA3D_RS_SCISSORTESTENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_SCISSORTESTENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_SCISSORTESTENABLE\n");
       break;
    case SVGA3D_RS_BLENDCOLOR:
-      debug_printf("\t\t.state = SVGA3D_RS_BLENDCOLOR\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_BLENDCOLOR\n");
       break;
    case SVGA3D_RS_STENCILENABLE2SIDED:
-      debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE2SIDED\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE2SIDED\n");
       break;
    case SVGA3D_RS_CCWSTENCILFUNC:
-      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFUNC\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFUNC\n");
       break;
    case SVGA3D_RS_CCWSTENCILFAIL:
-      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFAIL\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFAIL\n");
       break;
    case SVGA3D_RS_CCWSTENCILZFAIL:
-      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILZFAIL\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILZFAIL\n");
       break;
    case SVGA3D_RS_CCWSTENCILPASS:
-      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILPASS\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILPASS\n");
       break;
    case SVGA3D_RS_VERTEXBLEND:
-      debug_printf("\t\t.state = SVGA3D_RS_VERTEXBLEND\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_VERTEXBLEND\n");
       break;
    case SVGA3D_RS_SLOPESCALEDEPTHBIAS:
-      debug_printf("\t\t.state = SVGA3D_RS_SLOPESCALEDEPTHBIAS\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_SLOPESCALEDEPTHBIAS\n");
       break;
    case SVGA3D_RS_DEPTHBIAS:
-      debug_printf("\t\t.state = SVGA3D_RS_DEPTHBIAS\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_DEPTHBIAS\n");
       break;
    case SVGA3D_RS_OUTPUTGAMMA:
-      debug_printf("\t\t.state = SVGA3D_RS_OUTPUTGAMMA\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_OUTPUTGAMMA\n");
       break;
    case SVGA3D_RS_ZVISIBLE:
-      debug_printf("\t\t.state = SVGA3D_RS_ZVISIBLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_ZVISIBLE\n");
       break;
    case SVGA3D_RS_LASTPIXEL:
-      debug_printf("\t\t.state = SVGA3D_RS_LASTPIXEL\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_LASTPIXEL\n");
       break;
    case SVGA3D_RS_CLIPPING:
-      debug_printf("\t\t.state = SVGA3D_RS_CLIPPING\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_CLIPPING\n");
       break;
    case SVGA3D_RS_WRAP0:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP0\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP0\n");
       break;
    case SVGA3D_RS_WRAP1:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP1\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP1\n");
       break;
    case SVGA3D_RS_WRAP2:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP2\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP2\n");
       break;
    case SVGA3D_RS_WRAP3:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP3\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP3\n");
       break;
    case SVGA3D_RS_WRAP4:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP4\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP4\n");
       break;
    case SVGA3D_RS_WRAP5:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP5\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP5\n");
       break;
    case SVGA3D_RS_WRAP6:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP6\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP6\n");
       break;
    case SVGA3D_RS_WRAP7:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP7\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP7\n");
       break;
    case SVGA3D_RS_WRAP8:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP8\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP8\n");
       break;
    case SVGA3D_RS_WRAP9:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP9\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP9\n");
       break;
    case SVGA3D_RS_WRAP10:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP10\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP10\n");
       break;
    case SVGA3D_RS_WRAP11:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP11\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP11\n");
       break;
    case SVGA3D_RS_WRAP12:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP12\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP12\n");
       break;
    case SVGA3D_RS_WRAP13:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP13\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP13\n");
       break;
    case SVGA3D_RS_WRAP14:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP14\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP14\n");
       break;
    case SVGA3D_RS_WRAP15:
-      debug_printf("\t\t.state = SVGA3D_RS_WRAP15\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_WRAP15\n");
       break;
    case SVGA3D_RS_MULTISAMPLEANTIALIAS:
-      debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEANTIALIAS\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEANTIALIAS\n");
       break;
    case SVGA3D_RS_MULTISAMPLEMASK:
-      debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEMASK\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEMASK\n");
       break;
    case SVGA3D_RS_INDEXEDVERTEXBLENDENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_INDEXEDVERTEXBLENDENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_INDEXEDVERTEXBLENDENABLE\n");
       break;
    case SVGA3D_RS_TWEENFACTOR:
-      debug_printf("\t\t.state = SVGA3D_RS_TWEENFACTOR\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_TWEENFACTOR\n");
       break;
    case SVGA3D_RS_ANTIALIASEDLINEENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_ANTIALIASEDLINEENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_ANTIALIASEDLINEENABLE\n");
       break;
    case SVGA3D_RS_COLORWRITEENABLE1:
-      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE1\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE1\n");
       break;
    case SVGA3D_RS_COLORWRITEENABLE2:
-      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE2\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE2\n");
       break;
    case SVGA3D_RS_COLORWRITEENABLE3:
-      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE3\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE3\n");
       break;
    case SVGA3D_RS_SEPARATEALPHABLENDENABLE:
-      debug_printf("\t\t.state = SVGA3D_RS_SEPARATEALPHABLENDENABLE\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_SEPARATEALPHABLENDENABLE\n");
       break;
    case SVGA3D_RS_SRCBLENDALPHA:
-      debug_printf("\t\t.state = SVGA3D_RS_SRCBLENDALPHA\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_SRCBLENDALPHA\n");
       break;
    case SVGA3D_RS_DSTBLENDALPHA:
-      debug_printf("\t\t.state = SVGA3D_RS_DSTBLENDALPHA\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_DSTBLENDALPHA\n");
       break;
    case SVGA3D_RS_BLENDEQUATIONALPHA:
-      debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATIONALPHA\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATIONALPHA\n");
       break;
    case SVGA3D_RS_MAX:
-      debug_printf("\t\t.state = SVGA3D_RS_MAX\n");
+      _debug_printf("\t\t.state = SVGA3D_RS_MAX\n");
       break;
    default:
-      debug_printf("\t\t.state = %i\n", (*cmd).state);
+      _debug_printf("\t\t.state = %i\n", (*cmd).state);
       break;
    }
-   debug_printf("\t\t.uintValue = %u\n", (*cmd).uintValue);
-   debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue);
+   _debug_printf("\t\t.uintValue = %u\n", (*cmd).uintValue);
+   _debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue);
 }
 
 static void
 dump_SVGA3dVertexDivisor(const SVGA3dVertexDivisor *cmd)
 {
-   debug_printf("\t\t.value = %u\n", (*cmd).value);
-   debug_printf("\t\t.count = %u\n", (*cmd).count);
-   debug_printf("\t\t.indexedData = %u\n", (*cmd).indexedData);
-   debug_printf("\t\t.instanceData = %u\n", (*cmd).instanceData);
+   _debug_printf("\t\t.value = %u\n", (*cmd).value);
+   _debug_printf("\t\t.count = %u\n", (*cmd).count);
+   _debug_printf("\t\t.indexedData = %u\n", (*cmd).indexedData);
+   _debug_printf("\t\t.instanceData = %u\n", (*cmd).instanceData);
 }
 
 static void
 dump_SVGA3dCmdDefineShader(const SVGA3dCmdDefineShader *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
-   debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.shid = %u\n", (*cmd).shid);
    switch((*cmd).type) {
    case SVGA3D_SHADERTYPE_COMPILED_DX8:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
       break;
    case SVGA3D_SHADERTYPE_VS:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
       break;
    case SVGA3D_SHADERTYPE_PS:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
       break;
    case SVGA3D_SHADERTYPE_MAX:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      _debug_printf("\t\t.type = %i\n", (*cmd).type);
       break;
    }
 }
@@ -936,53 +936,53 @@ dump_SVGA3dCmdDefineShader(const SVGA3dCmdDefineShader *cmd)
 static void
 dump_SVGA3dCmdSetShaderConst(const SVGA3dCmdSetShaderConst *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
-   debug_printf("\t\t.reg = %u\n", (*cmd).reg);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.reg = %u\n", (*cmd).reg);
    switch((*cmd).type) {
    case SVGA3D_SHADERTYPE_COMPILED_DX8:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
       break;
    case SVGA3D_SHADERTYPE_VS:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
       break;
    case SVGA3D_SHADERTYPE_PS:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
       break;
    case SVGA3D_SHADERTYPE_MAX:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      _debug_printf("\t\t.type = %i\n", (*cmd).type);
       break;
    }
    switch((*cmd).ctype) {
    case SVGA3D_CONST_TYPE_FLOAT:
-      debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_FLOAT\n");
-      debug_printf("\t\t.values[0] = %f\n", *(const float *)&(*cmd).values[0]);
-      debug_printf("\t\t.values[1] = %f\n", *(const float *)&(*cmd).values[1]);
-      debug_printf("\t\t.values[2] = %f\n", *(const float *)&(*cmd).values[2]);
-      debug_printf("\t\t.values[3] = %f\n", *(const float *)&(*cmd).values[3]);
+      _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_FLOAT\n");
+      _debug_printf("\t\t.values[0] = %f\n", *(const float *)&(*cmd).values[0]);
+      _debug_printf("\t\t.values[1] = %f\n", *(const float *)&(*cmd).values[1]);
+      _debug_printf("\t\t.values[2] = %f\n", *(const float *)&(*cmd).values[2]);
+      _debug_printf("\t\t.values[3] = %f\n", *(const float *)&(*cmd).values[3]);
       break;
    case SVGA3D_CONST_TYPE_INT:
-      debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_INT\n");
-      debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
-      debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
-      debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
-      debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+      _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_INT\n");
+      _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+      _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+      _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+      _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
       break;
    case SVGA3D_CONST_TYPE_BOOL:
-      debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_BOOL\n");
-      debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
-      debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
-      debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
-      debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+      _debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_BOOL\n");
+      _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+      _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+      _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+      _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
       break;
    default:
-      debug_printf("\t\t.ctype = %i\n", (*cmd).ctype);
-      debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
-      debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
-      debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
-      debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+      _debug_printf("\t\t.ctype = %i\n", (*cmd).ctype);
+      _debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+      _debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+      _debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+      _debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
       break;
    }
 }
@@ -990,25 +990,25 @@ dump_SVGA3dCmdSetShaderConst(const SVGA3dCmdSetShaderConst *cmd)
 static void
 dump_SVGA3dCmdSetZRange(const SVGA3dCmdSetZRange *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
-   debug_printf("\t\t.zRange.min = %f\n", (*cmd).zRange.min);
-   debug_printf("\t\t.zRange.max = %f\n", (*cmd).zRange.max);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.zRange.min = %f\n", (*cmd).zRange.min);
+   _debug_printf("\t\t.zRange.max = %f\n", (*cmd).zRange.max);
 }
 
 static void
 dump_SVGA3dCmdDrawPrimitives(const SVGA3dCmdDrawPrimitives *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
-   debug_printf("\t\t.numVertexDecls = %u\n", (*cmd).numVertexDecls);
-   debug_printf("\t\t.numRanges = %u\n", (*cmd).numRanges);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.numVertexDecls = %u\n", (*cmd).numVertexDecls);
+   _debug_printf("\t\t.numRanges = %u\n", (*cmd).numRanges);
 }
 
 static void
 dump_SVGA3dCmdSetLightEnabled(const SVGA3dCmdSetLightEnabled *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
-   debug_printf("\t\t.index = %u\n", (*cmd).index);
-   debug_printf("\t\t.enabled = %u\n", (*cmd).enabled);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.index = %u\n", (*cmd).index);
+   _debug_printf("\t\t.enabled = %u\n", (*cmd).enabled);
 }
 
 static void
@@ -1016,86 +1016,86 @@ dump_SVGA3dPrimitiveRange(const SVGA3dPrimitiveRange *cmd)
 {
    switch((*cmd).primType) {
    case SVGA3D_PRIMITIVE_INVALID:
-      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_INVALID\n");
+      _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_INVALID\n");
       break;
    case SVGA3D_PRIMITIVE_TRIANGLELIST:
-      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLELIST\n");
+      _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLELIST\n");
       break;
    case SVGA3D_PRIMITIVE_POINTLIST:
-      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_POINTLIST\n");
+      _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_POINTLIST\n");
       break;
    case SVGA3D_PRIMITIVE_LINELIST:
-      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINELIST\n");
+      _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINELIST\n");
       break;
    case SVGA3D_PRIMITIVE_LINESTRIP:
-      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINESTRIP\n");
+      _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINESTRIP\n");
       break;
    case SVGA3D_PRIMITIVE_TRIANGLESTRIP:
-      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLESTRIP\n");
+      _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLESTRIP\n");
       break;
    case SVGA3D_PRIMITIVE_TRIANGLEFAN:
-      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLEFAN\n");
+      _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLEFAN\n");
       break;
    case SVGA3D_PRIMITIVE_MAX:
-      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_MAX\n");
+      _debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.primType = %i\n", (*cmd).primType);
+      _debug_printf("\t\t.primType = %i\n", (*cmd).primType);
       break;
    }
-   debug_printf("\t\t.primitiveCount = %u\n", (*cmd).primitiveCount);
-   debug_printf("\t\t.indexArray.surfaceId = %u\n", (*cmd).indexArray.surfaceId);
-   debug_printf("\t\t.indexArray.offset = %u\n", (*cmd).indexArray.offset);
-   debug_printf("\t\t.indexArray.stride = %u\n", (*cmd).indexArray.stride);
-   debug_printf("\t\t.indexWidth = %u\n", (*cmd).indexWidth);
-   debug_printf("\t\t.indexBias = %i\n", (*cmd).indexBias);
+   _debug_printf("\t\t.primitiveCount = %u\n", (*cmd).primitiveCount);
+   _debug_printf("\t\t.indexArray.surfaceId = %u\n", (*cmd).indexArray.surfaceId);
+   _debug_printf("\t\t.indexArray.offset = %u\n", (*cmd).indexArray.offset);
+   _debug_printf("\t\t.indexArray.stride = %u\n", (*cmd).indexArray.stride);
+   _debug_printf("\t\t.indexWidth = %u\n", (*cmd).indexWidth);
+   _debug_printf("\t\t.indexBias = %i\n", (*cmd).indexBias);
 }
 
 static void
 dump_SVGA3dCmdPresent(const SVGA3dCmdPresent *cmd)
 {
-   debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+   _debug_printf("\t\t.sid = %u\n", (*cmd).sid);
 }
 
 static void
 dump_SVGA3dCmdSetRenderState(const SVGA3dCmdSetRenderState *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
 }
 
 static void
 dump_SVGA3dCmdSurfaceStretchBlt(const SVGA3dCmdSurfaceStretchBlt *cmd)
 {
-   debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid);
-   debug_printf("\t\t.src.face = %u\n", (*cmd).src.face);
-   debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap);
-   debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid);
-   debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face);
-   debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap);
-   debug_printf("\t\t.boxSrc.x = %u\n", (*cmd).boxSrc.x);
-   debug_printf("\t\t.boxSrc.y = %u\n", (*cmd).boxSrc.y);
-   debug_printf("\t\t.boxSrc.z = %u\n", (*cmd).boxSrc.z);
-   debug_printf("\t\t.boxSrc.w = %u\n", (*cmd).boxSrc.w);
-   debug_printf("\t\t.boxSrc.h = %u\n", (*cmd).boxSrc.h);
-   debug_printf("\t\t.boxSrc.d = %u\n", (*cmd).boxSrc.d);
-   debug_printf("\t\t.boxDest.x = %u\n", (*cmd).boxDest.x);
-   debug_printf("\t\t.boxDest.y = %u\n", (*cmd).boxDest.y);
-   debug_printf("\t\t.boxDest.z = %u\n", (*cmd).boxDest.z);
-   debug_printf("\t\t.boxDest.w = %u\n", (*cmd).boxDest.w);
-   debug_printf("\t\t.boxDest.h = %u\n", (*cmd).boxDest.h);
-   debug_printf("\t\t.boxDest.d = %u\n", (*cmd).boxDest.d);
+   _debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid);
+   _debug_printf("\t\t.src.face = %u\n", (*cmd).src.face);
+   _debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap);
+   _debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid);
+   _debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face);
+   _debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap);
+   _debug_printf("\t\t.boxSrc.x = %u\n", (*cmd).boxSrc.x);
+   _debug_printf("\t\t.boxSrc.y = %u\n", (*cmd).boxSrc.y);
+   _debug_printf("\t\t.boxSrc.z = %u\n", (*cmd).boxSrc.z);
+   _debug_printf("\t\t.boxSrc.w = %u\n", (*cmd).boxSrc.w);
+   _debug_printf("\t\t.boxSrc.h = %u\n", (*cmd).boxSrc.h);
+   _debug_printf("\t\t.boxSrc.d = %u\n", (*cmd).boxSrc.d);
+   _debug_printf("\t\t.boxDest.x = %u\n", (*cmd).boxDest.x);
+   _debug_printf("\t\t.boxDest.y = %u\n", (*cmd).boxDest.y);
+   _debug_printf("\t\t.boxDest.z = %u\n", (*cmd).boxDest.z);
+   _debug_printf("\t\t.boxDest.w = %u\n", (*cmd).boxDest.w);
+   _debug_printf("\t\t.boxDest.h = %u\n", (*cmd).boxDest.h);
+   _debug_printf("\t\t.boxDest.d = %u\n", (*cmd).boxDest.d);
    switch((*cmd).mode) {
    case SVGA3D_STRETCH_BLT_POINT:
-      debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_POINT\n");
+      _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_POINT\n");
       break;
    case SVGA3D_STRETCH_BLT_LINEAR:
-      debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_LINEAR\n");
+      _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_LINEAR\n");
       break;
    case SVGA3D_STRETCH_BLT_MAX:
-      debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_MAX\n");
+      _debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_MAX\n");
       break;
    default:
-      debug_printf("\t\t.mode = %i\n", (*cmd).mode);
+      _debug_printf("\t\t.mode = %i\n", (*cmd).mode);
       break;
    }
 }
@@ -1103,21 +1103,21 @@ dump_SVGA3dCmdSurfaceStretchBlt(const SVGA3dCmdSurfaceStretchBlt *cmd)
 static void
 dump_SVGA3dCmdSurfaceDMA(const SVGA3dCmdSurfaceDMA *cmd)
 {
-   debug_printf("\t\t.guest.ptr.gmrId = %u\n", (*cmd).guest.ptr.gmrId);
-   debug_printf("\t\t.guest.ptr.offset = %u\n", (*cmd).guest.ptr.offset);
-   debug_printf("\t\t.guest.pitch = %u\n", (*cmd).guest.pitch);
-   debug_printf("\t\t.host.sid = %u\n", (*cmd).host.sid);
-   debug_printf("\t\t.host.face = %u\n", (*cmd).host.face);
-   debug_printf("\t\t.host.mipmap = %u\n", (*cmd).host.mipmap);
+   _debug_printf("\t\t.guest.ptr.gmrId = %u\n", (*cmd).guest.ptr.gmrId);
+   _debug_printf("\t\t.guest.ptr.offset = %u\n", (*cmd).guest.ptr.offset);
+   _debug_printf("\t\t.guest.pitch = %u\n", (*cmd).guest.pitch);
+   _debug_printf("\t\t.host.sid = %u\n", (*cmd).host.sid);
+   _debug_printf("\t\t.host.face = %u\n", (*cmd).host.face);
+   _debug_printf("\t\t.host.mipmap = %u\n", (*cmd).host.mipmap);
    switch((*cmd).transfer) {
    case SVGA3D_WRITE_HOST_VRAM:
-      debug_printf("\t\t.transfer = SVGA3D_WRITE_HOST_VRAM\n");
+      _debug_printf("\t\t.transfer = SVGA3D_WRITE_HOST_VRAM\n");
       break;
    case SVGA3D_READ_HOST_VRAM:
-      debug_printf("\t\t.transfer = SVGA3D_READ_HOST_VRAM\n");
+      _debug_printf("\t\t.transfer = SVGA3D_READ_HOST_VRAM\n");
       break;
    default:
-      debug_printf("\t\t.transfer = %i\n", (*cmd).transfer);
+      _debug_printf("\t\t.transfer = %i\n", (*cmd).transfer);
       break;
    }
 }
@@ -1125,107 +1125,107 @@ dump_SVGA3dCmdSurfaceDMA(const SVGA3dCmdSurfaceDMA *cmd)
 static void
 dump_SVGA3dCmdSurfaceDMASuffix(const SVGA3dCmdSurfaceDMASuffix *cmd)
 {
-   debug_printf("\t\t.suffixSize = %u\n", (*cmd).suffixSize);
-   debug_printf("\t\t.maximumOffset = %u\n", (*cmd).maximumOffset);
-   debug_printf("\t\t.flags.discard = %u\n", (*cmd).flags.discard);
-   debug_printf("\t\t.flags.unsynchronized = %u\n", (*cmd).flags.unsynchronized);
+   _debug_printf("\t\t.suffixSize = %u\n", (*cmd).suffixSize);
+   _debug_printf("\t\t.maximumOffset = %u\n", (*cmd).maximumOffset);
+   _debug_printf("\t\t.flags.discard = %u\n", (*cmd).flags.discard);
+   _debug_printf("\t\t.flags.unsynchronized = %u\n", (*cmd).flags.unsynchronized);
 }
 
 static void
 dump_SVGA3dCmdSetTransform(const SVGA3dCmdSetTransform *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
    switch((*cmd).type) {
    case SVGA3D_TRANSFORM_INVALID:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_INVALID\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_INVALID\n");
       break;
    case SVGA3D_TRANSFORM_WORLD:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD\n");
       break;
    case SVGA3D_TRANSFORM_VIEW:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_VIEW\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_VIEW\n");
       break;
    case SVGA3D_TRANSFORM_PROJECTION:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_PROJECTION\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_PROJECTION\n");
       break;
    case SVGA3D_TRANSFORM_TEXTURE0:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE0\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE0\n");
       break;
    case SVGA3D_TRANSFORM_TEXTURE1:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE1\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE1\n");
       break;
    case SVGA3D_TRANSFORM_TEXTURE2:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE2\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE2\n");
       break;
    case SVGA3D_TRANSFORM_TEXTURE3:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE3\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE3\n");
       break;
    case SVGA3D_TRANSFORM_TEXTURE4:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE4\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE4\n");
       break;
    case SVGA3D_TRANSFORM_TEXTURE5:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE5\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE5\n");
       break;
    case SVGA3D_TRANSFORM_TEXTURE6:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE6\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE6\n");
       break;
    case SVGA3D_TRANSFORM_TEXTURE7:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE7\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE7\n");
       break;
    case SVGA3D_TRANSFORM_WORLD1:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD1\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD1\n");
       break;
    case SVGA3D_TRANSFORM_WORLD2:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD2\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD2\n");
       break;
    case SVGA3D_TRANSFORM_WORLD3:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD3\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD3\n");
       break;
    case SVGA3D_TRANSFORM_MAX:
-      debug_printf("\t\t.type = SVGA3D_TRANSFORM_MAX\n");
+      _debug_printf("\t\t.type = SVGA3D_TRANSFORM_MAX\n");
       break;
    default:
-      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      _debug_printf("\t\t.type = %i\n", (*cmd).type);
       break;
    }
-   debug_printf("\t\t.matrix[0] = %f\n", (*cmd).matrix[0]);
-   debug_printf("\t\t.matrix[1] = %f\n", (*cmd).matrix[1]);
-   debug_printf("\t\t.matrix[2] = %f\n", (*cmd).matrix[2]);
-   debug_printf("\t\t.matrix[3] = %f\n", (*cmd).matrix[3]);
-   debug_printf("\t\t.matrix[4] = %f\n", (*cmd).matrix[4]);
-   debug_printf("\t\t.matrix[5] = %f\n", (*cmd).matrix[5]);
-   debug_printf("\t\t.matrix[6] = %f\n", (*cmd).matrix[6]);
-   debug_printf("\t\t.matrix[7] = %f\n", (*cmd).matrix[7]);
-   debug_printf("\t\t.matrix[8] = %f\n", (*cmd).matrix[8]);
-   debug_printf("\t\t.matrix[9] = %f\n", (*cmd).matrix[9]);
-   debug_printf("\t\t.matrix[10] = %f\n", (*cmd).matrix[10]);
-   debug_printf("\t\t.matrix[11] = %f\n", (*cmd).matrix[11]);
-   debug_printf("\t\t.matrix[12] = %f\n", (*cmd).matrix[12]);
-   debug_printf("\t\t.matrix[13] = %f\n", (*cmd).matrix[13]);
-   debug_printf("\t\t.matrix[14] = %f\n", (*cmd).matrix[14]);
-   debug_printf("\t\t.matrix[15] = %f\n", (*cmd).matrix[15]);
+   _debug_printf("\t\t.matrix[0] = %f\n", (*cmd).matrix[0]);
+   _debug_printf("\t\t.matrix[1] = %f\n", (*cmd).matrix[1]);
+   _debug_printf("\t\t.matrix[2] = %f\n", (*cmd).matrix[2]);
+   _debug_printf("\t\t.matrix[3] = %f\n", (*cmd).matrix[3]);
+   _debug_printf("\t\t.matrix[4] = %f\n", (*cmd).matrix[4]);
+   _debug_printf("\t\t.matrix[5] = %f\n", (*cmd).matrix[5]);
+   _debug_printf("\t\t.matrix[6] = %f\n", (*cmd).matrix[6]);
+   _debug_printf("\t\t.matrix[7] = %f\n", (*cmd).matrix[7]);
+   _debug_printf("\t\t.matrix[8] = %f\n", (*cmd).matrix[8]);
+   _debug_printf("\t\t.matrix[9] = %f\n", (*cmd).matrix[9]);
+   _debug_printf("\t\t.matrix[10] = %f\n", (*cmd).matrix[10]);
+   _debug_printf("\t\t.matrix[11] = %f\n", (*cmd).matrix[11]);
+   _debug_printf("\t\t.matrix[12] = %f\n", (*cmd).matrix[12]);
+   _debug_printf("\t\t.matrix[13] = %f\n", (*cmd).matrix[13]);
+   _debug_printf("\t\t.matrix[14] = %f\n", (*cmd).matrix[14]);
+   _debug_printf("\t\t.matrix[15] = %f\n", (*cmd).matrix[15]);
 }
 
 static void
 dump_SVGA3dCmdDestroyShader(const SVGA3dCmdDestroyShader *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
-   debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.shid = %u\n", (*cmd).shid);
    switch((*cmd).type) {
    case SVGA3D_SHADERTYPE_COMPILED_DX8:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
       break;
    case SVGA3D_SHADERTYPE_VS:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
       break;
    case SVGA3D_SHADERTYPE_PS:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
       break;
    case SVGA3D_SHADERTYPE_MAX:
-      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      _debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
       break;
    default:
-      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      _debug_printf("\t\t.type = %i\n", (*cmd).type);
       break;
    }
 }
@@ -1233,187 +1233,187 @@ dump_SVGA3dCmdDestroyShader(const SVGA3dCmdDestroyShader *cmd)
 static void
 dump_SVGA3dCmdDestroyContext(const SVGA3dCmdDestroyContext *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
 }
 
 static void
 dump_SVGA3dCmdClear(const SVGA3dCmdClear *cmd)
 {
-   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   _debug_printf("\t\t.cid = %u\n", (*cmd).cid);
    switch((*cmd).clearFlag) {
    case SVGA3D_CLEAR_COLOR:
-      debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_COLOR\n");
+      _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_COLOR\n");
       break;
    case SVGA3D_CLEAR_DEPTH:
-      debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_DEPTH\n");
+      _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_DEPTH\n");
       break;
    case SVGA3D_CLEAR_STENCIL:
-      debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_STENCIL\n");
+      _debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_STENCIL\n");
       break;
    default:
-      debug_printf("\t\t.clearFlag = %i\n", (*cmd).clearFlag);
+      _debug_printf("\t\t.clearFlag = %i\n", (*cmd).clearFlag);
       break;
    }
-   debug_printf("\t\t.color = %u\n", (*cmd).color);
-   debug_printf("\t\t.depth = %f\n", (*cmd).depth);
-   debug_printf("\t\t.stencil = %u\n", (*cmd).stencil);
+   _debug_printf("\t\t.color = %u\n", (*cmd).color);
+   _debug_printf("\t\t.depth = %f\n", (*cmd).depth);
+   _debug_printf("\t\t.stencil = %u\n", (*cmd).stencil);
 }
 
 static void
 dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd)
 {
-   debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+   _debug_printf("\t\t.sid = %u\n", (*cmd).sid);
    switch((*cmd).surfaceFlags) {
    case SVGA3D_SURFACE_CUBEMAP:
-      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_CUBEMAP\n");
+      _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_CUBEMAP\n");
       break;
    case SVGA3D_SURFACE_HINT_STATIC:
-      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_STATIC\n");
+      _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_STATIC\n");
       break;
    case SVGA3D_SURFACE_HINT_DYNAMIC:
-      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_DYNAMIC\n");
+      _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_DYNAMIC\n");
       break;
    case SVGA3D_SURFACE_HINT_INDEXBUFFER:
-      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_INDEXBUFFER\n");
+      _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_INDEXBUFFER\n");
       break;
    case SVGA3D_SURFACE_HINT_VERTEXBUFFER:
-      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_VERTEXBUFFER\n");
+      _debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_VERTEXBUFFER\n");
       break;
    default:
-      debug_printf("\t\t.surfaceFlags = %i\n", (*cmd).surfaceFlags);
+      _debug_printf("\t\t.surfaceFlags = %i\n", (*cmd).surfaceFlags);
       break;
    }
    switch((*cmd).format) {
    case SVGA3D_FORMAT_INVALID:
-      debug_printf("\t\t.format = SVGA3D_FORMAT_INVALID\n");
+      _debug_printf("\t\t.format = SVGA3D_FORMAT_INVALID\n");
       break;
    case SVGA3D_X8R8G8B8:
-      debug_printf("\t\t.format = SVGA3D_X8R8G8B8\n");
+      _debug_printf("\t\t.format = SVGA3D_X8R8G8B8\n");
       break;
    case SVGA3D_A8R8G8B8:
-      debug_printf("\t\t.format = SVGA3D_A8R8G8B8\n");
+      _debug_printf("\t\t.format = SVGA3D_A8R8G8B8\n");
       break;
    case SVGA3D_R5G6B5:
-      debug_printf("\t\t.format = SVGA3D_R5G6B5\n");
+      _debug_printf("\t\t.format = SVGA3D_R5G6B5\n");
       break;
    case SVGA3D_X1R5G5B5:
-      debug_printf("\t\t.format = SVGA3D_X1R5G5B5\n");
+      _debug_printf("\t\t.format = SVGA3D_X1R5G5B5\n");
       break;
    case SVGA3D_A1R5G5B5:
-      debug_printf("\t\t.format = SVGA3D_A1R5G5B5\n");
+      _debug_printf("\t\t.format = SVGA3D_A1R5G5B5\n");
       break;
    case SVGA3D_A4R4G4B4:
-      debug_printf("\t\t.format = SVGA3D_A4R4G4B4\n");
+      _debug_printf("\t\t.format = SVGA3D_A4R4G4B4\n");
       break;
    case SVGA3D_Z_D32:
-      debug_printf("\t\t.format = SVGA3D_Z_D32\n");
+      _debug_printf("\t\t.format = SVGA3D_Z_D32\n");
       break;
    case SVGA3D_Z_D16:
-      debug_printf("\t\t.format = SVGA3D_Z_D16\n");
+      _debug_printf("\t\t.format = SVGA3D_Z_D16\n");
       break;
    case SVGA3D_Z_D24S8:
-      debug_printf("\t\t.format = SVGA3D_Z_D24S8\n");
+      _debug_printf("\t\t.format = SVGA3D_Z_D24S8\n");
       break;
    case SVGA3D_Z_D15S1:
-      debug_printf("\t\t.format = SVGA3D_Z_D15S1\n");
+      _debug_printf("\t\t.format = SVGA3D_Z_D15S1\n");
       break;
    case SVGA3D_LUMINANCE8:
-      debug_printf("\t\t.format = SVGA3D_LUMINANCE8\n");
+      _debug_printf("\t\t.format = SVGA3D_LUMINANCE8\n");
       break;
    case SVGA3D_LUMINANCE4_ALPHA4:
-      debug_printf("\t\t.format = SVGA3D_LUMINANCE4_ALPHA4\n");
+      _debug_printf("\t\t.format = SVGA3D_LUMINANCE4_ALPHA4\n");
       break;
    case SVGA3D_LUMINANCE16:
-      debug_printf("\t\t.format = SVGA3D_LUMINANCE16\n");
+      _debug_printf("\t\t.format = SVGA3D_LUMINANCE16\n");
       break;
    case SVGA3D_LUMINANCE8_ALPHA8:
-      debug_printf("\t\t.format = SVGA3D_LUMINANCE8_ALPHA8\n");
+      _debug_printf("\t\t.format = SVGA3D_LUMINANCE8_ALPHA8\n");
       break;
    case SVGA3D_DXT1:
-      debug_printf("\t\t.format = SVGA3D_DXT1\n");
+      _debug_printf("\t\t.format = SVGA3D_DXT1\n");
       break;
    case SVGA3D_DXT2:
-      debug_printf("\t\t.format = SVGA3D_DXT2\n");
+      _debug_printf("\t\t.format = SVGA3D_DXT2\n");
       break;
    case SVGA3D_DXT3:
-      debug_printf("\t\t.format = SVGA3D_DXT3\n");
+      _debug_printf("\t\t.format = SVGA3D_DXT3\n");
       break;
    case SVGA3D_DXT4:
-      debug_printf("\t\t.format = SVGA3D_DXT4\n");
+      _debug_printf("\t\t.format = SVGA3D_DXT4\n");
       break;
    case SVGA3D_DXT5:
-      debug_printf("\t\t.format = SVGA3D_DXT5\n");
+      _debug_printf("\t\t.format = SVGA3D_DXT5\n");
       break;
    case SVGA3D_BUMPU8V8:
-      debug_printf("\t\t.format = SVGA3D_BUMPU8V8\n");
+      _debug_printf("\t\t.format = SVGA3D_BUMPU8V8\n");
       break;
    case SVGA3D_BUMPL6V5U5:
-      debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n");
+      _debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n");
       break;
    case SVGA3D_BUMPX8L8V8U8:
-      debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n");
+      _debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n");
       break;
    case SVGA3D_BUMPL8V8U8:
-      debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n");
+      _debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n");
       break;
    case SVGA3D_ARGB_S10E5:
-      debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n");
+      _debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n");
       break;
    case SVGA3D_ARGB_S23E8:
-      debug_printf("\t\t.format = SVGA3D_ARGB_S23E8\n");
+      _debug_printf("\t\t.format = SVGA3D_ARGB_S23E8\n");
       break;
    case SVGA3D_A2R10G10B10:
-      debug_printf("\t\t.format = SVGA3D_A2R10G10B10\n");
+      _debug_printf("\t\t.format = SVGA3D_A2R10G10B10\n");
       break;
    case SVGA3D_V8U8:
-      debug_printf("\t\t.format = SVGA3D_V8U8\n");
+      _debug_printf("\t\t.format = SVGA3D_V8U8\n");
       break;
    case SVGA3D_Q8W8V8U8:
-      debug_printf("\t\t.format = SVGA3D_Q8W8V8U8\n");
+      _debug_printf("\t\t.format = SVGA3D_Q8W8V8U8\n");
       break;
    case SVGA3D_CxV8U8:
-      debug_printf("\t\t.format = SVGA3D_CxV8U8\n");
+      _debug_printf("\t\t.format = SVGA3D_CxV8U8\n");
       break;
    case SVGA3D_X8L8V8U8:
-      debug_printf("\t\t.format = SVGA3D_X8L8V8U8\n");
+      _debug_printf("\t\t.format = SVGA3D_X8L8V8U8\n");
       break;
    case SVGA3D_A2W10V10U10:
-      debug_printf("\t\t.format = SVGA3D_A2W10V10U10\n");
+      _debug_printf("\t\t.format = SVGA3D_A2W10V10U10\n");
       break;
    case SVGA3D_ALPHA8:
-      debug_printf("\t\t.format = SVGA3D_ALPHA8\n");
+      _debug_printf("\t\t.format = SVGA3D_ALPHA8\n");
       break;
    case SVGA3D_R_S10E5:
-      debug_printf("\t\t.format = SVGA3D_R_S10E5\n");
+      _debug_printf("\t\t.format = SVGA3D_R_S10E5\n");
       break;
    case SVGA3D_R_S23E8:
-      debug_printf("\t\t.format = SVGA3D_R_S23E8\n");
+      _debug_printf("\t\t.format = SVGA3D_R_S23E8\n");
       break;
    case SVGA3D_RG_S10E5:
-      debug_printf("\t\t.format = SVGA3D_RG_S10E5\n");
+      _debug_printf("\t\t.format = SVGA3D_RG_S10E5\n");
       break;
    case SVGA3D_RG_S23E8:
-      debug_printf("\t\t.format = SVGA3D_RG_S23E8\n");
+      _debug_printf("\t\t.format = SVGA3D_RG_S23E8\n");
       break;
    case SVGA3D_BUFFER:
-      debug_printf("\t\t.format = SVGA3D_BUFFER\n");
+      _debug_printf("\t\t.format = SVGA3D_BUFFER\n");
       break;
    case SVGA3D_Z_D24X8:
-      debug_printf("\t\t.format = SVGA3D_Z_D24X8\n");
+      _debug_printf("\t\t.format = SVGA3D_Z_D24X8\n");
       break;
    case SVGA3D_FORMAT_MAX:
-      debug_printf("\t\t.format = SVGA3D_FORMAT_MAX\n");
+      _debug_printf("\t\t.format = SVGA3D_FORMAT_MAX\n");
       break;
    default:
-      debug_printf("\t\t.format = %i\n", (*cmd).format);
+      _debug_printf("\t\t.format = %i\n", (*cmd).format);
       break;
    }
-   debug_printf("\t\t.face[0].numMipLevels = %u\n", (*cmd).face[0].numMipLevels);
-   debug_printf("\t\t.face[1].numMipLevels = %u\n", (*cmd).face[1].numMipLevels);
-   debug_printf("\t\t.face[2].numMipLevels = %u\n", (*cmd).face[2].numMipLevels);
-   debug_printf("\t\t.face[3].numMipLevels = %u\n", (*cmd).face[3].numMipLevels);
-   debug_printf("\t\t.face[4].numMipLevels = %u\n", (*cmd).face[4].numMipLevels);
-   debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels);
+   _debug_printf("\t\t.face[0].numMipLevels = %u\n", (*cmd).face[0].numMipLevels);
+   _debug_printf("\t\t.face[1].numMipLevels = %u\n", (*cmd).face[1].numMipLevels);
+   _debug_printf("\t\t.face[2].numMipLevels = %u\n", (*cmd).face[2].numMipLevels);
+   _debug_printf("\t\t.face[3].numMipLevels = %u\n", (*cmd).face[3].numMipLevels);
+   _debug_printf("\t\t.face[4].numMipLevels = %u\n", (*cmd).face[4].numMipLevels);
+   _debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels);
 }
 
 
@@ -1438,7 +1438,7 @@ svga_dump_commands(const void *commands, uint32_t size)
 
          switch(cmd_id) {
          case SVGA_3D_CMD_SURFACE_DEFINE:
-            debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n");
+            _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n");
             {
                const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body;
                dump_SVGA3dCmdDefineSurface(cmd);
@@ -1450,7 +1450,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SURFACE_DESTROY:
-            debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n");
+            _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n");
             {
                const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body;
                dump_SVGA3dCmdDestroySurface(cmd);
@@ -1458,7 +1458,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SURFACE_COPY:
-            debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n");
+            _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n");
             {
                const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body;
                dump_SVGA3dCmdSurfaceCopy(cmd);
@@ -1470,7 +1470,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SURFACE_STRETCHBLT:
-            debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n");
+            _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n");
             {
                const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body;
                dump_SVGA3dCmdSurfaceStretchBlt(cmd);
@@ -1478,7 +1478,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SURFACE_DMA:
-            debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n");
+            _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n");
             {
                const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body;
                dump_SVGA3dCmdSurfaceDMA(cmd);
@@ -1494,7 +1494,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_CONTEXT_DEFINE:
-            debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n");
+            _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n");
             {
                const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body;
                dump_SVGA3dCmdDefineContext(cmd);
@@ -1502,7 +1502,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_CONTEXT_DESTROY:
-            debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n");
+            _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n");
             {
                const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body;
                dump_SVGA3dCmdDestroyContext(cmd);
@@ -1510,7 +1510,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SETTRANSFORM:
-            debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n");
+            _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n");
             {
                const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body;
                dump_SVGA3dCmdSetTransform(cmd);
@@ -1518,7 +1518,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SETZRANGE:
-            debug_printf("\tSVGA_3D_CMD_SETZRANGE\n");
+            _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n");
             {
                const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body;
                dump_SVGA3dCmdSetZRange(cmd);
@@ -1526,7 +1526,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SETRENDERSTATE:
-            debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n");
+            _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n");
             {
                const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body;
                dump_SVGA3dCmdSetRenderState(cmd);
@@ -1538,7 +1538,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SETRENDERTARGET:
-            debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n");
+            _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n");
             {
                const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body;
                dump_SVGA3dCmdSetRenderTarget(cmd);
@@ -1546,7 +1546,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SETTEXTURESTATE:
-            debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n");
+            _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n");
             {
                const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body;
                dump_SVGA3dCmdSetTextureState(cmd);
@@ -1558,7 +1558,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SETMATERIAL:
-            debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n");
+            _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n");
             {
                const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body;
                dump_SVGA3dCmdSetMaterial(cmd);
@@ -1566,7 +1566,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SETLIGHTDATA:
-            debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n");
+            _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n");
             {
                const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body;
                dump_SVGA3dCmdSetLightData(cmd);
@@ -1574,7 +1574,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SETLIGHTENABLED:
-            debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n");
+            _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n");
             {
                const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body;
                dump_SVGA3dCmdSetLightEnabled(cmd);
@@ -1582,7 +1582,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SETVIEWPORT:
-            debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n");
+            _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n");
             {
                const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body;
                dump_SVGA3dCmdSetViewport(cmd);
@@ -1590,7 +1590,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SETCLIPPLANE:
-            debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n");
+            _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n");
             {
                const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body;
                dump_SVGA3dCmdSetClipPlane(cmd);
@@ -1598,7 +1598,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_CLEAR:
-            debug_printf("\tSVGA_3D_CMD_CLEAR\n");
+            _debug_printf("\tSVGA_3D_CMD_CLEAR\n");
             {
                const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body;
                dump_SVGA3dCmdClear(cmd);
@@ -1610,7 +1610,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_PRESENT:
-            debug_printf("\tSVGA_3D_CMD_PRESENT\n");
+            _debug_printf("\tSVGA_3D_CMD_PRESENT\n");
             {
                const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body;
                dump_SVGA3dCmdPresent(cmd);
@@ -1622,7 +1622,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SHADER_DEFINE:
-            debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n");
+            _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n");
             {
                const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
                dump_SVGA3dCmdDefineShader(cmd);
@@ -1634,7 +1634,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SHADER_DESTROY:
-            debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n");
+            _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n");
             {
                const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body;
                dump_SVGA3dCmdDestroyShader(cmd);
@@ -1642,7 +1642,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SET_SHADER:
-            debug_printf("\tSVGA_3D_CMD_SET_SHADER\n");
+            _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n");
             {
                const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body;
                dump_SVGA3dCmdSetShader(cmd);
@@ -1650,7 +1650,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SET_SHADER_CONST:
-            debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n");
+            _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n");
             {
                const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body;
                dump_SVGA3dCmdSetShaderConst(cmd);
@@ -1658,7 +1658,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_DRAW_PRIMITIVES:
-            debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n");
+            _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n");
             {
                const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body;
                unsigned i, j;
@@ -1679,7 +1679,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_SETSCISSORRECT:
-            debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n");
+            _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n");
             {
                const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body;
                dump_SVGA3dCmdSetScissorRect(cmd);
@@ -1687,7 +1687,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_BEGIN_QUERY:
-            debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n");
+            _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n");
             {
                const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body;
                dump_SVGA3dCmdBeginQuery(cmd);
@@ -1695,7 +1695,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_END_QUERY:
-            debug_printf("\tSVGA_3D_CMD_END_QUERY\n");
+            _debug_printf("\tSVGA_3D_CMD_END_QUERY\n");
             {
                const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body;
                dump_SVGA3dCmdEndQuery(cmd);
@@ -1703,7 +1703,7 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          case SVGA_3D_CMD_WAIT_FOR_QUERY:
-            debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n");
+            _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n");
             {
                const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body;
                dump_SVGA3dCmdWaitForQuery(cmd);
@@ -1711,24 +1711,24 @@ svga_dump_commands(const void *commands, uint32_t size)
             }
             break;
          default:
-            debug_printf("\t0x%08x\n", cmd_id);
+            _debug_printf("\t0x%08x\n", cmd_id);
             break;
          }
 
          while(body + sizeof(uint32_t) <= next) {
-            debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+            _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
             body += sizeof(uint32_t);
          }
          while(body + sizeof(uint32_t) <= next)
-            debug_printf("\t\t0x%02x\n", *body++);
+            _debug_printf("\t\t0x%02x\n", *body++);
       }
       else if(cmd_id == SVGA_CMD_FENCE) {
-         debug_printf("\tSVGA_CMD_FENCE\n");
-         debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]);
+         _debug_printf("\tSVGA_CMD_FENCE\n");
+         _debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]);
          next += 2*sizeof(uint32_t);
       }
       else {
-         debug_printf("\t0x%08x\n", cmd_id);
+         _debug_printf("\t0x%08x\n", cmd_id);
          next += sizeof(uint32_t);
       }
    }
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py
index 288e753296..dc5f3267e2 100755
--- a/src/gallium/drivers/svga/svgadump/svga_dump.py
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.py
@@ -71,14 +71,14 @@ class decl_dumper_t(decl_visitor.decl_visitor_t):
             print '   switch(%s) {' % ("(*cmd)" + self._instance,)
             for name, value in self.decl.values:
                 print '   case %s:' % (name,)
-                print '      debug_printf("\\t\\t%s = %s\\n");' % (self._instance, name)
+                print '      _debug_printf("\\t\\t%s = %s\\n");' % (self._instance, name)
                 print '      break;'
             print '   default:'
-            print '      debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance)
+            print '      _debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance)
             print '      break;'
             print '   }'
         else:
-            print '   debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance)
+            print '   _debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance)
 
 
 def dump_decl(instance, decl):
@@ -154,7 +154,7 @@ class type_dumper_t(type_visitor.type_visitor_t):
         dump_decl(self.instance, decl)
 
     def print_instance(self, format):
-        print '   debug_printf("\\t\\t%s = %s\\n", %s);' % (self.instance, format, "(*cmd)" + self.instance)
+        print '   _debug_printf("\\t\\t%s = %s\\n", %s);' % (self.instance, format, "(*cmd)" + self.instance)
 
 
 def dump_type(instance, type_):
@@ -230,7 +230,7 @@ svga_dump_commands(const void *commands, uint32_t size)
     indexes = 'ijklmn'
     for id, header, body, footer in cmds:
         print '         case %s:' % id
-        print '            debug_printf("\\t%s\\n");' % id
+        print '            _debug_printf("\\t%s\\n");' % id
         print '            {'
         print '               const %s *cmd = (const %s *)body;' % (header, header)
         if len(body):
@@ -255,25 +255,25 @@ svga_dump_commands(const void *commands, uint32_t size)
         print '            }'
         print '            break;'
     print '         default:'
-    print '            debug_printf("\\t0x%08x\\n", cmd_id);'
+    print '            _debug_printf("\\t0x%08x\\n", cmd_id);'
     print '            break;'
     print '         }'
             
     print r'''
          while(body + sizeof(uint32_t) <= next) {
-            debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+            _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
             body += sizeof(uint32_t);
          }
          while(body + sizeof(uint32_t) <= next)
-            debug_printf("\t\t0x%02x\n", *body++);
+            _debug_printf("\t\t0x%02x\n", *body++);
       }
       else if(cmd_id == SVGA_CMD_FENCE) {
-         debug_printf("\tSVGA_CMD_FENCE\n");
-         debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]);
+         _debug_printf("\tSVGA_CMD_FENCE\n");
+         _debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]);
          next += 2*sizeof(uint32_t);
       }
       else {
-         debug_printf("\t0x%08x\n", cmd_id);
+         _debug_printf("\t0x%08x\n", cmd_id);
          next += sizeof(uint32_t);
       }
    }
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
index b0e7fdf378..70e27d86d3 100644
--- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
@@ -50,16 +50,16 @@ static void dump_op( struct sh_op op, const char *mnemonic )
    assert( op.is_reg == 0 );
 
    if (op.coissue)
-      debug_printf( "+" );
-   debug_printf( "%s", mnemonic );
+      _debug_printf( "+" );
+   _debug_printf( "%s", mnemonic );
    switch (op.control) {
    case 0:
       break;
    case SVGA3DOPCONT_PROJECT:
-      debug_printf( "p" );
+      _debug_printf( "p" );
       break;
    case SVGA3DOPCONT_BIAS:
-      debug_printf( "b" );
+      _debug_printf( "b" );
       break;
    default:
       assert( 0 );
@@ -72,28 +72,28 @@ static void dump_comp_op( struct sh_op op, const char *mnemonic )
    assert( op.is_reg == 0 );
 
    if (op.coissue)
-      debug_printf( "+" );
-   debug_printf( "%s", mnemonic );
+      _debug_printf( "+" );
+   _debug_printf( "%s", mnemonic );
    switch (op.control) {
    case SVGA3DOPCOMP_RESERVED0:
       break;
    case SVGA3DOPCOMP_GT:
-      debug_printf("_gt");
+      _debug_printf("_gt");
       break;
    case SVGA3DOPCOMP_EQ:
-      debug_printf("_eq");
+      _debug_printf("_eq");
       break;
    case SVGA3DOPCOMP_GE:
-      debug_printf("_ge");
+      _debug_printf("_ge");
       break;
    case SVGA3DOPCOMP_LT:
-      debug_printf("_lt");
+      _debug_printf("_lt");
       break;
    case SVGA3DOPCOMPC_NE:
-      debug_printf("_ne");
+      _debug_printf("_ne");
       break;
    case SVGA3DOPCOMP_LE:
-      debug_printf("_le");
+      _debug_printf("_le");
       break;
    case SVGA3DOPCOMP_RESERVED1:
    default:
@@ -109,93 +109,93 @@ static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct
 
    switch (sh_reg_type( reg )) {
    case SVGA3DREG_TEMP:
-      debug_printf( "r%u", reg.number );
+      _debug_printf( "r%u", reg.number );
       break;
 
    case SVGA3DREG_INPUT:
-      debug_printf( "v%u", reg.number );
+      _debug_printf( "v%u", reg.number );
       break;
 
    case SVGA3DREG_CONST:
       if (reg.relative) {
          if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP)
-            debug_printf( "c[aL+%u]", reg.number );
+            _debug_printf( "c[aL+%u]", reg.number );
          else
-            debug_printf( "c[a%u.x+%u]", indreg->number, reg.number );
+            _debug_printf( "c[a%u.x+%u]", indreg->number, reg.number );
       }
       else
-         debug_printf( "c%u", reg.number );
+         _debug_printf( "c%u", reg.number );
       break;
 
    case SVGA3DREG_ADDR:    /* VS */
    /* SVGA3DREG_TEXTURE */ /* PS */
       if (di->is_ps)
-         debug_printf( "t%u", reg.number );
+         _debug_printf( "t%u", reg.number );
       else
-         debug_printf( "a%u", reg.number );
+         _debug_printf( "a%u", reg.number );
       break;
 
    case SVGA3DREG_RASTOUT:
       switch (reg.number) {
       case 0 /*POSITION*/:
-         debug_printf( "oPos" );
+         _debug_printf( "oPos" );
          break;
       case 1 /*FOG*/:
-         debug_printf( "oFog" );
+         _debug_printf( "oFog" );
          break;
       case 2 /*POINT_SIZE*/:
-         debug_printf( "oPts" );
+         _debug_printf( "oPts" );
          break;
       default:
          assert( 0 );
-         debug_printf( "???" );
+         _debug_printf( "???" );
       }
       break;
 
    case SVGA3DREG_ATTROUT:
       assert( reg.number < 2 );
-      debug_printf( "oD%u", reg.number );
+      _debug_printf( "oD%u", reg.number );
       break;
 
    case SVGA3DREG_TEXCRDOUT:
    /* SVGA3DREG_OUTPUT */
-      debug_printf( "oT%u", reg.number );
+      _debug_printf( "oT%u", reg.number );
       break;
 
    case SVGA3DREG_COLOROUT:
-      debug_printf( "oC%u", reg.number );
+      _debug_printf( "oC%u", reg.number );
       break;
 
    case SVGA3DREG_DEPTHOUT:
-      debug_printf( "oD%u", reg.number );
+      _debug_printf( "oD%u", reg.number );
       break;
 
    case SVGA3DREG_SAMPLER:
-      debug_printf( "s%u", reg.number );
+      _debug_printf( "s%u", reg.number );
       break;
 
    case SVGA3DREG_CONSTBOOL:
       assert( !reg.relative );
-      debug_printf( "b%u", reg.number );
+      _debug_printf( "b%u", reg.number );
       break;
 
    case SVGA3DREG_CONSTINT:
       assert( !reg.relative );
-      debug_printf( "i%u", reg.number );
+      _debug_printf( "i%u", reg.number );
       break;
 
    case SVGA3DREG_LOOP:
       assert( reg.number == 0 );
-      debug_printf( "aL" );
+      _debug_printf( "aL" );
       break;
 
    case SVGA3DREG_MISCTYPE:
       switch (reg.number) {
       case SVGA3DMISCREG_POSITION:
-         debug_printf( "vPos" );
+         _debug_printf( "vPos" );
          break;
       case SVGA3DMISCREG_FACE:
-         debug_printf( "vFace" );
+         _debug_printf( "vFace" );
          break;
       default:
          assert(0);
@@ -204,46 +204,46 @@ static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct
       break;
 
    case SVGA3DREG_LABEL:
-      debug_printf( "l%u", reg.number );
+      _debug_printf( "l%u", reg.number );
       break;
 
    case SVGA3DREG_PREDICATE:
-      debug_printf( "p%u", reg.number );
+      _debug_printf( "p%u", reg.number );
       break;
 
 
    default:
       assert( 0 );
-      debug_printf( "???" );
+      _debug_printf( "???" );
    }
 }
 
 static void dump_cdata( struct sh_cdata cdata )
 {
-   debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] );
+   _debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] );
 }
 
 static void dump_idata( struct sh_idata idata )
 {
-   debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] );
+   _debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] );
 }
 
 static void dump_bdata( boolean bdata )
 {
-   debug_printf( bdata ? "TRUE" : "FALSE" );
+   _debug_printf( bdata ? "TRUE" : "FALSE" );
 }
 
 static void dump_sampleinfo( struct ps_sampleinfo sampleinfo )
 {
    switch (sampleinfo.texture_type) {
    case SVGA3DSAMP_2D:
-      debug_printf( "_2d" );
+      _debug_printf( "_2d" );
       break;
    case SVGA3DSAMP_CUBE:
-      debug_printf( "_cube" );
+      _debug_printf( "_cube" );
       break;
    case SVGA3DSAMP_VOLUME:
-      debug_printf( "_volume" );
+      _debug_printf( "_volume" );
       break;
    default:
       assert( 0 );
@@ -255,46 +255,46 @@ static void dump_usageinfo( struct vs_semantic semantic )
 {
    switch (semantic.usage) {
    case SVGA3D_DECLUSAGE_POSITION:
-      debug_printf("_position" );
+      _debug_printf("_position" );
       break;
    case SVGA3D_DECLUSAGE_BLENDWEIGHT:
-      debug_printf("_blendweight" );
+      _debug_printf("_blendweight" );
       break;
    case SVGA3D_DECLUSAGE_BLENDINDICES:
-      debug_printf("_blendindices" );
+      _debug_printf("_blendindices" );
       break;
    case SVGA3D_DECLUSAGE_NORMAL:
-      debug_printf("_normal" );
+      _debug_printf("_normal" );
       break;
    case SVGA3D_DECLUSAGE_PSIZE:
-      debug_printf("_psize" );
+      _debug_printf("_psize" );
       break;
    case SVGA3D_DECLUSAGE_TEXCOORD:
-      debug_printf("_texcoord");
+      _debug_printf("_texcoord");
       break;
    case SVGA3D_DECLUSAGE_TANGENT:
-      debug_printf("_tangent" );
+      _debug_printf("_tangent" );
       break;
    case SVGA3D_DECLUSAGE_BINORMAL:
-      debug_printf("_binormal" );
+      _debug_printf("_binormal" );
       break;
    case SVGA3D_DECLUSAGE_TESSFACTOR:
-      debug_printf("_tessfactor" );
+      _debug_printf("_tessfactor" );
       break;
    case SVGA3D_DECLUSAGE_POSITIONT:
-      debug_printf("_positiont" );
+      _debug_printf("_positiont" );
       break;
    case SVGA3D_DECLUSAGE_COLOR:
-      debug_printf("_color" );
+      _debug_printf("_color" );
       break;
    case SVGA3D_DECLUSAGE_FOG:
-      debug_printf("_fog" );
+      _debug_printf("_fog" );
       break;
    case SVGA3D_DECLUSAGE_DEPTH:
-      debug_printf("_depth" );
+      _debug_printf("_depth" );
       break;
    case SVGA3D_DECLUSAGE_SAMPLE:
-      debug_printf("_sample");
+      _debug_printf("_sample");
       break;
    default:
       assert( 0 );
@@ -302,7 +302,7 @@ static void dump_usageinfo( struct vs_semantic semantic )
    }
 
    if (semantic.usage_index != 0) {
-      debug_printf("%d", semantic.usage_index );
+      _debug_printf("%d", semantic.usage_index );
    }
 }
 
@@ -316,47 +316,47 @@ static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di )
    assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier );
 
    if (dstreg.modifier & SVGA3DDSTMOD_SATURATE)
-      debug_printf( "_sat" );
+      _debug_printf( "_sat" );
    if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION)
-      debug_printf( "_pp" );
+      _debug_printf( "_pp" );
    switch (dstreg.shift_scale) {
    case 0:
       break;
    case 1:
-      debug_printf( "_x2" );
+      _debug_printf( "_x2" );
       break;
    case 2:
-      debug_printf( "_x4" );
+      _debug_printf( "_x4" );
       break;
    case 3:
-      debug_printf( "_x8" );
+      _debug_printf( "_x8" );
       break;
    case 13:
-      debug_printf( "_d8" );
+      _debug_printf( "_d8" );
       break;
    case 14:
-      debug_printf( "_d4" );
+      _debug_printf( "_d4" );
       break;
    case 15:
-      debug_printf( "_d2" );
+      _debug_printf( "_d2" );
       break;
    default:
       assert( 0 );
    }
-   debug_printf( " " );
+   _debug_printf( " " );
 
    u.dstreg = dstreg;
    dump_reg( u.reg, NULL, di );
    if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) {
-      debug_printf( "." );
+      _debug_printf( "." );
       if (dstreg.write_mask & SVGA3DWRITEMASK_0)
-         debug_printf( "x" );
+         _debug_printf( "x" );
       if (dstreg.write_mask & SVGA3DWRITEMASK_1)
-         debug_printf( "y" );
+         _debug_printf( "y" );
       if (dstreg.write_mask & SVGA3DWRITEMASK_2)
-         debug_printf( "z" );
+         _debug_printf( "z" );
       if (dstreg.write_mask & SVGA3DWRITEMASK_3)
-         debug_printf( "w" );
+         _debug_printf( "w" );
    }
 }
 
@@ -372,19 +372,19 @@ static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, cons
    case SVGA3DSRCMOD_BIASNEG:
    case SVGA3DSRCMOD_SIGNNEG:
    case SVGA3DSRCMOD_X2NEG:
-      debug_printf( "-" );
+      _debug_printf( "-" );
       break;
    case SVGA3DSRCMOD_ABS:
-      debug_printf( "|" );
+      _debug_printf( "|" );
       break;
    case SVGA3DSRCMOD_ABSNEG:
-      debug_printf( "-|" );
+      _debug_printf( "-|" );
       break;
    case SVGA3DSRCMOD_COMP:
-      debug_printf( "1-" );
+      _debug_printf( "1-" );
       break;
    case SVGA3DSRCMOD_NOT:
-      debug_printf( "!" );
+      _debug_printf( "!" );
    }
 
    u.srcreg = srcreg;
@@ -397,39 +397,39 @@ static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, cons
       break;
    case SVGA3DSRCMOD_ABS:
    case SVGA3DSRCMOD_ABSNEG:
-      debug_printf( "|" );
+      _debug_printf( "|" );
       break;
    case SVGA3DSRCMOD_BIAS:
    case SVGA3DSRCMOD_BIASNEG:
-      debug_printf( "_bias" );
+      _debug_printf( "_bias" );
       break;
    case SVGA3DSRCMOD_SIGN:
    case SVGA3DSRCMOD_SIGNNEG:
-      debug_printf( "_bx2" );
+      _debug_printf( "_bx2" );
       break;
    case SVGA3DSRCMOD_X2:
    case SVGA3DSRCMOD_X2NEG:
-      debug_printf( "_x2" );
+      _debug_printf( "_x2" );
       break;
    case SVGA3DSRCMOD_DZ:
-      debug_printf( "_dz" );
+      _debug_printf( "_dz" );
       break;
    case SVGA3DSRCMOD_DW:
-      debug_printf( "_dw" );
+      _debug_printf( "_dw" );
       break;
    default:
       assert( 0 );
    }
    if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) {
-      debug_printf( "." );
+      _debug_printf( "." );
       if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) {
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+         _debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
       }
       else {
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_y] );
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_z] );
-         debug_printf( "%c", "xyzw"[srcreg.swizzle_w] );
+         _debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+         _debug_printf( "%c", "xyzw"[srcreg.swizzle_y] );
+         _debug_printf( "%c", "xyzw"[srcreg.swizzle_z] );
+         _debug_printf( "%c", "xyzw"[srcreg.swizzle_w] );
       }
    }
 }
@@ -447,15 +447,15 @@ svga_shader_dump(
 
    if (do_binary) {
       for (i = 0; i < dwords; i++) 
-         debug_printf("  0x%08x,\n", assem[i]);
+         _debug_printf("  0x%08x,\n", assem[i]);
       
-      debug_printf("\n\n");
+      _debug_printf("\n\n");
    }
 
    di.version.value = *assem++;
    di.is_ps = (di.version.type == SVGA3D_PS_TYPE);
 
-   debug_printf(
+   _debug_printf(
       "%s_%u_%u\n",
       di.is_ps ? "ps" : "vs",
       di.version.major,
@@ -465,7 +465,7 @@ svga_shader_dump(
       struct sh_op op = *(struct sh_op *) assem;
 
       if (assem - start >= dwords) {
-         debug_printf("... ran off end of buffer\n");
+         _debug_printf("... ran off end of buffer\n");
          assert(0);
          return;
       }
@@ -475,7 +475,7 @@ svga_shader_dump(
          {
             struct sh_dcl dcl = *(struct sh_dcl *) assem;
 
-            debug_printf( "dcl" );
+            _debug_printf( "dcl" );
             if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER)
                dump_sampleinfo( dcl.u.ps.sampleinfo );
             else if (di.is_ps) {
@@ -486,7 +486,7 @@ svga_shader_dump(
             else
                dump_usageinfo( dcl.u.vs.semantic );
             dump_dstreg( dcl.reg, &di );
-            debug_printf( "\n" );
+            _debug_printf( "\n" );
             assem += sizeof( struct sh_dcl ) / sizeof( unsigned );
          }
          break;
@@ -495,11 +495,11 @@ svga_shader_dump(
          {
             struct sh_defb defb = *(struct sh_defb *) assem;
 
-            debug_printf( "defb " );
+            _debug_printf( "defb " );
             dump_reg( defb.reg, NULL, &di );
-            debug_printf( ", " );
+            _debug_printf( ", " );
             dump_bdata( defb.data );
-            debug_printf( "\n" );
+            _debug_printf( "\n" );
             assem += sizeof( struct sh_defb ) / sizeof( unsigned );
          }
          break;
@@ -508,11 +508,11 @@ svga_shader_dump(
          {
             struct sh_defi defi = *(struct sh_defi *) assem;
 
-            debug_printf( "defi " );
+            _debug_printf( "defi " );
             dump_reg( defi.reg, NULL, &di );
-            debug_printf( ", " );
+            _debug_printf( ", " );
             dump_idata( defi.idata );
-            debug_printf( "\n" );
+            _debug_printf( "\n" );
             assem += sizeof( struct sh_defi ) / sizeof( unsigned );
          }
          break;
@@ -528,11 +528,11 @@ svga_shader_dump(
          else {
             struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
             dump_dstreg( unaryop.dst, &di );
-            debug_printf( ", " );
+            _debug_printf( ", " );
             dump_srcreg( unaryop.src, NULL, &di );
             assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
          }
-         debug_printf( "\n" );
+         _debug_printf( "\n" );
          break;
 
       case SVGA3DOP_TEX:
@@ -549,7 +549,7 @@ svga_shader_dump(
                struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
 
                dump_dstreg( unaryop.dst, &di );
-               debug_printf( ", " );
+               _debug_printf( ", " );
                dump_srcreg( unaryop.src, NULL, &di );
                assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
             }
@@ -559,30 +559,30 @@ svga_shader_dump(
 
             dump_op( op, "texld" );
             dump_dstreg( binaryop.dst, &di );
-            debug_printf( ", " );
+            _debug_printf( ", " );
             dump_srcreg( binaryop.src0, NULL, &di );
-            debug_printf( ", " );
+            _debug_printf( ", " );
             dump_srcreg( binaryop.src1, NULL, &di );
             assem += sizeof( struct sh_binaryop ) / sizeof( unsigned );
          }
-         debug_printf( "\n" );
+         _debug_printf( "\n" );
          break;
 
       case SVGA3DOP_DEF:
          {
             struct sh_def def = *(struct sh_def *) assem;
 
-            debug_printf( "def " );
+            _debug_printf( "def " );
             dump_reg( def.reg, NULL, &di );
-            debug_printf( ", " );
+            _debug_printf( ", " );
             dump_cdata( def.cdata );
-            debug_printf( "\n" );
+            _debug_printf( "\n" );
             assem += sizeof( struct sh_def ) / sizeof( unsigned );
          }
          break;
 
       case SVGA3DOP_PHASE:
-         debug_printf( "phase\n" );
+         _debug_printf( "phase\n" );
          assem += sizeof( struct sh_op ) / sizeof( unsigned );
          break;
 
@@ -596,12 +596,12 @@ svga_shader_dump(
          break;
 
       case SVGA3DOP_RET:
-         debug_printf( "ret\n" );
+         _debug_printf( "ret\n" );
          assem += sizeof( struct sh_op ) / sizeof( unsigned );
          break;
 
       case SVGA3DOP_END:
-         debug_printf( "end\n" );
+         _debug_printf( "end\n" );
          finished = TRUE;
          break;
 
@@ -640,14 +640,14 @@ svga_shader_dump(
                }
 
                if (not_first_arg)
-                  debug_printf( ", " );
+                  _debug_printf( ", " );
                else
-                  debug_printf( " " );
+                  _debug_printf( " " );
                dump_srcreg( srcreg, &indreg, &di );
                not_first_arg = TRUE;
             }
 
-            debug_printf( "\n" );
+            _debug_printf( "\n" );
          }
       }
    }
-- 
cgit v1.2.3


From 5b1a7843f841b2bfdd54538a2eaad9dadae3e09d Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 5 Dec 2009 06:34:59 +0000
Subject: svga: Dump SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN commands.

---
 src/gallium/drivers/svga/svgadump/svga_dump.c  | 38 ++++++++++++++++++++++++++
 src/gallium/drivers/svga/svgadump/svga_dump.py |  9 +++---
 2 files changed, 43 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
index 18e0eb5139..e6d4a74e86 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -1416,6 +1416,32 @@ dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd)
    _debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels);
 }
 
+static void
+dump_SVGASignedRect(const SVGASignedRect *cmd)
+{
+   _debug_printf("\t\t.left = %i\n", (*cmd).left);
+   _debug_printf("\t\t.top = %i\n", (*cmd).top);
+   _debug_printf("\t\t.right = %i\n", (*cmd).right);
+   _debug_printf("\t\t.bottom = %i\n", (*cmd).bottom);
+}
+
+static void
+dump_SVGA3dCmdBlitSurfaceToScreen(const SVGA3dCmdBlitSurfaceToScreen *cmd)
+{
+   _debug_printf("\t\t.srcImage.sid = %u\n", (*cmd).srcImage.sid);
+   _debug_printf("\t\t.srcImage.face = %u\n", (*cmd).srcImage.face);
+   _debug_printf("\t\t.srcImage.mipmap = %u\n", (*cmd).srcImage.mipmap);
+   _debug_printf("\t\t.srcRect.left = %i\n", (*cmd).srcRect.left);
+   _debug_printf("\t\t.srcRect.top = %i\n", (*cmd).srcRect.top);
+   _debug_printf("\t\t.srcRect.right = %i\n", (*cmd).srcRect.right);
+   _debug_printf("\t\t.srcRect.bottom = %i\n", (*cmd).srcRect.bottom);
+   _debug_printf("\t\t.destScreenId = %u\n", (*cmd).destScreenId);
+   _debug_printf("\t\t.destRect.left = %i\n", (*cmd).destRect.left);
+   _debug_printf("\t\t.destRect.top = %i\n", (*cmd).destRect.top);
+   _debug_printf("\t\t.destRect.right = %i\n", (*cmd).destRect.right);
+   _debug_printf("\t\t.destRect.bottom = %i\n", (*cmd).destRect.bottom);
+}
+
 
 void            
 svga_dump_commands(const void *commands, uint32_t size)
@@ -1710,6 +1736,18 @@ svga_dump_commands(const void *commands, uint32_t size)
                body = (const uint8_t *)&cmd[1];
             }
             break;
+         case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN:
+            _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n");
+            {
+               const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body;
+               dump_SVGA3dCmdBlitSurfaceToScreen(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGASignedRect) <= next) {
+                  dump_SVGASignedRect((const SVGASignedRect *)body);
+                  body += sizeof(SVGASignedRect);
+               }
+            }
+            break;
          default:
             _debug_printf("\t0x%08x\n", cmd_id);
             break;
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py
index dc5f3267e2..a1ada29ef8 100755
--- a/src/gallium/drivers/svga/svgadump/svga_dump.py
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.py
@@ -202,6 +202,7 @@ cmds = [
     ('SVGA_3D_CMD_END_QUERY', 'SVGA3dCmdEndQuery', (), None),
     ('SVGA_3D_CMD_WAIT_FOR_QUERY', 'SVGA3dCmdWaitForQuery', (), None),
     #('SVGA_3D_CMD_PRESENT_READBACK', None, (), None),
+    ('SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN', 'SVGA3dCmdBlitSurfaceToScreen', (), 'SVGASignedRect'),
 ]
 
 def dump_cmds():
@@ -294,18 +295,18 @@ def main():
     print '#include "svga_shader_dump.h"'
     print '#include "svga3d_reg.h"'
     print
-    print '#include "pipe/p_debug.h"'
+    print '#include "util/u_debug.h"'
     print '#include "svga_dump.h"'
     print
 
     config = parser.config_t(
-        include_paths = ['include'],
+        include_paths = ['../../../include', '../include'],
         compiler = 'gcc',
     )
 
     headers = [
-        'include/svga_types.h', 
-        'include/svga3d_reg.h', 
+        'svga_types.h', 
+        'svga3d_reg.h', 
     ]
 
     decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE)
-- 
cgit v1.2.3


From 56a4342a0493ad1d502d4791ab941ef171d36e60 Mon Sep 17 00:00:00 2001
From: Michel Dänzer <daenzer@vmware.com>
Date: Sat, 5 Dec 2009 17:48:00 +0100
Subject: r300g: Need to emit a hardware scissor rectangle even if scissor is
 disabled.

Just make it cover the whole framebuffer in that case. Otherwise the kernel CS
checker may complain, e.g. running progs/demos/gearbox. That runs fast now
here, but doesn't look right yet.
---
 src/gallium/drivers/r300/r300_context.h |  2 ++
 src/gallium/drivers/r300/r300_emit.c    |  9 +++++++--
 src/gallium/drivers/r300/r300_state.c   | 19 +++++++++++++++++++
 3 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index dd3f6ac143..11cd9f855f 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -101,6 +101,8 @@ struct r300_sampler_state {
 struct r300_scissor_state {
     uint32_t scissor_top_left;     /* R300_SC_SCISSORS_TL: 0x43e0 */
     uint32_t scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */
+    uint32_t no_scissor_top_left;     /* R300_SC_SCISSORS_TL: 0x43e0 */
+    uint32_t no_scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */
 };
 
 struct r300_texture_state {
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 60be03f54f..04dca29216 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -570,8 +570,13 @@ void r300_emit_scissor_state(struct r300_context* r300,
 
     BEGIN_CS(3);
     OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
-    OUT_CS(scissor->scissor_top_left);
-    OUT_CS(scissor->scissor_bottom_right);
+    if (r300->rs_state->rs.scissor) {
+       OUT_CS(scissor->scissor_top_left);
+       OUT_CS(scissor->scissor_bottom_right);
+    } else {
+       OUT_CS(scissor->no_scissor_top_left);
+       OUT_CS(scissor->no_scissor_bottom_right);
+    }
     END_CS;
 }
 
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 442af70e14..2bc2b79c02 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -302,6 +302,25 @@ static void
     r300->framebuffer_state = *state;
 
     r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
+
+    if (r300_screen(r300->context.screen)->caps->is_r500) {
+        r300->scissor_state->no_scissor_top_left =
+            (0 << R300_SCISSORS_X_SHIFT) |
+            (0 << R300_SCISSORS_Y_SHIFT);
+        r300->scissor_state->no_scissor_bottom_right =
+            ((state->width - 1) << R300_SCISSORS_X_SHIFT) |
+            ((state->height - 1) << R300_SCISSORS_Y_SHIFT);
+    } else {
+        /* Offset of 1440 in non-R500 chipsets. */
+        r300->scissor_state->no_scissor_top_left =
+            ((0 + 1440) << R300_SCISSORS_X_SHIFT) |
+            ((0 + 1440) << R300_SCISSORS_Y_SHIFT);
+        r300->scissor_state->no_scissor_bottom_right =
+            (((state->width - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
+            (((state->height - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
+    }
+
+    r300->dirty_state |= R300_NEW_SCISSOR;
 }
 
 /* Create fragment shader state. */
-- 
cgit v1.2.3


From e1380cae885df37d4a211d0271f59487d9f2db78 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sat, 5 Dec 2009 19:17:20 +0100
Subject: r300g: remove redundant code and clean up

---
 src/gallium/drivers/r300/r300_context.h | 11 +++--
 src/gallium/drivers/r300/r300_emit.c    | 19 +++++----
 src/gallium/drivers/r300/r300_state.c   | 73 +++++++++++++++++----------------
 3 files changed, 57 insertions(+), 46 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 11cd9f855f..23ea32c57e 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -98,11 +98,14 @@ struct r300_sampler_state {
     unsigned min_lod, max_lod;
 };
 
+struct r300_scissor_regs {
+    uint32_t top_left;     /* R300_SC_SCISSORS_TL: 0x43e0 */
+    uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */
+};
+
 struct r300_scissor_state {
-    uint32_t scissor_top_left;     /* R300_SC_SCISSORS_TL: 0x43e0 */
-    uint32_t scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */
-    uint32_t no_scissor_top_left;     /* R300_SC_SCISSORS_TL: 0x43e0 */
-    uint32_t no_scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */
+    struct r300_scissor_regs framebuffer;
+    struct r300_scissor_regs scissor;
 };
 
 struct r300_texture_state {
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 04dca29216..dbf316a9b5 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -563,21 +563,26 @@ void r300_emit_rs_block_state(struct r300_context* r300,
     END_CS;
 }
 
-void r300_emit_scissor_state(struct r300_context* r300,
-                             struct r300_scissor_state* scissor)
+static void r300_emit_scissor_regs(struct r300_context* r300,
+                                   struct r300_scissor_regs* scissor)
 {
     CS_LOCALS(r300);
 
     BEGIN_CS(3);
     OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
+    OUT_CS(scissor->top_left);
+    OUT_CS(scissor->bottom_right);
+    END_CS;
+}
+
+void r300_emit_scissor_state(struct r300_context* r300,
+                             struct r300_scissor_state* scissor)
+{
     if (r300->rs_state->rs.scissor) {
-       OUT_CS(scissor->scissor_top_left);
-       OUT_CS(scissor->scissor_bottom_right);
+        r300_emit_scissor_regs(r300, &scissor->scissor);
     } else {
-       OUT_CS(scissor->no_scissor_top_left);
-       OUT_CS(scissor->no_scissor_bottom_right);
+        r300_emit_scissor_regs(r300, &scissor->framebuffer);
     }
-    END_CS;
 }
 
 void r300_emit_texture(struct r300_context* r300,
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 2bc2b79c02..d3233557ce 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -289,11 +289,34 @@ static void r300_set_edgeflags(struct pipe_context* pipe,
     /* XXX and even worse, I have no idea WTF the bitfield is */
 }
 
+static void r300_set_scissor_regs(const struct pipe_scissor_state* state,
+                                  struct r300_scissor_regs *scissor,
+                                  boolean is_r500)
+{
+    if (is_r500) {
+        scissor->top_left =
+            (state->minx << R300_SCISSORS_X_SHIFT) |
+            (state->miny << R300_SCISSORS_Y_SHIFT);
+        scissor->bottom_right =
+            ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) |
+            ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT);
+    } else {
+        /* Offset of 1440 in non-R500 chipsets. */
+        scissor->top_left =
+            ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) |
+            ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT);
+        scissor->bottom_right =
+            (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
+            (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
+    }
+}
+
 static void
     r300_set_framebuffer_state(struct pipe_context* pipe,
                                const struct pipe_framebuffer_state* state)
 {
     struct r300_context* r300 = r300_context(pipe);
+    struct pipe_scissor_state scissor;
 
     if (r300->draw) {
         draw_flush(r300->draw);
@@ -301,26 +324,17 @@ static void
 
     r300->framebuffer_state = *state;
 
-    r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
+    scissor.minx = scissor.miny = 0;
+    scissor.maxx = state->width;
+    scissor.maxy = state->height;
+    r300_set_scissor_regs(&scissor, &r300->scissor_state->framebuffer,
+                          r300_screen(r300->context.screen)->caps->is_r500);
 
-    if (r300_screen(r300->context.screen)->caps->is_r500) {
-        r300->scissor_state->no_scissor_top_left =
-            (0 << R300_SCISSORS_X_SHIFT) |
-            (0 << R300_SCISSORS_Y_SHIFT);
-        r300->scissor_state->no_scissor_bottom_right =
-            ((state->width - 1) << R300_SCISSORS_X_SHIFT) |
-            ((state->height - 1) << R300_SCISSORS_Y_SHIFT);
-    } else {
-        /* Offset of 1440 in non-R500 chipsets. */
-        r300->scissor_state->no_scissor_top_left =
-            ((0 + 1440) << R300_SCISSORS_X_SHIFT) |
-            ((0 + 1440) << R300_SCISSORS_Y_SHIFT);
-        r300->scissor_state->no_scissor_bottom_right =
-            (((state->width - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
-            (((state->height - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
+    /* Don't rely on the order of states being set for the first time. */
+    if (!r300->rs_state || !r300->rs_state->rs.scissor) {
+        r300->dirty_state |= R300_NEW_SCISSOR;
     }
-
-    r300->dirty_state |= R300_NEW_SCISSOR;
+    r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
 }
 
 /* Create fragment shader state. */
@@ -642,24 +656,13 @@ static void r300_set_scissor_state(struct pipe_context* pipe,
 {
     struct r300_context* r300 = r300_context(pipe);
 
-    if (r300_screen(r300->context.screen)->caps->is_r500) {
-        r300->scissor_state->scissor_top_left =
-            (state->minx << R300_SCISSORS_X_SHIFT) |
-            (state->miny << R300_SCISSORS_Y_SHIFT);
-        r300->scissor_state->scissor_bottom_right =
-            ((state->maxx - 1) << R300_SCISSORS_X_SHIFT) |
-            ((state->maxy - 1) << R300_SCISSORS_Y_SHIFT);
-    } else {
-        /* Offset of 1440 in non-R500 chipsets. */
-        r300->scissor_state->scissor_top_left =
-            ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) |
-            ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT);
-        r300->scissor_state->scissor_bottom_right =
-            (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
-            (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
-    }
+    r300_set_scissor_regs(state, &r300->scissor_state->scissor,
+                          r300_screen(r300->context.screen)->caps->is_r500);
 
-    r300->dirty_state |= R300_NEW_SCISSOR;
+    /* Don't rely on the order of states being set for the first time. */
+    if (!r300->rs_state || r300->rs_state->rs.scissor) {
+        r300->dirty_state |= R300_NEW_SCISSOR;
+    }
 }
 
 static void r300_set_viewport_state(struct pipe_context* pipe,
-- 
cgit v1.2.3


From 07487643515edb731c6abc3e931c329a89dd9293 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sat, 5 Dec 2009 20:39:11 +0100
Subject: r300g: don't render if everything is culled by scissoring

Otherwise a CS is refused by kernel 2.6.31 (and maybe all later
versions, not sure).
---
 src/gallium/drivers/r300/r300_context.h |  3 +++
 src/gallium/drivers/r300/r300_render.c  | 23 +++++++++++++++++++++++
 src/gallium/drivers/r300/r300_state.c   |  3 +++
 3 files changed, 29 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 23ea32c57e..0be190392a 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -101,6 +101,9 @@ struct r300_sampler_state {
 struct r300_scissor_regs {
     uint32_t top_left;     /* R300_SC_SCISSORS_TL: 0x43e0 */
     uint32_t bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */
+
+    /* Whether everything is culled by scissoring. */
+    boolean empty_area;
 };
 
 struct r300_scissor_state {
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 4c5fb405c6..35b335df6a 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -70,6 +70,12 @@ uint32_t r300_translate_primitive(unsigned prim)
     }
 }
 
+static boolean r300_nothing_to_draw(struct r300_context *r300)
+{
+    return r300->rs_state->rs.scissor &&
+           r300->scissor_state->scissor.empty_area;
+}
+
 static void r300_emit_draw_arrays(struct r300_context *r300,
                                   unsigned mode,
                                   unsigned count)
@@ -173,10 +179,15 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
         return FALSE;
     }
 
+
     if (count > 65535) {
         return FALSE;
     }
 
+    if (r300_nothing_to_draw(r300)) {
+        return TRUE;
+    }
+
     r300_update_derived_state(r300);
 
     if (!r300_setup_vertex_buffers(r300)) {
@@ -218,6 +229,10 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
         return FALSE;
     }
 
+    if (r300_nothing_to_draw(r300)) {
+        return TRUE;
+    }
+
     r300_update_derived_state(r300);
 
     if (!r300_setup_vertex_buffers(r300)) {
@@ -251,6 +266,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
         return FALSE;
     }
 
+    if (r300_nothing_to_draw(r300)) {
+        return TRUE;
+    }
+
     for (i = 0; i < r300->vertex_buffer_count; i++) {
         void* buf = pipe_buffer_map(pipe->screen,
                                     r300->vertex_buffer[i].buffer,
@@ -292,6 +311,10 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
         return FALSE;
     }
 
+    if (r300_nothing_to_draw(r300)) {
+        return TRUE;
+    }
+
     for (i = 0; i < r300->vertex_buffer_count; i++) {
         void* buf = pipe_buffer_map(pipe->screen,
                                     r300->vertex_buffer[i].buffer,
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index d3233557ce..8ef0b3b268 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -309,6 +309,9 @@ static void r300_set_scissor_regs(const struct pipe_scissor_state* state,
             (((state->maxx - 1) + 1440) << R300_SCISSORS_X_SHIFT) |
             (((state->maxy - 1) + 1440) << R300_SCISSORS_Y_SHIFT);
     }
+
+    scissor->empty_area = state->minx >= state->maxx ||
+                          state->miny >= state->maxy;
 }
 
 static void
-- 
cgit v1.2.3


From c574f515f0aa20ccc3841cf61a6124bc5996e7b2 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Sun, 6 Dec 2009 12:26:55 -0500
Subject: nouveau: Work around nv04-nv40 miptrees not matching nouveau_miptree.

Thanks to Bob Gleitsmann for the patch.

I'll clean this up in a better way later if noone else beats me to it.
---
 src/gallium/drivers/nv04/nv04_miptree.c | 3 ++-
 src/gallium/drivers/nv04/nv04_state.h   | 1 +
 src/gallium/drivers/nv10/nv10_miptree.c | 2 ++
 src/gallium/drivers/nv10/nv10_state.h   | 1 +
 src/gallium/drivers/nv20/nv20_miptree.c | 2 ++
 src/gallium/drivers/nv20/nv20_state.h   | 1 +
 src/gallium/drivers/nv30/nv30_miptree.c | 2 ++
 src/gallium/drivers/nv30/nv30_state.h   | 1 +
 src/gallium/drivers/nv40/nv40_miptree.c | 5 ++++-
 src/gallium/drivers/nv40/nv40_state.h   | 1 +
 10 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c
index eeab6dfa30..e0a6948aeb 100644
--- a/src/gallium/drivers/nv04/nv04_miptree.c
+++ b/src/gallium/drivers/nv04/nv04_miptree.c
@@ -55,7 +55,7 @@ nv04_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 		FREE(mt);
 		return NULL;
 	}
-	
+	mt->bo = nouveau_bo(mt->buffer);
 	return &mt->base;
 }
 
@@ -81,6 +81,7 @@ nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
 
 	pipe_buffer_reference(&mt->buffer, pb);
+	mt->bo = nouveau_bo(mt->buffer);
 	return &mt->base;
 }
 
diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h
index 399f750dbe..81d1d2ebaa 100644
--- a/src/gallium/drivers/nv04/nv04_state.h
+++ b/src/gallium/drivers/nv04/nv04_state.h
@@ -31,6 +31,7 @@ struct nv04_rasterizer_state {
 
 struct nv04_miptree {
 	struct pipe_texture base;
+	struct nouveau_bo *bo;
 
 	struct pipe_buffer *buffer;
 	uint total_size;
diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c
index 439beeccc3..6a52b6af36 100644
--- a/src/gallium/drivers/nv10/nv10_miptree.c
+++ b/src/gallium/drivers/nv10/nv10_miptree.c
@@ -67,6 +67,7 @@ nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
 
 	pipe_buffer_reference(&mt->buffer, pb);
+	mt->bo = nouveau_bo(mt->buffer);
 	return &mt->base;
 }
 
@@ -90,6 +91,7 @@ nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
 		FREE(mt);
 		return NULL;
 	}
+	mt->bo = nouveau_bo(mt->buffer);
 	
 	return &mt->base;
 }
diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h
index 3a3fd0d4f4..2524ac02e2 100644
--- a/src/gallium/drivers/nv10/nv10_state.h
+++ b/src/gallium/drivers/nv10/nv10_state.h
@@ -126,6 +126,7 @@ struct nv10_depth_stencil_alpha_state {
 
 struct nv10_miptree {
 	struct pipe_texture base;
+	struct nouveau_bo *bo;
 
 	struct pipe_buffer *buffer;
 	uint total_size;
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
index 2bde9fb75b..e2e01bd849 100644
--- a/src/gallium/drivers/nv20/nv20_miptree.c
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -77,6 +77,7 @@ nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
 
 	pipe_buffer_reference(&mt->buffer, pb);
+	mt->bo = nouveau_bo(mt->buffer);
 	return &mt->base;
 }
 
@@ -132,6 +133,7 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
 		FREE(mt);
 		return NULL;
 	}
+	mt->bo = nouveau_bo(mt->buffer);
 	
 	return &mt->base;
 }
diff --git a/src/gallium/drivers/nv20/nv20_state.h b/src/gallium/drivers/nv20/nv20_state.h
index 34f402fdcb..dde4106568 100644
--- a/src/gallium/drivers/nv20/nv20_state.h
+++ b/src/gallium/drivers/nv20/nv20_state.h
@@ -126,6 +126,7 @@ struct nv20_depth_stencil_alpha_state {
 
 struct nv20_miptree {
 	struct pipe_texture base;
+	struct nouveau_bo *bo;
 
 	struct pipe_buffer *buffer;
 	uint total_size;
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index 9e50a7cf6b..920fe64c32 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -115,6 +115,7 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 		FREE(mt);
 		return NULL;
 	}
+	mt->bo = nouveau_bo(mt->buffer);
 
 	return &mt->base;
 }
@@ -144,6 +145,7 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 	mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
 
 	pipe_buffer_reference(&mt->buffer, pb);
+	mt->bo = nouveau_bo(mt->buffer);
 	return &mt->base;
 }
 
diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h
index e6f23bf166..e42e872de7 100644
--- a/src/gallium/drivers/nv30/nv30_state.h
+++ b/src/gallium/drivers/nv30/nv30_state.h
@@ -72,6 +72,7 @@ struct nv30_fragment_program {
 
 struct nv30_miptree {
 	struct pipe_texture base;
+	struct nouveau_bo *bo;
 
 	struct pipe_buffer *buffer;
 	uint total_size;
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index 8779c5572b..89ddf373e9 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -5,6 +5,8 @@
 
 #include "nv40_context.h"
 
+
+
 static void
 nv40_miptree_layout(struct nv40_miptree *mt)
 {
@@ -109,7 +111,7 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 		FREE(mt);
 		return NULL;
 	}
-
+	mt->bo = nouveau_bo(mt->buffer);
 	return &mt->base;
 }
 
@@ -138,6 +140,7 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 	mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
 
 	pipe_buffer_reference(&mt->buffer, pb);
+	mt->bo = nouveau_bo(mt->buffer);
 	return &mt->base;
 }
 
diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h
index 8a9d8c8fdf..192074e747 100644
--- a/src/gallium/drivers/nv40/nv40_state.h
+++ b/src/gallium/drivers/nv40/nv40_state.h
@@ -75,6 +75,7 @@ struct nv40_fragment_program {
 
 struct nv40_miptree {
 	struct pipe_texture base;
+	struct nouveau_bo *bo;
 
 	struct pipe_buffer *buffer;
 	uint total_size;
-- 
cgit v1.2.3


From 7091afed789dbba8364deaea0b7a5a99a12ff25e Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sat, 5 Dec 2009 01:27:59 +0100
Subject: r300g: enhance ZTOP conditions

---
 src/gallium/drivers/r300/r300_state_derived.c | 37 ++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index cd969d633b..d448866ef0 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -462,6 +462,31 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
     r300->dirty_state |= R300_NEW_RS_BLOCK;
 }
 
+static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
+{
+    /* We are interested only in the cases when a new depth or stencil value
+     * can be written and changed. */
+
+    /* We might optionally check for [Z func: never] and inspect the stencil
+     * state in a similar fashion, but it's not terribly important. */
+    return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE)
+           ||
+           (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK)
+           ||
+           ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) &&
+            (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK));
+}
+
+static boolean r300_dsa_alpha_test_enabled(struct r300_dsa_state* dsa)
+{
+    /* We are interested only in the cases when alpha testing can kill
+     * a fragment. */
+    uint32_t af = dsa->alpha_function;
+
+    return (af & R300_FG_ALPHA_FUNC_ENABLE) &&
+           (af & R300_FG_ALPHA_FUNC_ALWAYS) != R300_FG_ALPHA_FUNC_ALWAYS;
+}
+
 static void r300_update_ztop(struct r300_context* r300)
 {
     r300->ztop_state.z_buffer_top = R300_ZTOP_ENABLE;
@@ -484,13 +509,13 @@ static void r300_update_ztop(struct r300_context* r300)
      *
      * ~C.
      */
-    if (r300->dsa_state->alpha_function) {
-        r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
-    } else if (r300->fs->info.uses_kill) {
-        r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
-    } else if (r300_fragment_shader_writes_depth(r300->fs)) {
+
+    if (r300->query_current ||
+        r300_fragment_shader_writes_depth(r300->fs)) {
         r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
-    } else if (r300->query_current) {
+    } else if (r300_dsa_writes_depth_stencil(r300->dsa_state) &&
+               (r300->fs->info.uses_kill ||
+                r300_dsa_alpha_test_enabled(r300->dsa_state))) {
         r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
     }
 }
-- 
cgit v1.2.3


From c99fb991a3b46c5978248b00eef0efd742127a44 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 6 Dec 2009 23:33:41 -0800
Subject: r300g: Clean up previous commit.

If *I* can't read it, there's a strong possibility others can't,
either.
---
 src/gallium/drivers/r300/r300_state_derived.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index d448866ef0..6af49888b9 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -469,10 +469,8 @@ static boolean r300_dsa_writes_depth_stencil(struct r300_dsa_state* dsa)
 
     /* We might optionally check for [Z func: never] and inspect the stencil
      * state in a similar fashion, but it's not terribly important. */
-    return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE)
-           ||
-           (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK)
-           ||
+    return (dsa->z_buffer_control & R300_Z_WRITE_ENABLE) ||
+           (dsa->stencil_ref_mask & R300_STENCILWRITEMASK_MASK) ||
            ((dsa->z_buffer_control & R500_STENCIL_REFMASK_FRONT_BACK) &&
             (dsa->stencil_ref_bf & R300_STENCILWRITEMASK_MASK));
 }
@@ -503,19 +501,25 @@ static void r300_update_ztop(struct r300_context* r300)
      * The docs claim that for the first three cases, if no ZS writes happen,
      * then ZTOP can be used.
      *
+     * (3) will never apply since we do not support chroma-keyed operations.
+     * (4) will need to be re-examined (and this comment updated) if/when
+     * Hyper-Z becomes supported.
+     *
      * Additionally, the following conditions require disabled ZTOP:
-     * ~) Depth writes in fragment shader
-     * ~) Outstanding occlusion queries
+     * 5) Depth writes in fragment shader
+     * 6) Outstanding occlusion queries
      *
      * ~C.
      */
 
-    if (r300->query_current ||
-        r300_fragment_shader_writes_depth(r300->fs)) {
+    /* ZS writes */
+    if (r300_dsa_writes_depth_stencil(r300->dsa_state) &&
+           (r300_dsa_alpha_test_enabled(r300->dsa_state) ||   /* (1) */
+            r300->fs->info.uses_kill)) {                      /* (2) */
+        r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
+    } else if (r300_fragment_shader_writes_depth(r300->fs)) { /* (5) */
         r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
-    } else if (r300_dsa_writes_depth_stencil(r300->dsa_state) &&
-               (r300->fs->info.uses_kill ||
-                r300_dsa_alpha_test_enabled(r300->dsa_state))) {
+    } else if (r300->query_current) {                         /* (6) */
         r300->ztop_state.z_buffer_top = R300_ZTOP_DISABLE;
     }
 }
-- 
cgit v1.2.3


From d8d8b0d244d9abfd16f99de7f2f30c635033f66f Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 6 Dec 2009 23:49:02 -0800
Subject: softpipe: sp_winsys.h should define/include what it needs.

---
 src/gallium/drivers/softpipe/sp_winsys.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h
index 9e571862b7..f203ded29e 100644
--- a/src/gallium/drivers/softpipe/sp_winsys.h
+++ b/src/gallium/drivers/softpipe/sp_winsys.h
@@ -34,15 +34,17 @@
 #ifndef SP_WINSYS_H
 #define SP_WINSYS_H
 
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#include "pipe/p_defines.h"
 
 struct pipe_screen;
 struct pipe_winsys;
 struct pipe_context;
+struct pipe_texture;
+struct pipe_buffer;
 
 
 struct pipe_context *softpipe_create( struct pipe_screen * );
-- 
cgit v1.2.3


From 3456f9149b3009fcfce80054759d05883d3c4ee5 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 7 Dec 2009 20:35:42 +0100
Subject: gallium/util: fix util_color_[un]pack[-ub] to be strict aliasing safe

use pointer to union instead of void pointer.
gcc complained a lot, depending what the pointer originally actually was.
Looks like it's in fact maybe legal to cast for instance uint pointers to
union pointers as long as union contains a uint type, hence use this with some
callers, other just use union util_color in the first place.
---
 src/gallium/auxiliary/util/u_clear.h           |   8 +-
 src/gallium/auxiliary/util/u_pack_color.h      | 147 +++++++++++--------------
 src/gallium/drivers/cell/ppu/cell_clear.c      |   6 +-
 src/gallium/drivers/llvmpipe/lp_clear.c        |   5 +-
 src/gallium/drivers/r300/r300_state.c          |   4 +-
 src/gallium/drivers/softpipe/sp_clear.c        |   7 +-
 src/gallium/drivers/svga/svga_pipe_clear.c     |   6 +-
 src/gallium/drivers/svga/svga_pipe_sampler.c   |   2 +-
 src/gallium/state_trackers/vega/vg_translate.c |  54 ++++-----
 src/mesa/state_tracker/st_atom_pixeltransfer.c |   2 +-
 10 files changed, 113 insertions(+), 128 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_clear.h b/src/gallium/auxiliary/util/u_clear.h
index 1e65a035ae..2c32db6175 100644
--- a/src/gallium/auxiliary/util/u_clear.h
+++ b/src/gallium/auxiliary/util/u_clear.h
@@ -46,13 +46,13 @@ util_clear(struct pipe_context *pipe,
 {
    if (buffers & PIPE_CLEAR_COLOR) {
       struct pipe_surface *ps = framebuffer->cbufs[0];
-      unsigned color;
+      union util_color uc;
 
-      util_pack_color(rgba, ps->format, &color);
+      util_pack_color(rgba, ps->format, &uc);
       if (pipe->surface_fill) {
-         pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color);
+         pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui);
       } else {
-         util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color);
+         util_surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui);
       }
    }
 
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 9dacc6d83d..a2e0f26686 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -40,101 +40,97 @@
 #include "util/u_math.h"
 
 
+
+union util_color {
+   ubyte ub;
+   ushort us;
+   uint ui;
+   float f[4];
+};
+
 /**
  * Pack ubyte R,G,B,A into dest pixel.
  */
 static INLINE void
 util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a,
-                   enum pipe_format format, void *dest)
+                   enum pipe_format format, union util_color *uc)
 {
    switch (format) {
    case PIPE_FORMAT_R8G8B8A8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (r << 24) | (g << 16) | (b << 8) | a;
+         uc->ui = (r << 24) | (g << 16) | (b << 8) | a;
       }
       return;
    case PIPE_FORMAT_R8G8B8X8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (r << 24) | (g << 16) | (b << 8) | 0xff;
+         uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff;
       }
       return;
    case PIPE_FORMAT_A8R8G8B8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (a << 24) | (r << 16) | (g << 8) | b;
+         uc->ui = (a << 24) | (r << 16) | (g << 8) | b;
       }
       return;
    case PIPE_FORMAT_X8R8G8B8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (0xff << 24) | (r << 16) | (g << 8) | b;
+         uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b;
       }
       return;
    case PIPE_FORMAT_B8G8R8A8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (b << 24) | (g << 16) | (r << 8) | a;
+         uc->ui = (b << 24) | (g << 16) | (r << 8) | a;
       }
       return;
    case PIPE_FORMAT_B8G8R8X8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (b << 24) | (g << 16) | (r << 8) | 0xff;
+         uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff;
       }
       return;
    case PIPE_FORMAT_R5G6B5_UNORM:
       {
-         ushort *d = (ushort *) dest;
-         *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
+         uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
       }
       return;
    case PIPE_FORMAT_A1R5G5B5_UNORM:
       {
-         ushort *d = (ushort *) dest;
-         *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
+         uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
       }
       return;
    case PIPE_FORMAT_A4R4G4B4_UNORM:
       {
-         ushort *d = (ushort *) dest;
-         *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
+         uc->us = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
       }
       return;
    case PIPE_FORMAT_A8_UNORM:
       {
-         ubyte *d = (ubyte *) dest;
-         *d = a;
+         uc->ub = a;
       }
       return;
    case PIPE_FORMAT_L8_UNORM:
    case PIPE_FORMAT_I8_UNORM:
       {
-         ubyte *d = (ubyte *) dest;
-         *d = r;
+         uc->ub = a;
       }
       return;
    case PIPE_FORMAT_R32G32B32A32_FLOAT:
       {
-         float *d = (float *) dest;
-         d[0] = (float)r / 255.0f;
-         d[1] = (float)g / 255.0f;
-         d[2] = (float)b / 255.0f;
-         d[3] = (float)a / 255.0f;
+         uc->f[0] = (float)r / 255.0f;
+         uc->f[1] = (float)g / 255.0f;
+         uc->f[2] = (float)b / 255.0f;
+         uc->f[3] = (float)a / 255.0f;
       }
       return;
    case PIPE_FORMAT_R32G32B32_FLOAT:
       {
-         float *d = (float *) dest;
-         d[0] = (float)r / 255.0f;
-         d[1] = (float)g / 255.0f;
-         d[2] = (float)b / 255.0f;
+         uc->f[0] = (float)r / 255.0f;
+         uc->f[1] = (float)g / 255.0f;
+         uc->f[2] = (float)b / 255.0f;
       }
       return;
 
    /* XXX lots more cases to add */
    default:
+      uc->ui = 0; /* keep compiler happy */
       debug_print_format("gallium: unhandled format in util_pack_color_ub()", format);
       assert(0);
    }
@@ -145,13 +141,13 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a,
  * Unpack RGBA from a packed pixel, returning values as ubytes in [0,255].
  */
 static INLINE void
-util_unpack_color_ub(enum pipe_format format, const void *src,
+util_unpack_color_ub(enum pipe_format format, union util_color *uc,
                      ubyte *r, ubyte *g, ubyte *b, ubyte *a)
 {
    switch (format) {
    case PIPE_FORMAT_R8G8B8A8_UNORM:
       {
-         uint p = ((const uint *) src)[0];
+         uint p = uc->ui;
          *r = (ubyte) ((p >> 24) & 0xff);
          *g = (ubyte) ((p >> 16) & 0xff);
          *b = (ubyte) ((p >>  8) & 0xff);
@@ -160,7 +156,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
       return;
    case PIPE_FORMAT_R8G8B8X8_UNORM:
       {
-         uint p = ((const uint *) src)[0];
+         uint p = uc->ui;
          *r = (ubyte) ((p >> 24) & 0xff);
          *g = (ubyte) ((p >> 16) & 0xff);
          *b = (ubyte) ((p >>  8) & 0xff);
@@ -169,7 +165,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
       return;
    case PIPE_FORMAT_A8R8G8B8_UNORM:
       {
-         uint p = ((const uint *) src)[0];
+         uint p = uc->ui;
          *r = (ubyte) ((p >> 16) & 0xff);
          *g = (ubyte) ((p >>  8) & 0xff);
          *b = (ubyte) ((p >>  0) & 0xff);
@@ -178,7 +174,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
       return;
    case PIPE_FORMAT_X8R8G8B8_UNORM:
       {
-         uint p = ((const uint *) src)[0];
+         uint p = uc->ui;
          *r = (ubyte) ((p >> 16) & 0xff);
          *g = (ubyte) ((p >>  8) & 0xff);
          *b = (ubyte) ((p >>  0) & 0xff);
@@ -187,7 +183,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
       return;
    case PIPE_FORMAT_B8G8R8A8_UNORM:
       {
-         uint p = ((const uint *) src)[0];
+         uint p = uc->ui;
          *r = (ubyte) ((p >>  8) & 0xff);
          *g = (ubyte) ((p >> 16) & 0xff);
          *b = (ubyte) ((p >> 24) & 0xff);
@@ -196,7 +192,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
       return;
    case PIPE_FORMAT_B8G8R8X8_UNORM:
       {
-         uint p = ((const uint *) src)[0];
+         uint p = uc->ui;
          *r = (ubyte) ((p >>  8) & 0xff);
          *g = (ubyte) ((p >> 16) & 0xff);
          *b = (ubyte) ((p >> 24) & 0xff);
@@ -205,7 +201,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
       return;
    case PIPE_FORMAT_R5G6B5_UNORM:
       {
-         ushort p = ((const ushort *) src)[0];
+         ushort p = uc->us;
          *r = (ubyte) (((p >> 8) & 0xf8) | ((p >> 13) & 0x7));
          *g = (ubyte) (((p >> 3) & 0xfc) | ((p >>  9) & 0x3));
          *b = (ubyte) (((p << 3) & 0xf8) | ((p >>  2) & 0x7));
@@ -214,7 +210,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
       return;
    case PIPE_FORMAT_A1R5G5B5_UNORM:
       {
-         ushort p = ((const ushort *) src)[0];
+         ushort p = uc->us;
          *r = (ubyte) (((p >>  7) & 0xf8) | ((p >> 12) & 0x7));
          *g = (ubyte) (((p >>  2) & 0xf8) | ((p >>  7) & 0x7));
          *b = (ubyte) (((p <<  3) & 0xf8) | ((p >>  2) & 0x7));
@@ -223,7 +219,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
       return;
    case PIPE_FORMAT_A4R4G4B4_UNORM:
       {
-         ushort p = ((const ushort *) src)[0];
+         ushort p = uc->us;
          *r = (ubyte) (((p >> 4) & 0xf0) | ((p >>  8) & 0xf));
          *g = (ubyte) (((p >> 0) & 0xf0) | ((p >>  4) & 0xf));
          *b = (ubyte) (((p << 4) & 0xf0) | ((p >>  0) & 0xf));
@@ -232,27 +228,27 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
       return;
    case PIPE_FORMAT_A8_UNORM:
       {
-         ubyte p = ((const ubyte *) src)[0];
+         ubyte p = uc->ub;
          *r = *g = *b = (ubyte) 0xff;
          *a = p;
       }
       return;
    case PIPE_FORMAT_L8_UNORM:
       {
-         ubyte p = ((const ubyte *) src)[0];
+         ubyte p = uc->ub;
          *r = *g = *b = p;
          *a = (ubyte) 0xff;
       }
       return;
    case PIPE_FORMAT_I8_UNORM:
       {
-         ubyte p = ((const ubyte *) src)[0];
+         ubyte p = uc->ub;
          *r = *g = *b = *a = p;
       }
       return;
    case PIPE_FORMAT_R32G32B32A32_FLOAT:
       {
-         const float *p = (const float *) src;
+         const float *p = &uc->f[0];
          *r = float_to_ubyte(p[0]);
          *g = float_to_ubyte(p[1]);
          *b = float_to_ubyte(p[2]);
@@ -261,7 +257,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
       return;
    case PIPE_FORMAT_R32G32B32_FLOAT:
       {
-         const float *p = (const float *) src;
+         const float *p = &uc->f[0];
          *r = float_to_ubyte(p[0]);
          *g = float_to_ubyte(p[1]);
          *b = float_to_ubyte(p[2]);
@@ -271,7 +267,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
 
    case PIPE_FORMAT_R32G32_FLOAT:
       {
-         const float *p = (const float *) src;
+         const float *p = &uc->f[0];
          *r = float_to_ubyte(p[0]);
          *g = float_to_ubyte(p[1]);
          *b = *a = (ubyte) 0xff;
@@ -280,7 +276,7 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
 
    case PIPE_FORMAT_R32_FLOAT:
       {
-         const float *p = (const float *) src;
+         const float *p = &uc->f[0];
          *r = float_to_ubyte(p[0]);
          *g = *b = *a = (ubyte) 0xff;
       }
@@ -293,14 +289,13 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
       assert(0);
    }
 }
- 
 
 
 /**
  * Note rgba outside [0,1] will be clamped for int pixel formats.
  */
 static INLINE void
-util_pack_color(const float rgba[4], enum pipe_format format, void *dest)
+util_pack_color(const float rgba[4], enum pipe_format format, union util_color *uc)
 {
    ubyte r = 0;
    ubyte g = 0;
@@ -318,90 +313,78 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest)
    switch (format) {
    case PIPE_FORMAT_R8G8B8A8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (r << 24) | (g << 16) | (b << 8) | a;
+         uc->ui = (r << 24) | (g << 16) | (b << 8) | a;
       }
       return;
    case PIPE_FORMAT_R8G8B8X8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (r << 24) | (g << 16) | (b << 8) | 0xff;
+         uc->ui = (r << 24) | (g << 16) | (b << 8) | 0xff;
       }
       return;
    case PIPE_FORMAT_A8R8G8B8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (a << 24) | (r << 16) | (g << 8) | b;
+         uc->ui = (a << 24) | (r << 16) | (g << 8) | b;
       }
       return;
    case PIPE_FORMAT_X8R8G8B8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (0xff << 24) | (r << 16) | (g << 8) | b;
+         uc->ui = (0xff << 24) | (r << 16) | (g << 8) | b;
       }
       return;
    case PIPE_FORMAT_B8G8R8A8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (b << 24) | (g << 16) | (r << 8) | a;
+         uc->ui = (b << 24) | (g << 16) | (r << 8) | a;
       }
       return;
    case PIPE_FORMAT_B8G8R8X8_UNORM:
       {
-         uint *d = (uint *) dest;
-         *d = (b << 24) | (g << 16) | (r << 8) | 0xff;
+         uc->ui = (b << 24) | (g << 16) | (r << 8) | 0xff;
       }
       return;
    case PIPE_FORMAT_R5G6B5_UNORM:
       {
-         ushort *d = (ushort *) dest;
-         *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
+         uc->us = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3);
       }
       return;
    case PIPE_FORMAT_A1R5G5B5_UNORM:
       {
-         ushort *d = (ushort *) dest;
-         *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
+         uc->us = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3);
       }
       return;
    case PIPE_FORMAT_A4R4G4B4_UNORM:
       {
-         ushort *d = (ushort *) dest;
-         *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
+         uc->ub = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4);
       }
       return;
    case PIPE_FORMAT_A8_UNORM:
       {
-         ubyte *d = (ubyte *) dest;
-         *d = a;
+         uc->ub = a;
       }
       return;
    case PIPE_FORMAT_L8_UNORM:
    case PIPE_FORMAT_I8_UNORM:
       {
-         ubyte *d = (ubyte *) dest;
-         *d = r;
+         uc->ub = r;
       }
       return;
    case PIPE_FORMAT_R32G32B32A32_FLOAT:
       {
-         float *d = (float *) dest;
-         d[0] = rgba[0];
-         d[1] = rgba[1];
-         d[2] = rgba[2];
-         d[3] = rgba[3];
+         uc->f[0] = rgba[0];
+         uc->f[1] = rgba[1];
+         uc->f[2] = rgba[2];
+         uc->f[3] = rgba[3];
       }
       return;
    case PIPE_FORMAT_R32G32B32_FLOAT:
       {
-         float *d = (float *) dest;
-         d[0] = rgba[0];
-         d[1] = rgba[1];
-         d[2] = rgba[2];
+         uc->f[0] = rgba[0];
+         uc->f[1] = rgba[1];
+         uc->f[2] = rgba[2];
       }
       return;
    /* XXX lots more cases to add */
    default:
+      uc->ui = 0; /* keep compiler happy */
       debug_print_format("gallium: unhandled format in util_pack_color()", format);
       assert(0);
    }
diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c
index 79ad687ea9..3a3f968a49 100644
--- a/src/gallium/drivers/cell/ppu/cell_clear.c
+++ b/src/gallium/drivers/cell/ppu/cell_clear.c
@@ -59,9 +59,9 @@ cell_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
 
    if (buffers & PIPE_CLEAR_COLOR) {
       uint surfIndex = 0;
-      uint clearValue;
+      union util_color uc;
 
-      util_pack_color(rgba, cell->framebuffer.cbufs[0]->format, &clearValue);
+      util_pack_color(rgba, cell->framebuffer.cbufs[0]->format, &uc);
 
       /* Build a CLEAR command and place it in the current batch buffer */
       STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0);
@@ -70,7 +70,7 @@ cell_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
          cell_batch_alloc16(cell, sizeof(*clr));
       clr->opcode[0] = CELL_CMD_CLEAR_SURFACE;
       clr->surface = surfIndex;
-      clr->value = clearValue;
+      clr->value = uc.ui;
    }
 
    if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c
index bdcff94b9b..08d9f2e273 100644
--- a/src/gallium/drivers/llvmpipe/lp_clear.c
+++ b/src/gallium/drivers/llvmpipe/lp_clear.c
@@ -50,6 +50,7 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
                double depth, unsigned stencil)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   union util_color uc;
    unsigned cv;
    uint i;
 
@@ -64,8 +65,8 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
       for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
          struct pipe_surface *ps = llvmpipe->framebuffer.cbufs[i];
 
-         util_pack_color(rgba, ps->format, &cv);
-         lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, cv);
+         util_pack_color(rgba, ps->format, &uc);
+         lp_tile_cache_clear(llvmpipe->cbuf_cache[i], rgba, uc.ui);
       }
       llvmpipe->dirty_render_cache = TRUE;
    }
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 442af70e14..4ddbb357b6 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -153,7 +153,7 @@ static void r300_set_blend_color(struct pipe_context* pipe,
     struct r300_context* r300 = r300_context(pipe);
 
     util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM,
-            &r300->blend_color_state->blend_color);
+            (union util_color *)&r300->blend_color_state->blend_color);
 
     /* XXX if FP16 blending is enabled, we should use the FP16 format */
     r300->blend_color_state->blend_color_red_alpha =
@@ -535,7 +535,7 @@ static void*
     sampler->filter1 |= r300_anisotropy(state->max_anisotropy);
 
     util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM,
-                    &sampler->border_color);
+                    (union util_color *)&sampler->border_color);
 
     /* R500-specific fixups and optimizations */
     if (r300_screen(r300->context.screen)->caps->is_r500) {
diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
index 8fac8e6e05..f98087deb8 100644
--- a/src/gallium/drivers/softpipe/sp_clear.c
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -48,6 +48,7 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
                double depth, unsigned stencil)
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
+   union util_color uc;
    unsigned cv;
    uint i;
 
@@ -62,12 +63,12 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
       for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
          struct pipe_surface *ps = softpipe->framebuffer.cbufs[i];
 
-         util_pack_color(rgba, ps->format, &cv);
-         sp_tile_cache_clear(softpipe->cbuf_cache[i], rgba, cv);
+         util_pack_color(rgba, ps->format, &uc);
+         sp_tile_cache_clear(softpipe->cbuf_cache[i], rgba, uc.ui);
 
 #if !TILE_CLEAR_OPTIMIZATION
          /* non-cached surface */
-         pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv);
+         pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, uc.ui);
 #endif
       }
    }
diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c
index 6195c3897e..409b3b41cb 100644
--- a/src/gallium/drivers/svga/svga_pipe_clear.c
+++ b/src/gallium/drivers/svga/svga_pipe_clear.c
@@ -46,7 +46,7 @@ try_clear(struct svga_context *svga,
    boolean restore_viewport = FALSE;
    SVGA3dClearFlag flags = 0;
    struct pipe_framebuffer_state *fb = &svga->curr.framebuffer;
-   unsigned color = 0;
+   union util_color uc;
 
    ret = svga_update_state(svga, SVGA_STATE_HW_CLEAR);
    if (ret)
@@ -54,7 +54,7 @@ try_clear(struct svga_context *svga,
 
    if ((buffers & PIPE_CLEAR_COLOR) && fb->cbufs[0]) {
       flags |= SVGA3D_CLEAR_COLOR;
-      util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &color);
+      util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &uc);
 
       rect.w = fb->cbufs[0]->width;
       rect.h = fb->cbufs[0]->height;
@@ -77,7 +77,7 @@ try_clear(struct svga_context *svga,
          return ret;
    }
 
-   ret = SVGA3D_ClearRect(svga->swc, flags, color, depth, stencil,
+   ret = SVGA3D_ClearRect(svga->swc, flags, uc.ui, depth, stencil,
                           rect.x, rect.y, rect.w, rect.h);
    if (ret != PIPE_OK)
       return ret;
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index b4e57c5d15..7f530083d6 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -122,7 +122,7 @@ svga_create_sampler_state(struct pipe_context *pipe,
 
       util_pack_color_ub( r, g, b, a,
                           PIPE_FORMAT_B8G8R8A8_UNORM,
-                          &cso->bordercolor );
+                          (union util_color *)&cso->bordercolor );
    }
 
    /* No SVGA3D support for:
diff --git a/src/gallium/state_trackers/vega/vg_translate.c b/src/gallium/state_trackers/vega/vg_translate.c
index 00e0764706..5051d83831 100644
--- a/src/gallium/state_trackers/vega/vg_translate.c
+++ b/src/gallium/state_trackers/vega/vg_translate.c
@@ -487,7 +487,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          a = 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
    }
@@ -503,7 +503,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          a = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -520,7 +520,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          a = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -537,7 +537,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          clr[3] = 1.f;
 
          util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                         rgba[i]);
+                         (union util_color *)rgba[i]);
          ++src;
       }
    }
@@ -553,7 +553,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          clr[3] = ((*src >>  0) & 1)/1.;
 
          util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                         rgba[i]);
+                         (union util_color *)rgba[i]);
          ++src;
       }
    }
@@ -569,7 +569,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          clr[3] = ((*src >>  0) & 15)/15.;
 
          util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                         rgba[i]);
+                         (union util_color *)rgba[i]);
          ++src;
       }
    }
@@ -579,7 +579,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
       src += offset;
       for (i = 0; i < n; ++i) {
          util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                         rgba[i]);
+                         (union util_color *)rgba[i]);
          ++src;
       }
    }
@@ -595,7 +595,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          a = 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
    }
@@ -611,7 +611,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          a = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -628,7 +628,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          a = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -639,7 +639,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
       src += offset;
       for (i = 0; i < n; ++i) {
          util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                         rgba[i]);
+                         (union util_color *)rgba[i]);
          ++src;
       }
    }
@@ -649,7 +649,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
       src += offset;
       for (i = 0; i < n; ++i) {
          util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
    }
@@ -668,7 +668,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
             clr[3] = 1.f;
 
             util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i+j]);
+                            (union util_color *)rgba[i+j]);
          }
          ++src;
       }
@@ -689,7 +689,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
             clr[3] = (((*src) & (1<<shift)) >> shift);
 
             util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i+j]);
+                            (union util_color *)rgba[i+j]);
          }
          ++src;
       }
@@ -716,7 +716,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
             clr[3] = ((*src) & (bitter)) >> shift;
 
             util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i +j]);
+                            (union util_color *)rgba[i +j]);
          }
          ++src;
       }
@@ -736,7 +736,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          b = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -753,7 +753,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          b = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -776,7 +776,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          b = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -793,7 +793,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          b = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -812,7 +812,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          a = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -829,7 +829,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          a = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -854,7 +854,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          a = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -871,7 +871,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          a = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -890,7 +890,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          r = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -907,7 +907,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          r = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -930,7 +930,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          r = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
@@ -947,7 +947,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          r = (*src >>  0) & 0xff;
 
          util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            rgba[i]);
+                            (union util_color *)rgba[i]);
          ++src;
       }
       return;
diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c
index 4b35f59cc2..5e2ae1bb36 100644
--- a/src/mesa/state_tracker/st_atom_pixeltransfer.c
+++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c
@@ -167,7 +167,7 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt)
          ubyte g = ctx->PixelMaps.GtoG.Map8[i * gSize / texSize];
          ubyte b = ctx->PixelMaps.BtoB.Map8[j * bSize / texSize];
          ubyte a = ctx->PixelMaps.AtoA.Map8[i * aSize / texSize];
-         util_pack_color_ub(r, g, b, a, pt->format, dest + k);
+         util_pack_color_ub(r, g, b, a, pt->format, (union util_color *)(dest + k));
       }
    }
 
-- 
cgit v1.2.3


From add6dfbba64260c9b314b4a95c8def084e05bd3b Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Mon, 7 Dec 2009 19:04:07 -0800
Subject: llvmpipe: Initialize variables in emit_instruction.

---
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index d4d18febec..f588bde983 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -496,9 +496,9 @@ emit_instruction(
       if (IS_DST0_CHANNEL_ENABLED( inst, CHAN_X ) ||
           IS_DST0_CHANNEL_ENABLED( inst, CHAN_Y ) ||
           IS_DST0_CHANNEL_ENABLED( inst, CHAN_Z )) {
-         LLVMValueRef *p_floor_log2;
-         LLVMValueRef *p_exp;
-         LLVMValueRef *p_log2;
+         LLVMValueRef *p_floor_log2 = NULL;
+         LLVMValueRef *p_exp = NULL;
+         LLVMValueRef *p_log2 = NULL;
 
          src0 = emit_fetch( bld, inst, 0, CHAN_X );
          src0 = lp_build_abs( &bld->base, src0 );
-- 
cgit v1.2.3


From 7e93e06781d2f3e0c737c7654c3fb0d83e31e45a Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Tue, 8 Dec 2009 00:37:35 -0800
Subject: i915g: Add missing break statement in i915_debug_packet.

---
 src/gallium/drivers/i915simple/i915_debug.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c
index ce92d1af9a..521b516470 100644
--- a/src/gallium/drivers/i915simple/i915_debug.c
+++ b/src/gallium/drivers/i915simple/i915_debug.c
@@ -851,6 +851,7 @@ static boolean i915_debug_packet( struct debug_stream *stream )
       default:
 	 return debug(stream, "", 0);
       }
+      break;
    default:
       assert(0);
       return 0;
-- 
cgit v1.2.3


From 1de1deffce9c7120a167af8553b606eec82e60a3 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Tue, 8 Dec 2009 00:43:38 -0800
Subject: i915g: Fix memory leak when pci id is unknown.

---
 src/gallium/drivers/i915simple/i915_screen.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c
index 9f017a14cc..9557c80ce1 100644
--- a/src/gallium/drivers/i915simple/i915_screen.c
+++ b/src/gallium/drivers/i915simple/i915_screen.c
@@ -273,6 +273,7 @@ i915_create_screen(struct intel_winsys *iws, uint pci_id)
    default:
       debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", 
                    __FUNCTION__, pci_id);
+      FREE(is);
       return NULL;
    }
 
-- 
cgit v1.2.3


From 2aebc5e01fbab6046f80c881d30717f788a390bc Mon Sep 17 00:00:00 2001
From: Alan Hourihane <alanh@vmware.com>
Date: Tue, 8 Dec 2009 13:11:09 +0000
Subject: move assert to avoid crash in debug build.

---
 src/gallium/drivers/llvmpipe/lp_tile_cache.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_tile_cache.c b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
index e83210f93b..7a1ecf5107 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_cache.c
@@ -290,11 +290,12 @@ lp_get_cached_tile(struct llvmpipe_tile_cache *tc,
    
    assert(tc->surface);
    assert(tc->transfer);
-   assert(tc->transfer_map);
 
    if(!tc->transfer_map)
       lp_tile_cache_map_transfers(tc);
 
+   assert(tc->transfer_map);
+
    switch(tile->status) {
    case LP_TILE_STATUS_CLEAR:
       /* don't get tile from framebuffer, just clear it */
-- 
cgit v1.2.3


From 6fd8b9b550713302566bb4c28e49c219870ccfec Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 8 Dec 2009 15:30:10 +0100
Subject: Remove remaining pipe format utility functions.

Depricate pf_type(), pf_size_*(), pf_layout() and pf_exp2().

Map depricated PIPE_FORMAT_TYPE to new UTIL_FORMAT_ values:

UNKNOWN = TYPE_VOID
UNORM   = TYPE_UNSIGNED + LAYOUT_ARITH
SNORM   = TYPE_SIGNED + LAYOUT_ARITH
FIXED   = TYPE_FIXED
FLOAT   = TYPE_FLOAT
USCALED = TYPE_UNSIGNED + LAYOUT_ARRAY
SSCALED = TYPE_SIGNED + LAYOUT_ARRAY
SRGB    = TYPE_COLORSPACE_SRGB
---
 src/gallium/auxiliary/util/u_pack_color.h        |  3 +-
 src/gallium/drivers/nv50/nv50_tex.c              |  8 ++-
 src/gallium/drivers/nv50/nv50_vbo.c              | 68 +++++++++++++++++-------
 src/gallium/drivers/r300/r300_state_inlines.h    | 62 ++++++++++-----------
 src/gallium/include/pipe/p_format.h              | 18 -------
 src/gallium/state_trackers/wgl/stw_framebuffer.c |  4 +-
 src/gallium/state_trackers/wgl/stw_pixelformat.c |  2 -
 src/mesa/state_tracker/st_format.c               | 65 +++++++++++-----------
 8 files changed, 122 insertions(+), 108 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index 9dacc6d83d..30cc7969e9 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -37,6 +37,7 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_format.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 
 
@@ -307,7 +308,7 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest)
    ubyte b = 0;
    ubyte a = 0;
 
-   if (pf_size_x(format) <= 8) {
+   if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) <= 8) {
       /* format uses 8-bit components or less */
       r = float_to_ubyte(rgba[0]);
       g = float_to_ubyte(rgba[1]);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 417d367942..55a601deb8 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -25,6 +25,8 @@
 
 #include "nouveau/nouveau_stateobj.h"
 
+#include "util/u_format.h"
+
 #define _MIXED(pf, t0, t1, t2, t3, cr, cg, cb, ca, f)		\
 {                                                       	\
 	PIPE_FORMAT_##pf,					\
@@ -89,6 +91,7 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 {
 	unsigned i;
 	uint32_t mode;
+	const struct util_format_description *desc;
 
 	for (i = 0; i < NV50_TEX_FORMAT_LIST_SIZE; i++)
 		if (nv50_tex_format_list[i].pf == mt->base.base.format)
@@ -106,7 +109,10 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 	mode |= ((mt->base.bo->tile_mode & 0x0f) << 22) |
 		((mt->base.bo->tile_mode & 0xf0) << 21);
 
-	if (pf_type(mt->base.base.format) == PIPE_FORMAT_TYPE_SRGB)
+	desc = util_format_description(mt->base.base.format);
+	assert(desc);
+
+	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
 		mode |= 0x0400;
 
 	switch (mt->base.base.target) {
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 4b8783899e..f3472f266e 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -64,19 +64,36 @@ nv50_prim(unsigned mode)
 }
 
 static INLINE uint32_t
-nv50_vbo_type_to_hw(unsigned type)
+nv50_vbo_type_to_hw(enum pipe_format format)
 {
-	switch (type) {
-	case PIPE_FORMAT_TYPE_FLOAT:
+	const struct util_format_description *desc;
+
+	desc = util_format_description(format);
+	assert(desc);
+
+	switch (desc->type) {
+	case UTIL_FORMAT_TYPE_FLOAT:
 		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
-	case PIPE_FORMAT_TYPE_UNORM:
-		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
-	case PIPE_FORMAT_TYPE_SNORM:
-		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
-	case PIPE_FORMAT_TYPE_USCALED:
-		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
-	case PIPE_FORMAT_TYPE_SSCALED:
-		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
+	case UTIL_FORMAT_TYPE_UNSIGNED:
+		switch (desc->layout) {
+		case UTIL_FORMAT_LAYOUT_ARITH:
+			return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
+		case UTIL_FORMAT_LAYOUT_ARRAY:
+			return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
+		default:
+			return 0;
+		}
+		break;
+	case UTIL_FORMAT_TYPE_SIGNED:
+		switch (desc->layout) {
+		case UTIL_FORMAT_LAYOUT_ARITH:
+			return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
+		case UTIL_FORMAT_LAYOUT_ARRAY:
+			return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
+		default:
+			return 0;
+		}
+		break;
 	/*
 	case PIPE_FORMAT_TYPE_UINT:
 		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT;
@@ -122,9 +139,15 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
 {
 	uint32_t hw_type, hw_size;
 	enum pipe_format pf = ve->src_format;
-	unsigned size = pf_size_x(pf) << pf_exp2(pf);
+	const struct util_format_description *desc;
+	unsigned size;
+
+	desc = util_format_description(pf);
+	assert(desc);
 
-	hw_type = nv50_vbo_type_to_hw(pf_type(pf));
+	size = util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0);
+
+	hw_type = nv50_vbo_type_to_hw(pf);
 	hw_size = nv50_vbo_size_to_hw(size, ve->nr_components);
 
 	if (!hw_type || !hw_size) {
@@ -133,7 +156,7 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
 		return 0x24e80000;
 	}
 
-	if (util_format_description(pf)->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */
+	if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_Z) /* BGRA */
 		hw_size |= (1 << 31); /* no real swizzle bits :-( */
 
 	return (hw_type | hw_size);
@@ -321,9 +344,13 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
 	float *v;
 	int ret;
 	enum pipe_format pf = ve->src_format;
+	const struct util_format_description *desc;
+
+	desc = util_format_description(pf);
+	assert(desc);
 
-	if ((pf_type(pf) != PIPE_FORMAT_TYPE_FLOAT) ||
-	    (pf_size_x(pf) << pf_exp2(pf)) != 32)
+	if ((desc->type != UTIL_FORMAT_TYPE_FLOAT) ||
+	    util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0) != 32)
 		return FALSE;
 
 	ret = nouveau_bo_map(bo, NOUVEAU_BO_RD);
@@ -611,7 +638,8 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
 	for (i = 0; i < nv50->vtxelt_nr; ++i) {
 		struct pipe_vertex_element *ve;
 		struct pipe_vertex_buffer *vb;
-		unsigned n, type, size;
+		unsigned n, size;
+		const struct util_format_description *desc;
 
 		ve = &nv50->vtxelt[i];
 		vb = &nv50->vtxbuf[ve->vertex_buffer_index];
@@ -623,8 +651,10 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
 		emit->map[n] = nouveau_bo(vb->buffer)->map +
 			(start * vb->stride + ve->src_offset);
 
-		type = pf_type(ve->src_format);
-		size = pf_size_x(ve->src_format) << pf_exp2(ve->src_format);
+		desc = util_format_description(ve->src_format);
+		assert(desc);
+
+		size = util_format_get_component_bits(ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
 
 		assert(ve->nr_components > 0 && ve->nr_components <= 4);
 
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index ab720f366d..5c58655da1 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -445,20 +445,22 @@ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
 static INLINE unsigned pf_component_count(enum pipe_format format) {
     unsigned count = 0;
 
-    if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) {
-        return count;
+    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) {
+        count++;
     }
-
-    if (pf_size_x(format)) {
+    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 1)) {
+        count++;
+    }
+    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 2)) {
         count++;
     }
-    if (pf_size_y(format)) {
+    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3)) {
         count++;
     }
-    if (pf_size_z(format)) {
+    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 0)) {
         count++;
     }
-    if (pf_size_w(format)) {
+    if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1)) {
         count++;
     }
 
@@ -469,19 +471,23 @@ static INLINE unsigned pf_component_count(enum pipe_format format) {
 static INLINE uint16_t
 r300_translate_vertex_data_type(enum pipe_format format) {
     uint32_t result = 0;
+    const struct util_format_description *desc;
     unsigned components = pf_component_count(format);
 
-    if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) {
+    desc = util_format_description(format);
+
+    if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+        desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) {
         debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format),
             __FUNCTION__, __LINE__);
         assert(0);
     }
 
-    switch (pf_type(format)) {
+    switch (desc->type) {
         /* Half-floats, floats, doubles */
-        case PIPE_FORMAT_TYPE_FLOAT:
-            switch (pf_size_x(format)) {
-                case 4:
+        case UTIL_FORMAT_TYPE_FLOAT:
+            switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) {
+                case 32:
                     result = R300_DATA_TYPE_FLOAT_1 + (components - 1);
                     break;
                 default:
@@ -490,19 +496,15 @@ r300_translate_vertex_data_type(enum pipe_format format) {
                     assert(0);
             }
             break;
-        /* Normalized unsigned ints */
-        case PIPE_FORMAT_TYPE_UNORM:
-        /* Normalized signed ints */
-        case PIPE_FORMAT_TYPE_SNORM:
-        /* Non-normalized unsigned ints */
-        case PIPE_FORMAT_TYPE_USCALED:
-        /* Non-normalized signed ints */
-        case PIPE_FORMAT_TYPE_SSCALED:
-            switch (pf_size_x(format)) {
-                case 1:
+        /* Unsigned ints */
+        case UTIL_FORMAT_TYPE_UNSIGNED:
+        /* Signed ints */
+        case UTIL_FORMAT_TYPE_SIGNED:
+            switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) {
+                case 8:
                     result = R300_DATA_TYPE_BYTE;
                     break;
-                case 2:
+                case 16:
                     if (components > 2) {
                         result = R300_DATA_TYPE_SHORT_4;
                     } else {
@@ -512,8 +514,8 @@ r300_translate_vertex_data_type(enum pipe_format format) {
                 default:
                     debug_printf("r300: Bad format %s in %s:%d\n",
                         pf_name(format), __FUNCTION__, __LINE__);
-                    debug_printf("r300: pf_size_x(format) == %d\n",
-                        pf_size_x(format));
+                    debug_printf("r300: util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == %d\n",
+                        util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0));
                     assert(0);
             }
             break;
@@ -523,12 +525,11 @@ r300_translate_vertex_data_type(enum pipe_format format) {
             assert(0);
     }
 
-    if (pf_type(format) == PIPE_FORMAT_TYPE_SSCALED) {
+    if (desc->type == UTIL_FORMAT_TYPE_SIGNED) {
         result |= R300_SIGNED;
-    } else if (pf_type(format) == PIPE_FORMAT_TYPE_UNORM) {
+    }
+    if (desc->layout == UTIL_FORMAT_LAYOUT_ARITH) {
         result |= R300_NORMALIZE;
-    } else if (pf_type(format) == PIPE_FORMAT_TYPE_SNORM) {
-        result |= (R300_SIGNED | R300_NORMALIZE);
     }
 
     return result;
@@ -540,7 +541,8 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) {
 
     assert(format);
 
-    if (pf_layout(format) != PIPE_FORMAT_LAYOUT_RGBAZS) {
+    if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+        desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) {
         debug_printf("r300: Bad format %s in %s:%d\n",
             pf_name(format), __FUNCTION__, __LINE__);
         return 0;
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index cc7a8ab8df..77134f664b 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -54,11 +54,6 @@ extern "C" {
 #define PIPE_FORMAT_LAYOUT_DXT      2  /**< XXX temporary? */
 #define PIPE_FORMAT_LAYOUT_MIXED    3
 
-static INLINE uint pf_layout(uint f)  /**< PIPE_FORMAT_LAYOUT_ */
-{
-   return f & 0x3;
-}
-
 /**
  * RGBAZS Format Layout.
  */
@@ -107,19 +102,6 @@ static INLINE uint pf_layout(uint f)  /**< PIPE_FORMAT_LAYOUT_ */
  */
 typedef uint pipe_format_rgbazs_t;
 
-static INLINE uint pf_get(pipe_format_rgbazs_t f, uint shift, uint mask)
-{
-   return (f >> shift) & mask;
-}
-
-#define pf_size_x(f)          pf_get(f, 14, 0x7) /**< Size of X */
-#define pf_size_y(f)          pf_get(f, 17, 0x7) /**< Size of Y */
-#define pf_size_z(f)          pf_get(f, 20, 0x7) /**< Size of Z */
-#define pf_size_w(f)          pf_get(f, 23, 0x7) /**< Size of W */
-#define pf_size_xyzw(f,i)     pf_get(f, 14+((i)*3), 0x7)
-#define pf_exp2(f)            pf_get(f, 26, 0x7) /**< Scale size by 2 ^ exp2 */
-#define pf_type(f)            pf_get(f, 29, 0x7) /**< PIPE_FORMAT_TYPE_ */
-
 /**
  * Helper macro to encode the above structure into a 32-bit value.
  */
diff --git a/src/gallium/state_trackers/wgl/stw_framebuffer.c b/src/gallium/state_trackers/wgl/stw_framebuffer.c
index 5c3444777a..3d029f9174 100644
--- a/src/gallium/state_trackers/wgl/stw_framebuffer.c
+++ b/src/gallium/state_trackers/wgl/stw_framebuffer.c
@@ -268,9 +268,7 @@ stw_framebuffer_allocate(
       enum pipe_format colorFormat, depthFormat, stencilFormat;
 
       colorFormat = pfi->color_format;
-      
-      assert(pf_layout( pfi->depth_stencil_format ) == PIPE_FORMAT_LAYOUT_RGBAZS );
-   
+
       if(util_format_get_component_bits(pfi->depth_stencil_format, UTIL_FORMAT_COLORSPACE_ZS, 0))
          depthFormat = pfi->depth_stencil_format;
       else
diff --git a/src/gallium/state_trackers/wgl/stw_pixelformat.c b/src/gallium/state_trackers/wgl/stw_pixelformat.c
index 5ac833ced6..54cc361412 100644
--- a/src/gallium/state_trackers/wgl/stw_pixelformat.c
+++ b/src/gallium/state_trackers/wgl/stw_pixelformat.c
@@ -133,12 +133,10 @@ stw_pixelformat_add(
    if(stw_dev->pixelformat_extended_count >= STW_MAX_PIXELFORMATS)
       return;
 
-   assert(pf_layout( color->format ) == PIPE_FORMAT_LAYOUT_RGBAZS );
    assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 0) == color->bits.red);
    assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 1) == color->bits.green);
    assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 2) == color->bits.blue);
    assert(util_format_get_component_bits(color->format, UTIL_FORMAT_COLORSPACE_RGB, 3) == color->bits.alpha);
-   assert(pf_layout( depth->format ) == PIPE_FORMAT_LAYOUT_RGBAZS );
    assert(util_format_get_component_bits(depth->format, UTIL_FORMAT_COLORSPACE_ZS, 0) == depth->bits.depth);
    assert(util_format_get_component_bits(depth->format, UTIL_FORMAT_COLORSPACE_ZS, 1) == depth->bits.stencil);
    
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index c9f020c40f..329ae03db2 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -48,30 +48,28 @@
 
 
 static GLuint
-format_max_bits(
-   pipe_format_rgbazs_t  info )
+format_max_bits(enum pipe_format format)
 {
-   GLuint size = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 0);
+   GLuint size = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0);
 
-   size = MAX2(size, util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 1));
-   size = MAX2(size, util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 2));
-   size = MAX2(size, util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 3));
-   size = MAX2(size, util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 0));
-   size = MAX2(size, util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 1));
+   size = MAX2(size, util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 1));
+   size = MAX2(size, util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 2));
+   size = MAX2(size, util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3));
+   size = MAX2(size, util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 0));
+   size = MAX2(size, util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1));
    return size;
 }
 
 static GLuint
-format_size(
-   pipe_format_rgbazs_t  info )
+format_size(enum pipe_format format)
 {
    return
-      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 0) +
-      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 1) +
-      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 2) +
-      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 3) +
-      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 0) +
-      util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 1);
+      util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) +
+      util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 1) +
+      util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 2) +
+      util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3) +
+      util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 0) +
+      util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1);
 }
 
 /*
@@ -80,14 +78,13 @@ format_size(
 GLboolean
 st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
 {
-   if (pf_layout(format) == PIPE_FORMAT_LAYOUT_RGBAZS) {
-      const struct util_format_description *desc = util_format_description(format);
-      pipe_format_rgbazs_t info;
+   const struct util_format_description *desc;
 
-      assert(desc);
-
-      info = format;
+   desc = util_format_description(format);
+   assert(desc);
 
+   if (desc->layout == UTIL_FORMAT_LAYOUT_ARITH ||
+       desc->layout == UTIL_FORMAT_LAYOUT_ARRAY) {
 #if 0
       printf("%s\n", pf_name( format ) );
 #endif
@@ -100,22 +97,22 @@ st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
          pinfo->datatype = GL_UNSIGNED_INT_24_8;
       }
       else {
-         const GLuint size = format_max_bits( info );
+         const GLuint size = format_max_bits(format);
          if (size == 8) {
-            if (pf_type(info) == PIPE_FORMAT_TYPE_UNORM)
+            if (desc->type == UTIL_FORMAT_TYPE_UNSIGNED)
                pinfo->datatype = GL_UNSIGNED_BYTE;
             else
                pinfo->datatype = GL_BYTE;
          }
          else if (size == 16) {
-            if (pf_type(info) == PIPE_FORMAT_TYPE_UNORM)
+            if (desc->type == UTIL_FORMAT_TYPE_UNSIGNED)
                pinfo->datatype = GL_UNSIGNED_SHORT;
             else
                pinfo->datatype = GL_SHORT;
          }
          else {
             assert( size <= 32 );
-            if (pf_type(info) == PIPE_FORMAT_TYPE_UNORM)
+            if (desc->type == UTIL_FORMAT_TYPE_UNSIGNED)
                pinfo->datatype = GL_UNSIGNED_INT;
             else
                pinfo->datatype = GL_INT;
@@ -123,17 +120,17 @@ st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
       }
 
       /* Component bits */
-      pinfo->red_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 0);
-      pinfo->green_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 1);
-      pinfo->blue_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 2);
-      pinfo->alpha_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_RGB, 3);
-      pinfo->depth_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 0);
-      pinfo->stencil_bits = util_format_get_component_bits((enum pipe_format)info, UTIL_FORMAT_COLORSPACE_ZS, 1);
+      pinfo->red_bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0);
+      pinfo->green_bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 1);
+      pinfo->blue_bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 2);
+      pinfo->alpha_bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 3);
+      pinfo->depth_bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 0);
+      pinfo->stencil_bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_ZS, 1);
       pinfo->luminance_bits = 0;
       pinfo->intensity_bits = 0;
 
       /* Format size */
-      pinfo->size = format_size( info ) / 8;
+      pinfo->size = format_size(format) / 8;
 
       /* Luminance & Intensity bits */
       if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_X &&
@@ -150,7 +147,7 @@ st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
 
       pinfo->mesa_format = st_pipe_format_to_mesa_format(format);
    }
-   else if (pf_layout(format) == PIPE_FORMAT_LAYOUT_YCBCR) {
+   else if (desc->layout == UTIL_FORMAT_LAYOUT_YUV) {
       pinfo->mesa_format = MESA_FORMAT_YCBCR;
       pinfo->datatype = GL_UNSIGNED_SHORT;
       pinfo->size = 2; /* two bytes per "texel" */
-- 
cgit v1.2.3


From ee1720b99dfb5964962f2346406a4e3e88374a68 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 8 Dec 2009 19:13:48 +0100
Subject: gallium: fix more potential strict aliasing issues

In particular, gcc man page warns that
union a_union {
   int i;
   double d;
};

int f() {
   double d = 3.0;
   return ((union a_union *) &d)->i;
}

"might" not be ok (why not?), even though it doesn't seem to generate
any warnings. Hence don't use this and do the extra step to actually use
assignment to get the values in/out of the union.
This changes parts of 3456f9149b3009fcfce80054759d05883d3c4ee5.
---
 src/gallium/drivers/r300/r300_state.c          |  10 +-
 src/gallium/drivers/svga/svga_pipe_sampler.c   |   5 +-
 src/gallium/state_trackers/vega/vg_translate.c | 190 ++++++++++++++++++-------
 src/mesa/state_tracker/st_atom_pixeltransfer.c |   4 +-
 4 files changed, 148 insertions(+), 61 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 4ddbb357b6..a83075df92 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -151,9 +151,10 @@ static void r300_set_blend_color(struct pipe_context* pipe,
                                  const struct pipe_blend_color* color)
 {
     struct r300_context* r300 = r300_context(pipe);
+    union util_color uc;
 
-    util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM,
-            (union util_color *)&r300->blend_color_state->blend_color);
+    util_pack_color(color->color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc);
+    r300->blend_color_state->blend_color = uc.ui;
 
     /* XXX if FP16 blending is enabled, we should use the FP16 format */
     r300->blend_color_state->blend_color_red_alpha =
@@ -513,6 +514,7 @@ static void*
     struct r300_context* r300 = r300_context(pipe);
     struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state);
     int lod_bias;
+    union util_color uc;
 
     sampler->filter0 |=
         (r300_translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) |
@@ -534,8 +536,8 @@ static void*
 
     sampler->filter1 |= r300_anisotropy(state->max_anisotropy);
 
-    util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM,
-                    (union util_color *)&sampler->border_color);
+    util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, &uc);
+    sampler->border_color = uc.ui;
 
     /* R500-specific fixups and optimizations */
     if (r300_screen(r300->context.screen)->caps->is_r500) {
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index 7f530083d6..78053e755e 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -101,6 +101,7 @@ svga_create_sampler_state(struct pipe_context *pipe,
 {
    struct svga_context *svga = svga_context(pipe);
    struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state );
+   union util_color uc;
    
    cso->mipfilter = translate_mip_filter(sampler->min_mip_filter);
    cso->magfilter = translate_img_filter( sampler->mag_img_filter );
@@ -121,8 +122,8 @@ svga_create_sampler_state(struct pipe_context *pipe,
       ubyte a = float_to_ubyte(sampler->border_color[3]);
 
       util_pack_color_ub( r, g, b, a,
-                          PIPE_FORMAT_B8G8R8A8_UNORM,
-                          (union util_color *)&cso->bordercolor );
+                          PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+      cso->bordercolor = uc.ui;
    }
 
    /* No SVGA3D support for:
diff --git a/src/gallium/state_trackers/vega/vg_translate.c b/src/gallium/state_trackers/vega/vg_translate.c
index 5051d83831..03575ca3dd 100644
--- a/src/gallium/state_trackers/vega/vg_translate.c
+++ b/src/gallium/state_trackers/vega/vg_translate.c
@@ -474,6 +474,7 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
                                   VGfloat rgba[][4])
 {
    VGint i;
+   union util_color uc;
 
    switch (dataFormat) {
    case VG_sRGBX_8888: {
@@ -486,8 +487,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          b = (*src >>  8) & 0xff;
          a = 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
    }
@@ -502,8 +506,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          b = (*src >>  8) & 0xff;
          a = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -519,8 +526,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          b = (*src >>  8) & 0xff;
          a = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -536,8 +546,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          clr[2] = ((*src >>  0) & 31)/31.;
          clr[3] = 1.f;
 
-         util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                         (union util_color *)rgba[i]);
+         util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
    }
@@ -552,8 +565,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          clr[2] = ((*src >>  1) & 31)/31.;
          clr[3] = ((*src >>  0) & 1)/1.;
 
-         util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                         (union util_color *)rgba[i]);
+         util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
    }
@@ -568,8 +584,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          clr[2] = ((*src >>  4) & 15)/15.;
          clr[3] = ((*src >>  0) & 15)/15.;
 
-         util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                         (union util_color *)rgba[i]);
+         util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
    }
@@ -578,8 +597,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
       VGubyte *src = (VGubyte *)data;
       src += offset;
       for (i = 0; i < n; ++i) {
-         util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                         (union util_color *)rgba[i]);
+         util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
    }
@@ -594,8 +616,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          b = (*src >>  8) & 0xff;
          a = 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
    }
@@ -610,8 +635,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          b = (*src >>  8) & 0xff;
          a = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -627,8 +655,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          b = (*src >>  8) & 0xff;
          a = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -638,8 +669,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
       VGubyte *src = (VGubyte *)data;
       src += offset;
       for (i = 0; i < n; ++i) {
-         util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                         (union util_color *)rgba[i]);
+         util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
    }
@@ -648,8 +682,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
       VGubyte *src = (VGubyte *)data;
       src += offset;
       for (i = 0; i < n; ++i) {
-         util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(0xff, 0xff, 0xff, *src, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
    }
@@ -667,8 +704,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
             clr[2] = clr[0];
             clr[3] = 1.f;
 
-            util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i+j]);
+            util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+            rgba[i+j][0] = uc.f[0];
+            rgba[i+j][1] = uc.f[1];
+            rgba[i+j][2] = uc.f[2];
+            rgba[i+j][3] = uc.f[3];
          }
          ++src;
       }
@@ -688,8 +728,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
             clr[2] = 0.f;
             clr[3] = (((*src) & (1<<shift)) >> shift);
 
-            util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i+j]);
+            util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+            rgba[i+j][0] = uc.f[0];
+            rgba[i+j][1] = uc.f[1];
+            rgba[i+j][2] = uc.f[2];
+            rgba[i+j][3] = uc.f[3];
          }
          ++src;
       }
@@ -715,8 +758,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
             clr[2] = 0.f;
             clr[3] = ((*src) & (bitter)) >> shift;
 
-            util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i +j]);
+            util_pack_color(clr, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+            rgba[i+j][0] = uc.f[0];
+            rgba[i+j][1] = uc.f[1];
+            rgba[i+j][2] = uc.f[2];
+            rgba[i+j][3] = uc.f[3];
          }
          ++src;
       }
@@ -735,8 +781,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          g = (*src >>  8) & 0xff;
          b = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -752,8 +801,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          g = (*src >>  8) & 0xff;
          b = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -775,8 +827,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          g = (*src >>  8) & 0xff;
          b = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -792,8 +847,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          g = (*src >>  8) & 0xff;
          b = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -811,8 +869,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          r = (*src >>  8) & 0xff;
          a = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -828,8 +889,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          r = (*src >>  8) & 0xff;
          a = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -853,8 +917,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          r = (*src >>  8) & 0xff;
          a = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -870,8 +937,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          r = (*src >>  8) & 0xff;
          a = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -889,8 +959,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          g = (*src >>  8) & 0xff;
          r = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -906,8 +979,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          g = (*src >>  8) & 0xff;
          r = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -929,8 +1005,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          g = (*src >>  8) & 0xff;
          r = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
@@ -946,8 +1025,11 @@ void _vega_unpack_float_span_rgba(struct vg_context *ctx,
          g = (*src >>  8) & 0xff;
          r = (*src >>  0) & 0xff;
 
-         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT,
-                            (union util_color *)rgba[i]);
+         util_pack_color_ub(r, g, b, a, PIPE_FORMAT_R32G32B32A32_FLOAT, &uc);
+         rgba[i][0] = uc.f[0];
+         rgba[i][1] = uc.f[1];
+         rgba[i][2] = uc.f[2];
+         rgba[i][3] = uc.f[3];
          ++src;
       }
       return;
diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c
index 5e2ae1bb36..6a5854e9ba 100644
--- a/src/mesa/state_tracker/st_atom_pixeltransfer.c
+++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c
@@ -162,12 +162,14 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt)
     */
    for (i = 0; i < texSize; i++) {
       for (j = 0; j < texSize; j++) {
+         union util_color uc;
          int k = (i * texSize + j);
          ubyte r = ctx->PixelMaps.RtoR.Map8[j * rSize / texSize];
          ubyte g = ctx->PixelMaps.GtoG.Map8[i * gSize / texSize];
          ubyte b = ctx->PixelMaps.BtoB.Map8[j * bSize / texSize];
          ubyte a = ctx->PixelMaps.AtoA.Map8[i * aSize / texSize];
-         util_pack_color_ub(r, g, b, a, pt->format, (union util_color *)(dest + k));
+         util_pack_color_ub(r, g, b, a, pt->format, &uc);
+         *(dest + k) = uc.ui;
       }
    }
 
-- 
cgit v1.2.3


From 876a785a182d7987786377ff0a44ee40628254f3 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 8 Dec 2009 19:58:13 +0100
Subject: Format layout cannot be used to distinguish scaled/normalised
 formats.

---
 src/gallium/drivers/nv50/nv50_vbo.c           | 18 ++++--------------
 src/gallium/drivers/r300/r300_state_inlines.h |  2 +-
 2 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index f3472f266e..d1c9f3f590 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -75,25 +75,15 @@ nv50_vbo_type_to_hw(enum pipe_format format)
 	case UTIL_FORMAT_TYPE_FLOAT:
 		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
 	case UTIL_FORMAT_TYPE_UNSIGNED:
-		switch (desc->layout) {
-		case UTIL_FORMAT_LAYOUT_ARITH:
+		if (desc->channel[0].normalized) {
 			return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UNORM;
-		case UTIL_FORMAT_LAYOUT_ARRAY:
-			return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
-		default:
-			return 0;
 		}
-		break;
+		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_USCALED;
 	case UTIL_FORMAT_TYPE_SIGNED:
-		switch (desc->layout) {
-		case UTIL_FORMAT_LAYOUT_ARITH:
+		if (desc->channel[0].normalized) {
 			return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SNORM;
-		case UTIL_FORMAT_LAYOUT_ARRAY:
-			return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
-		default:
-			return 0;
 		}
-		break;
+		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_SSCALED;
 	/*
 	case PIPE_FORMAT_TYPE_UINT:
 		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_UINT;
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 5c58655da1..7cd1f87630 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -528,7 +528,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
     if (desc->type == UTIL_FORMAT_TYPE_SIGNED) {
         result |= R300_SIGNED;
     }
-    if (desc->layout == UTIL_FORMAT_LAYOUT_ARITH) {
+    if (desc->channel[0].normalized) {
         result |= R300_NORMALIZE;
     }
 
-- 
cgit v1.2.3


From 849a0644ada6ed7c3576babc3b348bee227118ff Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 8 Dec 2009 17:44:51 +0100
Subject: cell: use boolean instead of bool

---
 src/gallium/drivers/cell/ppu/cell_gen_fp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
index 1895a7940c..1d8a11a4ac 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c
@@ -995,7 +995,7 @@ static boolean
 emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst)
 {
    int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg;
-   bool complement = FALSE;
+   boolean complement = FALSE;
 
    one_reg = get_const_one_reg(gen);
 
-- 
cgit v1.2.3


From 47c780180b888e115b630cd940fe9c29dd53b4c5 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 8 Dec 2009 17:51:19 +0100
Subject: nouveau: use boolean instead of bool

---
 src/gallium/drivers/nv04/nv04_transfer.c | 2 +-
 src/gallium/drivers/nv10/nv10_transfer.c | 2 +-
 src/gallium/drivers/nv20/nv20_transfer.c | 2 +-
 src/gallium/drivers/nv30/nv30_transfer.c | 2 +-
 src/gallium/drivers/nv40/nv40_transfer.c | 2 +-
 src/gallium/drivers/nv50/nv50_context.h  | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c
index e8ff686b4a..d66d6c6346 100644
--- a/src/gallium/drivers/nv04/nv04_transfer.c
+++ b/src/gallium/drivers/nv04/nv04_transfer.c
@@ -11,7 +11,7 @@
 struct nv04_transfer {
 	struct pipe_transfer base;
 	struct pipe_surface *surface;
-	bool direct;
+	boolean direct;
 };
 
 static void
diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c
index 9e44d37367..06bb513417 100644
--- a/src/gallium/drivers/nv10/nv10_transfer.c
+++ b/src/gallium/drivers/nv10/nv10_transfer.c
@@ -11,7 +11,7 @@
 struct nv10_transfer {
 	struct pipe_transfer base;
 	struct pipe_surface *surface;
-	bool direct;
+	boolean direct;
 };
 
 static void
diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c
index f2e0a34db9..26a73c5143 100644
--- a/src/gallium/drivers/nv20/nv20_transfer.c
+++ b/src/gallium/drivers/nv20/nv20_transfer.c
@@ -11,7 +11,7 @@
 struct nv20_transfer {
 	struct pipe_transfer base;
 	struct pipe_surface *surface;
-	bool direct;
+	boolean direct;
 };
 
 static void
diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
index c8c3bd1f17..e29bfbd3ef 100644
--- a/src/gallium/drivers/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nv30/nv30_transfer.c
@@ -11,7 +11,7 @@
 struct nv30_transfer {
 	struct pipe_transfer base;
 	struct pipe_surface *surface;
-	bool direct;
+	boolean direct;
 };
 
 static void
diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
index 1ee5cf39e0..ed5be1cf87 100644
--- a/src/gallium/drivers/nv40/nv40_transfer.c
+++ b/src/gallium/drivers/nv40/nv40_transfer.c
@@ -11,7 +11,7 @@
 struct nv40_transfer {
 	struct pipe_transfer base;
 	struct pipe_surface *surface;
-	bool direct;
+	boolean direct;
 };
 
 static void
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 4b0f062295..79135f2f36 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -65,7 +65,7 @@ struct nv50_rasterizer_stateobj {
 };
 
 struct nv50_sampler_stateobj {
-	bool normalized;
+	boolean normalized;
 	unsigned tsc[8];
 };
 
-- 
cgit v1.2.3


From eb926ddf9eee1095c7fc12013f0b8375bbaeca6f Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Tue, 8 Dec 2009 20:48:47 +0100
Subject: Simplify the redundant meaning of format layout.

We really just need to know whether the format is compressed or not.
For more detailed information format colorspace should suffice.
---
 src/gallium/auxiliary/util/u_format.h         |  7 ++-----
 src/gallium/auxiliary/util/u_format_table.py  | 10 +++++++---
 src/gallium/drivers/llvmpipe/lp_screen.c      |  8 ++------
 src/gallium/drivers/r300/r300_state_inlines.h |  6 ++----
 src/mesa/state_tracker/st_format.c            |  5 ++---
 5 files changed, 15 insertions(+), 21 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 25a06a8f7d..bb7c2add5c 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -34,11 +34,8 @@
 
 
 enum util_format_layout {
-   UTIL_FORMAT_LAYOUT_SCALAR = 0,
-   UTIL_FORMAT_LAYOUT_ARITH = 1,
-   UTIL_FORMAT_LAYOUT_ARRAY = 2,
-   UTIL_FORMAT_LAYOUT_YUV = 3,
-   UTIL_FORMAT_LAYOUT_DXT = 4
+   UTIL_FORMAT_LAYOUT_PLAIN = 0, /*< RGB, depth-stencil */
+   UTIL_FORMAT_LAYOUT_DXT = 1
 };
 
 
diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py
index 8713594376..c772a75e61 100755
--- a/src/gallium/auxiliary/util/u_format_table.py
+++ b/src/gallium/auxiliary/util/u_format_table.py
@@ -35,8 +35,12 @@ import sys
 from u_format_parse import *
 
 
-def layout_map(layout):
-    return 'UTIL_FORMAT_LAYOUT_' + str(layout).upper()
+layout_map = {
+    'arith': 'UTIL_FORMAT_LAYOUT_PLAIN',
+    'array': 'UTIL_FORMAT_LAYOUT_PLAIN',
+    'yuv':   'UTIL_FORMAT_LAYOUT_PLAIN',
+    'dxt':   'UTIL_FORMAT_LAYOUT_DXT',
+}
 
 
 def colorspace_map(colorspace):
@@ -104,7 +108,7 @@ def write_format_table(formats):
         print "      %s," % (format.name,)
         print "      \"%s\"," % (format.name,)
         print "      {%u, %u, %u}, /* block */" % (format.block_width, format.block_height, format.block_size())
-        print "      %s," % (layout_map(format.layout),)
+        print "      %s," % (layout_map[format.layout],)
         print "      {"
         for i in range(4):
             type = format.in_types[i]
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index a6ecaa0b2b..3641e1dccb 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -158,9 +158,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
          format_desc->block.height != 1)
          return FALSE;
 
-      if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
-         format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
-         format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+      if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
          return FALSE;
 
       if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
@@ -188,9 +186,7 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
          format_desc->block.height != 1)
          return FALSE;
 
-      if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
-         format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
-         format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+      if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
          return FALSE;
 
       if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 7cd1f87630..c71305edc2 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -476,8 +476,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
 
     desc = util_format_description(format);
 
-    if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
-        desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) {
+    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
         debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format),
             __FUNCTION__, __LINE__);
         assert(0);
@@ -541,8 +540,7 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) {
 
     assert(format);
 
-    if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
-        desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) {
+    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
         debug_printf("r300: Bad format %s in %s:%d\n",
             pf_name(format), __FUNCTION__, __LINE__);
         return 0;
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 329ae03db2..2056dce7fb 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -83,8 +83,7 @@ st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
    desc = util_format_description(format);
    assert(desc);
 
-   if (desc->layout == UTIL_FORMAT_LAYOUT_ARITH ||
-       desc->layout == UTIL_FORMAT_LAYOUT_ARRAY) {
+   if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
 #if 0
       printf("%s\n", pf_name( format ) );
 #endif
@@ -147,7 +146,7 @@ st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
 
       pinfo->mesa_format = st_pipe_format_to_mesa_format(format);
    }
-   else if (desc->layout == UTIL_FORMAT_LAYOUT_YUV) {
+   else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_YUV) {
       pinfo->mesa_format = MESA_FORMAT_YCBCR;
       pinfo->datatype = GL_UNSIGNED_SHORT;
       pinfo->size = 2; /* two bytes per "texel" */
-- 
cgit v1.2.3


From dfdf83d714c0d32d9182eb3001cf642aa6cb5c87 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Wed, 9 Dec 2009 14:22:30 +0100
Subject: Revert "Simplify the redundant meaning of format layout."

This reverts commit eb926ddf9eee1095c7fc12013f0b8375bbaeca6f.
---
 src/gallium/auxiliary/util/u_format.h         |  7 +++++--
 src/gallium/auxiliary/util/u_format_table.py  | 10 +++-------
 src/gallium/drivers/llvmpipe/lp_screen.c      |  8 ++++++--
 src/gallium/drivers/r300/r300_state_inlines.h |  6 ++++--
 src/mesa/state_tracker/st_format.c            |  5 +++--
 5 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index bb7c2add5c..25a06a8f7d 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -34,8 +34,11 @@
 
 
 enum util_format_layout {
-   UTIL_FORMAT_LAYOUT_PLAIN = 0, /*< RGB, depth-stencil */
-   UTIL_FORMAT_LAYOUT_DXT = 1
+   UTIL_FORMAT_LAYOUT_SCALAR = 0,
+   UTIL_FORMAT_LAYOUT_ARITH = 1,
+   UTIL_FORMAT_LAYOUT_ARRAY = 2,
+   UTIL_FORMAT_LAYOUT_YUV = 3,
+   UTIL_FORMAT_LAYOUT_DXT = 4
 };
 
 
diff --git a/src/gallium/auxiliary/util/u_format_table.py b/src/gallium/auxiliary/util/u_format_table.py
index c772a75e61..8713594376 100755
--- a/src/gallium/auxiliary/util/u_format_table.py
+++ b/src/gallium/auxiliary/util/u_format_table.py
@@ -35,12 +35,8 @@ import sys
 from u_format_parse import *
 
 
-layout_map = {
-    'arith': 'UTIL_FORMAT_LAYOUT_PLAIN',
-    'array': 'UTIL_FORMAT_LAYOUT_PLAIN',
-    'yuv':   'UTIL_FORMAT_LAYOUT_PLAIN',
-    'dxt':   'UTIL_FORMAT_LAYOUT_DXT',
-}
+def layout_map(layout):
+    return 'UTIL_FORMAT_LAYOUT_' + str(layout).upper()
 
 
 def colorspace_map(colorspace):
@@ -108,7 +104,7 @@ def write_format_table(formats):
         print "      %s," % (format.name,)
         print "      \"%s\"," % (format.name,)
         print "      {%u, %u, %u}, /* block */" % (format.block_width, format.block_height, format.block_size())
-        print "      %s," % (layout_map[format.layout],)
+        print "      %s," % (layout_map(format.layout),)
         print "      {"
         for i in range(4):
             type = format.in_types[i]
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 3641e1dccb..a6ecaa0b2b 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -158,7 +158,9 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
          format_desc->block.height != 1)
          return FALSE;
 
-      if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+      if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
          return FALSE;
 
       if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
@@ -186,7 +188,9 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
          format_desc->block.height != 1)
          return FALSE;
 
-      if(format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+      if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
          return FALSE;
 
       if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index c71305edc2..7cd1f87630 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -476,7 +476,8 @@ r300_translate_vertex_data_type(enum pipe_format format) {
 
     desc = util_format_description(format);
 
-    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+    if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+        desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) {
         debug_printf("r300: Bad format %s in %s:%d\n", pf_name(format),
             __FUNCTION__, __LINE__);
         assert(0);
@@ -540,7 +541,8 @@ r300_translate_vertex_data_swizzle(enum pipe_format format) {
 
     assert(format);
 
-    if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+    if (desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+        desc->layout != UTIL_FORMAT_LAYOUT_ARRAY) {
         debug_printf("r300: Bad format %s in %s:%d\n",
             pf_name(format), __FUNCTION__, __LINE__);
         return 0;
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 2056dce7fb..329ae03db2 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -83,7 +83,8 @@ st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
    desc = util_format_description(format);
    assert(desc);
 
-   if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
+   if (desc->layout == UTIL_FORMAT_LAYOUT_ARITH ||
+       desc->layout == UTIL_FORMAT_LAYOUT_ARRAY) {
 #if 0
       printf("%s\n", pf_name( format ) );
 #endif
@@ -146,7 +147,7 @@ st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
 
       pinfo->mesa_format = st_pipe_format_to_mesa_format(format);
    }
-   else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_YUV) {
+   else if (desc->layout == UTIL_FORMAT_LAYOUT_YUV) {
       pinfo->mesa_format = MESA_FORMAT_YCBCR;
       pinfo->datatype = GL_UNSIGNED_SHORT;
       pinfo->size = 2; /* two bytes per "texel" */
-- 
cgit v1.2.3


From 8cc570a48c2e8e18622027cbd76f16a746b430bc Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Wed, 9 Dec 2009 00:55:51 +0100
Subject: r300g: clean up r300_emit_aos

---
 src/gallium/drivers/r300/r300_cs.h   |  9 +++++
 src/gallium/drivers/r300/r300_emit.c | 71 +++++++++++++++++++++++-------------
 src/gallium/drivers/r300/r300_reg.h  |  5 +++
 3 files changed, 59 insertions(+), 26 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 9fcf3ab538..d142fee050 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -115,6 +115,15 @@
     cs_count -= 3; \
 } while (0)
 
+#define OUT_CS_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \
+    DBG(cs_context_copy, DBG_CS, "r300: writing relocation for buffer %p, " \
+            "domains (%d, %d, %d)\n", \
+        bo, rd, wd, flags); \
+    assert(bo); \
+    cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \
+    cs_count -= 2; \
+} while (0)
+
 #define END_CS do { \
     if (VERY_VERBOSE_CS) { \
         DBG(cs_context_copy, DBG_CS, "r300: END_CS in %s (%s:%d)\n", __FUNCTION__, \
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index dbf316a9b5..7620c73cac 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -623,50 +624,68 @@ void r300_emit_texture(struct r300_context* r300,
     END_CS;
 }
 
-/* XXX I can't read this and that's not good */
-void r300_emit_aos(struct r300_context* r300, unsigned offset)
+static boolean r300_validate_aos(struct r300_context *r300)
 {
     struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
     struct pipe_vertex_element *velem = r300->vertex_element;
-    CS_LOCALS(r300);
     int i;
-    unsigned aos_count = r300->vertex_element_count;
 
+    /* Check if formats and strides are aligned to the size of DWORD. */
+    for (i = 0; i < r300->vertex_element_count; i++) {
+        if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 ||
+            pf_get_blocksize(velem[i].src_format) % 4 != 0) {
+            return FALSE;
+        }
+    }
+    return TRUE;
+}
+
+void r300_emit_aos(struct r300_context* r300, unsigned offset)
+{
+    struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
+    struct pipe_vertex_element *velem = r300->vertex_element;
+    int i;
+    unsigned size1, size2, aos_count = r300->vertex_element_count;
     unsigned packet_size = (aos_count * 3 + 1) / 2;
+    CS_LOCALS(r300);
+
+    /* XXX Move this checking to a more approriate place. */
+    if (!r300_validate_aos(r300)) {
+        /* XXX We should fallback using Draw. */
+        assert(0);
+    }
+
     BEGIN_CS(2 + packet_size + aos_count * 2);
     OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
     OUT_CS(aos_count);
+
     for (i = 0; i < aos_count - 1; i += 2) {
-        int buf_num1 = velem[i].vertex_buffer_index;
-        int buf_num2 = velem[i+1].vertex_buffer_index;
-        assert(vbuf[buf_num1].stride % 4 == 0 && pf_get_blocksize(velem[i].src_format) % 4 == 0);
-        assert(vbuf[buf_num2].stride % 4 == 0 && pf_get_blocksize(velem[i+1].src_format) % 4 == 0);
-        OUT_CS((pf_get_blocksize(velem[i].src_format) >> 2) | (vbuf[buf_num1].stride << 6) |
-               (pf_get_blocksize(velem[i+1].src_format) << 14) | (vbuf[buf_num2].stride << 22));
-        OUT_CS(vbuf[buf_num1].buffer_offset + velem[i].src_offset +
-               offset * vbuf[buf_num1].stride);
-        OUT_CS(vbuf[buf_num2].buffer_offset + velem[i+1].src_offset +
-               offset * vbuf[buf_num2].stride);
+        vb1 = &vbuf[velem[i].vertex_buffer_index];
+        vb2 = &vbuf[velem[i+1].vertex_buffer_index];
+        size1 = pf_get_blocksize(velem[i].src_format);
+        size2 = pf_get_blocksize(velem[i+1].src_format);
+
+        OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
+               R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
+        OUT_CS(vb1->buffer_offset + velem[i].src_offset   + offset * vb1->stride);
+        OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
     }
+
     if (aos_count & 1) {
-        int buf_num = velem[i].vertex_buffer_index;
-        assert(vbuf[buf_num].stride % 4 == 0 && pf_get_blocksize(velem[i].src_format) % 4 == 0);
-        OUT_CS((pf_get_blocksize(velem[i].src_format) >> 2) | (vbuf[buf_num].stride << 6));
-        OUT_CS(vbuf[buf_num].buffer_offset + velem[i].src_offset +
-               offset * vbuf[buf_num].stride);
+        vb1 = &vbuf[velem[i].vertex_buffer_index];
+        size1 = pf_get_blocksize(velem[i].src_format);
+
+        OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
+        OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
     }
 
-    /* XXX bare CS reloc */
     for (i = 0; i < aos_count; i++) {
-        cs_winsys->write_cs_reloc(cs_winsys,
-                                  vbuf[velem[i].vertex_buffer_index].buffer,
-                                  RADEON_GEM_DOMAIN_GTT,
-                                  0,
-                                  0);
-        cs_count -= 2;
+        OUT_CS_RELOC_NO_OFFSET(vbuf[velem[i].vertex_buffer_index].buffer,
+                               RADEON_GEM_DOMAIN_GTT, 0, 0);
     }
     END_CS;
 }
+
 #if 0
 void r300_emit_draw_packet(struct r300_context* r300)
 {
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 85b1ea568a..c1ea87d11e 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -3293,6 +3293,11 @@ enum {
  */
 #define R300_PACKET3_3D_LOAD_VBPNTR         0x00002F00
 
+#   define R300_VBPNTR_SIZE0(x)    ((x) >> 2)
+#   define R300_VBPNTR_STRIDE0(x)  (((x) >> 2) << 8)
+#   define R300_VBPNTR_SIZE1(x)    (((x) >> 2) << 16)
+#   define R300_VBPNTR_STRIDE1(x)  (((x) >> 2) << 24)
+
 #define R300_PACKET3_INDX_BUFFER            0x00003300
 #    define R300_INDX_BUFFER_DST_SHIFT          0
 #    define R300_INDX_BUFFER_SKIP_SHIFT         16
-- 
cgit v1.2.3


From 87b822e024797ef2fdb51ec9364f21eeb4d07161 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Tue, 8 Dec 2009 04:55:32 +0100
Subject: r300g: make pow(0,0) return 1 instead of NaN in the R500 fragment
 shader

Unfortunately we can't fix this easily in the R300 fragment shader,
and it's probably not worth the effort.
---
 src/gallium/drivers/r300/r300_emit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 7620c73cac..55bc2b3528 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -277,7 +277,7 @@ void r500_emit_fragment_program_code(struct r300_context* r300,
 
     BEGIN_CS(13 +
              ((code->inst_end + 1) * 6));
-    OUT_CS_REG(R500_US_CONFIG, 0);
+    OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
     OUT_CS_REG(R500_US_PIXSIZE, code->max_temp_idx);
     OUT_CS_REG(R500_US_CODE_RANGE,
                R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end));
-- 
cgit v1.2.3


From 6de7ac73bf027b9ace6f5f0c8063cbf724d95cee Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Tue, 8 Dec 2009 21:53:19 +0100
Subject: r300g: always disable unused colorbuffers

---
 src/gallium/drivers/r300/r300_emit.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 55bc2b3528..f784e1fa8e 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -331,7 +331,13 @@ void r300_emit_fb_state(struct r300_context* r300,
     int i;
     CS_LOCALS(r300);
 
-    BEGIN_CS((10 * fb->nr_cbufs) + (fb->zsbuf ? 10 : 0) + 4);
+    /* Shouldn't fail unless there is a bug in the state tracker. */
+    assert(fb->nr_cbufs <= 4);
+
+    BEGIN_CS((10 * fb->nr_cbufs) + (2 * (4 - fb->nr_cbufs)) +
+             (fb->zsbuf ? 10 : 0) + 4);
+
+    /* Flush and free renderbuffer caches. */
     OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
         R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
         R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
@@ -339,6 +345,7 @@ void r300_emit_fb_state(struct r300_context* r300,
         R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
         R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
 
+    /* Set up colorbuffers. */
     for (i = 0; i < fb->nr_cbufs; i++) {
         surf = fb->cbufs[i];
         tex = (struct r300_texture*)surf->texture;
@@ -356,6 +363,12 @@ void r300_emit_fb_state(struct r300_context* r300,
             r300_translate_out_fmt(surf->format));
     }
 
+    /* Disable unused colorbuffers. */
+    for (; i < 4; i++) {
+        OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED);
+    }
+
+    /* Set up a zbuffer. */
     if (fb->zsbuf) {
         surf = fb->zsbuf;
         tex = (struct r300_texture*)surf->texture;
-- 
cgit v1.2.3


From c6b450033d7ec2a415b1d761da1d94588358c94b Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Wed, 9 Dec 2009 00:45:18 +0100
Subject: r300g: fix routing of vertex streams if TCL is bypassed

Generating mipmaps finally works, among other things. Yay!
---
 src/gallium/drivers/r300/r300_state.c         |  2 --
 src/gallium/drivers/r300/r300_state_derived.c | 17 +++++++++++----
 src/gallium/drivers/r300/r300_vs.c            | 31 +++++++++++----------------
 src/gallium/drivers/r300/r300_vs.h            |  4 +++-
 4 files changed, 29 insertions(+), 25 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 68c5408a64..edf7114bbb 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -419,8 +419,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
     if (state->bypass_vs_clip_and_viewport ||
             !r300_screen(pipe->screen)->caps->has_tcl) {
         rs->vap_control_status |= R300_VAP_TCL_BYPASS;
-    } else {
-        rs->rs.bypass_vs_clip_and_viewport = TRUE;
     }
 
     rs->point_size = pack_float_16_6x(state->point_size) |
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 6af49888b9..29bc701a86 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -134,6 +134,16 @@ static void r300_vertex_psc(struct r300_context* r300)
     uint16_t type, swizzle;
     enum pipe_format format;
     unsigned i;
+    int identity[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+    int* stream_tab;
+
+    /* If TCL is bypassed, map vertex streams to equivalent VS output
+     * locations. */
+    if (r300->rs_state->enable_vte) {
+        stream_tab = identity;
+    } else {
+        stream_tab = r300->vs->stream_loc_notcl;
+    }
 
     /* Vertex shaders have no semantics on their inputs,
      * so PSC should just route stuff based on the vertex elements,
@@ -147,10 +157,10 @@ static void r300_vertex_psc(struct r300_context* r300)
         format = r300->vertex_element[i].src_format;
 
         type = r300_translate_vertex_data_type(format) |
-            (i << R300_DST_VEC_LOC_SHIFT);
+            (stream_tab[i] << R300_DST_VEC_LOC_SHIFT);
         swizzle = r300_translate_vertex_data_swizzle(format);
 
-        if (i % 2) {
+        if (i & 1) {
             vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
             vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
         } else {
@@ -159,7 +169,6 @@ static void r300_vertex_psc(struct r300_context* r300)
         }
     }
 
-
     assert(i <= 15);
 
     /* Set the last vector in the PSC. */
@@ -178,7 +187,7 @@ static void r300_swtcl_vertex_psc(struct r300_context* r300)
     uint16_t type, swizzle;
     enum pipe_format format;
     unsigned i, attrib_count;
-    int* vs_output_tab = r300->vs->output_stream_loc_swtcl;
+    int* vs_output_tab = r300->vs->stream_loc_notcl;
 
     /* For each Draw attribute, route it to the fragment shader according
      * to the vs_output_tab. */
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 31248346bc..fa207c939c 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -143,35 +143,33 @@ static void r300_shader_vap_output_fmt(
     assert(gen_count <= 8);
 }
 
-/* Set VS output stream locations for SWTCL. */
-static void r300_stream_locations_swtcl(
+/* Sets up stream mapping to equivalent VS outputs if TCL is bypassed
+ * or isn't present. */
+static void r300_stream_locations_notcl(
     struct r300_shader_semantics* vs_outputs,
-    int* output_stream_loc)
+    int* stream_loc)
 {
     int i, tabi = 0, gen_count;
 
-    /* XXX Check whether the numbers (0, 1, 2+i, etc.) are correct.
-     * These should go to VAP_PROG_STREAM_CNTL/DST_VEC_LOC. */
-
     /* Position. */
-    output_stream_loc[tabi++] = 0;
+    stream_loc[tabi++] = 0;
 
     /* Point size. */
     if (vs_outputs->psize != ATTR_UNUSED) {
-        output_stream_loc[tabi++] = 1;
+        stream_loc[tabi++] = 1;
     }
 
     /* Colors. */
     for (i = 0; i < ATTR_COLOR_COUNT; i++) {
         if (vs_outputs->color[i] != ATTR_UNUSED) {
-            output_stream_loc[tabi++] = 2 + i;
+            stream_loc[tabi++] = 2 + i;
         }
     }
 
     /* Back-face colors. */
     for (i = 0; i < ATTR_COLOR_COUNT; i++) {
         if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
-            output_stream_loc[tabi++] = 4 + i;
+            stream_loc[tabi++] = 4 + i;
         }
     }
 
@@ -180,7 +178,7 @@ static void r300_stream_locations_swtcl(
     for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
         if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
             assert(tabi < 16);
-            output_stream_loc[tabi++] = 6 + gen_count;
+            stream_loc[tabi++] = 6 + gen_count;
             gen_count++;
         }
     }
@@ -188,7 +186,7 @@ static void r300_stream_locations_swtcl(
     /* Fog coordinates. */
     if (vs_outputs->fog != ATTR_UNUSED) {
         assert(tabi < 16);
-        output_stream_loc[tabi++] = 6 + gen_count;
+        stream_loc[tabi++] = 6 + gen_count;
         gen_count++;
     }
 
@@ -196,7 +194,7 @@ static void r300_stream_locations_swtcl(
     assert(gen_count <= 8);
 
     for (; tabi < 16;) {
-        output_stream_loc[tabi++] = -1;
+        stream_loc[tabi++] = -1;
     }
 }
 
@@ -254,10 +252,7 @@ void r300_translate_vertex_shader(struct r300_context* r300,
     /* Initialize. */
     r300_shader_read_vs_outputs(&vs->info, &vs->outputs);
     r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt);
-
-    if (!r300_screen(r300->context.screen)->caps->has_tcl) {
-        r300_stream_locations_swtcl(&vs->outputs, vs->output_stream_loc_swtcl);
-    }
+    r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl);
 
     /* Setup the compiler */
     rc_init(&compiler.Base);
@@ -283,7 +278,7 @@ void r300_translate_vertex_shader(struct r300_context* r300,
     /* Invoke the compiler */
     r3xx_compile_vertex_program(&compiler);
     if (compiler.Base.Error) {
-        /* XXX Fail gracefully */
+        /* XXX We should fallback using Draw. */
         fprintf(stderr, "r300 VP: Compiler error\n");
         abort();
     }
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 283dd5a9e8..67e9db5366 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -38,9 +38,11 @@ struct r300_vertex_shader {
 
     struct tgsi_shader_info info;
     struct r300_shader_semantics outputs;
-    int output_stream_loc_swtcl[16];
     uint hwfmt[4];
 
+    /* Stream locations for SWTCL or if TCL is bypassed. */
+    int stream_loc_notcl[16];
+
     /* Has this shader been translated yet? */
     boolean translated;
 
-- 
cgit v1.2.3


From 770323e33e62169827454af74e9f90f09997f962 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 11 Dec 2009 12:09:02 +0000
Subject: svga: Fix mixed signed comparisons.

---
 src/gallium/drivers/svga/svga_screen_texture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
index e7301aba84..ed83ba48f0 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.c
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -528,7 +528,7 @@ svga_texture_view_surface(struct pipe_context *pipe,
 {
    struct svga_screen *ss = svga_screen(tex->base.screen);
    struct svga_winsys_surface *handle;
-   int i, j;
+   uint32_t i, j;
    unsigned z_offset = 0;
 
    SVGA_DBG(DEBUG_PERF, 
-- 
cgit v1.2.3


From 16876b8328059446b6fa0951f7848e5d500244ab Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 11 Dec 2009 12:29:02 +0000
Subject: svga: Keep tight control of texture handle ownership.

The texture owns the surface handle. All derivatives need to keep
a reference to texture.

This fixes several assertions failures starting up Jedi Knight 2.

Should cause no change for DRM surface sharing -- reference count still
done as before there.
---
 src/gallium/drivers/svga/svga_screen_texture.c | 35 ++++++++++++++------------
 src/gallium/drivers/svga/svga_screen_texture.h |  9 ++++++-
 2 files changed, 27 insertions(+), 17 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
index ed83ba48f0..1eb03db280 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.c
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -657,13 +657,11 @@ svga_get_tex_surface(struct pipe_screen *screen,
       s->real_level = 0;
       s->real_zslice = 0;
    } else {
-      struct svga_winsys_screen *sws = svga_winsys_screen(screen);
-
       SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
                pt, level, face, zslice, s);
 
       memset(&s->key, 0, sizeof s->key);
-      sws->surface_reference(sws, &s->handle, tex->handle);
+      s->handle = tex->handle;
       s->real_face = face;
       s->real_level = level;
       s->real_zslice = zslice;
@@ -677,11 +675,14 @@ static void
 svga_tex_surface_destroy(struct pipe_surface *surf)
 {
    struct svga_surface *s = svga_surface(surf);
+   struct svga_texture *t = svga_texture(surf->texture);
    struct svga_screen *ss = svga_screen(surf->texture->screen);
 
-   SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
-   assert(s->key.cachable == 0);
-   svga_screen_surface_destroy(ss, &s->key, &s->handle);
+   if(s->handle != t->handle) {
+      SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
+      svga_screen_surface_destroy(ss, &s->key, &s->handle);
+   }
+
    pipe_texture_reference(&surf->texture, NULL);
    FREE(surf);
 }
@@ -910,7 +911,6 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
                           unsigned min_lod, unsigned max_lod)
 {
    struct svga_screen *ss = svga_screen(pt->screen);
-   struct svga_winsys_screen *sws = ss->sws;
    struct svga_texture *tex = svga_texture(pt); 
    struct svga_sampler_view *sv = NULL;
    SVGA3dSurfaceFormat format = svga_translate_format(pt->format);
@@ -961,7 +961,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
 
    sv = CALLOC_STRUCT(svga_sampler_view);
    pipe_reference_init(&sv->reference, 1);
-   sv->texture = tex;
+   pipe_texture_reference(&sv->texture, pt);
    sv->min_lod = min_lod;
    sv->max_lod = max_lod;
 
@@ -976,7 +976,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
                pt->depth[0],
                pt->last_level);
       sv->key.cachable = 0;
-      sws->surface_reference(sws, &sv->handle, tex->handle);
+      sv->handle = tex->handle;
       return sv;
    }
 
@@ -999,7 +999,7 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
    if (!sv->handle) {
       assert(0);
       sv->key.cachable = 0;
-      sws->surface_reference(sws, &sv->handle, tex->handle);
+      sv->handle = tex->handle;
       return sv;
    }
 
@@ -1013,14 +1013,14 @@ svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
 void
 svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v)
 {
-   struct svga_texture *tex = v->texture;
+   struct svga_texture *tex = svga_texture(v->texture);
    unsigned numFaces;
    unsigned age = 0;
    int i, k;
 
    assert(svga);
 
-   if (v->handle == v->texture->handle)
+   if (v->handle == tex->handle)
       return;
 
    age = tex->age;
@@ -1048,11 +1048,14 @@ svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *
 void
 svga_destroy_sampler_view_priv(struct svga_sampler_view *v)
 {
-   struct svga_screen *ss = svga_screen(v->texture->base.screen);
-
-   SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle);
-   svga_screen_surface_destroy(ss, &v->key, &v->handle);
+   struct svga_texture *tex = svga_texture(v->texture);
 
+   if(v->handle != tex->handle) {
+      struct svga_screen *ss = svga_screen(v->texture->screen);
+      SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle);
+      svga_screen_surface_destroy(ss, &v->key, &v->handle);
+   }
+   pipe_texture_reference(&v->texture, NULL);
    FREE(v);
 }
 
diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h
index 1cc4063e65..8cfdfea693 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.h
+++ b/src/gallium/drivers/svga/svga_screen_texture.h
@@ -61,7 +61,7 @@ struct svga_sampler_view
 {
    struct pipe_reference reference;
 
-   struct svga_texture *texture;
+   struct pipe_texture *texture;
 
    int min_lod;
    int max_lod;
@@ -94,6 +94,13 @@ struct svga_texture
     * operation.
     */
    struct svga_host_surface_cache_key key;
+
+   /**
+    * Handle for the host side surface.
+    *
+    * This handle is owned by this texture. Views should hold on to a reference
+    * to this texture and never destroy this handle directly.
+    */
    struct svga_winsys_surface *handle;
 };
 
-- 
cgit v1.2.3


From 8469baf41bd4775eab2403ecf08ed013343943a5 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 11 Dec 2009 13:15:12 +0000
Subject: svga: Always pass SVGA3D_SURFACE_HINT_DYNAMIC.

Since we're reusing buffers we're effectively transforming all
of them into dynamic buffers.

It would be nice to not cache long lived static buffers. But there
is no way to detect the long lived from short lived ones yet. A
good heuristic would be buffer size.
---
 src/gallium/drivers/svga/svga_screen_cache.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
index 8a06383f61..eff36e0bcc 100644
--- a/src/gallium/drivers/svga/svga_screen_cache.c
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -277,6 +277,15 @@ svga_screen_surface_create(struct svga_screen *svgascreen,
          while(size < key->size.width)
             size <<= 1;
          key->size.width = size;
+	 /* Since we're reusing buffers we're effectively transforming all
+	  * of them into dynamic buffers.
+	  *
+	  * It would be nice to not cache long lived static buffers. But there
+	  * is no way to detect the long lived from short lived ones yet. A
+	  * good heuristic would be buffer size.
+	  */
+	 key->flags &= ~SVGA3D_SURFACE_HINT_STATIC;
+	 key->flags |= SVGA3D_SURFACE_HINT_DYNAMIC;
       }
 
       handle = svga_screen_cache_lookup(svgascreen, key);
-- 
cgit v1.2.3


From ffae1f938d61165fce620bfd76ea7ae74dc63289 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 11 Dec 2009 14:14:03 +0000
Subject: svga: Add a missing dependency from the prescale state.

Thanks for Keith to finding this.

Fixes Jedi Knight 2 menus.
---
 src/gallium/drivers/svga/svga_state_constants.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
index 18cce7dde1..a5777d4fbd 100644
--- a/src/gallium/drivers/svga/svga_state_constants.c
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -231,7 +231,8 @@ static int emit_vs_consts( struct svga_context *svga,
 struct svga_tracked_state svga_hw_vs_parameters = 
 {
    "hw vs params",
-   (SVGA_NEW_VS_CONST_BUFFER |
+   (SVGA_NEW_PRESCALE |
+    SVGA_NEW_VS_CONST_BUFFER |
     SVGA_NEW_ZERO_STRIDE |
     SVGA_NEW_VS_RESULT),
    emit_vs_consts
-- 
cgit v1.2.3


From 6a15ec9141b070b088d03d87673d0d2741b7db6b Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 10 Dec 2009 20:50:02 +0100
Subject: nv50: support vertex program textures

---
 src/gallium/drivers/nv50/nv50_context.h        | 12 ++--
 src/gallium/drivers/nv50/nv50_screen.c         | 17 +++--
 src/gallium/drivers/nv50/nv50_state.c          | 59 ++++++++++++-----
 src/gallium/drivers/nv50/nv50_state_validate.c | 52 ++++++++++-----
 src/gallium/drivers/nv50/nv50_tex.c            | 90 +++++++++++++++++---------
 5 files changed, 156 insertions(+), 74 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 79135f2f36..5578a5838f 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -126,7 +126,7 @@ struct nv50_state {
 	unsigned viewport_bypass;
 	struct nouveau_stateobj *tsc_upload;
 	struct nouveau_stateobj *tic_upload;
-	unsigned miptree_nr;
+	unsigned miptree_nr[PIPE_SHADER_TYPES];
 	struct nouveau_stateobj *vertprog;
 	struct nouveau_stateobj *fragprog;
 	struct nouveau_stateobj *programs;
@@ -162,10 +162,10 @@ struct nv50_context {
 	unsigned vtxbuf_nr;
 	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
 	unsigned vtxelt_nr;
-	struct nv50_sampler_stateobj *sampler[PIPE_MAX_SAMPLERS];
-	unsigned sampler_nr;
-	struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS];
-	unsigned miptree_nr;
+	struct nv50_sampler_stateobj *sampler[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+	unsigned sampler_nr[PIPE_SHADER_TYPES];
+	struct nv50_miptree *miptree[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+	unsigned miptree_nr[PIPE_SHADER_TYPES];
 
 	uint16_t vbo_fifo;
 };
@@ -218,7 +218,7 @@ extern void nv50_state_flush_notify(struct nouveau_channel *chan);
 extern void nv50_so_init_sifc(struct nv50_context *nv50,
 			      struct nouveau_stateobj *so,
 			      struct nouveau_bo *bo, unsigned reloc,
-			      unsigned size);
+			      unsigned offset, unsigned size);
 
 /* nv50_tex.c */
 extern void nv50_tex_validate(struct nv50_context *);
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index e1b2f11239..862be46a9e 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -97,6 +97,10 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
 	switch (param) {
 	case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
 		return 32;
+	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+		return 32;
+	case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+		return 64;
 	case PIPE_CAP_NPOT_TEXTURES:
 		return 1;
 	case PIPE_CAP_TWO_SIDED_STENCIL:
@@ -122,8 +126,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
 	case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
 	case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
 		return 1;
-	case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
-		return 0;
 	case PIPE_CAP_TGSI_CONT_SUPPORTED:
 		return 0;
 	case PIPE_CAP_BLEND_EQUATION_SEPARATE:
@@ -315,6 +317,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_method(so, screen->tesla, 0x1400, 1);
 	so_data  (so, 0xf);
 
+	/* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
+	so_method(so, screen->tesla, 0x13b4, 1);
+	so_data  (so, 0x54);
 	so_method(so, screen->tesla, 0x13bc, 1);
 	so_data  (so, 0x54);
 	/* origin is top left (set to 1 for bottom left) */
@@ -387,7 +392,8 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
 	so_data  (so, 0x00000131 | (NV50_CB_PFP << 12));
 
-	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tic);
+	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+			     &screen->tic);
 	if (ret) {
 		nv50_screen_destroy(pscreen);
 		return NULL;
@@ -398,9 +404,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
 	so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM |
 		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, 0x000007ff);
+	so_data  (so, PIPE_SHADER_TYPES * 32 - 1);
 
-	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 64*8*4, &screen->tsc);
+	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, PIPE_SHADER_TYPES*32*32,
+			     &screen->tsc);
 	if (ret) {
 		nv50_screen_destroy(pscreen);
 		return NULL;
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 07318f2394..9c8c0c261e 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -215,41 +215,66 @@ nv50_sampler_state_create(struct pipe_context *pipe,
 	return (void *)sso;
 }
 
-static void
-nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler)
+static INLINE void
+nv50_sampler_state_bind(struct pipe_context *pipe, unsigned type,
+			unsigned nr, void **sampler)
 {
 	struct nv50_context *nv50 = nv50_context(pipe);
-	int i;
 
-	nv50->sampler_nr = nr;
-	for (i = 0; i < nv50->sampler_nr; i++)
-		nv50->sampler[i] = sampler[i];
+	memcpy(nv50->sampler[type], sampler, nr * sizeof(void *));
 
+	nv50->sampler_nr[type] = nr;
 	nv50->dirty |= NV50_NEW_SAMPLER;
 }
 
+static void
+nv50_vp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+	nv50_sampler_state_bind(pipe, PIPE_SHADER_VERTEX, nr, s);
+}
+
+static void
+nv50_fp_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+	nv50_sampler_state_bind(pipe, PIPE_SHADER_FRAGMENT, nr, s);
+}
+
 static void
 nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
 {
 	FREE(hwcso);
 }
 
-static void
-nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr,
-			 struct pipe_texture **pt)
+static INLINE void
+nv50_set_sampler_texture(struct pipe_context *pipe, unsigned type,
+			 unsigned nr, struct pipe_texture **pt)
 {
 	struct nv50_context *nv50 = nv50_context(pipe);
-	int i;
+	unsigned i;
 
 	for (i = 0; i < nr; i++)
-		pipe_texture_reference((void *)&nv50->miptree[i], pt[i]);
-	for (i = nr; i < nv50->miptree_nr; i++)
-		pipe_texture_reference((void *)&nv50->miptree[i], NULL);
+		pipe_texture_reference((void *)&nv50->miptree[type][i], pt[i]);
+	for (i = nr; i < nv50->miptree_nr[type]; i++)
+		pipe_texture_reference((void *)&nv50->miptree[type][i], NULL);
 
-	nv50->miptree_nr = nr;
+	nv50->miptree_nr[type] = nr;
 	nv50->dirty |= NV50_NEW_TEXTURE;
 }
 
+static void
+nv50_set_vp_sampler_textures(struct pipe_context *pipe,
+			     unsigned nr, struct pipe_texture **pt)
+{
+	nv50_set_sampler_texture(pipe, PIPE_SHADER_VERTEX, nr, pt);
+}
+
+static void
+nv50_set_fp_sampler_textures(struct pipe_context *pipe,
+			     unsigned nr, struct pipe_texture **pt)
+{
+	nv50_set_sampler_texture(pipe, PIPE_SHADER_FRAGMENT, nr, pt);
+}
+
 static void *
 nv50_rasterizer_state_create(struct pipe_context *pipe,
 			     const struct pipe_rasterizer_state *cso)
@@ -648,9 +673,11 @@ nv50_init_state_functions(struct nv50_context *nv50)
 	nv50->pipe.delete_blend_state = nv50_blend_state_delete;
 
 	nv50->pipe.create_sampler_state = nv50_sampler_state_create;
-	nv50->pipe.bind_fragment_sampler_states = nv50_sampler_state_bind;
 	nv50->pipe.delete_sampler_state = nv50_sampler_state_delete;
-	nv50->pipe.set_fragment_sampler_textures = nv50_set_sampler_texture;
+	nv50->pipe.bind_fragment_sampler_states = nv50_fp_sampler_state_bind;
+	nv50->pipe.bind_vertex_sampler_states   = nv50_vp_sampler_state_bind;
+	nv50->pipe.set_fragment_sampler_textures = nv50_set_fp_sampler_textures;
+	nv50->pipe.set_vertex_sampler_textures   = nv50_set_vp_sampler_textures;
 
 	nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create;
 	nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index c871acaab8..871e8097b6 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -155,6 +155,30 @@ nv50_state_validate_fb(struct nv50_context *nv50)
 	so_ref(NULL, &so);
 }
 
+static void
+nv50_validate_samplers(struct nv50_context *nv50, struct nouveau_stateobj *so,
+		       unsigned p)
+{
+	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
+	unsigned i, j, dw = nv50->sampler_nr[p] * 8;
+
+	if (!dw)
+		return;
+	nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
+			  p * (32 * 8 * 4), dw * 4);
+
+	so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), dw);
+
+	for (i = 0; i < nv50->sampler_nr[p]; ++i) {
+		if (nv50->sampler[p][i])
+			so_datap(so, nv50->sampler[p][i]->tsc, 8);
+		else {
+			for (j = 0; j < 8; ++j) /* you get punished */
+				so_data(so, 0); /* ... for leaving holes */
+		}
+	}
+}
+
 static void
 nv50_state_emit(struct nv50_context *nv50)
 {
@@ -246,7 +270,6 @@ boolean
 nv50_state_validate(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
 	struct nouveau_stateobj *so;
 	unsigned i;
 
@@ -369,22 +392,16 @@ scissor_uptodate:
 viewport_uptodate:
 
 	if (nv50->dirty & NV50_NEW_SAMPLER) {
-		unsigned i;
+		unsigned nr = 0;
 
-		so = so_new(nv50->sampler_nr * 9 + 23 + 4, 2);
+		for (i = 0; i < PIPE_SHADER_TYPES; ++i)
+			nr += nv50->sampler_nr[i];
 
-		nv50_so_init_sifc(nv50, so, nv50->screen->tsc, NOUVEAU_BO_VRAM,
-				  nv50->sampler_nr * 8 * 4);
+		so = so_new(nr * 8 + 24 * PIPE_SHADER_TYPES + 2, 4);
 
-		for (i = 0; i < nv50->sampler_nr; i++) {
-			if (!nv50->sampler[i])
-				continue;
-			so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
-			so_datap (so, nv50->sampler[i]->tsc, 8);
-		}
+		nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
+		nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
 
-		so_method(so, tesla, 0x1440, 1); /* sync SIFC */
-		so_data  (so, 0);
 		so_method(so, tesla, 0x1334, 1); /* flush TSC */
 		so_data  (so, 0);
 
@@ -407,10 +424,13 @@ viewport_uptodate:
 
 void nv50_so_init_sifc(struct nv50_context *nv50,
 		       struct nouveau_stateobj *so,
-		       struct nouveau_bo *bo, unsigned reloc, unsigned size)
+		       struct nouveau_bo *bo, unsigned reloc,
+		       unsigned offset, unsigned size)
 {
 	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
 
+	reloc |= NOUVEAU_BO_WR;
+
 	so_method(so, eng2d, NV50_2D_DST_FORMAT, 2);
 	so_data  (so, NV50_2D_DST_FORMAT_R8_UNORM);
 	so_data  (so, 1);
@@ -418,8 +438,8 @@ void nv50_so_init_sifc(struct nv50_context *nv50,
 	so_data  (so, 262144);
 	so_data  (so, 65536);
 	so_data  (so, 1);
-	so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_HIGH, 0, 0);
-	so_reloc (so, bo, 0, reloc | NOUVEAU_BO_WR | NOUVEAU_BO_LOW, 0, 0);
+	so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0);
+	so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0);
 	so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2);
 	so_data  (so, 0);
 	so_data  (so, NV50_2D_SIFC_FORMAT_R8_UNORM);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 417d367942..60b0ca7159 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -85,7 +85,7 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
 
 static int
 nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
-		   struct nv50_miptree *mt, int unit)
+		   struct nv50_miptree *mt, int unit, unsigned p)
 {
 	unsigned i;
 	uint32_t mode;
@@ -96,7 +96,7 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 	if (i == NV50_TEX_FORMAT_LIST_SIZE)
                 return 1;
 
-	if (nv50->sampler[unit]->normalized)
+	if (nv50->sampler[p][unit]->normalized)
 		mode = 0x50001000 | (1 << 31);
 	else {
 		mode = 0x50001000 | (7 << 14);
@@ -140,48 +140,78 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 	return 0;
 }
 
-void
-nv50_tex_validate(struct nv50_context *nv50)
+#ifndef NV50TCL_BIND_TIC
+#define NV50TCL_BIND_TIC(n) (0x1448 + 8 * n)
+#endif
+
+static boolean
+nv50_validate_textures(struct nv50_context *nv50, struct nouveau_stateobj *so,
+		       unsigned p)
 {
+	static const unsigned p_remap[PIPE_SHADER_TYPES] = { 0, 2 };
+
 	struct nouveau_grobj *eng2d = nv50->screen->eng2d;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nouveau_stateobj *so;
-	unsigned i, unit, push;
-
-	push = MAX2(nv50->miptree_nr, nv50->state.miptree_nr) * 2 + 23 + 6;
-	so = so_new(nv50->miptree_nr * 9 + push, nv50->miptree_nr * 2 + 2);
+	unsigned unit, j, p_hw = p_remap[p];
 
 	nv50_so_init_sifc(nv50, so, nv50->screen->tic, NOUVEAU_BO_VRAM,
-			  nv50->miptree_nr * 8 * 4);
+			  p * (32 * 8 * 4), nv50->miptree_nr[p] * 8 * 4);
 
-	for (i = 0, unit = 0; unit < nv50->miptree_nr; ++unit) {
-		struct nv50_miptree *mt = nv50->miptree[unit];
-
-		if (!mt)
-			continue;
+	for (unit = 0; unit < nv50->miptree_nr[p]; ++unit) {
+		struct nv50_miptree *mt = nv50->miptree[p][unit];
 
 		so_method(so, eng2d, NV50_2D_SIFC_DATA | (2 << 29), 8);
-		if (nv50_tex_construct(nv50, so, mt, unit)) {
-			NOUVEAU_ERR("failed tex validate\n");
-			so_ref(NULL, &so);
-			return;
+		if (mt) {
+			if (nv50_tex_construct(nv50, so, mt, unit, p))
+				return FALSE;
+			/* Set TEX insn $t src binding $unit in program type p
+			 * to TIC, TSC entry (32 * p + unit), mark valid (1).
+			 */
+			so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+			so_data  (so, ((32 * p + unit) << 9) | (unit << 1) | 1);
+		} else {
+			for (j = 0; j < 8; ++j)
+				so_data(so, 0);
+			so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+			so_data  (so, (unit << 1) | 0);
 		}
+	}
+
+	for (; unit < nv50->state.miptree_nr[p]; unit++) {
+		/* Make other bindings invalid. */
+		so_method(so, tesla, NV50TCL_BIND_TIC(p_hw), 1);
+		so_data  (so, (unit << 1) | 0);
+	}
+
+	nv50->state.miptree_nr[p] = nv50->miptree_nr[p];
+	return TRUE;
+}
 
-		so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
-		so_data  (so, (i++ << NV50TCL_SET_SAMPLER_TEX_TIC_SHIFT) |
-			  (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) |
-			  NV50TCL_SET_SAMPLER_TEX_VALID);
+void
+nv50_tex_validate(struct nv50_context *nv50)
+{
+	struct nouveau_stateobj *so;
+	struct nouveau_grobj *tesla = nv50->screen->tesla;
+	unsigned p, push, nrlc;
+
+	for (nrlc = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) {
+		push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
+		nrlc += nv50->miptree_nr[p];
 	}
+	push = push * 11 + 23 * PIPE_SHADER_TYPES + 4;
+	nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES;
+
+	so = so_new(push, nrlc);
+
+	if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE ||
+	    nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) {
+		so_ref(NULL, &so);
 
-	for (; unit < nv50->state.miptree_nr; unit++) {
-		so_method(so, tesla, NV50TCL_SET_SAMPLER_TEX, 1);
-		so_data  (so,
-			  (unit << NV50TCL_SET_SAMPLER_TEX_SAMPLER_SHIFT) | 0);
+		NOUVEAU_ERR("failed tex validate\n");
+		return;
 	}
 
 	/* not sure if the following really do what I think: */
-	so_method(so, tesla, 0x1440, 1); /* sync SIFC */
-	so_data  (so, 0);
 	so_method(so, tesla, 0x1330, 1); /* flush TIC */
 	so_data  (so, 0);
 	so_method(so, tesla, 0x1338, 1); /* flush texture caches */
@@ -189,6 +219,4 @@ nv50_tex_validate(struct nv50_context *nv50)
 
 	so_ref(so, &nv50->state.tic_upload);
 	so_ref(NULL, &so);
-	nv50->state.miptree_nr = nv50->miptree_nr;
 }
-
-- 
cgit v1.2.3


From f7a97344924461d64bfa5bd1b6a2c1151b70cc7c Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 2 Dec 2009 19:59:07 +0100
Subject: nv50: use copies of tgsi src nv50_regs

So we can use the 'mod' member without concern
if a source is used multiple times in 1 insn.
---
 src/gallium/drivers/nv50/nv50_program.c | 48 +++++++++++++++------------------
 1 file changed, 22 insertions(+), 26 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index f0fe7e6168..6116056857 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -131,6 +131,9 @@ struct nv50_pc {
 	struct nv50_reg *r_brdc;
 	struct nv50_reg *r_dst[4];
 
+	struct nv50_reg reg_instances[16];
+	unsigned reg_instance_nr;
+
 	unsigned interp_mode[32];
 	/* perspective interpolation registers */
 	struct nv50_reg *iv_p;
@@ -150,6 +153,19 @@ struct nv50_pc {
 	boolean allow32;
 };
 
+static INLINE struct nv50_reg *
+reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+	struct nv50_reg *dup = NULL;
+	if (reg) {
+		assert(pc->reg_instance_nr < 16);
+		dup = &pc->reg_instances[pc->reg_instance_nr++];
+		*dup = *reg;
+		reg->mod = 0;
+	}
+	return dup;
+}
+
 static INLINE void
 ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
 {
@@ -898,7 +914,6 @@ static INLINE void
 emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1)
 {
-	assert(src0 != src1);
 	src1->mod ^= NV50_MOD_NEG;
 	emit_add(pc, dst, src0, src1);
 	src1->mod ^= NV50_MOD_NEG;
@@ -967,7 +982,6 @@ static INLINE void
 emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1, struct nv50_reg *src2)
 {
-	assert(src2 != src0 && src2 != src1);
 	src2->mod ^= NV50_MOD_NEG;
 	emit_mad(pc, dst, src0, src1, src2);
 	src2->mod ^= NV50_MOD_NEG;
@@ -1515,8 +1529,6 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
 static boolean
 negate_supported(const struct tgsi_full_instruction *insn, int i)
 {
-	int s;
-
 	switch (insn->Instruction.Opcode) {
 	case TGSI_OPCODE_DDY:
 	case TGSI_OPCODE_DP3:
@@ -1526,29 +1538,14 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
 	case TGSI_OPCODE_ADD:
 	case TGSI_OPCODE_SUB:
 	case TGSI_OPCODE_MAD:
-		break;
+		return TRUE;
 	case TGSI_OPCODE_POW:
 		if (i == 1)
-			break;
+			return TRUE;
 		return FALSE;
 	default:
 		return FALSE;
 	}
-
-	/* Watch out for possible multiple uses of an nv50_reg, we
-	 * can't use nv50_reg::neg in these cases.
-	 */
-	for (s = 0; s < insn->Instruction.NumSrcRegs; ++s) {
-		if (s == i)
-			continue;
-		if ((insn->Src[s].Register.Index ==
-		     insn->Src[i].Register.Index) &&
-		    (insn->Src[s].Register.File ==
-		     insn->Src[i].Register.File))
-			return FALSE;
-	}
-
-	return TRUE;
 }
 
 /* Return a read mask for source registers deduced from opcode & write mask. */
@@ -1882,7 +1879,8 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 
 		for (c = 0; c < 4; c++)
 			if (src_mask & (1 << c))
-				src[i][c] = tgsi_src(pc, c, fs, neg_supp);
+				src[i][c] = reg_instance(pc,
+					tgsi_src(pc, c, fs, neg_supp));
 	}
 
 	brdc = temp = pc->r_brdc;
@@ -2249,16 +2247,14 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		for (c = 0; c < 4; c++) {
 			if (!src[i][c])
 				continue;
-			src[i][c]->mod = 0;
-			if (src[i][c]->index == -1 && src[i][c]->type == P_IMMD)
-				FREE(src[i][c]);
-			else
 			if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST)
 				FREE(src[i][c]); /* indirect constant */
 		}
 	}
 
 	kill_temp_temp(pc);
+	pc->reg_instance_nr = 0;
+
 	return TRUE;
 }
 
-- 
cgit v1.2.3


From 9f3644c42350fec2cda17e66548c517d9d00e47f Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Fri, 4 Dec 2009 23:16:32 +0100
Subject: nv50: plug memory leak in miptree creation/destruction

Keeping this dynamically allocated for texture arrays.
Since we don't use it to store zslice offsets anymore
it's either 1 or 6 integers (cube) ...
---
 src/gallium/drivers/nv50/nv50_miptree.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 40ee665999..795db5872d 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -130,6 +130,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 				  mt->level[0].tile_mode, tile_flags,
 				  &mt->base.bo);
 	if (ret) {
+		for (l = 0; l < pt->last_level; ++l)
+			FREE(mt->level[l].image_offset);
 		FREE(mt);
 		return NULL;
 	}
@@ -169,6 +171,10 @@ static void
 nv50_miptree_destroy(struct pipe_texture *pt)
 {
 	struct nv50_miptree *mt = nv50_miptree(pt);
+	unsigned l;
+
+	for (l = 0; l < pt->last_level; ++l)
+		FREE(mt->level[l].image_offset);
 
 	nouveau_bo_ref(NULL, &mt->base.bo);
 	FREE(mt);
-- 
cgit v1.2.3


From 6a689783b9f61fc12e35f7e613697a3f4b07766b Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 7 Dec 2009 20:40:39 +0100
Subject: nv50: add src_mask case for IF opcode

---
 src/gallium/drivers/nv50/nv50_program.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 6116056857..8c82652913 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1573,6 +1573,8 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 	case TGSI_OPCODE_RSQ:
 	case TGSI_OPCODE_SCS:
 		return 0x1;
+	case TGSI_OPCODE_IF:
+		return 0x1;
 	case TGSI_OPCODE_LIT:
 		return 0xb;
 	case TGSI_OPCODE_TEX:
-- 
cgit v1.2.3


From cc0ffaba7d1df234b3c62769ade9dee712117d2f Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 10 Dec 2009 20:54:18 +0100
Subject: nv50: fix depth comparison func TSC bits

Unfortunately it seems that if depth comparison is
active and we read a 2D texture, i.e. provide only
2 inputs, the second is used for comparison ...
---
 src/gallium/drivers/nv50/nv50_state.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 9c8c0c261e..88aef52d08 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -196,8 +196,9 @@ nv50_sampler_state_create(struct pipe_context *pipe,
 	}
 
 	if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-		tsc[0] |= (1 << 8);
-		tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7);
+		/* XXX: must be deactivated for non-shadow textures */
+		tsc[0] |= (1 << 9);
+		tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7) << 10;
 	}
 
 	limit = CLAMP(cso->lod_bias, -16.0, 15.0);
-- 
cgit v1.2.3


From b0036f391a1862c15c4e33d221314926dba3213b Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 9 Dec 2009 23:45:52 +0100
Subject: nv50: add S8Z24 depth texture format too

---
 src/gallium/drivers/nv50/nv50_screen.c  | 1 +
 src/gallium/drivers/nv50/nv50_tex.c     | 1 +
 src/gallium/drivers/nv50/nv50_texture.h | 1 +
 3 files changed, 3 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 862be46a9e..9e05745349 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -76,6 +76,7 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen,
 		case PIPE_FORMAT_DXT3_RGBA:
 		case PIPE_FORMAT_DXT5_RGBA:
 		case PIPE_FORMAT_Z24S8_UNORM:
+		case PIPE_FORMAT_S8Z24_UNORM:
 		case PIPE_FORMAT_Z32_FLOAT:
 		case PIPE_FORMAT_R16G16B16A16_SNORM:
 		case PIPE_FORMAT_R16G16B16A16_UNORM:
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 60b0ca7159..120aa6f362 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -68,6 +68,7 @@ static const struct nv50_texture_format nv50_tex_format_list[] =
 	_(DXT5_RGBA, UNORM, C0, C1, C2, C3, DXT5),
 
 	_MIXED(Z24S8_UNORM, UINT, UNORM, UINT, UINT, C1, C1, C1, ONE, 24_8),
+	_MIXED(S8Z24_UNORM, UNORM, UINT, UINT, UINT, C0, C0, C0, ONE, 8_24),
 
 	_(R16G16B16A16_SNORM, UNORM, C0, C1, C2, C3, 16_16_16_16),
 	_(R16G16B16A16_UNORM, SNORM, C0, C1, C2, C3, 16_16_16_16),
diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h
index d531e61132..b870302019 100644
--- a/src/gallium/drivers/nv50/nv50_texture.h
+++ b/src/gallium/drivers/nv50/nv50_texture.h
@@ -82,6 +82,7 @@
 #define NV50TIC_0_0_FMT_RGTC1                                     0x00000027
 #define NV50TIC_0_0_FMT_RGTC2                                     0x00000028
 #define NV50TIC_0_0_FMT_24_8                                      0x00000029
+#define NV50TIC_0_0_FMT_8_24                                      0x0000002a
 #define NV50TIC_0_0_FMT_32_DEPTH                                  0x0000002f
 #define NV50TIC_0_0_FMT_32_8                                      0x00000030
 
-- 
cgit v1.2.3


From d80778218d512f51e1b52e2fe652021ecefd724a Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 10 Dec 2009 00:36:03 +0100
Subject: nv50: support TXB and TXL

... and don't set the 'live' flag for TEX anymore, we'd
have to know if results affect the inputs for another TEX,
and I'm not going to do that kind of analysis now.
---
 src/gallium/drivers/nv50/nv50_program.c | 162 +++++++++++++++++++++-----------
 src/gallium/drivers/nv50/nv50_screen.c  |   3 +-
 2 files changed, 108 insertions(+), 57 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 8c82652913..ddb049f391 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1273,7 +1273,7 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 
 static void
 load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
-		     struct nv50_reg **src, boolean proj)
+		     struct nv50_reg **src, unsigned arg, boolean proj)
 {
 	int mod[3] = { src[0]->mod, src[1]->mod, src[2]->mod };
 
@@ -1290,6 +1290,10 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 
 	if (proj && 0 /* looks more correct without this */)
 		emit_mul(pc, t[2], t[2], src[3]);
+	else
+	if (arg == 4) /* there is no textureProj(samplerCubeShadow) */
+		emit_mov(pc, t[3], src[3]);
+
 	emit_flop(pc, 0, t[2], t[2]);
 
 	emit_mul(pc, t[0], src[0], t[2]);
@@ -1298,85 +1302,115 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 }
 
 static void
-emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
-	 struct nv50_reg **src, unsigned unit, unsigned type, boolean proj)
+load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
+		     struct nv50_reg **src, unsigned dim, unsigned arg)
 {
-	struct nv50_reg *t[4];
-	struct nv50_program_exec *e;
+	unsigned c, mode;
+
+	if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
+		mode = pc->interp_mode[src[0]->index] | INTERP_PERSPECTIVE;
+
+		t[3]->rhw = src[3]->rhw;
+		emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
+		emit_flop(pc, 0, t[3], t[3]);
 
-	unsigned c, mode, dim;
+		for (c = 0; c < dim; ++c) {
+			t[c]->rhw = src[c]->rhw;
+			emit_interp(pc, t[c], t[3], mode);
+		}
+		if (arg != dim) { /* depth reference value */
+			t[dim]->rhw = src[2]->rhw;
+			emit_interp(pc, t[dim], t[3], mode);
+		}
+	} else {
+		/* XXX: for some reason the blob sometimes uses MAD
+		 * (mad f32 $rX $rY $rZ neg $r63)
+		 */
+		emit_flop(pc, 0, t[3], src[3]);
+		for (c = 0; c < dim; ++c)
+			emit_mul(pc, t[c], src[c], t[3]);
+		if (arg != dim) /* depth reference value */
+			emit_mul(pc, t[dim], src[2], t[3]);
+	}
+}
 
+static INLINE void
+get_tex_dim(unsigned type, unsigned *dim, unsigned *arg)
+{
 	switch (type) {
 	case TGSI_TEXTURE_1D:
-		dim = 1;
+		*arg = *dim = 1;
+		break;
+	case TGSI_TEXTURE_SHADOW1D:
+		*dim = 1;
+		*arg = 2;
 		break;
 	case TGSI_TEXTURE_UNKNOWN:
 	case TGSI_TEXTURE_2D:
-	case TGSI_TEXTURE_SHADOW1D: /* XXX: x, z */
 	case TGSI_TEXTURE_RECT:
-		dim = 2;
+		*arg = *dim = 2;
+		break;
+	case TGSI_TEXTURE_SHADOW2D:
+	case TGSI_TEXTURE_SHADOWRECT:
+		*dim = 2;
+		*arg = 3;
 		break;
 	case TGSI_TEXTURE_3D:
 	case TGSI_TEXTURE_CUBE:
-	case TGSI_TEXTURE_SHADOW2D:
-	case TGSI_TEXTURE_SHADOWRECT: /* XXX */
-		dim = 3;
+		*dim = *arg = 3;
 		break;
 	default:
 		assert(0);
 		break;
 	}
+}
+
+static void
+emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
+	 struct nv50_reg **src, unsigned unit, unsigned type,
+	 boolean proj, int bias_lod)
+{
+	struct nv50_reg *t[4];
+	struct nv50_program_exec *e;
+	unsigned c, dim, arg;
 
-	/* some cards need t[0]'s hw index to be a multiple of 4 */
+	/* t[i] must be within a single 128 bit super-reg */
 	alloc_temp4(pc, t, 0);
 
+	e = exec(pc);
+	e->inst[0] = 0xf0000000;
+	set_long(pc, e);
+	set_dst(pc, t[0], e);
+
+	/* TIC and TSC binding indices (TSC is ignored as TSC_LINKED = TRUE): */
+	e->inst[0] |= (unit << 9) /* | (unit << 17) */;
+
+	/* live flag (don't set if TEX results affect input to another TEX): */
+	/* e->inst[0] |= 0x00000004; */
+
+	get_tex_dim(type, &dim, &arg);
+
 	if (type == TGSI_TEXTURE_CUBE) {
-		load_cube_tex_coords(pc, t, src, proj);
+		e->inst[0] |= 0x08000000;
+		load_cube_tex_coords(pc, t, src, arg, proj);
 	} else
-	if (proj) {
-		if (src[0]->type == P_TEMP && src[0]->rhw != -1) {
-			mode = pc->interp_mode[src[0]->index];
-
-			t[3]->rhw = src[3]->rhw;
-			emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
-			emit_flop(pc, 0, t[3], t[3]);
-
-			for (c = 0; c < dim; c++) {
-				t[c]->rhw = src[c]->rhw;
-				emit_interp(pc, t[c], t[3],
-					    (mode | INTERP_PERSPECTIVE));
-			}
-		} else {
-			emit_flop(pc, 0, t[3], src[3]);
-			for (c = 0; c < dim; c++)
-				emit_mul(pc, t[c], src[c], t[3]);
-
-			/* XXX: for some reason the blob sometimes uses MAD:
-			 * emit_mad(pc, t[c], src[0][c], t[3], t[3])
-			 * pc->p->exec_tail->inst[1] |= 0x080fc000;
-			 */
-		}
-	} else {
+	if (proj)
+		load_proj_tex_coords(pc, t, src, dim, arg);
+	else {
 		for (c = 0; c < dim; c++)
 			emit_mov(pc, t[c], src[c]);
+		if (arg != dim) /* depth reference value (always src.z here) */
+			emit_mov(pc, t[dim], src[2]);
 	}
 
-	e = exec(pc);
-	set_long(pc, e);
-	e->inst[0] |= 0xf0000000;
-	e->inst[1] |= 0x00000004;
-	set_dst(pc, t[0], e);
-	e->inst[0] |= (unit << 9);
-
-	if (dim == 2)
-		e->inst[0] |= 0x00400000;
-	else
-	if (dim == 3) {
-		e->inst[0] |= 0x00800000;
-		if (type == TGSI_TEXTURE_CUBE)
-			e->inst[0] |= 0x08000000;
+	if (bias_lod) {
+		assert(arg < 4);
+		emit_mov(pc, t[arg++], src[3]);
+		e->inst[1] |= (bias_lod < 0) ? 0x20000000 : 0x40000000;
 	}
 
+	e->inst[0] |= (arg - 1) << 22;
+
 	e->inst[0] |= (mask & 0x3) << 25;
 	e->inst[1] |= (mask & 0xc) << 12;
 
@@ -1578,6 +1612,8 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 	case TGSI_OPCODE_LIT:
 		return 0xb;
 	case TGSI_OPCODE_TEX:
+	case TGSI_OPCODE_TXB:
+	case TGSI_OPCODE_TXL:
 	case TGSI_OPCODE_TXP:
 	{
 		const struct tgsi_instruction_texture *tex;
@@ -1586,13 +1622,17 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 		tex = &insn->Texture;
 
 		mask = 0x7;
-		if (insn->Instruction.Opcode == TGSI_OPCODE_TXP)
-			mask |= 0x8;
+		if (insn->Instruction.Opcode != TGSI_OPCODE_TEX &&
+		    insn->Instruction.Opcode != TGSI_OPCODE_TXD)
+			mask |= 0x8; /* bias, lod or proj */
 
 		switch (tex->Texture) {
 		case TGSI_TEXTURE_1D:
 			mask &= 0x9;
 			break;
+		case TGSI_TEXTURE_SHADOW1D:
+			mask &= 0x5;
+			break;
 		case TGSI_TEXTURE_2D:
 			mask &= 0xb;
 			break;
@@ -1784,6 +1824,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c)
 	case TGSI_OPCODE_LIT:
 	case TGSI_OPCODE_SCS:
 	case TGSI_OPCODE_TEX:
+	case TGSI_OPCODE_TXB:
+	case TGSI_OPCODE_TXL:
 	case TGSI_OPCODE_TXP:
 		/* these take care of dangerous swizzles themselves */
 		return 0x0;
@@ -2187,11 +2229,19 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		break;
 	case TGSI_OPCODE_TEX:
 		emit_tex(pc, dst, mask, src[0], unit,
-			 inst->Texture.Texture, FALSE);
+			 inst->Texture.Texture, FALSE, 0);
+		break;
+	case TGSI_OPCODE_TXB:
+		emit_tex(pc, dst, mask, src[0], unit,
+			 inst->Texture.Texture, FALSE, -1);
+		break;
+	case TGSI_OPCODE_TXL:
+		emit_tex(pc, dst, mask, src[0], unit,
+			 inst->Texture.Texture, FALSE, 1);
 		break;
 	case TGSI_OPCODE_TXP:
 		emit_tex(pc, dst, mask, src[0], unit,
-			 inst->Texture.Texture, TRUE);
+			 inst->Texture.Texture, TRUE, 0);
 		break;
 	case TGSI_OPCODE_TRUNC:
 		for (c = 0; c < 4; c++) {
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 9e05745349..d443ca3ad0 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -419,7 +419,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		  NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0);
 	so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM |
 		  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
-	so_data  (so, 0x00000000);
+	so_data  (so, 0x00000000); /* ignored if TSC_LINKED (0x1234) = 1 */
 
 
 	/* Vertex array limits - max them out */
@@ -433,6 +433,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_data  (so, fui(0.0));
 	so_data  (so, fui(1.0));
 
+	/* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
 	so_method(so, screen->tesla, 0x1234, 1);
 	so_data  (so, 1);
 
-- 
cgit v1.2.3


From ec7844537ecdb0b598447e37bf0b7120acd029f3 Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Sat, 12 Dec 2009 22:31:53 +0100
Subject: nouveau: nv30: Disable swizzled surface usage if any dimension is 1
 (Warsow creates a 1x1 front buffer)

---
 src/gallium/drivers/nv30/nv30_miptree.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index 920fe64c32..fd7c65f6b3 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -72,6 +72,9 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 	mt->base.screen = pscreen;
 
 	/* Swizzled textures must be POT */
+	if ((pt->width0 == 1) || (pt->height0 == 1)) {
+		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+	} else
 	if (pt->width0 & (pt->width0 - 1) ||
 	    pt->height0 & (pt->height0 - 1))
 		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
-- 
cgit v1.2.3


From b91503f45740e6e2a5db92609aed887b6c7bd460 Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Sat, 12 Dec 2009 23:15:08 +0100
Subject: Revert "nouveau: nv30: Disable swizzled surface usage if any
 dimension is 1 (Warsow creates a 1x1 front buffer)"

This reverts commit ec7844537ecdb0b598447e37bf0b7120acd029f3.
---
 src/gallium/drivers/nv30/nv30_miptree.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index fd7c65f6b3..920fe64c32 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -72,9 +72,6 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 	mt->base.screen = pscreen;
 
 	/* Swizzled textures must be POT */
-	if ((pt->width0 == 1) || (pt->height0 == 1)) {
-		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
-	} else
 	if (pt->width0 & (pt->width0 - 1) ||
 	    pt->height0 & (pt->height0 - 1))
 		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
-- 
cgit v1.2.3


From f8d4ac56f7596613730fa4961d9b7b2e8d042055 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 12 Dec 2009 18:34:15 -0800
Subject: softpipe: Initialize source in blend_quad.

---
 src/gallium/drivers/softpipe/sp_quad_blend.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index b1e18805c7..43ed4c2494 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -258,7 +258,8 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad)
 
    /* loop over colorbuffer outputs */
    for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) {
-      float source[4][QUAD_SIZE], dest[4][QUAD_SIZE];
+      float source[4][QUAD_SIZE] = { { 0 } };
+      float dest[4][QUAD_SIZE];
       struct softpipe_cached_tile *tile
          = sp_get_cached_tile(softpipe,
                               softpipe->cbuf_cache[cbuf],
-- 
cgit v1.2.3


From 44d8c9add2f095fc365ede751253d9fb7fc5c6e1 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 13 Dec 2009 13:44:49 +0100
Subject: nv50: add craziness for non-constant TXB and TXL

If lod or bias can be non-constant across a quad of fragments,
we need to execute TEX separately for each value.
Don't ask why.
---
 src/gallium/drivers/nv50/nv50_program.c | 248 ++++++++++++++++++++++++++------
 1 file changed, 204 insertions(+), 44 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index ddb049f391..2e4279ff83 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -660,7 +660,7 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 	if (src->type == P_IMMD || src->type == P_CONST) {
 		set_long(pc, e);
 		set_data(pc, src, 0x7f, 9, e);
-		e->inst[1] |= 0x20000000; /* src0 const? */
+		e->inst[1] |= 0x20000000; /* mov from c[] */
 	} else {
 		if (src->type == P_ATTR) {
 			set_long(pc, e);
@@ -675,9 +675,9 @@ emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 
 	if (is_long(e) && !is_immd(e)) {
 		e->inst[1] |= 0x04000000; /* 32-bit */
-		e->inst[1] |= 0x0000c000; /* "subsubop" 0x3 */
+		e->inst[1] |= 0x0000c000; /* 32-bit c[] load / lane mask 0:1 */
 		if (!(e->inst[1] & 0x20000000))
-			e->inst[1] |= 0x00030000; /* "subsubop" 0xf */
+			e->inst[1] |= 0x00030000; /* lane mask 2:3 */
 	} else
 		e->inst[0] |= 0x00008000;
 
@@ -692,6 +692,17 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
 	FREE(imm);
 }
 
+static void
+emit_nop(struct nv50_pc *pc)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	e->inst[0] = 0xf0000000;
+	set_long(pc, e);
+	e->inst[1] = 0xe0000000;
+	emit(pc, e);
+}
+
 static boolean
 check_swap_src_0_1(struct nv50_pc *pc,
 		   struct nv50_reg **s0, struct nv50_reg **s1)
@@ -810,6 +821,33 @@ set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e)
 	e->inst[1] |= ((src->hw & 127) << 14);
 }
 
+static void
+emit_mov_from_pred(struct nv50_pc *pc, struct nv50_reg *dst, int pred)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	assert(dst->type == P_TEMP);
+	e->inst[1] = 0x20000000 | (pred << 12);
+	set_long(pc, e);
+	set_dst(pc, dst, e);
+
+	emit(pc, e);
+}
+
+static void
+emit_mov_to_pred(struct nv50_pc *pc, int pred, struct nv50_reg *src)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	e->inst[0] = 0x000001fc;
+	e->inst[1] = 0xa0000008;
+	set_long(pc, e);
+	set_pred_wr(pc, 1, pred, e);
+	set_src_0_restricted(pc, src, e);
+
+	emit(pc, e);
+}
+
 static void
 emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1)
@@ -1271,6 +1309,65 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 	emit(pc, e);
 }
 
+static struct nv50_program_exec *
+emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
+	    struct nv50_program_exec **join)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	if (join) {
+		set_long(pc, e);
+		e->inst[0] |= 0xa0000002;
+		emit(pc, e);
+		*join = e;
+		e = exec(pc);
+	}
+
+	set_long(pc, e);
+	e->inst[0] |= 0x10000002;
+	if (pred >= 0)
+		set_pred(pc, cc, pred, e);
+	emit(pc, e);
+	return pc->p->exec_tail;
+}
+
+#define QOP_ADD 0
+#define QOP_SUBR 1
+#define QOP_SUB 2
+#define QOP_MOV_SRC1 3
+
+/* For a quad of threads / top left, top right, bottom left, bottom right
+ * pixels, do a different operation, and take src0 from a specific thread.
+ */
+static void
+emit_quadop(struct nv50_pc *pc, struct nv50_reg *dst, int wp, int lane_src0,
+	    struct nv50_reg *src0, struct nv50_reg *src1, ubyte qop)
+{
+       struct nv50_program_exec *e = exec(pc);
+
+       e->inst[0] = 0xc0000000;
+       e->inst[1] = 0x80000000;
+       set_long(pc, e);
+       e->inst[0] |= lane_src0 << 16;
+       set_src_0(pc, src0, e);
+       set_src_2(pc, src1, e);
+
+       if (wp >= 0)
+	       set_pred_wr(pc, 1, wp, e);
+
+       if (dst)
+	       set_dst(pc, dst, e);
+       else {
+	       e->inst[0] |= 0x000001fc;
+	       e->inst[1] |= 0x00000008;
+       }
+
+       e->inst[0] |= (qop & 3) << 20;
+       e->inst[1] |= (qop >> 2) << 22;
+
+       emit(pc, e);
+}
+
 static void
 load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 		     struct nv50_reg **src, unsigned arg, boolean proj)
@@ -1365,6 +1462,94 @@ get_tex_dim(unsigned type, unsigned *dim, unsigned *arg)
 	}
 }
 
+/* We shouldn't execute TEXLOD if any of the pixels in a quad have
+ * different LOD values, so branch off groups of equal LOD.
+ */
+static void
+emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod,
+		     struct nv50_reg *src, struct nv50_program_exec *tex)
+{
+	struct nv50_program_exec *join_at;
+	unsigned i, target = pc->p->exec_size + 7 * 2;
+
+	/* Subtract lod of each pixel from lod of top left pixel, jump
+	 * texlod insn if result is 0, then repeat for 2 other pixels.
+	 */
+	emit_quadop(pc, NULL, 0, 0, tlod, tlod, 0x55);
+	emit_branch(pc, 0, 2, &join_at)->param.index = target;
+
+	for (i = 1; i < 4; ++i) {
+		emit_quadop(pc, NULL, 0, i, tlod, tlod, 0x55);
+		emit_branch(pc, 0, 2, NULL)->param.index = target;
+	}
+
+	emit_mov(pc, tlod, src); /* target */
+	emit(pc, tex); /* texlod */
+
+	join_at->param.index = target + 2 * 2;
+	emit_nop(pc);
+	pc->p->exec_tail->inst[1] |= 2; /* join _after_ tex */
+}
+
+static void
+emit_texbias_sequence(struct nv50_pc *pc, struct nv50_reg *t[4], unsigned arg,
+		      struct nv50_program_exec *tex)
+{
+	struct nv50_program_exec *e;
+	struct nv50_reg imm_1248, *t123[4][4], *r_bits = alloc_temp(pc, NULL);
+	int r_pred = 0;
+	unsigned n, c, i, cc[4] = { 0x0a, 0x13, 0x11, 0x10 };
+
+	pc->allow32 = FALSE;
+	ctor_reg(&imm_1248, P_IMMD, -1, ctor_immd_4u32(pc, 1, 2, 4, 8) * 4);
+
+	/* Subtract bias value of thread i from bias values of each thread,
+	 * store result in r_pred, and set bit i in r_bits if result was 0.
+	 */
+	assert(arg < 4);
+	for (i = 0; i < 4; ++i, ++imm_1248.hw) {
+		emit_quadop(pc, NULL, r_pred, i, t[arg], t[arg], 0x55);
+		emit_mov(pc, r_bits, &imm_1248);
+		set_pred(pc, 2, r_pred, pc->p->exec_tail);
+	}
+	emit_mov_to_pred(pc, r_pred, r_bits);
+
+	/* The lanes of a quad are now grouped by the bit in r_pred they have
+	 * set. Put the input values for TEX into a new register set for each
+	 * group and execute TEX only for a specific group.
+	 * We cannot use the same register set for each group because we need
+	 * the derivatives, which are implicitly calculated, to be correct.
+	 */
+	for (i = 1; i < 4; ++i) {
+		alloc_temp4(pc, t123[i], 0);
+
+		for (c = 0; c <= arg; ++c)
+			emit_mov(pc, t123[i][c], t[c]);
+
+		*(e = exec(pc)) = *(tex);
+		e->inst[0] &= ~0x01fc;
+		set_dst(pc, t123[i][0], e);
+		set_pred(pc, cc[i], r_pred, e);
+		emit(pc, e);
+	}
+	/* finally TEX on the original regs (where we kept the input) */
+	set_pred(pc, cc[0], r_pred, tex);
+	emit(pc, tex);
+
+	/* put the 3 * n other results into regs for lane 0 */
+	n = popcnt4(((e->inst[0] >> 25) & 0x3) | ((e->inst[1] >> 12) & 0xc));
+	for (i = 1; i < 4; ++i) {
+		for (c = 0; c < n; ++c) {
+			emit_mov(pc, t[c], t123[i][c]);
+			set_pred(pc, cc[i], r_pred, pc->p->exec_tail);
+		}
+		free_temp4(pc, t123[i]);
+	}
+
+	emit_nop(pc);
+	free_temp(pc, r_bits);
+}
+
 static void
 emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 	 struct nv50_reg **src, unsigned unit, unsigned type,
@@ -1403,18 +1588,25 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 			emit_mov(pc, t[dim], src[2]);
 	}
 
-	if (bias_lod) {
-		assert(arg < 4);
-		emit_mov(pc, t[arg++], src[3]);
-		e->inst[1] |= (bias_lod < 0) ? 0x20000000 : 0x40000000;
-	}
-
-	e->inst[0] |= (arg - 1) << 22;
-
 	e->inst[0] |= (mask & 0x3) << 25;
 	e->inst[1] |= (mask & 0xc) << 12;
 
-	emit(pc, e);
+	if (!bias_lod) {
+		e->inst[0] |= (arg - 1) << 22;
+		emit(pc, e);
+	} else
+	if (bias_lod < 0) {
+		e->inst[0] |= arg << 22;
+		e->inst[1] |= 0x20000000; /* texbias */
+		emit_mov(pc, t[arg], src[3]);
+		emit_texbias_sequence(pc, t, arg, e);
+	} else {
+		e->inst[0] |= arg << 22;
+		e->inst[1] |= 0x40000000; /* texlod */
+		emit_mov(pc, t[arg], src[3]);
+		emit_texlod_sequence(pc, t[arg], src[3], e);
+	}
+
 #if 1
 	c = 0;
 	if (mask & 1) emit_mov(pc, dst[0], t[c++]);
@@ -1436,38 +1628,6 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 #endif
 }
 
-static void
-emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
-	    struct nv50_program_exec **join)
-{
-	struct nv50_program_exec *e = exec(pc);
-
-	if (join) {
-		set_long(pc, e);
-		e->inst[0] |= 0xa0000002;
-		emit(pc, e);
-		*join = e;
-		e = exec(pc);
-	}
-
-	set_long(pc, e);
-	e->inst[0] |= 0x10000002;
-	if (pred >= 0)
-		set_pred(pc, cc, pred, e);
-	emit(pc, e);
-}
-
-static void
-emit_nop(struct nv50_pc *pc)
-{
-	struct nv50_program_exec *e = exec(pc);
-
-	e->inst[0] = 0xf0000000;
-	set_long(pc, e);
-	e->inst[1] = 0xe0000000;
-	emit(pc, e);
-}
-
 static void
 emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
-- 
cgit v1.2.3


From 7b5a6fa0c87a821835161494987994a781401303 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 13 Dec 2009 14:14:41 +0100
Subject: nv50: use m2mf z pos instead of calculating offset manually

---
 src/gallium/drivers/nv50/nv50_transfer.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 4705f96f57..6a98d806d0 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -16,6 +16,7 @@ struct nv50_transfer {
 	int level_depth;
 	int level_x;
 	int level_y;
+	int level_z;
 	unsigned nblocksx;
 	unsigned nblocksy;
 };
@@ -24,10 +25,10 @@ static void
 nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 			struct nouveau_bo *src_bo, unsigned src_offset,
 			int src_pitch, unsigned src_tile_mode,
-			int sx, int sy, int sw, int sh, int sd,
+			int sx, int sy, int sz, int sw, int sh, int sd,
 			struct nouveau_bo *dst_bo, unsigned dst_offset,
 			int dst_pitch, unsigned dst_tile_mode,
-			int dx, int dy, int dw, int dh, int dd,
+			int dx, int dy, int dz, int dw, int dh, int dd,
 			int cpp, int width, int height,
 			unsigned src_reloc, unsigned dst_reloc)
 {
@@ -56,7 +57,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 		OUT_RING  (chan, sw * cpp);
 		OUT_RING  (chan, sh);
 		OUT_RING  (chan, sd);
-		OUT_RING  (chan, 0);
+		OUT_RING  (chan, sz); /* copying only 1 zslice per call */
 	}
 
 	if (!dst_bo->tile_flags) {
@@ -75,7 +76,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 		OUT_RING  (chan, dw * cpp);
 		OUT_RING  (chan, dh);
 		OUT_RING  (chan, dd);
-		OUT_RING  (chan, 0);
+		OUT_RING  (chan, dz); /* copying only 1 zslice per call */
 	}
 
 	while (height) {
@@ -166,6 +167,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->level_depth = u_minify(mt->base.base.depth0, level);
 	tx->level_offset = lvl->image_offset[image];
 	tx->level_tiling = lvl->tile_mode;
+	tx->level_z = zslice;
 	tx->level_x = pf_get_nblocksx(pt->format, x);
 	tx->level_y = pf_get_nblocksy(pt->format, y);
 	ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
@@ -175,23 +177,18 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	}
 
-	if (pt->target == PIPE_TEXTURE_3D)
-		tx->level_offset += get_zslice_offset(lvl->tile_mode, zslice,
-						      lvl->pitch,
-						      tx->nblocksy);
-
 	if (usage & PIPE_TRANSFER_READ) {
 		nx = pf_get_nblocksx(pt->format, tx->base.width);
 		ny = pf_get_nblocksy(pt->format, tx->base.height);
 
 		nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
-					x, y,
+					x, y, zslice,
 					tx->nblocksx, tx->nblocksy,
 					tx->level_depth,
 					tx->bo, 0,
 					tx->base.stride, tx->bo->tile_mode,
-					0, 0,
+					0, 0, 0,
 					tx->nblocksx, tx->nblocksy, 1,
 					pf_get_blocksize(pt->format), nx, ny,
 					NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
@@ -216,11 +213,11 @@ nv50_transfer_del(struct pipe_transfer *ptx)
 
 		nv50_transfer_rect_m2mf(pscreen, tx->bo, 0,
 					tx->base.stride, tx->bo->tile_mode,
-					0, 0,
+					0, 0, 0,
 					tx->nblocksx, tx->nblocksy, 1,
 					mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
-					tx->level_x, tx->level_y,
+					tx->level_x, tx->level_y, tx->level_z,
 					tx->nblocksx, tx->nblocksy,
 					tx->level_depth,
 					pf_get_blocksize(pt->format), nx, ny,
-- 
cgit v1.2.3


From 079b670111fe41cabf700d089f489d4b116af5eb Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 13 Dec 2009 14:36:54 +0100
Subject: nv50: add proper zslice offset in miptree_surface

---
 src/gallium/drivers/nv50/nv50_miptree.c  | 27 +++++++++++++++++++++------
 src/gallium/drivers/nv50/nv50_transfer.c | 14 --------------
 2 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 795db5872d..9e083b662d 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -55,6 +55,20 @@ get_tile_mode(unsigned ny, unsigned d)
 	return tile_mode | 0x10;
 }
 
+static INLINE unsigned
+get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned nb_h)
+{
+	unsigned tile_h = get_tile_height(tile_mode);
+	unsigned tile_d = get_tile_depth(tile_mode);
+
+	/* pitch_2d == to next slice within this volume-tile */
+	/* pitch_3d == size (in bytes) of a volume-tile */
+	unsigned pitch_2d = tile_h * 64;
+	unsigned pitch_3d = tile_d * align(nb_h, tile_h) * pitch;
+
+	return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
+}
+
 static struct pipe_texture *
 nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 {
@@ -188,15 +202,10 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	struct nv50_miptree *mt = nv50_miptree(pt);
 	struct nv50_miptree_level *lvl = &mt->level[level];
 	struct pipe_surface *ps;
-	int img;
+	unsigned img = 0;
 
 	if (pt->target == PIPE_TEXTURE_CUBE)
 		img = face;
-	else
-	if (pt->target == PIPE_TEXTURE_3D)
-		img = zslice;
-	else
-		img = 0;
 
 	ps = CALLOC_STRUCT(pipe_surface);
 	if (!ps)
@@ -212,6 +221,12 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	ps->zslice = zslice;
 	ps->offset = lvl->image_offset[img];
 
+	if (pt->target == PIPE_TEXTURE_3D) {
+		unsigned nb_h = pf_get_nblocksy(pt->format, ps->height);
+		ps->offset += get_zslice_offset(lvl->tile_mode, zslice,
+						lvl->pitch, nb_h);
+	}
+
 	return ps;
 }
 
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 6a98d806d0..104d29a003 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -119,20 +119,6 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 	}
 }
 
-static INLINE unsigned
-get_zslice_offset(unsigned tile_mode, unsigned z, unsigned pitch, unsigned ny)
-{
-	unsigned tile_h = get_tile_height(tile_mode);
-	unsigned tile_d = get_tile_depth(tile_mode);
-
-	/* pitch_2d == to next slice within this volume-tile */
-	/* pitch_3d == to next slice in next 2D array of blocks */
-	unsigned pitch_2d = tile_h * 64;
-	unsigned pitch_3d = tile_d * align(ny, tile_h) * pitch;
-
-	return (z % tile_d) * pitch_2d + (z / tile_d) * pitch_3d;
-}
-
 static struct pipe_transfer *
 nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		  unsigned face, unsigned level, unsigned zslice,
-- 
cgit v1.2.3


From d4d880199ead954e79cad141f7a29f7dd17fe7fc Mon Sep 17 00:00:00 2001
From: Patrice Mandin <patmandin@gmail.com>
Date: Sun, 13 Dec 2009 20:09:33 +0100
Subject: nouveau: nv50: Add missing ctor_immd_4u32 function

---
 src/gallium/drivers/nv50/nv50_program.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 2e4279ff83..feb3d42286 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -358,7 +358,7 @@ static void
 kill_temp_temp(struct nv50_pc *pc)
 {
 	int i;
-	
+
 	for (i = 0; i < pc->temp_temp_nr; i++)
 		free_temp(pc, pc->temp_temp[i]);
 	pc->temp_temp_nr = 0;
@@ -373,7 +373,20 @@ ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
 	pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
 	pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
 	pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
-	
+
+	return pc->immd_nr++;
+}
+
+static int
+ctor_immd_4u32(struct nv50_pc *pc, uint32_t x, uint32_t y, uint32_t z, uint32_t w)
+{
+	pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(uint32_t)),
+			       (pc->immd_nr + 1) * 4 * sizeof(uint32_t));
+	pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
+	pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
+	pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
+	pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
+
 	return pc->immd_nr++;
 }
 
-- 
cgit v1.2.3


From 1778ddaf74aba72df167769bf42150810aac91a3 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 14 Dec 2009 11:10:16 +0100
Subject: nv50: store immediates as uint32

Sometimes we want non-float immediates, hacking them into
floats isn't nice.

Sorry, this should have already been committed before.
---
 src/gallium/drivers/nv50/nv50_program.c | 62 ++++++++++++++++-----------------
 src/gallium/drivers/nv50/nv50_program.h |  2 +-
 2 files changed, 31 insertions(+), 33 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index feb3d42286..fe8ccd0349 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -119,7 +119,7 @@ struct nv50_pc {
 	struct nv50_reg *param;
 	int param_nr;
 	struct nv50_reg *immd;
-	float *immd_buf;
+	uint32_t *immd_buf;
 	int immd_nr;
 	struct nv50_reg **addr;
 	int addr_nr;
@@ -365,10 +365,13 @@ kill_temp_temp(struct nv50_pc *pc)
 }
 
 static int
-ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
+ctor_immd_4u32(struct nv50_pc *pc,
+	       uint32_t x, uint32_t y, uint32_t z, uint32_t w)
 {
-	pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(float)),
-			       (pc->immd_nr + 1) * 4 * sizeof(float));
+	unsigned size = pc->immd_nr * 4 * sizeof(uint32_t);
+
+	pc->immd_buf = REALLOC(pc->immd_buf, size, size + 4 * sizeof(uint32_t));
+
 	pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
 	pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
 	pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
@@ -377,17 +380,10 @@ ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w)
 	return pc->immd_nr++;
 }
 
-static int
-ctor_immd_4u32(struct nv50_pc *pc, uint32_t x, uint32_t y, uint32_t z, uint32_t w)
+static INLINE int
+ctor_immd_4f32(struct nv50_pc *pc, float x, float y, float z, float w)
 {
-	pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * 4 * sizeof(uint32_t)),
-			       (pc->immd_nr + 1) * 4 * sizeof(uint32_t));
-	pc->immd_buf[(pc->immd_nr * 4) + 0] = x;
-	pc->immd_buf[(pc->immd_nr * 4) + 1] = y;
-	pc->immd_buf[(pc->immd_nr * 4) + 2] = z;
-	pc->immd_buf[(pc->immd_nr * 4) + 3] = w;
-
-	return pc->immd_nr++;
+	return ctor_immd_4u32(pc, fui(x), fui(y), fui(z), fui(w));
 }
 
 static struct nv50_reg *
@@ -397,11 +393,11 @@ alloc_immd(struct nv50_pc *pc, float f)
 	unsigned hw;
 
 	for (hw = 0; hw < pc->immd_nr * 4; hw++)
-		if (pc->immd_buf[hw] == f)
+		if (pc->immd_buf[hw] == fui(f))
 			break;
 
 	if (hw == pc->immd_nr * 4)
-		hw = ctor_immd(pc, f, -f, 0.5 * f, 0) * 4;
+		hw = ctor_immd_4f32(pc, f, -f, 0.5 * f, 0) * 4;
 
 	ctor_reg(r, P_IMMD, -1, hw);
 	return r;
@@ -493,22 +489,24 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
 static INLINE void
 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
 {
-	unsigned val;
-	float f = pc->immd_buf[imm->hw];
+	union {
+		float f;
+		uint32_t ui;
+	} u;
+	u.ui = pc->immd_buf[imm->hw];
 
-	if (imm->mod & NV50_MOD_ABS)
-		f = fabsf(f);
-	val = fui((imm->mod & NV50_MOD_NEG) ? -f : f);
+	u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f;
+	u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f;
 
 	set_long(pc, e);
-	/*XXX: can't be predicated - bits overlap.. catch cases where both
-	 *     are required and avoid them. */
+	/* XXX: can't be predicated - bits overlap; cases where both
+	 * are required should be avoided by using pc->allow32 */
 	set_pred(pc, 0, 0, e);
 	set_pred_wr(pc, 0, 0, e);
 
 	e->inst[1] |= 0x00000002 | 0x00000001;
-	e->inst[0] |= (val & 0x3f) << 16;
-	e->inst[1] |= (val >> 6) << 2;
+	e->inst[0] |= (u.ui & 0x3f) << 16;
+	e->inst[1] |= (u.ui >> 6) << 2;
 }
 
 static INLINE void
@@ -2762,10 +2760,10 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 			const struct tgsi_full_immediate *imm =
 				&tp.FullToken.FullImmediate;
 
-			ctor_immd(pc, imm->u[0].Float,
-				      imm->u[1].Float,
-				      imm->u[2].Float,
-				      imm->u[3].Float);
+			ctor_immd_4f32(pc, imm->u[0].Float,
+				       imm->u[1].Float,
+				       imm->u[2].Float,
+				       imm->u[3].Float);
 		}
 			break;
 		case TGSI_TOKEN_TYPE_DECLARATION:
@@ -3245,7 +3243,7 @@ nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p)
 }
 
 static void
-nv50_program_upload_data(struct nv50_context *nv50, float *map,
+nv50_program_upload_data(struct nv50_context *nv50, uint32_t *map,
 			unsigned start, unsigned count, unsigned cbuf)
 {
 	struct nouveau_channel *chan = nv50->screen->base.channel;
@@ -3293,8 +3291,8 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p)
 
 	if (p->param_nr) {
 		unsigned cb;
-		float *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
-					     PIPE_BUFFER_USAGE_CPU_READ);
+		uint32_t *map = pipe_buffer_map(pscreen, nv50->constbuf[p->type],
+						PIPE_BUFFER_USAGE_CPU_READ);
 
 		if (p->type == PIPE_SHADER_VERTEX)
 			cb = NV50_CB_PVP;
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 255c7c737e..4a90c372ce 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -37,7 +37,7 @@ struct nv50_program {
 
 	struct nouveau_bo *bo;
 
-	float *immd;
+	uint32_t *immd;
 	unsigned immd_nr;
 	unsigned param_nr;
 
-- 
cgit v1.2.3


From a0127b6ced257919180ba3a1bf534b68d9c750be Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 14 Dec 2009 18:36:33 +0100
Subject: gallium: more work for edgeflags changes

fixes, cleanups, etc.
not working yet
---
 src/gallium/auxiliary/draw/draw_private.h                 |  1 +
 src/gallium/auxiliary/draw/draw_pt.c                      | 11 -----------
 src/gallium/auxiliary/draw/draw_pt.h                      |  3 ++-
 src/gallium/auxiliary/draw/draw_pt_fetch.c                | 15 +--------------
 src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c |  5 ++---
 src/gallium/auxiliary/draw/draw_pt_post_vs.c              |  5 ++++-
 src/gallium/auxiliary/draw/draw_vs.c                      |  4 ++++
 src/gallium/auxiliary/draw/draw_vs.h                      |  1 +
 src/gallium/drivers/softpipe/sp_context.c                 |  2 --
 src/gallium/drivers/softpipe/sp_draw_arrays.c             |  8 --------
 src/gallium/drivers/softpipe/sp_state.h                   |  4 ----
 src/mesa/state_tracker/st_atom_shader.c                   |  4 ++--
 src/mesa/state_tracker/st_mesa_to_tgsi.c                  | 10 ++++------
 src/mesa/state_tracker/st_mesa_to_tgsi.h                  |  4 +++-
 src/mesa/state_tracker/st_program.c                       | 15 ++++++++++-----
 15 files changed, 34 insertions(+), 58 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 0750e6e379..3850cede1e 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -182,6 +182,7 @@ struct draw_context
       struct draw_vertex_shader *vertex_shader;
       uint num_vs_outputs;  /**< convenience, from vertex_shader */
       uint position_output;
+      uint edgeflag_output;
 
       /** TGSI program interpreter runtime state */
       struct tgsi_exec_machine *machine;
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 139ae1fe55..2801dbafe4 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -314,14 +314,3 @@ draw_arrays(struct draw_context *draw, unsigned prim,
    /* drawing done here: */
    draw_pt_arrays(draw, prim, start, count);
 }
-
-boolean draw_pt_get_edgeflag( struct draw_context *draw,
-                              unsigned idx )
-{
-   if (draw->pt.user.edgeflag) {
-      float *ef = draw->pt.verted_buffer[idx]
-      return (draw->pt.user.edgeflag[idx/32] & (1 << (idx%32))) != 0;
-   }
-   else
-      return 1;
-}
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index b5c8c82f4a..20edf7a227 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -212,7 +212,8 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
 void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
 			      boolean bypass_clipping,
 			      boolean bypass_viewport,
-			      boolean opengl );
+			      boolean opengl,
+			      boolean need_edgeflags );
 
 struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw );
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c
index cb609f8c41..305bfef435 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c
@@ -42,11 +42,11 @@ struct pt_fetch {
    struct translate *translate;
 
    unsigned vertex_size;
-   boolean need_edgeflags;
 
    struct translate_cache *cache;
 };
 
+
 /* Perform the fetch from API vertex elements & vertex buffers, to a
  * contiguous set of float[4] attributes as required for the
  * vertex_shader->run_linear() method.
@@ -160,11 +160,6 @@ void draw_pt_fetch_run( struct pt_fetch *fetch,
 			count,
 			verts );
 
-   /* Extract edgeflag values from vertex data into the header.
-    */
-   if (fetch->need_edgeflags) {
-      extract_edge_flags( fetch, count );
-   }
 }
 
 
@@ -189,14 +184,6 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch,
                    start,
                    count,
                    verts );
-
-   /* Extract edgeflag values from vertex data into the header.  XXX:
-    * this should be done after the vertex shader is run.
-    * Bypass-vs-and-clip interaction with pipeline???
-    */
-   if (fetch->need_edgeflags) {
-      extract_edge_flags( fetch, count );
-   }
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index d41436858a..932113783d 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -85,10 +85,9 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle,
    draw_pt_post_vs_prepare( fpme->post_vs,
 			    (boolean)draw->bypass_clipping,
 			    (boolean)(draw->identity_viewport ||
-                                      draw->rasterizer->bypass_vs_clip_and_viewport),
+			    draw->rasterizer->bypass_vs_clip_and_viewport),
 			    (boolean)draw->rasterizer->gl_rasterization_rules,
-                            need_edgeflags );
-			    
+			    (draw->vs.edgeflag_output ? true : false) );    
 
    if (!(opt & PT_PIPELINE)) {
       draw_pt_emit_prepare( fpme->emit, 
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 0745b168de..08d77649a3 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -155,6 +155,7 @@ post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs,
                                       unsigned count,
                                       unsigned stride )
 {
+   unsigned j;
    if (!post_vs_cliptest_viewport_gl( pvs, vertices, count, stride))
       return FALSE;
 
@@ -170,6 +171,7 @@ post_vs_cliptest_viewport_gl_edgeflag(struct pt_post_vs *pvs,
          out->edgeflag = (edgeflag[0] != 1.0f);
       }
    }
+   return TRUE;
 }
 
 
@@ -229,7 +231,8 @@ boolean draw_pt_post_vs_run( struct pt_post_vs *pvs,
 void draw_pt_post_vs_prepare( struct pt_post_vs *pvs,
 			      boolean bypass_clipping,
 			      boolean bypass_viewport,
-			      boolean opengl )
+			      boolean opengl,
+			      boolean need_edgeflags )
 {
    if (!need_edgeflags) {
       if (bypass_clipping) {
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index 790e89ed82..3553689532 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -101,6 +101,9 @@ draw_create_vertex_shader(struct draw_context *draw,
          if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
              vs->info.output_semantic_index[i] == 0)
             vs->position_output = i;
+         else if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_EDGEFLAG &&
+             vs->info.output_semantic_index[i] == 0)
+            vs->edgeflag_output = i;
       }
    }
 
@@ -120,6 +123,7 @@ draw_bind_vertex_shader(struct draw_context *draw,
       draw->vs.vertex_shader = dvs;
       draw->vs.num_vs_outputs = dvs->info.num_outputs;
       draw->vs.position_output = dvs->position_output;
+      draw->vs.edgeflag_output = dvs->edgeflag_output;
       dvs->prepare( dvs, draw );
    }
    else {
diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h
index 89ae158751..e3b807ebd0 100644
--- a/src/gallium/auxiliary/draw/draw_vs.h
+++ b/src/gallium/auxiliary/draw/draw_vs.h
@@ -107,6 +107,7 @@ struct draw_vertex_shader {
 
    struct tgsi_shader_info info;
    unsigned position_output;
+   unsigned edgeflag_output;
 
    /* Extracted from shader:
     */
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index f8bf3e9974..2a33587b5a 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -238,8 +238,6 @@ softpipe_create( struct pipe_screen *screen )
    softpipe->pipe.draw_arrays = softpipe_draw_arrays;
    softpipe->pipe.draw_elements = softpipe_draw_elements;
    softpipe->pipe.draw_range_elements = softpipe_draw_range_elements;
-   softpipe->pipe.set_edgeflags = softpipe_set_edgeflags;
-
 
    softpipe->pipe.clear = softpipe_clear;
    softpipe->pipe.flush = softpipe_flush;
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index d4045816d0..518ef8806e 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -184,11 +184,3 @@ softpipe_draw_elements(struct pipe_context *pipe,
                                         0, 0xffffffff,
                                         mode, start, count );
 }
-
-
-void
-softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
-{
-   struct softpipe_context *sp = softpipe_context(pipe);
-   draw_set_edgeflags(sp->draw, edgeflags);
-}
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index d488fb8710..26d5c3fbb2 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -189,10 +189,6 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
                              unsigned max_index,
                              unsigned mode, unsigned start, unsigned count);
 
-void
-softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);
-
-
 void
 softpipe_map_transfers(struct softpipe_context *sp);
 
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index e209634c90..629bf8953e 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -113,8 +113,8 @@ find_translated_vp(struct st_context *st,
     * the input to the output.  We'll need to use similar logic to set
     * up the extra vertex_element input for edgeflags.
     */
-   key.passthrough_edgeflags = (ctx->Polygon.FrontMode != GL_FILL ||
-                                ctx->Polygon.BackMode != GL_FILL);
+   key.passthrough_edgeflags = (st->ctx->Polygon.FrontMode != GL_FILL ||
+                                st->ctx->Polygon.BackMode != GL_FILL);
       
 
    /* Do we need to throw away old translations after a change in the
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 9fd670cac2..72bd17bf41 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -738,11 +738,11 @@ emit_face_var( struct st_translate *t,
  *
  * \return  array of translated tokens, caller's responsibility to free
  */
-const struct tgsi_token *
+enum pipe_error
 st_translate_mesa_program(
    GLcontext *ctx,
-   struct ureg_program *ureg;
    uint procType,
+   struct ureg_program *ureg,
    const struct gl_program *program,
    GLuint numInputs,
    const GLuint inputMapping[],
@@ -755,7 +755,6 @@ st_translate_mesa_program(
    const ubyte outputSemanticIndex[] )
 {
    struct st_translate translate, *t;
-   const struct tgsi_token *tokens = NULL;
    unsigned i;
 
    t = &translate;
@@ -904,16 +903,15 @@ out:
 
    if (t->error) {
       debug_printf("%s: translate error flag set\n", __FUNCTION__);
-      FREE((void *)tokens);
-      tokens = NULL;
    }
 
+/* ???
    if (!tokens) {
       debug_printf("%s: failed to translate Mesa program:\n", __FUNCTION__);
       _mesa_print_program(program);
       debug_assert(0);
    }
-
+*/
    return PIPE_ERROR_OUT_OF_MEMORY;
 }
 
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h
index dc0362fe79..9dae373ede 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.h
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h
@@ -30,6 +30,7 @@
 #define ST_MESA_TO_TGSI_H
 
 #include "main/mtypes.h"
+#include "tgsi/tgsi_ureg.h"
 
 
 #if defined __cplusplus
@@ -39,10 +40,11 @@ extern "C" {
 struct tgsi_token;
 struct gl_program;
 
-const struct tgsi_token *
+enum pipe_error
 st_translate_mesa_program(
    GLcontext *ctx,
    uint procType,
+   struct ureg_program *ureg,
    const struct gl_program *program,
    GLuint numInputs,
    const GLuint inputMapping[],
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 876d92539e..24f2387429 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -193,6 +193,7 @@ st_translate_vertex_program(struct st_context *st,
    struct st_vp_varient *vpv = CALLOC_STRUCT(st_vp_varient);
    struct pipe_context *pipe = st->pipe;
    struct ureg_program *ureg;
+   enum pipe_error error;
 
    ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
    if (ureg == NULL)
@@ -215,18 +216,18 @@ st_translate_vertex_program(struct st_context *st,
                                 stvp->output_semantic_name,
                                 stvp->output_semantic_index );
 
-   if (ret)
+   if (error)
       goto fail;
 
    /* Edgeflags will be the last input:
     */
-   if (key.passthrough_edgeflags) {
+   if (key->passthrough_edgeflags) {
       ureg_MOV( ureg,
                 ureg_DECL_output( ureg, TGSI_SEMANTIC_EDGEFLAG, 0 ),
                 ureg_DECL_next_vs_input(ureg));
    }
 
-   tokens = ureg_get_tokens( ureg, NULL );
+   vpv->state.tokens = ureg_get_tokens( ureg, NULL );
    ureg_destroy( ureg );
 
    vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->state);
@@ -266,6 +267,7 @@ st_translate_fragment_program(struct st_context *st,
    GLuint defaultInputMapping[FRAG_ATTRIB_MAX];
    GLuint interpMode[16];  /* XXX size? */
    GLuint attr;
+   enum pipe_error error;
    const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
    struct ureg_program *ureg;
    GLuint vslot = 0;
@@ -404,12 +406,13 @@ st_translate_fragment_program(struct st_context *st,
 
    ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
    if (ureg == NULL)
-      return NULL;
+      return;
 
 
-   stfp->state.tokens = 
+   error = 
       st_translate_mesa_program(st->ctx,
                                 TGSI_PROCESSOR_FRAGMENT,
+                                ureg,
                                 &stfp->Base.Base,
                                 /* inputs */
                                 fs_num_inputs,
@@ -423,6 +426,8 @@ st_translate_fragment_program(struct st_context *st,
                                 fs_output_semantic_name,
                                 fs_output_semantic_index );
 
+   stfp->state.tokens = ureg_get_tokens( ureg, NULL );
+   ureg_destroy( ureg );
    stfp->driver_shader = pipe->create_fs_state(pipe, &stfp->state);
 
    if ((ST_DEBUG & DEBUG_TGSI) && (ST_DEBUG & DEBUG_MESA)) {
-- 
cgit v1.2.3


From 2677f199a547f6e44d964b8c34dd7f60d9523ab2 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 14 Dec 2009 18:39:13 +0100
Subject: nv50: be more cautious about using reg_instance

Trying to free part of nv50_pc->reg_instances[] for an
nv50_reg representing an indirect constant resulted in
a segmentation fault.
---
 src/gallium/drivers/nv50/nv50_program.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index fe8ccd0349..e496cf4cad 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -156,14 +156,15 @@ struct nv50_pc {
 static INLINE struct nv50_reg *
 reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
 {
-	struct nv50_reg *dup = NULL;
+	struct nv50_reg *ri;
+
+	assert(pc->reg_instance_nr < 16);
+	ri = &pc->reg_instances[pc->reg_instance_nr++];
 	if (reg) {
-		assert(pc->reg_instance_nr < 16);
-		dup = &pc->reg_instances[pc->reg_instance_nr++];
-		*dup = *reg;
+		*ri = *reg;
 		reg->mod = 0;
 	}
-	return dup;
+	return ri;
 }
 
 static INLINE void
@@ -1886,7 +1887,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 			/* Indicate indirection by setting r->acc < 0 and
 			 * use the index field to select the address reg.
 			 */
-			r = MALLOC_STRUCT(nv50_reg);
+			r = reg_instance(pc, NULL);
 			swz = tgsi_util_get_src_register_swizzle(
 						 &src->Indirect, 0);
 			ctor_reg(r, P_CONST,
@@ -1940,6 +1941,8 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 		break;
 	}
 
+	if (r && r->acc >= 0 && r != temp)
+		return reg_instance(pc, r);
 	return r;
 }
 
@@ -2094,8 +2097,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 
 		for (c = 0; c < 4; c++)
 			if (src_mask & (1 << c))
-				src[i][c] = reg_instance(pc,
-					tgsi_src(pc, c, fs, neg_supp));
+				src[i][c] = tgsi_src(pc, c, fs, neg_supp);
 	}
 
 	brdc = temp = pc->r_brdc;
@@ -2466,15 +2468,6 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		}
 	}
 
-	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
-		for (c = 0; c < 4; c++) {
-			if (!src[i][c])
-				continue;
-			if (src[i][c]->acc < 0 && src[i][c]->type == P_CONST)
-				FREE(src[i][c]); /* indirect constant */
-		}
-	}
-
 	kill_temp_temp(pc);
 	pc->reg_instance_nr = 0;
 
-- 
cgit v1.2.3


From 41b52aa3362665e08bdc2f75cc9bfdc4debc6eb0 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Fri, 4 Dec 2009 22:58:22 +0100
Subject: nouveau: avoid running out of relocs

- Added flush notify functions for NV30 and NV40.
- Flushing mid frame will call flush notify, which will resubmit all
relocs.
- We don't try to recover from reloc failure yet.
---
 src/gallium/drivers/nouveau/nouveau_stateobj.h | 49 ++++++++++++++++++++------
 src/gallium/drivers/nv04/nv04_surface_2d.c     |  9 +++--
 src/gallium/drivers/nv30/nv30_context.c        |  3 ++
 src/gallium/drivers/nv30/nv30_context.h        |  1 +
 src/gallium/drivers/nv30/nv30_state_emit.c     | 10 +++++-
 src/gallium/drivers/nv40/nv40_context.c        |  3 ++
 src/gallium/drivers/nv40/nv40_context.h        |  1 +
 src/gallium/drivers/nv40/nv40_state_emit.c     | 10 +++++-
 src/gallium/drivers/nv50/nv50_query.c          |  2 +-
 src/gallium/drivers/nv50/nv50_surface.c        |  2 ++
 src/gallium/drivers/nv50/nv50_transfer.c       |  4 +--
 11 files changed, 76 insertions(+), 18 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
index 62990f9b6a..9aee9e4956 100644
--- a/src/gallium/drivers/nouveau/nouveau_stateobj.h
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -112,20 +112,30 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so)
 {
 	struct nouveau_pushbuf *pb = chan->pushbuf;
 	unsigned nr, i;
+	int ret = 0;
 
 	nr = so->cur - so->push;
-	if (pb->remaining < nr)
-		nouveau_pushbuf_flush(chan, nr);
+	/* This will flush if we need space.
+	 * We don't actually need the marker.
+	 */
+	if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) {
+		debug_printf("so_emit failed marker emit with error %d\n", ret);
+		return;
+	}
 	pb->remaining -= nr;
 
 	memcpy(pb->cur, so->push, nr * 4);
 	for (i = 0; i < so->cur_reloc; i++) {
 		struct nouveau_stateobj_reloc *r = &so->reloc[i];
 
-		nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset,
+		if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset,
 					   r->bo, r->data, 0, r->flags,
-					   r->vor, r->tor);
+					   r->vor, r->tor))) {
+			debug_printf("so_emit failed reloc with error %d\n", ret);
+			goto out;
+		}
 	}
+out:
 	pb->cur += nr;
 }
 
@@ -134,26 +144,45 @@ so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so)
 {
 	struct nouveau_pushbuf *pb = chan->pushbuf;
 	unsigned i;
+	int ret = 0;
 
 	if (!so)
 		return;
 
 	i = so->cur_reloc << 1;
-	if (pb->remaining < i)
-		nouveau_pushbuf_flush(chan, i);
+	/* This will flush if we need space.
+	 * We don't actually need the marker.
+	 */
+	if ((ret = nouveau_pushbuf_marker_emit(chan, i, i))) {
+		debug_printf("so_emit_reloc_markers failed marker emit with" \
+			"error %d\n", ret);
+		return;
+	}
 	pb->remaining -= i;
 
 	for (i = 0; i < so->cur_reloc; i++) {
 		struct nouveau_stateobj_reloc *r = &so->reloc[i];
 
-		nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->packet, 0,
+		if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
+					   r->packet, 0,
 					   (r->flags & (NOUVEAU_BO_VRAM |
 							NOUVEAU_BO_GART |
 							NOUVEAU_BO_RDWR)) |
-					   NOUVEAU_BO_DUMMY, 0, 0);
-		nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo, r->data, 0,
+					   NOUVEAU_BO_DUMMY, 0, 0))) {
+			debug_printf("so_emit_reloc_markers failed reloc" \
+						"with error %d\n", ret);
+			pb->remaining += ((so->cur_reloc - i) << 1);
+			return;
+		}
+		if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
+					   r->data, 0,
 					   r->flags | NOUVEAU_BO_DUMMY,
-					   r->vor, r->tor);
+					   r->vor, r->tor))) {
+			debug_printf("so_emit_reloc_markers failed reloc" \
+						"with error %d\n", ret);
+			pb->remaining += ((so->cur_reloc - i) << 1) - 1;
+			return;
+		}
 	}
 }
 
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 932893eef5..3020806c5d 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -133,6 +133,9 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
 	assert(sub_w == w || util_is_pot(sub_w));
 	assert(sub_h == h || util_is_pot(sub_h));
 
+	MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 +
+			 ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2);
+
 	BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
 	OUT_RELOCo(chan, dst_bo,
 	                 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -202,7 +205,7 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
 	unsigned src_offset = src->offset + sy * src_pitch +
 	                      sx * pf_get_blocksize(src->texture->format);
 
-	WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9);
+	MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2);
 	BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
 	OUT_RELOCo(chan, src_bo,
 		   NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
@@ -250,7 +253,7 @@ nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
 	if (format < 0)
 		return 1;
 
-	WAIT_RING (chan, 12);
+	MARK_RING (chan, 12, 4);
 	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
 	OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
 	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
@@ -315,7 +318,7 @@ nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst,
 	gdirect_format = nv04_rect_format(dst->format);
 	assert(gdirect_format >= 0);
 
-	WAIT_RING (chan, 16);
+	MARK_RING (chan, 16, 4);
 	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
 	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
index d8300fd69f..46a821a48b 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -58,6 +58,9 @@ nv30_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv30->pipe.is_texture_referenced = nouveau_is_texture_referenced;
 	nv30->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
+	screen->base.channel->user_private = nv30;
+	screen->base.channel->flush_notify = nv30_state_flush_notify;
+
 	nv30_init_query_functions(nv30);
 	nv30_init_surface_functions(nv30);
 	nv30_init_state_functions(nv30);
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index 8d49366dfc..6f44b1c7fe 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -184,6 +184,7 @@ extern void nv30_fragtex_bind(struct nv30_context *);
 /* nv30_state.c and friends */
 extern boolean nv30_state_validate(struct nv30_context *nv30);
 extern void nv30_state_emit(struct nv30_context *nv30);
+extern void nv30_state_flush_notify(struct nouveau_channel *chan);
 extern struct nv30_state_entry nv30_state_rasterizer;
 extern struct nv30_state_entry nv30_state_scissor;
 extern struct nv30_state_entry nv30_state_stipple;
diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c
index 621b8846c8..ac52d946f0 100644
--- a/src/gallium/drivers/nv30/nv30_state_emit.c
+++ b/src/gallium/drivers/nv30/nv30_state_emit.c
@@ -41,7 +41,7 @@ nv30_state_emit(struct nv30_context *nv30)
 	struct nouveau_channel *chan = nv30->screen->base.channel;
 	struct nv30_state *state = &nv30->state;
 	struct nv30_screen *screen = nv30->screen;
-	unsigned i, samplers;
+	unsigned i;
 	uint64_t states;
 
 	if (nv30->pctx_id != screen->cur_pctx) {
@@ -63,6 +63,14 @@ nv30_state_emit(struct nv30_context *nv30)
 	}
 
 	state->dirty = 0;
+}
+
+void
+nv30_state_flush_notify(struct nouveau_channel *chan)
+{
+	struct nv30_context *nv30 = chan->user_private;
+	struct nv30_state *state = &nv30->state;
+	unsigned i, samplers;
 
 	so_emit_reloc_markers(chan, state->hw[NV30_STATE_FB]);
 	for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
index 7f008274a4..eb9cce4c78 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -58,6 +58,9 @@ nv40_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv40->pipe.is_texture_referenced = nouveau_is_texture_referenced;
 	nv40->pipe.is_buffer_referenced = nouveau_is_buffer_referenced;
 
+	screen->base.channel->user_private = nv40;
+	screen->base.channel->flush_notify = nv40_state_flush_notify;
+
 	nv40_init_query_functions(nv40);
 	nv40_init_surface_functions(nv40);
 	nv40_init_state_functions(nv40);
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index a3d594167a..cf33b64a86 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -204,6 +204,7 @@ extern void nv40_fragtex_bind(struct nv40_context *);
 extern boolean nv40_state_validate(struct nv40_context *nv40);
 extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40);
 extern void nv40_state_emit(struct nv40_context *nv40);
+extern void nv40_state_flush_notify(struct nouveau_channel *chan);
 extern struct nv40_state_entry nv40_state_rasterizer;
 extern struct nv40_state_entry nv40_state_scissor;
 extern struct nv40_state_entry nv40_state_stipple;
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index 198692965d..ba0fbcb26a 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -57,7 +57,7 @@ nv40_state_emit(struct nv40_context *nv40)
 	struct nouveau_channel *chan = nv40->screen->base.channel;
 	struct nv40_state *state = &nv40->state;
 	struct nv40_screen *screen = nv40->screen;
-	unsigned i, samplers;
+	unsigned i;
 	uint64_t states;
 
 	if (nv40->pctx_id != screen->cur_pctx) {
@@ -87,6 +87,14 @@ nv40_state_emit(struct nv40_context *nv40)
 	}
 
 	state->dirty = 0;
+}
+
+void
+nv40_state_flush_notify(struct nouveau_channel *chan)
+{
+	struct nv40_context *nv40 = chan->user_private;
+	struct nv40_state *state = &nv40->state;
+	unsigned i, samplers;
 
 	so_emit_reloc_markers(chan, state->hw[NV40_STATE_FB]);
 	for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) {
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 5305c93d59..268c9823f7 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -93,7 +93,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nv50_query *q = nv50_query(pq);
 
-	WAIT_RING (chan, 5);
+	MARK_RING (chan, 5, 2); /* flush on lack of space or relocs */
 	BEGIN_RING(chan, tesla, NV50TCL_QUERY_ADDRESS_HIGH, 4);
 	OUT_RELOCh(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 	OUT_RELOCl(chan, q->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 6bf6f773b0..79655fc08d 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -62,6 +62,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
  		return 1;
 
  	if (!bo->tile_flags) {
+		MARK_RING (chan, 9, 2); /* flush on lack of space or relocs */
  		BEGIN_RING(chan, eng2d, mthd, 2);
  		OUT_RING  (chan, format);
  		OUT_RING  (chan, 1);
@@ -72,6 +73,7 @@ nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst)
  		OUT_RELOCh(chan, bo, ps->offset, flags);
  		OUT_RELOCl(chan, bo, ps->offset, flags);
  	} else {
+		MARK_RING (chan, 11, 2); /* flush on lack of space or relocs */
  		BEGIN_RING(chan, eng2d, mthd, 5);
  		OUT_RING  (chan, format);
  		OUT_RING  (chan, 0);
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 104d29a003..6240a0c757 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -82,7 +82,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 	while (height) {
 		int line_count = height > 2047 ? 2047 : height;
 
-		WAIT_RING (chan, 15);
+		MARK_RING (chan, 15, 4); /* flush on lack of space or relocs */
 		BEGIN_RING(chan, m2mf,
 			NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN_HIGH, 2);
 		OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
@@ -265,7 +265,7 @@ nv50_upload_sifc(struct nv50_context *nv50,
 
 	reloc |= NOUVEAU_BO_WR;
 
-	WAIT_RING (chan, 32);
+	MARK_RING (chan, 32, 2); /* flush on lack of space or relocs */
 
 	if (bo->tile_flags) {
 		BEGIN_RING(chan, eng2d, NV50_2D_DST_FORMAT, 5);
-- 
cgit v1.2.3


From 846e38f1c33c3b2e46227886da57beda27b82f0c Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 15 Dec 2009 12:13:43 +0000
Subject: llvmpipe: Fix bad SI -> FP conversion into lp_build_log2_approx.

It should be a bitcast  as the integer value is actually an encoded FP
already.
---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 9c59677a74..4fd459e593 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -1285,7 +1285,7 @@ lp_build_log2_approx(struct lp_build_context *bld,
       /* mant = (float) mantissa(x) */
       mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
       mant = LLVMBuildOr(bld->builder, mant, one, "");
-      mant = LLVMBuildSIToFP(bld->builder, mant, vec_type, "");
+      mant = LLVMBuildBitCast(bld->builder, mant, vec_type, "");
 
       logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
                                     Elements(lp_build_log2_polynomial));
-- 
cgit v1.2.3


From 72c98780697b40da5c34da0aec21d06e46a431d1 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 15 Dec 2009 13:58:53 +0000
Subject: llvmpipe: Fix typo in lp_build_log constant.

---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 4fd459e593..f0af324404 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -1083,7 +1083,7 @@ lp_build_log(struct lp_build_context *bld,
              LLVMValueRef x)
 {
    /* log(2) */
-   LLVMValueRef log2 = lp_build_const_scalar(bld->type, 1.4426950408889634);
+   LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529);
 
    return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
 }
-- 
cgit v1.2.3


From 85c27c3ef7753ee8bae119dd982df09161b44d5f Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 15 Dec 2009 14:15:52 +0000
Subject: llvmpipe: Fix lp_build_polynomial comment.

---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index f0af324404..08c86a3b92 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -1095,7 +1095,7 @@ lp_build_log(struct lp_build_context *bld,
 
 /**
  * Generate polynomial.
- * Ex:  x^2 * coeffs[0] + x * coeffs[1] + coeffs[2].
+ * Ex:  coeffs[0] + x * coeffs[1] + x^2 * coeffs[2].
  */
 static LLVMValueRef
 lp_build_polynomial(struct lp_build_context *bld,
-- 
cgit v1.2.3


From 3a15c48ecedb985e2cecaaa9061ff579092069f1 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 15 Dec 2009 14:46:43 +0000
Subject: llvmpipe: Fix yet another copynpaste typo in lp_build_log2_approx.

Now fslight looks perfect.
---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 08c86a3b92..847c2a34b1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -1291,7 +1291,7 @@ lp_build_log2_approx(struct lp_build_context *bld,
                                     Elements(lp_build_log2_polynomial));
 
       /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
-      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildMul(bld->builder, mant, bld->one, ""), "");
+      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
 
       res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
    }
-- 
cgit v1.2.3


From 38a97148bf5df3c32087a5fdd799912d0275267d Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sun, 13 Dec 2009 01:32:39 +0100
Subject: r300g: add acceleration of the clear, surface_copy, and surface_fill
 functions

---
 src/gallium/drivers/r300/Makefile       |   2 +-
 src/gallium/drivers/r300/SConscript     |   2 +-
 src/gallium/drivers/r300/r300_blit.c    | 130 ++++++++++++++++++++++++++++++++
 src/gallium/drivers/r300/r300_blit.h    |  48 ++++++++++++
 src/gallium/drivers/r300/r300_clear.c   |  38 ----------
 src/gallium/drivers/r300/r300_clear.h   |  34 ---------
 src/gallium/drivers/r300/r300_context.c |   9 ++-
 src/gallium/drivers/r300/r300_context.h |   4 +
 8 files changed, 192 insertions(+), 75 deletions(-)
 create mode 100644 src/gallium/drivers/r300/r300_blit.c
 create mode 100644 src/gallium/drivers/r300/r300_blit.h
 delete mode 100644 src/gallium/drivers/r300/r300_clear.c
 delete mode 100644 src/gallium/drivers/r300/r300_clear.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index 9c9fc6f64b..8cfd4147c2 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -4,8 +4,8 @@ include $(TOP)/configs/current
 LIBNAME = r300
 
 C_SOURCES = \
+	r300_blit.c \
 	r300_chipset.c \
-	r300_clear.c \
 	r300_context.c \
 	r300_debug.c \
 	r300_emit.c \
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index 97989040d2..0d2de17be9 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -9,8 +9,8 @@ env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/sr
 r300 = env.ConvenienceLibrary(
     target = 'r300',
     source = [
+        'r300_blit.c',
         'r300_chipset.c',
-        'r300_clear.c',
         'r300_context.c',
         'r300_debug.c',
         'r300_emit.c',
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
new file mode 100644
index 0000000000..ffe066d536
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_blit.h"
+#include "r300_context.h"
+
+#include "util/u_rect.h"
+
+static void r300_blitter_save_states(struct r300_context* r300)
+{
+    util_blitter_save_blend(r300->blitter, r300->blend_state);
+    util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state);
+    util_blitter_save_rasterizer(r300->blitter, r300->rs_state);
+    util_blitter_save_fragment_shader(r300->blitter, r300->fs);
+    util_blitter_save_vertex_shader(r300->blitter, r300->vs);
+}
+
+/* Clear currently bound buffers. */
+void r300_clear(struct pipe_context* pipe,
+                unsigned buffers,
+                const float* rgba,
+                double depth,
+                unsigned stencil)
+{
+    /* XXX Implement fastfill.
+     *
+     * If fastfill is enabled, a few facts should be considered:
+     *
+     * 1) Zbuffer must be micro-tiled and whole microtiles must be
+     *    written.
+     *
+     * 2) ZB_DEPTHCLEARVALUE is used to clear a zbuffer and Z Mask must be
+     *    equal to 0.
+     *
+     * 3) RB3D_COLOR_CLEAR_VALUE is used to clear a colorbuffer and
+     *    RB3D_COLOR_CHANNEL_MASK must be equal to 0.
+     *
+     * 4) ZB_CB_CLEAR can be used to make the ZB units help in clearing
+     *    the colorbuffer. The color clear value is supplied through both
+     *    RB3D_COLOR_CLEAR_VALUE and ZB_DEPTHCLEARVALUE, and the colorbuffer
+     *    must be set in ZB_DEPTHOFFSET and ZB_DEPTHPITCH in addition to
+     *    RB3D_COLOROFFSET and RB3D_COLORPITCH. It's obvious that the zbuffer
+     *    will not be cleared and multiple render targets cannot be cleared
+     *    this way either.
+     *
+     * 5) For 16-bit integer buffering, compression causes a hung with one or
+     *    two samples and should not be used.
+     *
+     * 6) Fastfill must not be used if reading of compressed Z data is disabled
+     *    and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
+     *    i.e. it cannot be used to compress the zbuffer.
+     *    (what the hell does that mean and how does it fit in clearing
+     *    the buffers?)
+     *
+     * - Marek
+     */
+
+    struct r300_context* r300 = r300_context(pipe);
+
+    r300_blitter_save_states(r300);
+
+    util_blitter_clear(r300->blitter,
+                       r300->framebuffer_state.width,
+                       r300->framebuffer_state.height,
+                       r300->framebuffer_state.nr_cbufs,
+                       buffers, rgba, depth, stencil);
+}
+
+/* Copy a block of pixels from one surface to another. */
+void r300_surface_copy(struct pipe_context* pipe,
+                       struct pipe_surface* dst,
+                       unsigned dstx, unsigned dsty,
+                       struct pipe_surface* src,
+                       unsigned srcx, unsigned srcy,
+                       unsigned width, unsigned height)
+{
+    struct r300_context* r300 = r300_context(pipe);
+
+    /* Yeah we have to save all those states to ensure this blitter operation
+     * is really transparent. The states will be restored by the blitter once
+     * copying is done. */
+    r300_blitter_save_states(r300);
+    util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state);
+
+    util_blitter_save_fragment_sampler_states(
+        r300->blitter, r300->sampler_count, (void**)r300->sampler_states);
+
+    util_blitter_save_fragment_sampler_textures(
+        r300->blitter, r300->texture_count,
+        (struct pipe_texture**)r300->textures);
+
+    /* Do a copy */
+    util_blitter_copy(r300->blitter,
+                      dst, dstx, dsty, src, srcx, srcy, width, height, TRUE);
+}
+
+/* Fill a region of a surface with a constant value. */
+void r300_surface_fill(struct pipe_context* pipe,
+                       struct pipe_surface* dst,
+                       unsigned dstx, unsigned dsty,
+                       unsigned width, unsigned height,
+                       unsigned value)
+{
+    struct r300_context* r300 = r300_context(pipe);
+
+    r300_blitter_save_states(r300);
+    util_blitter_save_framebuffer(r300->blitter, &r300->framebuffer_state);
+
+    util_blitter_fill(r300->blitter,
+                      dst, dstx, dsty, width, height, value);
+}
diff --git a/src/gallium/drivers/r300/r300_blit.h b/src/gallium/drivers/r300/r300_blit.h
new file mode 100644
index 0000000000..029e4f98e7
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_blit.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2008 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_BLIT_H
+#define R300_BLIT_H
+
+struct pipe_context;
+struct pipe_surface;
+
+void r300_clear(struct pipe_context* pipe,
+                unsigned buffers,
+                const float* rgba,
+                double depth,
+                unsigned stencil);
+
+void r300_surface_copy(struct pipe_context* pipe,
+                       struct pipe_surface* dst,
+                       unsigned dstx, unsigned dsty,
+                       struct pipe_surface* src,
+                       unsigned srcx, unsigned srcy,
+                       unsigned width, unsigned height);
+
+void r300_surface_fill(struct pipe_context* pipe,
+                       struct pipe_surface* dst,
+                       unsigned dstx, unsigned dsty,
+                       unsigned width, unsigned height,
+                       unsigned value);
+
+#endif /* R300_BLIT_H */
diff --git a/src/gallium/drivers/r300/r300_clear.c b/src/gallium/drivers/r300/r300_clear.c
deleted file mode 100644
index 02d6d504fc..0000000000
--- a/src/gallium/drivers/r300/r300_clear.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#include "r300_clear.h"
-#include "r300_context.h"
-
-#include "util/u_clear.h"
-
-/* Clears currently bound buffers. */
-void r300_clear(struct pipe_context* pipe,
-                unsigned buffers,
-                const float* rgba,
-                double depth,
-                unsigned stencil)
-{
-    /* XXX we can and should do one clear if both color and zs are set */
-    util_clear(pipe, &r300_context(pipe)->framebuffer_state,
-            buffers, rgba, depth, stencil);
-}
diff --git a/src/gallium/drivers/r300/r300_clear.h b/src/gallium/drivers/r300/r300_clear.h
deleted file mode 100644
index b8fcdf273c..0000000000
--- a/src/gallium/drivers/r300/r300_clear.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef R300_CLEAR_H
-#define R300_CLEAR_H
-
-struct pipe_context;
-
-void r300_clear(struct pipe_context* pipe,
-                unsigned buffers,
-                const float* rgba,
-                double depth,
-                unsigned stencil);
-
-#endif /* R300_CLEAR_H */
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 5b337f03ac..d5c2d63d39 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -28,7 +28,7 @@
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
 
-#include "r300_clear.h"
+#include "r300_blit.h"
 #include "r300_context.h"
 #include "r300_flush.h"
 #include "r300_query.h"
@@ -52,6 +52,8 @@ static void r300_destroy_context(struct pipe_context* context)
     struct r300_context* r300 = r300_context(context);
     struct r300_query* query, * temp;
 
+    util_blitter_destroy(r300->blitter);
+
     util_hash_table_foreach(r300->shader_hash_table, r300_clear_hash_table,
         NULL);
     util_hash_table_destroy(r300->shader_hash_table);
@@ -124,6 +126,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->context.destroy = r300_destroy_context;
 
     r300->context.clear = r300_clear;
+    r300->context.surface_copy = r300_surface_copy;
+    r300->context.surface_fill = r300_surface_fill;
 
     if (r300screen->caps->has_tcl) {
         r300->context.draw_arrays = r300_draw_arrays;
@@ -175,5 +179,8 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->winsys->set_flush_cb(r300->winsys, r300_flush_cb, r300);
     r300->dirty_state = R300_NEW_KITCHEN_SINK;
     r300->dirty_hw++;
+
+    r300->blitter = util_blitter_create(&r300->context);
+
     return &r300->context;
 }
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 0be190392a..6bd2766730 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -25,6 +25,8 @@
 
 #include "draw/draw_vertex.h"
 
+#include "util/u_blitter.h"
+
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
 
@@ -248,6 +250,8 @@ struct r300_context {
     struct radeon_winsys* winsys;
     /* Draw module. Used mostly for SW TCL. */
     struct draw_context* draw;
+    /* Accelerated blit support. */
+    struct blitter_context* blitter;
 
     /* Vertex buffer for rendering. */
     struct pipe_buffer* vbo;
-- 
cgit v1.2.3


From c5e0b0bc37315cd29a84e71854dca951149b8bc8 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sun, 13 Dec 2009 02:19:18 +0100
Subject: r300g: add Z24X8 to the list of unsupported sampler formats on
 R3xx-R4xx

---
 src/gallium/drivers/r300/r300_screen.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index c0d9797020..feb571a23d 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -220,12 +220,18 @@ static boolean check_tex_format(enum pipe_format format, uint32_t usage,
 
         /* Z buffer or texture */
         case PIPE_FORMAT_Z16_UNORM:
+            retval = usage &
+                (PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
+                 PIPE_TEXTURE_USAGE_SAMPLER);
+            break;
+
+        /* 24bit Z buffer can only be used as a texture on R500. */
         case PIPE_FORMAT_Z24X8_UNORM:
         /* Z buffer with stencil or texture */
         case PIPE_FORMAT_Z24S8_UNORM:
             retval = usage &
                 (PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
-                 PIPE_TEXTURE_USAGE_SAMPLER);
+                 (is_r500 ? PIPE_TEXTURE_USAGE_SAMPLER : 0));
             break;
 
         /* Definitely unsupported formats. */
-- 
cgit v1.2.3


From de0befc4b2e3061f865a5b39295d64a8f003e9e8 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Mon, 14 Dec 2009 05:22:36 +0100
Subject: r300g: remove unnecessary flush in set_sampler_textures

---
 src/gallium/drivers/r300/r300_state.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index edf7114bbb..3cfa2e63f9 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -626,8 +626,6 @@ static void r300_set_sampler_textures(struct pipe_context* pipe,
         return;
     }
     
-    r300->context.flush(&r300->context, 0, NULL);
-
     for (i = 0; i < count; i++) {
         if (r300->textures[i] != (struct r300_texture*)texture[i]) {
             pipe_texture_reference((struct pipe_texture**)&r300->textures[i],
-- 
cgit v1.2.3


From cf85bf9cd0c168caed6210a896df285c3d86db03 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Mon, 14 Dec 2009 05:47:54 +0100
Subject: r300g: set the number of colorbuffers in RB3D_CCTL

---
 src/gallium/drivers/r300/r300_emit.c            | 5 ++++-
 src/gallium/drivers/r300/r300_reg.h             | 1 +
 src/gallium/drivers/r300/r300_state_invariant.c | 3 +--
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index f784e1fa8e..9644efb717 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -335,7 +335,7 @@ void r300_emit_fb_state(struct r300_context* r300,
     assert(fb->nr_cbufs <= 4);
 
     BEGIN_CS((10 * fb->nr_cbufs) + (2 * (4 - fb->nr_cbufs)) +
-             (fb->zsbuf ? 10 : 0) + 4);
+             (fb->zsbuf ? 10 : 0) + 6);
 
     /* Flush and free renderbuffer caches. */
     OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
@@ -345,6 +345,9 @@ void r300_emit_fb_state(struct r300_context* r300,
         R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
         R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
 
+    /* Set the number of colorbuffers. */
+    OUT_CS_REG(R300_RB3D_CCTL, R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs));
+
     /* Set up colorbuffers. */
     for (i = 0; i < fb->nr_cbufs; i++) {
         surf = fb->cbufs[i];
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index c1ea87d11e..d8d08fbe26 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2145,6 +2145,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 /* Unpipelined. */
 #define R300_RB3D_CCTL                      0x4e00
+#	define R300_RB3D_CCTL_NUM_MULTIWRITES(x)       (MAX2(((x)-1), 0) << 5)
 #	define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER                (0 << 5)
 #	define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS               (1 << 5)
 #	define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS               (2 << 5)
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 46d1cb39b5..3320d43b27 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -84,7 +84,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
     END_CS;
 
     /* XXX unsorted stuff from surface_fill */
-    BEGIN_CS(56 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
+    BEGIN_CS(54 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
     /* Flush PVS. */
     OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
 
@@ -123,7 +123,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
     OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000);
     OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C);
     OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
-    OUT_CS_REG(R300_RB3D_CCTL, 0x00000000);
     OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
     if (caps->is_r500) {
         OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000);
-- 
cgit v1.2.3


From a6d701d1c6ed8e0a649d62104aeded8fb25c66d8 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Mon, 14 Dec 2009 05:29:12 +0100
Subject: r300g: if no colorbuffers are set, disable blending and set the color
 mask to 0

This seems to be the only way to disable the first colorbuffer.
---
 src/gallium/drivers/r300/r300_emit.c  | 13 ++++++++++---
 src/gallium/drivers/r300/r300_state.c |  1 +
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 9644efb717..55e4f94afe 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -41,9 +41,16 @@ void r300_emit_blend_state(struct r300_context* r300,
     CS_LOCALS(r300);
     BEGIN_CS(8);
     OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 3);
-    OUT_CS(blend->blend_control);
-    OUT_CS(blend->alpha_blend_control);
-    OUT_CS(blend->color_channel_mask);
+    if (r300->framebuffer_state.nr_cbufs) {
+        OUT_CS(blend->blend_control);
+        OUT_CS(blend->alpha_blend_control);
+        OUT_CS(blend->color_channel_mask);
+    } else {
+        OUT_CS(0);
+        OUT_CS(0);
+        OUT_CS(0);
+        /* XXX also disable fastfill here once it's supported */
+    }
     OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop);
     OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither);
     END_CS;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 3cfa2e63f9..91cf972ede 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -339,6 +339,7 @@ static void
         r300->dirty_state |= R300_NEW_SCISSOR;
     }
     r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
+    r300->dirty_state |= R300_NEW_BLEND;
 }
 
 /* Create fragment shader state. */
-- 
cgit v1.2.3


From acce4824ec284b2a9bfdc847d7d79b8064912db9 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Mon, 14 Dec 2009 06:39:08 +0100
Subject: r300g: clamp vertex indices to [min,max] everywhere

---
 src/gallium/drivers/r300/r300_render.c          | 8 +++++---
 src/gallium/drivers/r300/r300_state_invariant.c | 5 +----
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 35b335df6a..4b210f72db 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -82,8 +82,9 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
 {
     CS_LOCALS(r300);
 
-    BEGIN_CS(4);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count);
+    BEGIN_CS(6);
+    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
+    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
     OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
            r300_translate_primitive(mode));
@@ -108,7 +109,8 @@ static void r300_emit_draw_elements(struct r300_context *r300,
     assert((start * indexSize)  % 4 == 0);
     assert(offset_dwords == 0);
 
-    BEGIN_CS(10);
+    BEGIN_CS(12);
+    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex);
     OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
     if (indexSize == 4) {
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 3320d43b27..d80e20a493 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -43,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
     struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
     CS_LOCALS(r300);
 
-    BEGIN_CS(24 + (caps->has_tcl ? 2: 0));
+    BEGIN_CS(20 + (caps->has_tcl ? 2: 0));
 
     /*** Graphics Backend (GB) ***/
     /* Various GB enables */
@@ -70,9 +70,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
     OUT_CS_REG(R300_US_W_FMT, 0x0);
 
     /*** VAP ***/
-    /* Max and min vertex index clamp. */
-    OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0);
-    OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xffffff);
     /* Sign/normalize control */
     OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
     /* TCL-only stuff */
-- 
cgit v1.2.3


From ded4ecde60e36bbf7204ebb3b43c6ec065ff1f06 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Mon, 14 Dec 2009 06:55:54 +0100
Subject: r300g: clean up the invariant state

---
 src/gallium/drivers/r300/r300_state_invariant.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index d80e20a493..bcd4c030f9 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -81,15 +81,11 @@ void r300_emit_invariant_state(struct r300_context* r300)
     END_CS;
 
     /* XXX unsorted stuff from surface_fill */
-    BEGIN_CS(54 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
-    /* Flush PVS. */
-    OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
+    BEGIN_CS(44 + (caps->has_tcl ? 7 : 0) + (caps->is_r500 ? 4 : 0));
 
-    OUT_CS_REG(R300_SE_VTE_CNTL, R300_VPORT_X_SCALE_ENA |
-        R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
-        R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
-        R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT);
     if (caps->has_tcl) {
+        /*Flushing PVS is required before the VAP_GB registers can be changed*/
+        OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
         OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
         OUT_CS_32F(1.0);
         OUT_CS_32F(1.0);
@@ -125,13 +121,10 @@ void r300_emit_invariant_state(struct r300_context* r300)
         OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000);
         OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF);
     }
-    OUT_CS_REG(R300_ZB_FORMAT, 0x00000002);
-    OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, 0x00000003);
     OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000);
     OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
     OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
     OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000);
-    OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F);
 
     /* XXX */
     OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa);
-- 
cgit v1.2.3


From 2ddee2cfbcef59128b2a251d5391ddc2b4aea4bb Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Tue, 15 Dec 2009 05:35:03 +0100
Subject: r300g: fix emission of which textures are enabled

It fixes most of the "Bad CS" issues in piglit/texCombine and piglit/fbo.
Some other issues of this kind will get fixed in the kernel soon (depth-only
rendering, S3TC, and RGTC).
---
 src/gallium/drivers/r300/r300_emit.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 55e4f94afe..55c8aa07bd 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -883,10 +883,21 @@ void r300_emit_viewport_state(struct r300_context* r300,
 
 void r300_emit_texture_count(struct r300_context* r300)
 {
+    uint32_t tx_enable = 0;
+    int i;
     CS_LOCALS(r300);
 
+    /* Notice that texture_count and sampler_count are just sizes
+     * of the respective arrays. We still have to check for the individual
+     * elements. */
+    for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) {
+        if (r300->textures[i]) {
+            tx_enable |= 1 << i;
+        }
+    }
+
     BEGIN_CS(2);
-    OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1);
+    OUT_CS_REG(R300_TX_ENABLE, tx_enable);
     END_CS;
 
 }
-- 
cgit v1.2.3


From 43d6c81ae2b3cb263f803bb9881c0823c1ed7dda Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 14 Dec 2009 15:24:31 -0700
Subject: llvmpipe: fix broken lp_build_abs()

---
 src/gallium/drivers/llvmpipe/lp_bld_arit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
index 847c2a34b1..eea6b5d6a5 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -629,7 +629,8 @@ lp_build_abs(struct lp_build_context *bld,
    if(type.floating) {
       /* Mask out the sign bit */
       LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1);
+      unsigned long absMask = ~(1 << (type.width - 1));
+      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
       a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
       a = LLVMBuildAnd(bld->builder, a, mask, "");
       a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
-- 
cgit v1.2.3


From f1f49bd465b899d1c85aa07650ca5b62a50303b0 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Mon, 14 Dec 2009 15:27:35 -0700
Subject: llvmpipe: fix broken TGSI_OPCODE_FRC codegen

---
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 3eb0e0c57c..a67c70ff25 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -763,7 +763,7 @@ emit_instruction(
       FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
          src0 = emit_fetch( bld, inst, 0, chan_index );
          tmp0 = lp_build_floor(&bld->base, src0);
-         tmp0 = lp_build_sub(&bld->base, tmp0, src0);
+         tmp0 = lp_build_sub(&bld->base, src0, tmp0);
          dst0[chan_index] = tmp0;
       }
       break;
-- 
cgit v1.2.3


From 2584c5bd253e53ba052356360a33b5ec976e9716 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 16 Dec 2009 15:06:02 +0000
Subject: llvmpipe: add LP_DEBUG env var

Cherry-picked from dec35d04aeb398eef159aaf8cde5e0d04622b811.
---
 src/gallium/drivers/llvmpipe/lp_debug.h    |  71 +++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_screen.c   |  22 ++++++
 src/gallium/drivers/llvmpipe/lp_state_fs.c | 117 ++++++++++++++---------------
 3 files changed, 151 insertions(+), 59 deletions(-)
 create mode 100644 src/gallium/drivers/llvmpipe/lp_debug.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_debug.h b/src/gallium/drivers/llvmpipe/lp_debug.h
new file mode 100644
index 0000000000..74b2757494
--- /dev/null
+++ b/src/gallium/drivers/llvmpipe/lp_debug.h
@@ -0,0 +1,71 @@
+/**************************************************************************
+ * 
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+
+#ifndef LP_DEBUG_H
+#define LP_DEBUG_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+
+extern void
+st_print_current(void);
+
+
+#define DEBUG_PIPE      0x1
+#define DEBUG_TGSI      0x2
+#define DEBUG_TEX       0x4
+#define DEBUG_ASM       0x8
+#define DEBUG_SETUP     0x10
+#define DEBUG_RAST      0x20
+#define DEBUG_QUERY     0x40
+#define DEBUG_SCREEN    0x80
+#define DEBUG_JIT       0x100
+
+#ifdef DEBUG
+extern int LP_DEBUG;
+#else
+#define LP_DEBUG 0
+#endif
+
+void st_debug_init( void );
+
+static INLINE void
+LP_DBG( unsigned flag, const char *fmt, ... )
+{
+    if (LP_DEBUG & flag)
+    {
+        va_list args;
+
+        va_start( args, fmt );
+        debug_vprintf( fmt, args );
+        va_end( args );
+    }
+}
+
+
+#endif /* LP_DEBUG_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 19fe2850fd..9b47415f00 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -36,6 +36,24 @@
 #include "lp_winsys.h"
 #include "lp_jit.h"
 #include "lp_screen.h"
+#include "lp_debug.h"
+
+#ifdef DEBUG
+int LP_DEBUG = 0;
+
+static const struct debug_named_value lp_debug_flags[] = {
+   { "pipe",   DEBUG_PIPE },
+   { "tgsi",   DEBUG_TGSI },
+   { "tex",    DEBUG_TEX },
+   { "asm",    DEBUG_ASM },
+   { "setup",  DEBUG_SETUP },
+   { "rast",   DEBUG_RAST },
+   { "query",  DEBUG_QUERY },
+   { "screen", DEBUG_SCREEN },
+   { "jit",    DEBUG_JIT },
+   {NULL, 0}
+};
+#endif
 
 
 static const char *
@@ -259,6 +277,10 @@ llvmpipe_create_screen(struct llvmpipe_winsys *winsys)
 {
    struct llvmpipe_screen *screen = CALLOC_STRUCT(llvmpipe_screen);
 
+#ifdef DEBUG
+   LP_DEBUG = debug_get_flags_option("LP_DEBUG", lp_debug_flags, 0 );
+#endif
+
    if (!screen)
       return NULL;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index ee0f69b2af..22683ff8b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -87,6 +87,7 @@
 #include "lp_state.h"
 #include "lp_quad.h"
 #include "lp_tex_sample.h"
+#include "lp_debug.h"
 
 
 static const unsigned char quad_offset_x[4] = {0, 1, 0, 1};
@@ -408,59 +409,58 @@ generate_fragment(struct llvmpipe_context *lp,
    unsigned i;
    unsigned chan;
 
-#ifdef DEBUG
-   tgsi_dump(shader->base.tokens, 0);
-   if(key->depth.enabled) {
-      debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
-      debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
-      debug_printf("depth.writemask = %u\n", key->depth.writemask);
-   }
-   if(key->alpha.enabled) {
-      debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
-      debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
-   }
-   if(key->blend.logicop_enable) {
-      debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
-   }
-   else if(key->blend.blend_enable) {
-      debug_printf("blend.rgb_func = %s\n",   debug_dump_blend_func  (key->blend.rgb_func, TRUE));
-      debug_printf("rgb_src_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
-      debug_printf("rgb_dst_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
-      debug_printf("alpha_func = %s\n",       debug_dump_blend_func  (key->blend.alpha_func, TRUE));
-      debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
-      debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
-   }
-   debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
-   for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
-      if(key->sampler[i].format) {
-         debug_printf("sampler[%u] = \n", i);
-         debug_printf("  .format = %s\n",
-                      pf_name(key->sampler[i].format));
-         debug_printf("  .target = %s\n",
-                      debug_dump_tex_target(key->sampler[i].target, TRUE));
-         debug_printf("  .pot = %u %u %u\n",
-                      key->sampler[i].pot_width,
-                      key->sampler[i].pot_height,
-                      key->sampler[i].pot_depth);
-         debug_printf("  .wrap = %s %s %s\n",
-                      debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
-                      debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
-                      debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
-         debug_printf("  .min_img_filter = %s\n",
-                      debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
-         debug_printf("  .min_mip_filter = %s\n",
-                      debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
-         debug_printf("  .mag_img_filter = %s\n",
-                      debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
-         if(key->sampler[i].compare_mode)
-            debug_printf("  .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
-         debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
-         debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
+   if (LP_DEBUG & DEBUG_JIT) {
+      tgsi_dump(shader->base.tokens, 0);
+      if(key->depth.enabled) {
+         debug_printf("depth.format = %s\n", pf_name(key->zsbuf_format));
+         debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
+         debug_printf("depth.writemask = %u\n", key->depth.writemask);
+      }
+      if(key->alpha.enabled) {
+         debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
+         debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
+      }
+      if(key->blend.logicop_enable) {
+         debug_printf("blend.logicop_func = %u\n", key->blend.logicop_func);
+      }
+      else if(key->blend.blend_enable) {
+         debug_printf("blend.rgb_func = %s\n",   debug_dump_blend_func  (key->blend.rgb_func, TRUE));
+         debug_printf("rgb_src_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_src_factor, TRUE));
+         debug_printf("rgb_dst_factor = %s\n",   debug_dump_blend_factor(key->blend.rgb_dst_factor, TRUE));
+         debug_printf("alpha_func = %s\n",       debug_dump_blend_func  (key->blend.alpha_func, TRUE));
+         debug_printf("alpha_src_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_src_factor, TRUE));
+         debug_printf("alpha_dst_factor = %s\n", debug_dump_blend_factor(key->blend.alpha_dst_factor, TRUE));
+      }
+      debug_printf("blend.colormask = 0x%x\n", key->blend.colormask);
+      for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
+         if(key->sampler[i].format) {
+            debug_printf("sampler[%u] = \n", i);
+            debug_printf("  .format = %s\n",
+                         pf_name(key->sampler[i].format));
+            debug_printf("  .target = %s\n",
+                         debug_dump_tex_target(key->sampler[i].target, TRUE));
+            debug_printf("  .pot = %u %u %u\n",
+                         key->sampler[i].pot_width,
+                         key->sampler[i].pot_height,
+                         key->sampler[i].pot_depth);
+            debug_printf("  .wrap = %s %s %s\n",
+                         debug_dump_tex_wrap(key->sampler[i].wrap_s, TRUE),
+                         debug_dump_tex_wrap(key->sampler[i].wrap_t, TRUE),
+                         debug_dump_tex_wrap(key->sampler[i].wrap_r, TRUE));
+            debug_printf("  .min_img_filter = %s\n",
+                         debug_dump_tex_filter(key->sampler[i].min_img_filter, TRUE));
+            debug_printf("  .min_mip_filter = %s\n",
+                         debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
+            debug_printf("  .mag_img_filter = %s\n",
+                         debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
+            if(key->sampler[i].compare_mode)
+               debug_printf("  .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+            debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
+            debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
+         }
       }
    }
 
-#endif
-
    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
    if(!variant)
       return NULL;
@@ -599,8 +599,8 @@ generate_fragment(struct llvmpipe_context *lp,
    }
 
    lp_build_conv_mask(builder, fs_type, blend_type,
-                               fs_mask, num_fs,
-                               &blend_mask, 1);
+                      fs_mask, num_fs,
+                      &blend_mask, 1);
 
    /*
     * Blending.
@@ -631,16 +631,15 @@ generate_fragment(struct llvmpipe_context *lp,
 
    LLVMRunFunctionPassManager(screen->pass, variant->function);
 
-#ifdef DEBUG
-   LLVMDumpValue(variant->function);
-   debug_printf("\n");
-#endif
+   if (LP_DEBUG & DEBUG_JIT) {
+      LLVMDumpValue(variant->function);
+      debug_printf("\n");
+   }
 
    variant->jit_function = (lp_jit_frag_func)LLVMGetPointerToGlobal(screen->engine, variant->function);
 
-#ifdef DEBUG
-   lp_disassemble(variant->jit_function);
-#endif
+   if (LP_DEBUG & DEBUG_ASM)
+      lp_disassemble(variant->jit_function);
 
    variant->next = shader->variants;
    shader->variants = variant;
-- 
cgit v1.2.3


From 3af2ddbe943f0fe0d6b0ba9e627cbb82d0cc79f2 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Thu, 17 Dec 2009 21:25:47 +0100
Subject: s/desc->type/desc->channel[0].type/

---
 src/gallium/drivers/nv50/nv50_vbo.c           | 4 ++--
 src/gallium/drivers/r300/r300_state_inlines.h | 4 ++--
 src/mesa/state_tracker/st_format.c            | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index d1c9f3f590..f7fa0659e8 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -71,7 +71,7 @@ nv50_vbo_type_to_hw(enum pipe_format format)
 	desc = util_format_description(format);
 	assert(desc);
 
-	switch (desc->type) {
+	switch (desc->channel[0].type) {
 	case UTIL_FORMAT_TYPE_FLOAT:
 		return NV50TCL_VERTEX_ARRAY_ATTRIB_TYPE_FLOAT;
 	case UTIL_FORMAT_TYPE_UNSIGNED:
@@ -339,7 +339,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
 	desc = util_format_description(pf);
 	assert(desc);
 
-	if ((desc->type != UTIL_FORMAT_TYPE_FLOAT) ||
+	if ((desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT) ||
 	    util_format_get_component_bits(pf, UTIL_FORMAT_COLORSPACE_RGB, 0) != 32)
 		return FALSE;
 
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 7cd1f87630..dbe42edd91 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -483,7 +483,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
         assert(0);
     }
 
-    switch (desc->type) {
+    switch (desc->channel[0].type) {
         /* Half-floats, floats, doubles */
         case UTIL_FORMAT_TYPE_FLOAT:
             switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0)) {
@@ -525,7 +525,7 @@ r300_translate_vertex_data_type(enum pipe_format format) {
             assert(0);
     }
 
-    if (desc->type == UTIL_FORMAT_TYPE_SIGNED) {
+    if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
         result |= R300_SIGNED;
     }
     if (desc->channel[0].normalized) {
diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c
index 329ae03db2..94da0f61e0 100644
--- a/src/mesa/state_tracker/st_format.c
+++ b/src/mesa/state_tracker/st_format.c
@@ -99,20 +99,20 @@ st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo)
       else {
          const GLuint size = format_max_bits(format);
          if (size == 8) {
-            if (desc->type == UTIL_FORMAT_TYPE_UNSIGNED)
+            if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
                pinfo->datatype = GL_UNSIGNED_BYTE;
             else
                pinfo->datatype = GL_BYTE;
          }
          else if (size == 16) {
-            if (desc->type == UTIL_FORMAT_TYPE_UNSIGNED)
+            if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
                pinfo->datatype = GL_UNSIGNED_SHORT;
             else
                pinfo->datatype = GL_SHORT;
          }
          else {
             assert( size <= 32 );
-            if (desc->type == UTIL_FORMAT_TYPE_UNSIGNED)
+            if (desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED)
                pinfo->datatype = GL_UNSIGNED_INT;
             else
                pinfo->datatype = GL_INT;
-- 
cgit v1.2.3


From b1ed72ebe2599ec178f51d86fd42f26486b9a19b Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Thu, 17 Dec 2009 23:41:57 +0100
Subject: Move the remaining format pf_get_* functions to u_format.h.

Previously they depended on format blocks, but after removing those
they started depending on format encoding.
---
 progs/rbug/bin_to_bmp.c                            |  13 +-
 src/gallium/auxiliary/util/u_blitter.c             |   4 +-
 src/gallium/auxiliary/util/u_debug.c               |   7 +-
 src/gallium/auxiliary/util/u_format.h              |  83 ++++++++++++-
 src/gallium/auxiliary/util/u_gen_mipmap.c          |  12 +-
 src/gallium/auxiliary/util/u_rect.c                |  21 ++--
 src/gallium/auxiliary/util/u_tile.c                |   9 +-
 src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c   |   4 +-
 src/gallium/drivers/cell/ppu/cell_texture.c        |  20 +--
 src/gallium/drivers/i915/i915_surface.c            |  19 +--
 src/gallium/drivers/i915/i915_texture.c            |  75 ++++++------
 src/gallium/drivers/llvmpipe/lp_setup.c            |   3 +-
 src/gallium/drivers/llvmpipe/lp_texture.c          |  18 +--
 src/gallium/drivers/nv04/nv04_surface_2d.c         |  13 +-
 src/gallium/drivers/nv04/nv04_transfer.c           |   3 +-
 src/gallium/drivers/nv10/nv10_miptree.c            |   5 +-
 src/gallium/drivers/nv10/nv10_transfer.c           |   3 +-
 src/gallium/drivers/nv20/nv20_miptree.c            |   5 +-
 src/gallium/drivers/nv20/nv20_transfer.c           |   3 +-
 src/gallium/drivers/nv30/nv30_miptree.c            |   5 +-
 src/gallium/drivers/nv30/nv30_transfer.c           |   3 +-
 src/gallium/drivers/nv40/nv40_miptree.c            |   5 +-
 src/gallium/drivers/nv40/nv40_transfer.c           |   3 +-
 src/gallium/drivers/nv50/nv50_miptree.c            |   9 +-
 src/gallium/drivers/nv50/nv50_transfer.c           |  23 ++--
 src/gallium/drivers/r300/r300_emit.c               |   8 +-
 src/gallium/drivers/r300/r300_screen.c             |   5 +-
 src/gallium/drivers/r300/r300_texture.c            |   8 +-
 src/gallium/drivers/softpipe/sp_texture.c          |  14 +--
 src/gallium/drivers/softpipe/sp_tile_cache.c       |   2 +-
 src/gallium/drivers/svga/svga_screen_texture.c     |  14 +--
 src/gallium/drivers/svga/svga_state_vs.c           |   2 +-
 src/gallium/drivers/trace/tr_rbug.c                |  15 +--
 src/gallium/drivers/trace/tr_screen.c              |   3 +-
 src/gallium/include/pipe/p_format.h                | 134 ---------------------
 src/gallium/state_trackers/python/st_sample.c      |  15 +--
 .../state_trackers/python/st_softpipe_winsys.c     |   4 +-
 .../winsys/drm/nouveau/drm/nouveau_drm_api.c       |   2 +-
 src/gallium/winsys/drm/radeon/core/radeon_buffer.c |   5 +-
 src/gallium/winsys/egl_xlib/sw_winsys.c            |   4 +-
 src/gallium/winsys/g3dvl/xlib/xsp_winsys.c         |   4 +-
 src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c       |   4 +-
 src/gallium/winsys/gdi/gdi_softpipe_winsys.c       |   8 +-
 src/gallium/winsys/xlib/xlib_cell.c                |   4 +-
 src/gallium/winsys/xlib/xlib_llvmpipe.c            |  10 +-
 src/gallium/winsys/xlib/xlib_softpipe.c            |  10 +-
 src/mesa/state_tracker/st_cb_drawpixels.c          |   4 +-
 src/mesa/state_tracker/st_cb_fbo.c                 |   4 +-
 src/mesa/state_tracker/st_cb_texture.c             |  26 ++--
 src/mesa/state_tracker/st_gen_mipmap.c             |   5 +-
 50 files changed, 322 insertions(+), 360 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/progs/rbug/bin_to_bmp.c b/progs/rbug/bin_to_bmp.c
index cdae3486ce..03ff622fee 100644
--- a/progs/rbug/bin_to_bmp.c
+++ b/progs/rbug/bin_to_bmp.c
@@ -25,6 +25,7 @@
 #include "pipe/p_compiler.h"
 #include "pipe/p_format.h"
 #include "pipe/p_state.h"
+#include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_debug.h"
 #include "util/u_network.h"
@@ -54,10 +55,7 @@ static void dump(unsigned width, unsigned height,
                  unsigned src_stride, enum pipe_format src_format,
                  uint8_t *data, unsigned src_size)
 {
-   struct pipe_format_block src_block;
-
    enum pipe_format dst_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
-   struct pipe_format_block dst_block;
    unsigned dst_stride;
    unsigned dst_size;
    float *rgba;
@@ -65,14 +63,11 @@ static void dump(unsigned width, unsigned height,
    char filename[512];
 
    {
-      pf_get_block(src_format, &src_block);
-      assert(src_stride >= pf_get_stride(&src_block, width));
-      assert(src_size >= pf_get_2d_size(&src_block, src_stride, width));
+      assert(src_stride >= util_format_get_stride(src_format, width));
    }
    {
-      pf_get_block(dst_format, &dst_block);
-      dst_stride = pf_get_stride(&dst_block, width);
-      dst_size = pf_get_2d_size(&dst_block, dst_stride, width);
+      dst_stride = util_format_get_stride(dst_format, width);
+      dst_size = util_format_get_2d_size(dst_format, dst_stride, width);
       rgba = MALLOC(dst_size);
    }
 
diff --git a/src/gallium/auxiliary/util/u_blitter.c b/src/gallium/auxiliary/util/u_blitter.c
index 895af2c8d0..0242b79615 100644
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -579,8 +579,8 @@ void util_blitter_copy(struct blitter_context *blitter,
    if (!dst->texture || !src->texture)
       return;
 
-   is_depth = pf_get_component_bits(src->format, PIPE_FORMAT_COMP_Z) != 0;
-   is_stencil = pf_get_component_bits(src->format, PIPE_FORMAT_COMP_S) != 0;
+   is_depth = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 0) != 0;
+   is_stencil = util_format_get_component_bits(src->format, UTIL_FORMAT_COLORSPACE_ZS, 1) != 0;
    dst_tex_usage = is_depth || is_stencil ? PIPE_TEXTURE_USAGE_DEPTH_STENCIL :
                                             PIPE_TEXTURE_USAGE_RENDER_TARGET;
 
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 40633574b0..27e0b0d159 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -64,6 +64,7 @@
 #include "pipe/p_format.h" 
 #include "pipe/p_state.h" 
 #include "pipe/p_inlines.h" 
+#include "util/u_format.h"
 #include "util/u_memory.h" 
 #include "util/u_string.h" 
 #include "util/u_stream.h" 
@@ -670,9 +671,9 @@ void debug_dump_surface(const char *prefix,
    
    debug_dump_image(prefix, 
                     texture->format,
-                    pf_get_blocksize(texture->format), 
-                    pf_get_nblocksx(texture->format, transfer->width),
-                    pf_get_nblocksy(texture->format, transfer->height),
+                    util_format_get_blocksize(texture->format), 
+                    util_format_get_nblocksx(texture->format, transfer->width),
+                    util_format_get_nblocksy(texture->format, transfer->height),
                     transfer->stride,
                     data);
    
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 72da2a44c7..97e4d959bc 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -200,7 +200,7 @@ util_format_is_depth_and_stencil(enum pipe_format format)
  * Return total bits needed for the pixel format.
  */
 static INLINE uint
-util_format_get_bits(enum pipe_format format)
+util_format_get_blocksizebits(enum pipe_format format)
 {
    const struct util_format_description *desc = util_format_description(format);
 
@@ -216,15 +216,92 @@ util_format_get_bits(enum pipe_format format)
  * Return bytes per pixel for the given format.
  */
 static INLINE uint
-util_format_get_size(enum pipe_format format)
+util_format_get_blocksize(enum pipe_format format)
 {
-   uint bits = util_format_get_bits(format);
+   uint bits = util_format_get_blocksizebits(format);
 
    assert(bits % 8 == 0);
 
    return bits / 8;
 }
 
+static INLINE uint
+util_format_get_blockwidth(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+
+   assert(format);
+   if (!format) {
+      return 1;
+   }
+
+   switch (desc->layout) {
+   case UTIL_FORMAT_LAYOUT_YUV:
+      return 2;
+   case UTIL_FORMAT_LAYOUT_DXT:
+      return 4;
+   default:
+      return 1;
+   }
+}
+
+static INLINE uint
+util_format_get_blockheight(enum pipe_format format)
+{
+   const struct util_format_description *desc = util_format_description(format);
+
+   assert(format);
+   if (!format) {
+      return 1;
+   }
+
+   switch (desc->layout) {
+   case UTIL_FORMAT_LAYOUT_DXT:
+      return 4;
+   default:
+      return 1;
+   }
+}
+
+static INLINE unsigned
+util_format_get_nblocksx(enum pipe_format format,
+                         unsigned x)
+{
+   unsigned blockwidth = util_format_get_blockwidth(format);
+   return (x + blockwidth - 1) / blockwidth;
+}
+
+static INLINE unsigned
+util_format_get_nblocksy(enum pipe_format format,
+                         unsigned y)
+{
+   unsigned blockheight = util_format_get_blockheight(format);
+   return (y + blockheight - 1) / blockheight;
+}
+
+static INLINE unsigned
+util_format_get_nblocks(enum pipe_format format,
+                        unsigned width,
+                        unsigned height)
+{
+   return util_format_get_nblocksx(format, width) * util_format_get_nblocksy(format, height);
+}
+
+static INLINE size_t
+util_format_get_stride(enum pipe_format format,
+                       unsigned width)
+{
+   return util_format_get_nblocksx(format, width) * util_format_get_blocksize(format);
+}
+
+static INLINE size_t
+util_format_get_2d_size(enum pipe_format format,
+                        size_t stride,
+                        unsigned height)
+{
+   return util_format_get_nblocksy(format, height) * stride;
+}
+
 static INLINE uint
 util_format_get_component_bits(enum pipe_format format,
                                enum util_format_colorspace colorspace,
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 2931dfac47..0dad6ccbc0 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -998,7 +998,7 @@ reduce_2d(enum pipe_format pformat,
 {
    enum dtype datatype;
    uint comps;
-   const int bpt = util_format_get_size(pformat);
+   const int bpt = util_format_get_blocksize(pformat);
    const ubyte *srcA, *srcB;
    ubyte *dst;
    int row;
@@ -1037,7 +1037,7 @@ reduce_3d(enum pipe_format pformat,
           int dstWidth, int dstHeight, int dstDepth,
           int dstRowStride, ubyte *dstPtr)
 {
-   const int bpt = util_format_get_size(pformat);
+   const int bpt = util_format_get_blocksize(pformat);
    const int border = 0;
    int img, row;
    int bytesPerSrcImage, bytesPerDstImage;
@@ -1161,8 +1161,8 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
    const uint zslice = 0;
    uint dstLevel;
    
-   assert(pf_get_blockwidth(pt->format) == 1);
-   assert(pf_get_blockheight(pt->format) == 1);
+   assert(util_format_get_blockwidth(pt->format) == 1);
+   assert(util_format_get_blockheight(pt->format) == 1);
 
    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
       const uint srcLevel = dstLevel - 1;
@@ -1206,8 +1206,8 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
    struct pipe_screen *screen = pipe->screen;
    uint dstLevel, zslice = 0;
 
-   assert(pf_get_blockwidth(pt->format) == 1);
-   assert(pf_get_blockheight(pt->format) == 1);
+   assert(util_format_get_blockwidth(pt->format) == 1);
+   assert(util_format_get_blockheight(pt->format) == 1);
 
    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
       const uint srcLevel = dstLevel - 1;
diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c
index 72725b59d2..298fbacecb 100644
--- a/src/gallium/auxiliary/util/u_rect.c
+++ b/src/gallium/auxiliary/util/u_rect.c
@@ -34,6 +34,7 @@
 #include "pipe/p_format.h"
 #include "pipe/p_context.h"
 #include "pipe/p_screen.h"
+#include "util/u_format.h"
 #include "util/u_rect.h"
 
 
@@ -57,9 +58,9 @@ util_copy_rect(ubyte * dst,
 {
    unsigned i;
    int src_stride_pos = src_stride < 0 ? -src_stride : src_stride;
-   int blocksize = pf_get_blocksize(format);
-   int blockwidth = pf_get_blockwidth(format);
-   int blockheight = pf_get_blockheight(format);
+   int blocksize = util_format_get_blocksize(format);
+   int blockwidth = util_format_get_blockwidth(format);
+   int blockheight = util_format_get_blockheight(format);
 
    assert(blocksize > 0);
    assert(blockwidth > 0);
@@ -105,9 +106,9 @@ util_fill_rect(ubyte * dst,
 {
    unsigned i, j;
    unsigned width_size;
-   int blocksize = pf_get_blocksize(format);
-   int blockwidth = pf_get_blockwidth(format);
-   int blockheight = pf_get_blockheight(format);
+   int blocksize = util_format_get_blocksize(format);
+   int blockwidth = util_format_get_blockwidth(format);
+   int blockheight = util_format_get_blockheight(format);
 
    assert(blocksize > 0);
    assert(blockwidth > 0);
@@ -203,9 +204,9 @@ util_surface_copy(struct pipe_context *pipe,
                                         PIPE_TRANSFER_WRITE,
                                         dst_x, dst_y, w, h);
 
-   assert(pf_get_blocksize(dst_format) == pf_get_blocksize(src_format));
-   assert(pf_get_blockwidth(dst_format) == pf_get_blockwidth(src_format));
-   assert(pf_get_blockheight(dst_format) == pf_get_blockheight(src_format));
+   assert(util_format_get_blocksize(dst_format) == util_format_get_blocksize(src_format));
+   assert(util_format_get_blockwidth(dst_format) == util_format_get_blockwidth(src_format));
+   assert(util_format_get_blockheight(dst_format) == util_format_get_blockheight(src_format));
 
    src_map = pipe->screen->transfer_map(screen, src_trans);
    dst_map = pipe->screen->transfer_map(screen, dst_trans);
@@ -270,7 +271,7 @@ util_surface_fill(struct pipe_context *pipe,
    if (dst_map) {
       assert(dst_trans->stride > 0);
 
-      switch (pf_get_blocksize(dst_trans->texture->format)) {
+      switch (util_format_get_blocksize(dst_trans->texture->format)) {
       case 1:
       case 2:
       case 4:
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index 88c9a1f097..5b8dd1abb9 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -34,6 +34,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
 
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_rect.h"
@@ -52,7 +53,7 @@ pipe_get_tile_raw(struct pipe_transfer *pt,
    const void *src;
 
    if (dst_stride == 0)
-      dst_stride = pf_get_stride(pt->texture->format, w);
+      dst_stride = util_format_get_stride(pt->texture->format, w);
 
    if (pipe_clip_tile(x, y, &w, &h, pt))
       return;
@@ -81,7 +82,7 @@ pipe_put_tile_raw(struct pipe_transfer *pt,
    enum pipe_format format = pt->texture->format;
 
    if (src_stride == 0)
-      src_stride = pf_get_stride(format, w);
+      src_stride = util_format_get_stride(format, w);
 
    if (pipe_clip_tile(x, y, &w, &h, pt))
       return;
@@ -1275,7 +1276,7 @@ pipe_get_tile_rgba(struct pipe_transfer *pt,
    if (pipe_clip_tile(x, y, &w, &h, pt))
       return;
 
-   packed = MALLOC(pf_get_nblocks(format, w, h) * pf_get_blocksize(format));
+   packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format));
 
    if (!packed)
       return;
@@ -1303,7 +1304,7 @@ pipe_put_tile_rgba(struct pipe_transfer *pt,
    if (pipe_clip_tile(x, y, &w, &h, pt))
       return;
 
-   packed = MALLOC(pf_get_nblocks(format, w, h) * pf_get_blocksize(format));
+   packed = MALLOC(util_format_get_nblocks(format, w, h) * util_format_get_blocksize(format));
 
    if (!packed)
       return;
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index 8f0185b42a..ab196c21f8 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -1444,7 +1444,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
    assert(r);
    assert(blocks);
 
-   tex_pitch = r->tex_transfer[0]->stride / pf_get_blocksize(r->tex_transfer[0]->texture->format);
+   tex_pitch = r->tex_transfer[0]->stride / util_format_get_blocksize(r->tex_transfer[0]->texture->format);
    texels = r->texels[0] + mbpy * tex_pitch + mbpx;
 
    for (y = 0; y < 2; ++y) {
@@ -1483,7 +1483,7 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
    mbpy /= 2;
 
    for (tb = 0; tb < 2; ++tb) {
-      tex_pitch = r->tex_transfer[tb + 1]->stride / pf_get_blocksize(r->tex_transfer[tb + 1]->texture->format);
+      tex_pitch = r->tex_transfer[tb + 1]->stride / util_format_get_blocksize(r->tex_transfer[tb + 1]->texture->format);
       texels = r->texels[tb + 1] + mbpy * tex_pitch + mbpx;
 
       if ((cbp >> (1 - tb)) & 1) {
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index bc2e625f54..998944f77a 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -67,11 +67,11 @@ cell_texture_layout(struct cell_texture *ct)
       w_tile = align(width, TILE_SIZE);
       h_tile = align(height, TILE_SIZE);
 
-      ct->stride[level] = pf_get_stride(pt->format, w_tile);
+      ct->stride[level] = util_format_get_stride(pt->format, w_tile);
 
       ct->level_offset[level] = ct->buffer_size;
 
-      size = ct->stride[level] * pf_get_nblocksy(pt->format, h_tile);
+      size = ct->stride[level] * util_format_get_nblocksy(pt->format, h_tile);
       if (pt->target == PIPE_TEXTURE_CUBE)
          size *= 6;
       else
@@ -283,11 +283,11 @@ cell_get_tex_surface(struct pipe_screen *screen,
 
       if (pt->target == PIPE_TEXTURE_CUBE) {
          unsigned h_tile = align(ps->height, TILE_SIZE);
-         ps->offset += face * pf_get_nblocksy(ps->format, h_tile) * ct->stride[level];
+         ps->offset += face * util_format_get_nblocksy(ps->format, h_tile) * ct->stride[level];
       }
       else if (pt->target == PIPE_TEXTURE_3D) {
          unsigned h_tile = align(ps->height, TILE_SIZE);
-         ps->offset += zslice * pf_get_nblocksy(ps->format, h_tile) * ct->stride[level];
+         ps->offset += zslice * util_format_get_nblocksy(ps->format, h_tile) * ct->stride[level];
       }
       else {
          assert(face == 0);
@@ -342,11 +342,11 @@ cell_get_tex_transfer(struct pipe_screen *screen,
 
       if (texture->target == PIPE_TEXTURE_CUBE) {
          unsigned h_tile = align(u_minify(texture->height0, level), TILE_SIZE);
-         ctrans->offset += face * pf_get_nblocksy(texture->format, h_tile) * pt->stride;
+         ctrans->offset += face * util_format_get_nblocksy(texture->format, h_tile) * pt->stride;
       }
       else if (texture->target == PIPE_TEXTURE_3D) {
          unsigned h_tile = align(u_minify(texture->height0, level), TILE_SIZE);
-         ctrans->offset += zslice * pf_get_nblocksy(texture->format, h_tile) * pt->stride;
+         ctrans->offset += zslice * util_format_get_nblocksy(texture->format, h_tile) * pt->stride;
       }
       else {
          assert(face == 0);
@@ -399,8 +399,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
     * Create a buffer of ordinary memory for the linear texture.
     * This is the memory that the user will read/write.
     */
-   size = pf_get_stride(pt->format, align(texWidth, TILE_SIZE)) *
-          pf_get_nblocksy(pt->format, align(texHeight, TILE_SIZE));
+   size = util_format_get_stride(pt->format, align(texWidth, TILE_SIZE)) *
+          util_format_get_nblocksy(pt->format, align(texHeight, TILE_SIZE));
 
    ctrans->map = align_malloc(size, 16);
    if (!ctrans->map)
@@ -408,7 +408,7 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
 
    if (transfer->usage & PIPE_TRANSFER_READ) {
       /* need to untwiddle the texture to make a linear version */
-      const uint bpp = util_format_get_size(ct->base.format);
+      const uint bpp = util_format_get_blocksize(ct->base.format);
       if (bpp == 4) {
          const uint *src = (uint *) (ct->mapped + ctrans->offset);
          uint *dst = ctrans->map;
@@ -451,7 +451,7 @@ cell_transfer_unmap(struct pipe_screen *screen,
       /* The user wrote new texture data into the mapped buffer.
        * We need to convert the new linear data into the twiddled/tiled format.
        */
-      const uint bpp = util_format_get_size(ct->base.format);
+      const uint bpp = util_format_get_blocksize(ct->base.format);
       if (bpp == 4) {
          const uint *src = ctrans->map;
          uint *dst = (uint *) (ct->mapped + ctrans->offset);
diff --git a/src/gallium/drivers/i915/i915_surface.c b/src/gallium/drivers/i915/i915_surface.c
index 24e1024aaa..c693eb30e8 100644
--- a/src/gallium/drivers/i915/i915_surface.c
+++ b/src/gallium/drivers/i915/i915_surface.c
@@ -32,6 +32,7 @@
 #include "pipe/p_inlines.h"
 #include "pipe/p_inlines.h"
 #include "pipe/internal/p_winsys_screen.h"
+#include "util/u_format.h"
 #include "util/u_tile.h"
 #include "util/u_rect.h"
 
@@ -52,15 +53,15 @@ i915_surface_copy(struct pipe_context *pipe,
    struct pipe_texture *spt = &src_tex->base;
 
    assert( dst != src );
-   assert( pf_get_blocksize(dpt->format) == pf_get_blocksize(spt->format) );
-   assert( pf_get_blockwidth(dpt->format) == pf_get_blockwidth(spt->format) );
-   assert( pf_get_blockheight(dpt->format) == pf_get_blockheight(spt->format) );
-   assert( pf_get_blockwidth(dpt->format) == 1 );
-   assert( pf_get_blockheight(dpt->format) == 1 );
+   assert( util_format_get_blocksize(dpt->format) == util_format_get_blocksize(spt->format) );
+   assert( util_format_get_blockwidth(dpt->format) == util_format_get_blockwidth(spt->format) );
+   assert( util_format_get_blockheight(dpt->format) == util_format_get_blockheight(spt->format) );
+   assert( util_format_get_blockwidth(dpt->format) == 1 );
+   assert( util_format_get_blockheight(dpt->format) == 1 );
 
    i915_copy_blit( i915_context(pipe),
                    FALSE,
-                   pf_get_blocksize(dpt->format),
+                   util_format_get_blocksize(dpt->format),
                    (unsigned short) src_tex->stride, src_tex->buffer, src->offset,
                    (unsigned short) dst_tex->stride, dst_tex->buffer, dst->offset,
                    (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height );
@@ -76,11 +77,11 @@ i915_surface_fill(struct pipe_context *pipe,
    struct i915_texture *tex = (struct i915_texture *)dst->texture;
    struct pipe_texture *pt = &tex->base;
 
-   assert(pf_get_blockwidth(pt->format) == 1);
-   assert(pf_get_blockheight(pt->format) == 1);
+   assert(util_format_get_blockwidth(pt->format) == 1);
+   assert(util_format_get_blockheight(pt->format) == 1);
 
    i915_fill_blit( i915_context(pipe),
-                   pf_get_blocksize(pt->format),
+                   util_format_get_blocksize(pt->format),
                    (unsigned short) tex->stride,
                    tex->buffer, dst->offset,
                    (short) dstx, (short) dsty,
diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c
index b28b413771..50a9e19094 100644
--- a/src/gallium/drivers/i915/i915_texture.c
+++ b/src/gallium/drivers/i915/i915_texture.c
@@ -35,6 +35,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
 #include "pipe/internal/p_winsys_screen.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -129,7 +130,7 @@ i915_miptree_set_image_offset(struct i915_texture *tex,
 
    assert(img < tex->nr_images[level]);
 
-   tex->image_offset[level][img] = y * tex->stride + x * pf_get_blocksize(tex->base.format);
+   tex->image_offset[level][img] = y * tex->stride + x * util_format_get_blocksize(tex->base.format);
 
    /*
    printf("%s level %d img %d pos %d,%d image_offset %x\n",
@@ -151,7 +152,7 @@ i915_scanout_layout(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
 
-   if (pt->last_level > 0 || pf_get_blocksize(pt->format) != 4)
+   if (pt->last_level > 0 || util_format_get_blocksize(pt->format) != 4)
       return FALSE;
 
    i915_miptree_set_level_info(tex, 0, 1,
@@ -161,18 +162,18 @@ i915_scanout_layout(struct i915_texture *tex)
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
    if (pt->width0 >= 240) {
-      tex->stride = power_of_two(pf_get_stride(pt->format, pt->width0));
-      tex->total_nblocksy = align(pf_get_nblocksy(pt->format, pt->height0), 8);
+      tex->stride = power_of_two(util_format_get_stride(pt->format, pt->width0));
+      tex->total_nblocksy = align(util_format_get_nblocksy(pt->format, pt->height0), 8);
       tex->hw_tiled = INTEL_TILE_X;
    } else if (pt->width0 == 64 && pt->height0 == 64) {
-      tex->stride = power_of_two(pf_get_stride(pt->format, pt->width0));
-      tex->total_nblocksy = align(pf_get_nblocksy(pt->format, pt->height0), 8);
+      tex->stride = power_of_two(util_format_get_stride(pt->format, pt->width0));
+      tex->total_nblocksy = align(util_format_get_nblocksy(pt->format, pt->height0), 8);
    } else {
       return FALSE;
    }
 
    debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
-      pt->width0, pt->height0, pf_get_blocksize(pt->format),
+      pt->width0, pt->height0, util_format_get_blocksize(pt->format),
       tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
 
    return TRUE;
@@ -186,7 +187,7 @@ i915_display_target_layout(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
 
-   if (pt->last_level > 0 || pf_get_blocksize(pt->format) != 4)
+   if (pt->last_level > 0 || util_format_get_blocksize(pt->format) != 4)
       return FALSE;
 
    /* fallback to normal textures for small textures */
@@ -199,12 +200,12 @@ i915_display_target_layout(struct i915_texture *tex)
                                1);
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
-   tex->stride = power_of_two(pf_get_stride(pt->format, pt->width0));
-   tex->total_nblocksy = align(pf_get_nblocksy(pt->format, pt->height0), 8);
+   tex->stride = power_of_two(util_format_get_stride(pt->format, pt->width0));
+   tex->total_nblocksy = align(util_format_get_nblocksy(pt->format, pt->height0), 8);
    tex->hw_tiled = INTEL_TILE_X;
 
    debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
-      pt->width0, pt->height0, pf_get_blocksize(pt->format),
+      pt->width0, pt->height0, util_format_get_blocksize(pt->format),
       tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
 
    return TRUE;
@@ -217,7 +218,7 @@ i915_miptree_layout_2d(struct i915_texture *tex)
    unsigned level;
    unsigned width = pt->width0;
    unsigned height = pt->height0;
-   unsigned nblocksy = pf_get_nblocksy(pt->format, pt->width0);
+   unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->width0);
 
    /* used for scanouts that need special layouts */
    if (pt->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
@@ -229,7 +230,7 @@ i915_miptree_layout_2d(struct i915_texture *tex)
       if (i915_display_target_layout(tex))
          return;
 
-   tex->stride = align(pf_get_stride(pt->format, pt->width0), 4);
+   tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4);
    tex->total_nblocksy = 0;
 
    for (level = 0; level <= pt->last_level; level++) {
@@ -242,7 +243,7 @@ i915_miptree_layout_2d(struct i915_texture *tex)
 
       width = u_minify(width, 1);
       height = u_minify(height, 1);
-      nblocksy = pf_get_nblocksy(pt->format, height);
+      nblocksy = util_format_get_nblocksy(pt->format, height);
    }
 }
 
@@ -255,12 +256,12 @@ i915_miptree_layout_3d(struct i915_texture *tex)
    unsigned width = pt->width0;
    unsigned height = pt->height0;
    unsigned depth = pt->depth0;
-   unsigned nblocksy = pf_get_nblocksy(pt->format, pt->height0);
+   unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->height0);
    unsigned stack_nblocksy = 0;
 
    /* Calculate the size of a single slice. 
     */
-   tex->stride = align(pf_get_stride(pt->format, pt->width0), 4);
+   tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4);
 
    /* XXX: hardware expects/requires 9 levels at minimum.
     */
@@ -271,7 +272,7 @@ i915_miptree_layout_3d(struct i915_texture *tex)
 
       width = u_minify(width, 1);
       height = u_minify(height, 1);
-      nblocksy = pf_get_nblocksy(pt->format, height);
+      nblocksy = util_format_get_nblocksy(pt->format, height);
    }
 
    /* Fixup depth image_offsets: 
@@ -296,14 +297,14 @@ i915_miptree_layout_cube(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
    unsigned width = pt->width0, height = pt->height0;
-   const unsigned nblocks = pf_get_nblocksx(pt->format, pt->width0);
+   const unsigned nblocks = util_format_get_nblocksx(pt->format, pt->width0);
    unsigned level;
    unsigned face;
 
    assert(width == height); /* cubemap images are square */
 
    /* double pitch for cube layouts */
-   tex->stride = align(nblocks * pf_get_blocksize(pt->format) * 2, 4);
+   tex->stride = align(nblocks * util_format_get_blocksize(pt->format) * 2, 4);
    tex->total_nblocksy = nblocks * 4;
 
    for (level = 0; level <= pt->last_level; level++) {
@@ -366,8 +367,8 @@ i945_miptree_layout_2d(struct i915_texture *tex)
    unsigned y = 0;
    unsigned width = pt->width0;
    unsigned height = pt->height0;
-   unsigned nblocksx = pf_get_nblocksx(pt->format, pt->width0);
-   unsigned nblocksy = pf_get_nblocksy(pt->format, pt->height0);
+   unsigned nblocksx = util_format_get_nblocksx(pt->format, pt->width0);
+   unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->height0);
 
    /* used for scanouts that need special layouts */
    if (tex->base.tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
@@ -379,7 +380,7 @@ i945_miptree_layout_2d(struct i915_texture *tex)
       if (i915_display_target_layout(tex))
          return;
 
-   tex->stride = align(pf_get_stride(pt->format, pt->width0), 4);
+   tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4);
 
    /* May need to adjust pitch to accomodate the placement of
     * the 2nd mipmap level.  This occurs when the alignment
@@ -388,11 +389,11 @@ i945_miptree_layout_2d(struct i915_texture *tex)
     */
    if (pt->last_level > 0) {
       unsigned mip1_nblocksx 
-         = align(pf_get_nblocksx(pt->format, u_minify(width, 1)), align_x)
-         + pf_get_nblocksx(pt->format, u_minify(width, 2));
+         = align(util_format_get_nblocksx(pt->format, u_minify(width, 1)), align_x)
+         + util_format_get_nblocksx(pt->format, u_minify(width, 2));
 
       if (mip1_nblocksx > nblocksx)
-         tex->stride = mip1_nblocksx * pf_get_blocksize(pt->format);
+         tex->stride = mip1_nblocksx * util_format_get_blocksize(pt->format);
    }
 
    /* Pitch must be a whole number of dwords
@@ -422,8 +423,8 @@ i945_miptree_layout_2d(struct i915_texture *tex)
 
       width  = u_minify(width, 1);
       height = u_minify(height, 1);
-      nblocksx = pf_get_nblocksx(pt->format, width);
-      nblocksy = pf_get_nblocksy(pt->format, height);
+      nblocksx = util_format_get_nblocksx(pt->format, width);
+      nblocksy = util_format_get_nblocksy(pt->format, height);
    }
 }
 
@@ -434,16 +435,16 @@ i945_miptree_layout_3d(struct i915_texture *tex)
    unsigned width = pt->width0;
    unsigned height = pt->height0;
    unsigned depth = pt->depth0;
-   unsigned nblocksy = pf_get_nblocksy(pt->format, pt->width0);
+   unsigned nblocksy = util_format_get_nblocksy(pt->format, pt->width0);
    unsigned pack_x_pitch, pack_x_nr;
    unsigned pack_y_pitch;
    unsigned level;
 
-   tex->stride = align(pf_get_stride(pt->format, pt->width0), 4);
+   tex->stride = align(util_format_get_stride(pt->format, pt->width0), 4);
    tex->total_nblocksy = 0;
 
    pack_y_pitch = MAX2(nblocksy, 2);
-   pack_x_pitch = tex->stride / pf_get_blocksize(pt->format);
+   pack_x_pitch = tex->stride / util_format_get_blocksize(pt->format);
    pack_x_nr = 1;
 
    for (level = 0; level <= pt->last_level; level++) {
@@ -468,7 +469,7 @@ i945_miptree_layout_3d(struct i915_texture *tex)
       if (pack_x_pitch > 4) {
          pack_x_pitch >>= 1;
          pack_x_nr <<= 1;
-         assert(pack_x_pitch * pack_x_nr * pf_get_blocksize(pt->format) <= tex->stride);
+         assert(pack_x_pitch * pack_x_nr * util_format_get_blocksize(pt->format) <= tex->stride);
       }
 
       if (pack_y_pitch > 2) {
@@ -478,7 +479,7 @@ i945_miptree_layout_3d(struct i915_texture *tex)
       width = u_minify(width, 1);
       height = u_minify(height, 1);
       depth = u_minify(depth, 1);
-      nblocksy = pf_get_nblocksy(pt->format, height);
+      nblocksy = util_format_get_nblocksy(pt->format, height);
    }
 }
 
@@ -488,7 +489,7 @@ i945_miptree_layout_cube(struct i915_texture *tex)
    struct pipe_texture *pt = &tex->base;
    unsigned level;
 
-   const unsigned nblocks = pf_get_nblocksx(pt->format, pt->width0);
+   const unsigned nblocks = util_format_get_nblocksx(pt->format, pt->width0);
    unsigned face;
    unsigned width = pt->width0;
    unsigned height = pt->height0;
@@ -508,9 +509,9 @@ i945_miptree_layout_cube(struct i915_texture *tex)
     * or the final row of 4x4, 2x2 and 1x1 faces below this.
     */
    if (nblocks > 32)
-      tex->stride = align(nblocks * pf_get_blocksize(pt->format) * 2, 4);
+      tex->stride = align(nblocks * util_format_get_blocksize(pt->format) * 2, 4);
    else
-      tex->stride = 14 * 8 * pf_get_blocksize(pt->format);
+      tex->stride = 14 * 8 * util_format_get_blocksize(pt->format);
 
    tex->total_nblocksy = nblocks * 4;
 
@@ -840,8 +841,8 @@ i915_transfer_map(struct pipe_screen *screen,
       return NULL;
 
    return map + i915_transfer(transfer)->offset +
-      transfer->y / pf_get_blockheight(format) * transfer->stride +
-      transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format);
+      transfer->y / util_format_get_blockheight(format) * transfer->stride +
+      transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
 }
 
 static void
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index b4aabd4d7c..b18f17c0cd 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -41,6 +41,7 @@
 #include "draw/draw_vertex.h"
 #include "pipe/p_shader_tokens.h"
 #include "pipe/p_thread.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "lp_bld_debug.h"
@@ -166,7 +167,7 @@ shade_quads(struct llvmpipe_context *llvmpipe,
       assert((y % 2) == 0);
       depth = llvmpipe->zsbuf_map +
               y*llvmpipe->zsbuf_transfer->stride +
-              2*x*pf_get_blocksize(llvmpipe->zsbuf_transfer->texture->format);
+              2*x*util_format_get_blocksize(llvmpipe->zsbuf_transfer->texture->format);
    }
    else
       depth = NULL;
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 9e41bc4074..2c135029ea 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -69,10 +69,10 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
 
       /* Allocate storage for whole quads. This is particularly important
        * for depth surfaces, which are currently stored in a swizzled format. */
-      nblocksx = pf_get_nblocksx(pt->format, align(width, 2));
-      nblocksy = pf_get_nblocksy(pt->format, align(height, 2));
+      nblocksx = util_format_get_nblocksx(pt->format, align(width, 2));
+      nblocksy = util_format_get_nblocksy(pt->format, align(height, 2));
 
-      lpt->stride[level] = align(nblocksx * pf_get_blocksize(pt->format), 16);
+      lpt->stride[level] = align(nblocksx * util_format_get_blocksize(pt->format), 16);
 
       lpt->level_offset[level] = buffer_size;
 
@@ -251,11 +251,11 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen,
       */
       if (pt->target == PIPE_TEXTURE_CUBE) {
          unsigned tex_height = ps->height;
-         ps->offset += face * pf_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
+         ps->offset += face * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
       }
       else if (pt->target == PIPE_TEXTURE_3D) {
          unsigned tex_height = ps->height;
-         ps->offset += zslice * pf_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
+         ps->offset += zslice * util_format_get_nblocksy(pt->format, tex_height) * lpt->stride[level];
       }
       else {
          assert(face == 0);
@@ -314,11 +314,11 @@ llvmpipe_get_tex_transfer(struct pipe_screen *screen,
       */
       if (texture->target == PIPE_TEXTURE_CUBE) {
          unsigned tex_height = u_minify(texture->height0, level);
-         lpt->offset += face *  pf_get_nblocksy(texture->format, tex_height) * pt->stride;
+         lpt->offset += face *  util_format_get_nblocksy(texture->format, tex_height) * pt->stride;
       }
       else if (texture->target == PIPE_TEXTURE_3D) {
          unsigned tex_height = u_minify(texture->height0, level);
-         lpt->offset += zslice * pf_get_nblocksy(texture->format, tex_height) * pt->stride;
+         lpt->offset += zslice * util_format_get_nblocksy(texture->format, tex_height) * pt->stride;
       }
       else {
          assert(face == 0);
@@ -379,8 +379,8 @@ llvmpipe_transfer_map( struct pipe_screen *_screen,
    }
    
    xfer_map = map + llvmpipe_transfer(transfer)->offset +
-      transfer->y / pf_get_blockheight(format) * transfer->stride +
-      transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format);
+      transfer->y / util_format_get_blockheight(format) * transfer->stride +
+      transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
    /*printf("map = %p  xfer map = %p\n", map, xfer_map);*/
    return xfer_map;
 }
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 3020806c5d..12df7fd199 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -1,5 +1,6 @@
 #include "pipe/p_context.h"
 #include "pipe/p_format.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
 
@@ -158,10 +159,10 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
 	    sub_w = MIN2(sub_w, w - x);
 
 	    /* Must be 64-byte aligned */
-	    assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * pf_get_blocksize(dst->texture->format)) & 63));
+	    assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format)) & 63));
 
 	    BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
-	    OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * pf_get_blocksize(dst->texture->format),
+	    OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format),
                              NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 
 	    BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
@@ -180,7 +181,7 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
 	    OUT_RING  (chan, src_pitch |
 			     NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
 			     NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
-	    OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * pf_get_blocksize(src->texture->format),
+	    OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * util_format_get_blocksize(src->texture->format),
                              NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
 	    OUT_RING  (chan, 0);
 	  }
@@ -201,9 +202,9 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
 	unsigned src_pitch = ((struct nv04_surface *)src)->pitch;
 	unsigned dst_pitch = ((struct nv04_surface *)dst)->pitch;
 	unsigned dst_offset = dst->offset + dy * dst_pitch +
-	                      dx * pf_get_blocksize(dst->texture->format);
+	                      dx * util_format_get_blocksize(dst->texture->format);
 	unsigned src_offset = src->offset + sy * src_pitch +
-	                      sx * pf_get_blocksize(src->texture->format);
+	                      sx * util_format_get_blocksize(src->texture->format);
 
 	MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2);
 	BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
@@ -222,7 +223,7 @@ nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx,
 			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
 		OUT_RING  (chan, src_pitch);
 		OUT_RING  (chan, dst_pitch);
-		OUT_RING  (chan, w * pf_get_blocksize(src->texture->format));
+		OUT_RING  (chan, w * util_format_get_blocksize(src->texture->format));
 		OUT_RING  (chan, count);
 		OUT_RING  (chan, 0x0101);
 		OUT_RING  (chan, 0);
diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c
index d66d6c6346..8446073ae8 100644
--- a/src/gallium/drivers/nv04/nv04_transfer.c
+++ b/src/gallium/drivers/nv04/nv04_transfer.c
@@ -1,6 +1,7 @@
 #include <pipe/p_state.h>
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
+#include <util/u_format.h>
 #include <util/u_memory.h>
 #include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
@@ -151,7 +152,7 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	                            pipe_transfer_buffer_flags(ptx));
 
 	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format);
+	       ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c
index 6a52b6af36..908482ad85 100644
--- a/src/gallium/drivers/nv10/nv10_miptree.c
+++ b/src/gallium/drivers/nv10/nv10_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 
 #include "nv10_context.h"
@@ -23,9 +24,9 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt)
 	
 	for (l = 0; l <= pt->last_level; l++) {
 		if (swizzled)
-			nv10mt->level[l].pitch = pf_get_stride(pt->format, width);
+			nv10mt->level[l].pitch = util_format_get_stride(pt->format, width);
 		else
-			nv10mt->level[l].pitch = pf_get_stride(pt->format, pt->width0);
+			nv10mt->level[l].pitch = util_format_get_stride(pt->format, pt->width0);
 		nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63;
 
 		nv10mt->level[l].image_offset =
diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c
index 06bb513417..c664973e90 100644
--- a/src/gallium/drivers/nv10/nv10_transfer.c
+++ b/src/gallium/drivers/nv10/nv10_transfer.c
@@ -1,6 +1,7 @@
 #include <pipe/p_state.h>
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
+#include <util/u_format.h>
 #include <util/u_memory.h>
 #include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
@@ -151,7 +152,7 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	                            pipe_transfer_buffer_flags(ptx));
 
 	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format);
+	       ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
index e2e01bd849..d1291a92e0 100644
--- a/src/gallium/drivers/nv20/nv20_miptree.c
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 
 #include "nv20_context.h"
@@ -27,9 +28,9 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt)
 	
 	for (l = 0; l <= pt->last_level; l++) {
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			nv20mt->level[l].pitch = align(pf_get_stride(pt->format, pt->width0), 64);
+			nv20mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64);
 		else
-			nv20mt->level[l].pitch = pf_get_stride(pt->format, width);
+			nv20mt->level[l].pitch = util_format_get_stride(pt->format, width);
 
 		nv20mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c
index 26a73c5143..69b79c809f 100644
--- a/src/gallium/drivers/nv20/nv20_transfer.c
+++ b/src/gallium/drivers/nv20/nv20_transfer.c
@@ -1,6 +1,7 @@
 #include <pipe/p_state.h>
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
+#include <util/u_format.h>
 #include <util/u_memory.h>
 #include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
@@ -151,7 +152,7 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	                            pipe_transfer_buffer_flags(ptx));
 
 	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format);
+	       ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index 920fe64c32..ce95d9700f 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 
 #include "nv30_context.h"
@@ -29,9 +30,9 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt)
 
 	for (l = 0; l <= pt->last_level; l++) {
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			nv30mt->level[l].pitch = align(pf_get_stride(pt->format, pt->width0), 64);
+			nv30mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64);
 		else
-			nv30mt->level[l].pitch = pf_get_stride(pt->format, width);
+			nv30mt->level[l].pitch = util_format_get_stride(pt->format, width);
 
 		nv30mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
index e29bfbd3ef..2255a02cae 100644
--- a/src/gallium/drivers/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nv30/nv30_transfer.c
@@ -1,6 +1,7 @@
 #include <pipe/p_state.h>
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
+#include <util/u_format.h>
 #include <util/u_memory.h>
 #include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
@@ -151,7 +152,7 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	                            pipe_transfer_buffer_flags(ptx));
 
 	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format);
+	       ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index 89ddf373e9..b974e68a07 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 
 #include "nv40_context.h"
@@ -31,9 +32,9 @@ nv40_miptree_layout(struct nv40_miptree *mt)
 
 	for (l = 0; l <= pt->last_level; l++) {
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			mt->level[l].pitch = align(pf_get_stride(pt->format, pt->width0), 64);
+			mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64);
 		else
-			mt->level[l].pitch = pf_get_stride(pt->format, width);
+			mt->level[l].pitch = util_format_get_stride(pt->format, width);
 
 		mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
index ed5be1cf87..b084a38b48 100644
--- a/src/gallium/drivers/nv40/nv40_transfer.c
+++ b/src/gallium/drivers/nv40/nv40_transfer.c
@@ -1,6 +1,7 @@
 #include <pipe/p_state.h>
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
+#include <util/u_format.h>
 #include <util/u_memory.h>
 #include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
@@ -151,7 +152,7 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	                            pipe_transfer_buffer_flags(ptx));
 
 	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * pf_get_blocksize(ptx->texture->format);
+	       ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 9e083b662d..3f1edf0a13 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -23,6 +23,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 
 #include "nv50_context.h"
 
@@ -105,10 +106,10 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 
 	for (l = 0; l <= pt->last_level; l++) {
 		struct nv50_miptree_level *lvl = &mt->level[l];
-		unsigned nblocksy = pf_get_nblocksy(pt->format, height);
+		unsigned nblocksy = util_format_get_nblocksy(pt->format, height);
 
 		lvl->image_offset = CALLOC(mt->image_nr, sizeof(int));
-		lvl->pitch = align(pf_get_stride(pt->format, width), 64);
+		lvl->pitch = align(util_format_get_stride(pt->format, width), 64);
 		lvl->tile_mode = get_tile_mode(nblocksy, depth);
 
 		width = u_minify(width, 1);
@@ -130,7 +131,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 			unsigned tile_d = get_tile_depth(lvl->tile_mode);
 
 			size  = lvl->pitch;
-			size *= align(pf_get_nblocksy(pt->format, u_minify(pt->height0, l)), tile_h);
+			size *= align(util_format_get_nblocksy(pt->format, u_minify(pt->height0, l)), tile_h);
 			size *= align(u_minify(pt->depth0, l), tile_d);
 
 			lvl->image_offset[i] = mt->total_size;
@@ -222,7 +223,7 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	ps->offset = lvl->image_offset[img];
 
 	if (pt->target == PIPE_TEXTURE_3D) {
-		unsigned nb_h = pf_get_nblocksy(pt->format, ps->height);
+		unsigned nb_h = util_format_get_nblocksy(pt->format, ps->height);
 		ps->offset += get_zslice_offset(lvl->tile_mode, zslice,
 						lvl->pitch, nb_h);
 	}
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 6240a0c757..4d9afa6fed 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -1,6 +1,7 @@
 
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 
 #include "nv50_context.h"
@@ -140,11 +141,11 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 
 	pipe_texture_reference(&tx->base.texture, pt);
-	tx->nblocksx = pf_get_nblocksx(pt->format, u_minify(pt->width0, level));
-	tx->nblocksy = pf_get_nblocksy(pt->format, u_minify(pt->height0, level));
+	tx->nblocksx = util_format_get_nblocksx(pt->format, u_minify(pt->width0, level));
+	tx->nblocksy = util_format_get_nblocksy(pt->format, u_minify(pt->height0, level));
 	tx->base.width = w;
 	tx->base.height = h;
-	tx->base.stride = tx->nblocksx * pf_get_blocksize(pt->format);
+	tx->base.stride = tx->nblocksx * util_format_get_blocksize(pt->format);
 	tx->base.usage = usage;
 
 	tx->level_pitch = lvl->pitch;
@@ -154,8 +155,8 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->level_offset = lvl->image_offset[image];
 	tx->level_tiling = lvl->tile_mode;
 	tx->level_z = zslice;
-	tx->level_x = pf_get_nblocksx(pt->format, x);
-	tx->level_y = pf_get_nblocksy(pt->format, y);
+	tx->level_x = util_format_get_nblocksx(pt->format, x);
+	tx->level_y = util_format_get_nblocksy(pt->format, y);
 	ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
 			     tx->nblocksy * tx->base.stride, &tx->bo);
 	if (ret) {
@@ -164,8 +165,8 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	}
 
 	if (usage & PIPE_TRANSFER_READ) {
-		nx = pf_get_nblocksx(pt->format, tx->base.width);
-		ny = pf_get_nblocksy(pt->format, tx->base.height);
+		nx = util_format_get_nblocksx(pt->format, tx->base.width);
+		ny = util_format_get_nblocksy(pt->format, tx->base.height);
 
 		nv50_transfer_rect_m2mf(pscreen, mt->base.bo, tx->level_offset,
 					tx->level_pitch, tx->level_tiling,
@@ -176,7 +177,7 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 					tx->base.stride, tx->bo->tile_mode,
 					0, 0, 0,
 					tx->nblocksx, tx->nblocksy, 1,
-					pf_get_blocksize(pt->format), nx, ny,
+					util_format_get_blocksize(pt->format), nx, ny,
 					NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
 					NOUVEAU_BO_GART);
 	}
@@ -191,8 +192,8 @@ nv50_transfer_del(struct pipe_transfer *ptx)
 	struct nv50_miptree *mt = nv50_miptree(ptx->texture);
 	struct pipe_texture *pt = ptx->texture;
 
-	unsigned nx = pf_get_nblocksx(pt->format, tx->base.width);
-	unsigned ny = pf_get_nblocksy(pt->format, tx->base.height);
+	unsigned nx = util_format_get_nblocksx(pt->format, tx->base.width);
+	unsigned ny = util_format_get_nblocksy(pt->format, tx->base.height);
 
 	if (ptx->usage & PIPE_TRANSFER_WRITE) {
 		struct pipe_screen *pscreen = pt->screen;
@@ -206,7 +207,7 @@ nv50_transfer_del(struct pipe_transfer *ptx)
 					tx->level_x, tx->level_y, tx->level_z,
 					tx->nblocksx, tx->nblocksy,
 					tx->level_depth,
-					pf_get_blocksize(pt->format), nx, ny,
+					util_format_get_blocksize(pt->format), nx, ny,
 					NOUVEAU_BO_GART, NOUVEAU_BO_VRAM |
 					NOUVEAU_BO_GART);
 	}
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 52a8388ead..d7b6511d6d 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -657,7 +657,7 @@ static boolean r300_validate_aos(struct r300_context *r300)
     /* Check if formats and strides are aligned to the size of DWORD. */
     for (i = 0; i < r300->vertex_element_count; i++) {
         if (vbuf[velem[i].vertex_buffer_index].stride % 4 != 0 ||
-            pf_get_blocksize(velem[i].src_format) % 4 != 0) {
+            util_format_get_blocksize(velem[i].src_format) % 4 != 0) {
             return FALSE;
         }
     }
@@ -686,8 +686,8 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
     for (i = 0; i < aos_count - 1; i += 2) {
         vb1 = &vbuf[velem[i].vertex_buffer_index];
         vb2 = &vbuf[velem[i+1].vertex_buffer_index];
-        size1 = util_format_get_size(velem[i].src_format);
-        size2 = util_format_get_size(velem[i+1].src_format);
+        size1 = util_format_get_blocksize(velem[i].src_format);
+        size2 = util_format_get_blocksize(velem[i+1].src_format);
 
         OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
                R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
@@ -697,7 +697,7 @@ void r300_emit_aos(struct r300_context* r300, unsigned offset)
 
     if (aos_count & 1) {
         vb1 = &vbuf[velem[i].vertex_buffer_index];
-        size1 = util_format_get_size(velem[i].src_format);
+        size1 = util_format_get_blocksize(velem[i].src_format);
 
         OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
         OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index feb571a23d..a7ef3dbcc2 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -21,6 +21,7 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_simple_screen.h"
 
@@ -357,8 +358,8 @@ static void* r300_transfer_map(struct pipe_screen* screen,
     }
 
     return map + r300_transfer(transfer)->offset +
-        transfer->y / pf_get_blockheight(format) * transfer->stride +
-        transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format);
+        transfer->y / util_format_get_blockheight(format) * transfer->stride +
+        transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
 }
 
 static void r300_transfer_unmap(struct pipe_screen* screen,
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index 170483b9bb..9a96206a4d 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -106,7 +106,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level)
         return 0;
     }
 
-    return align(pf_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32);
+    return align(util_format_get_stride(tex->tex.format, u_minify(tex->tex.width0, level)), 32);
 }
 
 static void r300_setup_miptree(struct r300_texture* tex)
@@ -116,7 +116,7 @@ static void r300_setup_miptree(struct r300_texture* tex)
     int i;
 
     for (i = 0; i <= base->last_level; i++) {
-        unsigned nblocksy = pf_get_nblocksy(base->format, u_minify(base->height0, i));
+        unsigned nblocksy = util_format_get_nblocksy(base->format, u_minify(base->height0, i));
 
         stride = r300_texture_get_stride(tex, i);
         layer_size = stride * nblocksy;
@@ -129,7 +129,7 @@ static void r300_setup_miptree(struct r300_texture* tex)
         tex->offset[i] = align(tex->size, 32);
         tex->size = tex->offset[i] + size;
         tex->layer_size[i] = layer_size;
-        tex->pitch[i] = stride / pf_get_blocksize(base->format);
+        tex->pitch[i] = stride / util_format_get_blocksize(base->format);
 
         debug_printf("r300: Texture miptree: Level %d "
                 "(%dx%dx%d px, pitch %d bytes)\n",
@@ -245,7 +245,7 @@ static struct pipe_texture*
     tex->tex.screen = screen;
 
     tex->stride_override = *stride;
-    tex->pitch[0] = *stride / pf_get_blocksize(base->format);
+    tex->pitch[0] = *stride / util_format_get_blocksize(base->format);
 
     r300_setup_flags(tex);
     r300_setup_texture_state(tex, r300_screen(screen)->caps->is_r500);
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 9e83410fcd..a9436a3394 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -65,11 +65,11 @@ softpipe_texture_layout(struct pipe_screen *screen,
    pt->depth0 = depth;
 
    for (level = 0; level <= pt->last_level; level++) {
-      spt->stride[level] = pf_get_stride(pt->format, width);
+      spt->stride[level] = util_format_get_stride(pt->format, width);
 
       spt->level_offset[level] = buffer_size;
 
-      buffer_size += (pf_get_nblocksy(pt->format, height) *
+      buffer_size += (util_format_get_nblocksy(pt->format, height) *
                       ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
                       spt->stride[level]);
 
@@ -239,11 +239,11 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
       ps->zslice = zslice;
 
       if (pt->target == PIPE_TEXTURE_CUBE) {
-         ps->offset += face * pf_get_nblocksy(pt->format, u_minify(pt->height0, level)) *
+         ps->offset += face * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) *
                        spt->stride[level];
       }
       else if (pt->target == PIPE_TEXTURE_3D) {
-         ps->offset += zslice * pf_get_nblocksy(pt->format, u_minify(pt->height0, level)) *
+         ps->offset += zslice * util_format_get_nblocksy(pt->format, u_minify(pt->height0, level)) *
                        spt->stride[level];
       }
       else {
@@ -299,7 +299,7 @@ softpipe_get_tex_transfer(struct pipe_screen *screen,
    spt = CALLOC_STRUCT(softpipe_transfer);
    if (spt) {
       struct pipe_transfer *pt = &spt->base;
-      int nblocksy = pf_get_nblocksy(texture->format, u_minify(texture->height0, level));
+      int nblocksy = util_format_get_nblocksy(texture->format, u_minify(texture->height0, level));
       pipe_texture_reference(&pt->texture, texture);
       pt->x = x;
       pt->y = y;
@@ -376,8 +376,8 @@ softpipe_transfer_map( struct pipe_screen *screen,
    }
 
    xfer_map = map + softpipe_transfer(transfer)->offset +
-      transfer->y / pf_get_blockheight(format) * transfer->stride +
-      transfer->x / pf_get_blockwidth(format) * pf_get_blocksize(format);
+      transfer->y / util_format_get_blockheight(format) * transfer->stride +
+      transfer->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
    /*printf("map = %p  xfer map = %p\n", map, xfer_map);*/
    return xfer_map;
 }
diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c
index 49e1ab0ca7..112a6fe0cf 100644
--- a/src/gallium/drivers/softpipe/sp_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tile_cache.c
@@ -239,7 +239,7 @@ clear_tile(struct softpipe_cached_tile *tile,
 {
    uint i, j;
 
-   switch (util_format_get_size(format)) {
+   switch (util_format_get_blocksize(format)) {
    case 1:
       memset(tile->data.any, clear_value, TILE_SIZE * TILE_SIZE);
       break;
diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
index ab93dab223..2224c2d394 100644
--- a/src/gallium/drivers/svga/svga_screen_texture.c
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -159,8 +159,8 @@ svga_transfer_dma_band(struct svga_transfer *st,
                 st->base.x + st->base.width,
                 y + h,
                 st->base.zslice + 1,
-                pf_get_blocksize(texture->base.format)*8/
-                (pf_get_blockwidth(texture->base.format)*pf_get_blockheight(texture->base.format)));
+                util_format_get_blocksize(texture->base.format)*8/
+                (util_format_get_blockwidth(texture->base.format)*util_format_get_blockheight(texture->base.format)));
    
    box.x = st->base.x;
    box.y = y;
@@ -210,7 +210,7 @@ svga_transfer_dma(struct svga_transfer *st,
    }
    else {
       unsigned y, h, srcy;
-      unsigned blockheight = pf_get_blockheight(st->base.texture->format);
+      unsigned blockheight = util_format_get_blockheight(st->base.texture->format);
       h = st->hw_nblocksy * blockheight;
       srcy = 0;
       for(y = 0; y < st->base.height; y += h) {
@@ -772,8 +772,8 @@ svga_get_tex_transfer(struct pipe_screen *screen,
    struct svga_screen *ss = svga_screen(screen);
    struct svga_winsys_screen *sws = ss->sws;
    struct svga_transfer *st;
-   unsigned nblocksx = pf_get_nblocksx(texture->format, w);
-   unsigned nblocksy = pf_get_nblocksy(texture->format, h);
+   unsigned nblocksx = util_format_get_nblocksx(texture->format, w);
+   unsigned nblocksy = util_format_get_nblocksy(texture->format, h);
 
    /* We can't map texture storage directly */
    if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
@@ -787,7 +787,7 @@ svga_get_tex_transfer(struct pipe_screen *screen,
    st->base.y = y;
    st->base.width = w;
    st->base.height = h;
-   st->base.stride = nblocksx*pf_get_blocksize(texture->format);
+   st->base.stride = nblocksx*util_format_get_blocksize(texture->format);
    st->base.usage = usage;
    st->base.face = face;
    st->base.level = level;
@@ -1071,7 +1071,7 @@ svga_screen_buffer_from_texture(struct pipe_texture *texture,
        svga_translate_format(texture->format),
        stex->handle);
 
-   *stride = pf_get_stride(texture->format, texture->width0);
+   *stride = util_format_get_stride(texture->format, texture->width0);
 
    return *buffer != NULL;
 }
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 9e339577c7..44b7ceb4fa 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -211,7 +211,7 @@ static int update_zero_stride( struct svga_context *svga,
          mapped_buffer = pipe_buffer_map_range(svga->pipe.screen, 
                                                vbuffer->buffer,
                                                vel->src_offset,
-                                               util_format_get_size(vel->src_format),
+                                               util_format_get_blocksize(vel->src_format),
                                                PIPE_BUFFER_USAGE_CPU_READ);
          translate->set_buffer(translate, vel->vertex_buffer_index,
                                mapped_buffer,
diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c
index af1d7f3224..c31b1d8698 100644
--- a/src/gallium/drivers/trace/tr_rbug.c
+++ b/src/gallium/drivers/trace/tr_rbug.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 
 
+#include "util/u_format.h"
 #include "util/u_string.h"
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
@@ -203,9 +204,9 @@ trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header,
                                &t->width0, 1,
                                &t->height0, 1,
                                &t->depth0, 1,
-                               pf_get_blockwidth(t->format),
-                               pf_get_blockheight(t->format),
-                               pf_get_blocksize(t->format),
+                               util_format_get_blockwidth(t->format),
+                               util_format_get_blockheight(t->format),
+                               util_format_get_blocksize(t->format),
                                t->last_level,
                                t->nr_samples,
                                t->tex_usage,
@@ -254,11 +255,11 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header,
 
    rbug_send_texture_read_reply(tr_rbug->con, serial,
                                 t->texture->format,
-                                pf_get_blockwidth(t->texture->format),
-                                pf_get_blockheight(t->texture->format),
-                                pf_get_blocksize(t->texture->format),
+                                util_format_get_blockwidth(t->texture->format),
+                                util_format_get_blockheight(t->texture->format),
+                                util_format_get_blocksize(t->texture->format),
                                 (uint8_t*)map,
-                                t->stride * pf_get_nblocksy(t->texture->format, t->height),
+                                t->stride * util_format_get_nblocksy(t->texture->format, t->height),
                                 t->stride,
                                 NULL);
 
diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index f69f7da000..ac20a47af1 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -25,6 +25,7 @@
  *
  **************************************************************************/
 
+#include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
 
@@ -425,7 +426,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen,
    struct pipe_transfer *transfer = tr_trans->transfer;
 
    if(tr_trans->map) {
-      size_t size = pf_get_nblocksy(transfer->texture->format, transfer->width) * transfer->stride;
+      size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->width) * transfer->stride;
 
       trace_dump_call_begin("pipe_screen", "transfer_write");
 
diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h
index d334114d62..6bfff1cc59 100644
--- a/src/gallium/include/pipe/p_format.h
+++ b/src/gallium/include/pipe/p_format.h
@@ -174,140 +174,6 @@ enum pipe_format {
  */
 extern const char *pf_name( enum pipe_format format );
 
-/**
- * Return bits for a particular component.
- * \param comp  component index, starting at 0
- */
-static INLINE uint pf_get_component_bits( enum pipe_format format, uint comp )
-{
-   uint size;
-
-   if (pf_swizzle_x(format) == comp) {
-      size = pf_size_x(format);
-   }
-   else if (pf_swizzle_y(format) == comp) {
-      size = pf_size_y(format);
-   }
-   else if (pf_swizzle_z(format) == comp) {
-      size = pf_size_z(format);
-   }
-   else if (pf_swizzle_w(format) == comp) {
-      size = pf_size_w(format);
-   }
-   else {
-      size = 0;
-   }
-   if (pf_layout( format ) == PIPE_FORMAT_LAYOUT_RGBAZS)
-      return size << pf_exp2( format );
-   return size << (pf_mixed_scale8( format ) * 3);
-}
-
-
-/**
- * Return total bits needed for the pixel format per block.
- */
-static INLINE uint pf_get_blocksizebits( enum pipe_format format )
-{
-   switch (pf_layout(format)) {
-   case PIPE_FORMAT_LAYOUT_RGBAZS:
-   case PIPE_FORMAT_LAYOUT_MIXED:
-      return
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_0 ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_1 ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_R ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_G ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_B ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_A ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_Z ) +
-         pf_get_component_bits( format, PIPE_FORMAT_COMP_S );
-   case PIPE_FORMAT_LAYOUT_YCBCR:
-      assert( format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV );
-      return 32;
-   case PIPE_FORMAT_LAYOUT_DXT:
-        switch(format) {
-        case PIPE_FORMAT_DXT1_RGBA:
-        case PIPE_FORMAT_DXT1_RGB:
-        case PIPE_FORMAT_DXT1_SRGBA:
-        case PIPE_FORMAT_DXT1_SRGB:
-           return 64;
-        case PIPE_FORMAT_DXT3_RGBA:
-        case PIPE_FORMAT_DXT5_RGBA:
-        case PIPE_FORMAT_DXT3_SRGBA:
-        case PIPE_FORMAT_DXT5_SRGBA:
-           return 128;
-        default:
-           assert( 0 );
-           return 0;
-        }
-
-   default:
-      assert( 0 );
-      return 0;
-   }
-}
-
-/**
- * Return bytes per element for the given format.
- */
-static INLINE uint pf_get_blocksize( enum pipe_format format )
-{
-   assert(pf_get_blocksizebits(format) % 8 == 0);
-   return pf_get_blocksizebits(format) / 8;
-}
-
-static INLINE uint pf_get_blockwidth( enum pipe_format format )
-{
-   switch (pf_layout(format)) {
-   case PIPE_FORMAT_LAYOUT_YCBCR:
-      return 2;
-   case PIPE_FORMAT_LAYOUT_DXT:
-      return 4;
-   default:
-      return 1;
-   }
-}
-
-static INLINE uint pf_get_blockheight( enum pipe_format format )
-{
-   switch (pf_layout(format)) {
-   case PIPE_FORMAT_LAYOUT_DXT:
-      return 4;
-   default:
-      return 1;
-   }
-}
-
-static INLINE unsigned
-pf_get_nblocksx(enum pipe_format format, unsigned x)
-{
-   unsigned blockwidth = pf_get_blockwidth(format);
-   return (x + blockwidth - 1) / blockwidth;
-}
-
-static INLINE unsigned
-pf_get_nblocksy(enum pipe_format format, unsigned y)
-{
-   unsigned blockheight = pf_get_blockheight(format);
-   return (y + blockheight - 1) / blockheight;
-}
-
-static INLINE unsigned
-pf_get_nblocks(enum pipe_format format, unsigned width, unsigned height)
-{
-   return pf_get_nblocksx(format, width) * pf_get_nblocksy(format, height);
-}
-
-static INLINE size_t
-pf_get_stride(enum pipe_format format, unsigned width)
-{
-   return pf_get_nblocksx(format, width) * pf_get_blocksize(format);
-}
-
-static INLINE size_t
-pf_get_2d_size(enum pipe_format format, size_t stride, unsigned height)
-{
-   return pf_get_nblocksy(format, height) * stride;
-}
 
 enum pipe_video_chroma_format
 {
diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c
index 97ca2afc54..9637741421 100644
--- a/src/gallium/state_trackers/python/st_sample.c
+++ b/src/gallium/state_trackers/python/st_sample.c
@@ -30,6 +30,7 @@
 #include "pipe/p_format.h"
 #include "pipe/p_state.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_tile.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
@@ -461,7 +462,7 @@ st_sample_dxt_pixel_block(enum pipe_format format,
          for(ch = 0; ch < 4; ++ch)
             rgba[y*rgba_stride + x*4 + ch] = (float)(data[i].rgba[y*4*4 + x*4 + ch])/255.0f;
    
-   memcpy(raw, data[i].raw, pf_get_blocksize(format));
+   memcpy(raw, data[i].raw, util_format_get_blocksize(format));
 }
 
 
@@ -473,7 +474,7 @@ st_sample_generic_pixel_block(enum pipe_format format,
 {
    unsigned i;
    unsigned x, y, ch;
-   int blocksize = pf_get_blocksize(format);
+   int blocksize = util_format_get_blocksize(format);
    
    for(i = 0; i < blocksize; ++i)
       raw[i] = (uint8_t)st_random();
@@ -548,11 +549,11 @@ st_sample_surface(struct st_surface *surface, float *rgba)
    if (raw) {
       enum pipe_format format = texture->format;
       uint x, y;
-      int nblocksx = pf_get_nblocksx(format, width);
-      int nblocksy = pf_get_nblocksy(format, height);
-      int blockwidth = pf_get_blockwidth(format);
-      int blockheight = pf_get_blockheight(format);
-      int blocksize = pf_get_blocksize(format);
+      int nblocksx = util_format_get_nblocksx(format, width);
+      int nblocksy = util_format_get_nblocksy(format, height);
+      int blockwidth = util_format_get_blockwidth(format);
+      int blockheight = util_format_get_blockheight(format);
+      int blocksize = util_format_get_blocksize(format);
 
 
       for (y = 0; y < nblocksy; ++y) {
diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c
index b8535a4217..a3294e877a 100644
--- a/src/gallium/state_trackers/python/st_softpipe_winsys.c
+++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c
@@ -169,8 +169,8 @@ st_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
    const unsigned alignment = 64;
    unsigned nblocksy;
 
-   nblocksy = pf_get_nblocksy(format, height);
-   *stride = align(pf_get_stride(format, width), alignment);
+   nblocksy = util_format_get_nblocksy(format, height);
+   *stride = align(util_format_get_stride(format, width), alignment);
 
    return winsys->buffer_create(winsys, alignment,
                                 usage,
diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
index 6fd402cee4..7106a06492 100644
--- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
+++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
@@ -247,7 +247,7 @@ nouveau_drm_handle_from_pt(struct drm_api *api, struct pipe_screen *pscreen,
 		return false;
 
 	*handle = mt->bo->handle;
-	*stride = pf_get_stride(mt->base.format, mt->base.width0);
+	*stride = util_format_get_stride(mt->base.format, mt->base.width0);
 	return true;
 }
 
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 76acc99ad7..dfecb8a728 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -35,6 +35,7 @@
 #include "radeon_bo_gem.h"
 #include "softpipe/sp_texture.h"
 #include "r300_context.h"
+#include "util/u_format.h"
 #include "util/u_math.h"
 #include <X11/Xutil.h>
 
@@ -121,8 +122,8 @@ static struct pipe_buffer *radeon_surface_buffer_create(struct pipe_winsys *ws,
     const unsigned alignment = 64;
     unsigned nblocksy, size;
 
-    nblocksy = pf_get_nblocksy(format, height);
-    *stride = align(pf_get_stride(format, width), alignment);
+    nblocksy = util_format_get_nblocksy(format, height);
+    *stride = align(util_format_get_stride(format, width), alignment);
     size = *stride * nblocksy;
 
     return radeon_buffer_create(ws, 64, usage, size);
diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c
index a36a9e52a6..6ee3ede38c 100644
--- a/src/gallium/winsys/egl_xlib/sw_winsys.c
+++ b/src/gallium/winsys/egl_xlib/sw_winsys.c
@@ -163,8 +163,8 @@ surface_buffer_create(struct pipe_winsys *winsys,
    const unsigned alignment = 64;
    unsigned nblocksy;
 
-   nblocksy = pf_get_nblocksy(format, height);
-   *stride = align(pf_get_stride(format, width), alignment);
+   nblocksy = util_format_get_nblocksy(format, height);
+   *stride = align(util_format_get_stride(format, width), alignment);
 
    return winsys->buffer_create(winsys, alignment,
                                 usage,
diff --git a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
index 3fc44925a8..f15bcd37b5 100644
--- a/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
+++ b/src/gallium/winsys/g3dvl/xlib/xsp_winsys.c
@@ -141,8 +141,8 @@ static struct pipe_buffer* xsp_surface_buffer_create
    const unsigned int ALIGNMENT = 1;
    unsigned nblocksy;
 
-   nblocksy = pf_get_nblocksy(format, height);
-   *stride = align(pf_get_stride(format, width), ALIGNMENT);
+   nblocksy = util_format_get_nblocksy(format, height);
+   *stride = align(util_format_get_stride(format, width), ALIGNMENT);
 
    return pws->buffer_create(pws, ALIGNMENT, usage,
                              *stride * nblocksy);
diff --git a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c
index 7122365027..7d076be3a3 100644
--- a/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c
+++ b/src/gallium/winsys/gdi/gdi_llvmpipe_winsys.c
@@ -137,8 +137,8 @@ gdi_llvmpipe_displaytarget_create(struct llvmpipe_winsys *winsys,
    gdt->width = width;
    gdt->height = height;
 
-   bpp = util_format_get_bits(format);
-   cpp = util_format_get_size(format);
+   bpp = util_format_get_blocksizebits(format);
+   cpp = util_format_get_blocksize(format);
    
    gdt->stride = align(width * cpp, alignment);
    gdt->size = gdt->stride * height;
diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
index 40f612e893..2ad794c3f0 100644
--- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
+++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c
@@ -163,8 +163,8 @@ gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys,
    const unsigned alignment = 64;
    unsigned nblocksy;
 
-   nblocksy = pf_get_nblocksy(format, height);
-   *stride = align(pf_get_stride(format, width), alignment);
+   nblocksy = util_format_get_nblocksy(format, height);
+   *stride = align(util_format_get_stride(format, width), alignment);
 
    return winsys->buffer_create(winsys, alignment,
                                 usage,
@@ -271,10 +271,10 @@ gdi_softpipe_present(struct pipe_screen *screen,
 
     memset(&bmi, 0, sizeof(BITMAPINFO));
     bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
-    bmi.bmiHeader.biWidth = texture->stride[surface->level] / util_format_get_size(surface->format);
+    bmi.bmiHeader.biWidth = texture->stride[surface->level] / util_format_get_blocksize(surface->format);
     bmi.bmiHeader.biHeight= -(long)surface->height;
     bmi.bmiHeader.biPlanes = 1;
-    bmi.bmiHeader.biBitCount = util_format_get_bits(surface->format);
+    bmi.bmiHeader.biBitCount = util_format_get_blocksizebits(surface->format);
     bmi.bmiHeader.biCompression = BI_RGB;
     bmi.bmiHeader.biSizeImage = 0;
     bmi.bmiHeader.biXPelsPerMeter = 0;
diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c
index cd838e7322..47ae0519a4 100644
--- a/src/gallium/winsys/xlib/xlib_cell.c
+++ b/src/gallium/winsys/xlib/xlib_cell.c
@@ -289,8 +289,8 @@ xm_surface_buffer_create(struct pipe_winsys *winsys,
    const unsigned alignment = 64;
    unsigned nblocksy;
 
-   nblocksy = pf_get_nblocksy(format, height);
-   *stride = align(pf_get_stride(format, width), alignment);
+   nblocksy = util_format_get_nblocksy(format, height);
+   *stride = align(util_format_get_stride(format, width), alignment);
 
    return winsys->buffer_create(winsys, alignment,
                                 usage,
diff --git a/src/gallium/winsys/xlib/xlib_llvmpipe.c b/src/gallium/winsys/xlib/xlib_llvmpipe.c
index d96311e452..2a434b5fd2 100644
--- a/src/gallium/winsys/xlib/xlib_llvmpipe.c
+++ b/src/gallium/winsys/xlib/xlib_llvmpipe.c
@@ -262,10 +262,10 @@ xm_llvmpipe_display(struct xmesa_buffer *xm_buffer,
    {
       if (xm_dt->tempImage == NULL)
       {
-         assert(pf_get_blockwidth(xm_dt->format) == 1);
-         assert(pf_get_blockheight(xm_dt->format) == 1);
+         assert(util_format_get_blockwidth(xm_dt->format) == 1);
+         assert(util_format_get_blockheight(xm_dt->format) == 1);
          alloc_shm_ximage(xm_dt, xm_buffer,
-                          xm_dt->stride / pf_get_blocksize(xm_dt->format),
+                          xm_dt->stride / util_format_get_blocksize(xm_dt->format),
                           xm_dt->height);
       }
 
@@ -331,8 +331,8 @@ xm_displaytarget_create(struct llvmpipe_winsys *winsys,
    xm_dt->width = width;
    xm_dt->height = height;
 
-   nblocksy = pf_get_nblocksy(format, height);
-   xm_dt->stride = align(pf_get_stride(format, width), alignment);
+   nblocksy = util_format_get_nblocksy(format, height);
+   xm_dt->stride = align(util_format_get_stride(format, width), alignment);
    size = xm_dt->stride * nblocksy;
 
 #ifdef USE_XSHM
diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c
index 48cee0a41d..f7c0099584 100644
--- a/src/gallium/winsys/xlib/xlib_softpipe.c
+++ b/src/gallium/winsys/xlib/xlib_softpipe.c
@@ -255,10 +255,10 @@ xlib_softpipe_display_surface(struct xmesa_buffer *b,
    {
       if (xm_buf->tempImage == NULL) 
       {
-         assert(pf_get_blockwidth(surf->texture->format) == 1);
-         assert(pf_get_blockheight(surf->texture->format) == 1);
+         assert(util_format_get_blockwidth(surf->texture->format) == 1);
+         assert(util_format_get_blockheight(surf->texture->format) == 1);
          alloc_shm_ximage(xm_buf, b, spt->stride[surf->level] /
-                          pf_get_blocksize(surf->texture->format), surf->height);
+                          util_format_get_blocksize(surf->texture->format), surf->height);
       }
 
       ximage = xm_buf->tempImage;
@@ -363,8 +363,8 @@ xm_surface_buffer_create(struct pipe_winsys *winsys,
    const unsigned alignment = 64;
    unsigned nblocksy, size;
 
-   nblocksy = pf_get_nblocksy(format, height);
-   *stride = align(pf_get_stride(format, width), alignment);
+   nblocksy = util_format_get_nblocksy(format, height);
+   *stride = align(util_format_get_stride(format, width), alignment);
    size = *stride * nblocksy;
 
 #ifdef USE_XSHM
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index c61c77479e..7c664267d4 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -857,8 +857,8 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy,
 					   usage, dstx, dsty,
 					   width, height);
 
-   assert(pf_get_blockwidth(ptDraw->texture->format) == 1);
-   assert(pf_get_blockheight(ptDraw->texture->format) == 1);
+   assert(util_format_get_blockwidth(ptDraw->texture->format) == 1);
+   assert(util_format_get_blockheight(ptDraw->texture->format) == 1);
 
    /* map the stencil buffer */
    drawMap = screen->transfer_map(screen, ptDraw);
diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c
index 78aed07bf9..45ce34a85f 100644
--- a/src/mesa/state_tracker/st_cb_fbo.c
+++ b/src/mesa/state_tracker/st_cb_fbo.c
@@ -105,8 +105,8 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
 
       assert(strb->format != PIPE_FORMAT_NONE);
       
-      strb->stride = pf_get_stride(strb->format, width);
-      size = pf_get_2d_size(strb->format, strb->stride, height);
+      strb->stride = util_format_get_stride(strb->format, width);
+      size = util_format_get_2d_size(strb->format, strb->stride, height);
       
       strb->data = _mesa_malloc(size);
       
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 000e6eb2a5..6e1ecb1c50 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -833,7 +833,7 @@ decompress_with_blit(GLcontext * ctx, GLenum target, GLint level,
    /* copy/pack data into user buffer */
    if (st_equal_formats(stImage->pt->format, format, type)) {
       /* memcpy */
-      const uint bytesPerRow = width * util_format_get_size(stImage->pt->format);
+      const uint bytesPerRow = width * util_format_get_blocksize(stImage->pt->format);
       ubyte *map = screen->transfer_map(screen, tex_xfer);
       GLuint row;
       for (row = 0; row < height; row++) {
@@ -915,7 +915,7 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level,
                                             PIPE_TRANSFER_READ, 0, 0,
                                             stImage->base.Width,
                                             stImage->base.Height);
-      texImage->RowStride = stImage->transfer->stride / pf_get_blocksize(stImage->pt->format);
+      texImage->RowStride = stImage->transfer->stride / util_format_get_blocksize(stImage->pt->format);
    }
    else {
       /* Otherwise, the image should actually be stored in
@@ -1178,7 +1178,7 @@ st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
                                             xoffset, yoffset,
                                             width, height);
       
-      srcBlockStride = pf_get_stride(pformat, width);
+      srcBlockStride = util_format_get_stride(pformat, width);
       dstBlockStride = stImage->transfer->stride;
    } else {
       assert(stImage->pt);
@@ -1192,16 +1192,16 @@ st_CompressedTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
       return;
    }
 
-   assert(xoffset % pf_get_blockwidth(pformat) == 0);
-   assert(yoffset % pf_get_blockheight(pformat) == 0);
-   assert(width % pf_get_blockwidth(pformat) == 0);
-   assert(height % pf_get_blockheight(pformat) == 0);
+   assert(xoffset % util_format_get_blockwidth(pformat) == 0);
+   assert(yoffset % util_format_get_blockheight(pformat) == 0);
+   assert(width % util_format_get_blockwidth(pformat) == 0);
+   assert(height % util_format_get_blockheight(pformat) == 0);
 
-   for (y = 0; y < height; y += pf_get_blockheight(pformat)) {
+   for (y = 0; y < height; y += util_format_get_blockheight(pformat)) {
       /* don't need to adjust for xoffset and yoffset as st_texture_image_map does that */
-      const char *src = (const char*)data + srcBlockStride * pf_get_nblocksy(pformat, y);
-      char *dst = (char*)texImage->Data + dstBlockStride * pf_get_nblocksy(pformat, y);
-      memcpy(dst, src, pf_get_stride(pformat, width));
+      const char *src = (const char*)data + srcBlockStride * util_format_get_nblocksy(pformat, y);
+      char *dst = (char*)texImage->Data + dstBlockStride * util_format_get_nblocksy(pformat, y);
+      memcpy(dst, src, util_format_get_stride(pformat, width));
    }
 
    if (stImage->pt) {
@@ -1691,10 +1691,10 @@ copy_image_data_to_texture(struct st_context *st,
                             dstLevel,
                             stImage->base.Data,
                             stImage->base.RowStride * 
-                            pf_get_blocksize(stObj->pt->format),
+                            util_format_get_blocksize(stObj->pt->format),
                             stImage->base.RowStride *
                             stImage->base.Height *
-                            pf_get_blocksize(stObj->pt->format));
+                            util_format_get_blocksize(stObj->pt->format));
       _mesa_align_free(stImage->base.Data);
       stImage->base.Data = NULL;
    }
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
index 7700551830..5a433dd7b9 100644
--- a/src/mesa/state_tracker/st_gen_mipmap.c
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -37,6 +37,7 @@
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_format.h"
 #include "util/u_gen_mipmap.h"
 #include "util/u_math.h"
 
@@ -146,8 +147,8 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target,
       srcData = (ubyte *) screen->transfer_map(screen, srcTrans);
       dstData = (ubyte *) screen->transfer_map(screen, dstTrans);
 
-      srcStride = srcTrans->stride / pf_get_blocksize(srcTrans->texture->format);
-      dstStride = dstTrans->stride / pf_get_blocksize(dstTrans->texture->format);
+      srcStride = srcTrans->stride / util_format_get_blocksize(srcTrans->texture->format);
+      dstStride = dstTrans->stride / util_format_get_blocksize(dstTrans->texture->format);
 
       _mesa_generate_mipmap_level(target, datatype, comps,
                                   0 /*border*/,
-- 
cgit v1.2.3


From 62afcbb2338e30aab8c829e92456c073800156f6 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Fri, 18 Dec 2009 13:40:24 -0800
Subject: r300g: Nuke r300_vbo.

---
 src/gallium/drivers/r300/Makefile      |  1 -
 src/gallium/drivers/r300/r300_render.c | 10 +++-
 src/gallium/drivers/r300/r300_vbo.c    | 87 ----------------------------------
 src/gallium/drivers/r300/r300_vbo.h    | 36 --------------
 4 files changed, 8 insertions(+), 126 deletions(-)
 delete mode 100644 src/gallium/drivers/r300/r300_vbo.c
 delete mode 100644 src/gallium/drivers/r300/r300_vbo.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
index 8cfd4147c2..afddcb161f 100644
--- a/src/gallium/drivers/r300/Makefile
+++ b/src/gallium/drivers/r300/Makefile
@@ -17,7 +17,6 @@ C_SOURCES = \
 	r300_state.c \
 	r300_state_derived.c \
 	r300_state_invariant.c \
-	r300_vbo.c \
 	r300_vs.c \
 	r300_texture.c \
 	r300_tgsi_to_rc.c
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 4b210f72db..425eb9f9ef 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -37,7 +37,6 @@
 #include "r300_reg.h"
 #include "r300_render.h"
 #include "r300_state_derived.h"
-#include "r300_vbo.h"
 
 /* r300_render: Vertex and index buffer primitive emission. */
 #define R300_MAX_VBO_SIZE  (1024 * 1024)
@@ -196,7 +195,14 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
         return FALSE;
     }
 
-    setup_index_buffer(r300, indexBuffer, indexSize);
+    if (!r300->winsys->add_buffer(r300->winsys, indexBuffer,
+                                  RADEON_GEM_DOMAIN_GTT, 0)) {
+        assert(0);
+    }
+
+    if (!r300->winsys->validate(r300->winsys)) {
+        assert(0);
+    }
 
     r300_emit_dirty_state(r300);
 
diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
deleted file mode 100644
index d8610dadfa..0000000000
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/* r300_vbo: Various helpers for emitting vertex buffers. Needs cleanup,
- * refactoring, etc. */
-
-#include "r300_vbo.h"
-
-#include "pipe/p_format.h"
-
-#include "r300_cs.h"
-#include "r300_context.h"
-#include "r300_state_inlines.h"
-#include "r300_reg.h"
-
-#include "radeon_winsys.h"
-
-static INLINE int get_buffer_offset(struct r300_context *r300,
-                                    unsigned int buf_nr,
-                                    unsigned int elem_offset)
-{
-    return r300->vertex_buffer[buf_nr].buffer_offset + elem_offset;
-}
-#if 0
-/* XXX not called at all */
-static void setup_vertex_buffers(struct r300_context *r300)
-{
-    struct pipe_vertex_element *vert_elem;
-    int i;
-
-    for (i = 0; i < r300->aos_count; i++)
-    {
-        vert_elem = &r300->vertex_element[i];
-            /* XXX use translate module to convert the data */
-        if (!format_is_supported(vert_elem->src_format,
-                                 vert_elem->nr_components)) {
-            assert(0);
-            /*
-            struct pipe_buffer *buf;
-            const unsigned int max_index = r300->vertex_buffers[vert_elem->vertex_buffer_index].max_index;
-            buf = pipe_buffer_create(r300->context.screen, 4, usage, vert_elem->nr_components * max_index * sizeof(float));
-            */
-        }
-
-        if (get_buffer_offset(r300,
-                              vert_elem->vertex_buffer_index,
-                              vert_elem->src_offset) % 4) {
-            /* XXX need to align buffer */
-            assert(0);
-        }
-    }
-}
-#endif
-/* XXX these shouldn't be asserts since we can work around bad indexbufs */
-void setup_index_buffer(struct r300_context *r300,
-                        struct pipe_buffer* indexBuffer,
-                        unsigned indexSize)
-{
-    if (!r300->winsys->add_buffer(r300->winsys, indexBuffer,
-                                  RADEON_GEM_DOMAIN_GTT, 0)) {
-        assert(0);
-    }
-
-    if (!r300->winsys->validate(r300->winsys)) {
-        assert(0);
-    }
-}
diff --git a/src/gallium/drivers/r300/r300_vbo.h b/src/gallium/drivers/r300/r300_vbo.h
deleted file mode 100644
index 7afa75899c..0000000000
--- a/src/gallium/drivers/r300/r300_vbo.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef R300_VBO_H
-#define R300_VBO_H
-
-struct r300_context;
-struct pipe_buffer;
-
-void setup_vertex_attributes(struct r300_context *r300);
-
-void setup_index_buffer(struct r300_context *r300,
-                        struct pipe_buffer* indexBuffer,
-                        unsigned indexSize);
-
-#endif
-- 
cgit v1.2.3


From bf611cef29d664c2c2b56d2175c3a966c720e8b8 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Fri, 18 Dec 2009 14:05:40 -0800
Subject: r300g: Eliminate a couple more asserts.

---
 src/gallium/drivers/r300/r300_render.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 425eb9f9ef..a0d67e7618 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -180,7 +180,6 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
         return FALSE;
     }
 
-
     if (count > 65535) {
         return FALSE;
     }
@@ -197,11 +196,11 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
 
     if (!r300->winsys->add_buffer(r300->winsys, indexBuffer,
                                   RADEON_GEM_DOMAIN_GTT, 0)) {
-        assert(0);
+        return FALSE;
     }
 
     if (!r300->winsys->validate(r300->winsys)) {
-        assert(0);
+        return FALSE;
     }
 
     r300_emit_dirty_state(r300);
-- 
cgit v1.2.3


From 429f0e3b37e33a33289f8488369474b20bfd5247 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Sat, 19 Dec 2009 00:18:43 +0100
Subject: gallium: fix up drivers for edgeflag changes

several drivers which chose to ignore edgeflags might require some more work,
while edgeflags never worked there they might now crash.
---
 src/gallium/drivers/cell/ppu/cell_draw_arrays.c      | 10 ----------
 src/gallium/drivers/i915/i915_state.c                |  7 -------
 src/gallium/drivers/identity/id_context.c            | 12 ------------
 src/gallium/drivers/llvmpipe/lp_context.c            |  2 --
 src/gallium/drivers/llvmpipe/lp_draw_arrays.c        |  7 -------
 src/gallium/drivers/llvmpipe/lp_state.h              |  4 ----
 src/gallium/drivers/nv04/nv04_context.c              |  6 ------
 src/gallium/drivers/nv10/nv10_context.c              |  6 ------
 src/gallium/drivers/nv20/nv20_context.c              |  6 ------
 src/gallium/drivers/nv20/nv20_vertprog.c             |  3 +++
 src/gallium/drivers/nv30/nv30_context.h              |  1 -
 src/gallium/drivers/nv30/nv30_state.c                | 11 -----------
 src/gallium/drivers/nv30/nv30_vbo.c                  |  5 -----
 src/gallium/drivers/nv30/nv30_vertprog.c             |  3 +++
 src/gallium/drivers/nv40/nv40_context.h              |  1 -
 src/gallium/drivers/nv40/nv40_state.c                | 11 -----------
 src/gallium/drivers/nv40/nv40_state_emit.c           |  1 -
 src/gallium/drivers/nv40/nv40_vbo.c                  |  5 -----
 src/gallium/drivers/nv40/nv40_vertprog.c             |  4 ++++
 src/gallium/drivers/nv50/nv50_context.c              |  6 ------
 src/gallium/drivers/r300/r300_state.c                |  9 ---------
 src/gallium/drivers/r300/r300_vs.c                   |  5 +++++
 src/gallium/drivers/svga/svga_context.h              |  7 ++-----
 src/gallium/drivers/svga/svga_pipe_vertex.c          | 13 -------------
 src/gallium/drivers/svga/svga_state_need_swtnl.c     |  8 ++++----
 src/gallium/drivers/svga/svga_swtnl_state.c          |  7 +------
 src/gallium/drivers/trace/tr_context.c               | 20 --------------------
 .../state_trackers/python/retrace/interpreter.py     |  4 ----
 28 files changed, 22 insertions(+), 162 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index 644496db40..5cc1d4ddf8 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -171,21 +171,11 @@ cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
 }
 
 
-static void
-cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
-{
-   struct cell_context *cell = cell_context(pipe);
-   draw_set_edgeflags(cell->draw, edgeflags);
-}
-
-
-
 void
 cell_init_draw_functions(struct cell_context *cell)
 {
    cell->pipe.draw_arrays = cell_draw_arrays;
    cell->pipe.draw_elements = cell_draw_elements;
    cell->pipe.draw_range_elements = cell_draw_range_elements;
-   cell->pipe.set_edgeflags = cell_set_edgeflags;
 }
 
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 9103847f1c..e580b6c0f7 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -752,16 +752,9 @@ static void i915_set_vertex_elements(struct pipe_context *pipe,
 }
 
 
-static void i915_set_edgeflags(struct pipe_context *pipe,
-                               const unsigned *bitfield)
-{
-   /* TODO do something here */
-}
-
 void
 i915_init_state_functions( struct i915_context *i915 )
 {
-   i915->base.set_edgeflags = i915_set_edgeflags;
    i915->base.create_blend_state = i915_create_blend_state;
    i915->base.bind_blend_state = i915_bind_blend_state;
    i915->base.delete_blend_state = i915_delete_blend_state;
diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index bedab56f59..bdbaae5987 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -45,17 +45,6 @@ identity_destroy(struct pipe_context *_pipe)
    free(id_pipe);
 }
 
-static void
-identity_set_edgeflags(struct pipe_context *_pipe,
-                       const unsigned *bitfield)
-{
-   struct identity_context *id_pipe = identity_context(_pipe);
-   struct pipe_context *pipe = id_pipe->pipe;
-
-   pipe->set_edgeflags(pipe,
-                       bitfield);
-}
-
 static boolean
 identity_draw_arrays(struct pipe_context *_pipe,
                      unsigned prim,
@@ -707,7 +696,6 @@ identity_context_create(struct pipe_screen *_screen, struct pipe_context *pipe)
    id_pipe->base.draw = NULL;
 
    id_pipe->base.destroy = identity_destroy;
-   id_pipe->base.set_edgeflags = identity_set_edgeflags;
    id_pipe->base.draw_arrays = identity_draw_arrays;
    id_pipe->base.draw_elements = identity_draw_elements;
    id_pipe->base.draw_range_elements = identity_draw_range_elements;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 679e244274..001311e703 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -226,8 +226,6 @@ llvmpipe_create( struct pipe_screen *screen )
    llvmpipe->pipe.draw_arrays = llvmpipe_draw_arrays;
    llvmpipe->pipe.draw_elements = llvmpipe_draw_elements;
    llvmpipe->pipe.draw_range_elements = llvmpipe_draw_range_elements;
-   llvmpipe->pipe.set_edgeflags = llvmpipe_set_edgeflags;
-
 
    llvmpipe->pipe.clear = llvmpipe_clear;
    llvmpipe->pipe.flush = llvmpipe_flush;
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 0aa13a1fc6..2299566c66 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -133,10 +133,3 @@ llvmpipe_draw_elements(struct pipe_context *pipe,
                                         mode, start, count );
 }
 
-
-void
-llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags)
-{
-   struct llvmpipe_context *lp = llvmpipe_context(pipe);
-   draw_set_edgeflags(lp->draw, edgeflags);
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index d1c74ab07b..5cee7bf74b 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -212,10 +212,6 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
                              unsigned max_index,
                              unsigned mode, unsigned start, unsigned count);
 
-void
-llvmpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags);
-
-
 void
 llvmpipe_map_transfers(struct llvmpipe_context *lp);
 
diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c
index 10d984ace9..4b33636b2e 100644
--- a/src/gallium/drivers/nv04/nv04_context.c
+++ b/src/gallium/drivers/nv04/nv04_context.c
@@ -27,11 +27,6 @@ nv04_destroy(struct pipe_context *pipe)
 	FREE(nv04);
 }
 
-static void
-nv04_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
 static boolean
 nv04_init_hwctx(struct nv04_context *nv04)
 {
@@ -83,7 +78,6 @@ nv04_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv04->pipe.winsys = ws;
 	nv04->pipe.screen = pscreen;
 	nv04->pipe.destroy = nv04_destroy;
-	nv04->pipe.set_edgeflags = nv04_set_edgeflags;
 	nv04->pipe.draw_arrays = nv04_draw_arrays;
 	nv04->pipe.draw_elements = nv04_draw_elements;
 	nv04->pipe.clear = nv04_clear;
diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c
index 65a22b175e..0dadeb03dd 100644
--- a/src/gallium/drivers/nv10/nv10_context.c
+++ b/src/gallium/drivers/nv10/nv10_context.c
@@ -252,11 +252,6 @@ static void nv10_init_hwctx(struct nv10_context *nv10)
 	FIRE_RING (NULL);
 }
 
-static void
-nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
 struct pipe_context *
 nv10_create(struct pipe_screen *pscreen, unsigned pctx_id)
 {
@@ -276,7 +271,6 @@ nv10_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv10->pipe.winsys = ws;
 	nv10->pipe.screen = pscreen;
 	nv10->pipe.destroy = nv10_destroy;
-	nv10->pipe.set_edgeflags = nv10_set_edgeflags;
 	nv10->pipe.draw_arrays = nv10_draw_arrays;
 	nv10->pipe.draw_elements = nv10_draw_elements;
 	nv10->pipe.clear = nv10_clear;
diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c
index 276db8b57b..6a147a4159 100644
--- a/src/gallium/drivers/nv20/nv20_context.c
+++ b/src/gallium/drivers/nv20/nv20_context.c
@@ -375,11 +375,6 @@ static void nv20_init_hwctx(struct nv20_context *nv20)
 	FIRE_RING (NULL);
 }
 
-static void
-nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
 struct pipe_context *
 nv20_create(struct pipe_screen *pscreen, unsigned pctx_id)
 {
@@ -399,7 +394,6 @@ nv20_create(struct pipe_screen *pscreen, unsigned pctx_id)
 	nv20->pipe.winsys = ws;
 	nv20->pipe.screen = pscreen;
 	nv20->pipe.destroy = nv20_destroy;
-	nv20->pipe.set_edgeflags = nv20_set_edgeflags;
 	nv20->pipe.draw_arrays = nv20_draw_arrays;
 	nv20->pipe.draw_elements = nv20_draw_elements;
 	nv20->pipe.clear = nv20_clear;
diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c
index 9e8aab9754..7886c2af7e 100644
--- a/src/gallium/drivers/nv20/nv20_vertprog.c
+++ b/src/gallium/drivers/nv20/nv20_vertprog.c
@@ -530,6 +530,9 @@ nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc,
 			return FALSE;
 		}
 		break;
+	case TGSI_SEMANTIC_EDGEFLAG:
+		NOUVEAU_ERR("cannot handle edgeflag output\n");
+		return FALSE;
 	default:
 		NOUVEAU_ERR("bad output semantic\n");
 		return FALSE;
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index 8d49366dfc..026cc82e0a 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -144,7 +144,6 @@ struct nv30_context {
 	unsigned vtxbuf_nr;
 	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
 	unsigned vtxelt_nr;
-	const unsigned *edgeflags;
 };
 
 static INLINE struct nv30_context *
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
index 3f802d9241..e6321b480f 100644
--- a/src/gallium/drivers/nv30/nv30_state.c
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -672,16 +672,6 @@ nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count,
 	/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
 }
 
-static void
-nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-	struct nv30_context *nv30 = nv30_context(pipe);
-
-	nv30->edgeflags = bitfield;
-	nv30->dirty |= NV30_NEW_ARRAYS;
-	/*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/
-}
-
 void
 nv30_init_state_functions(struct nv30_context *nv30)
 {
@@ -721,7 +711,6 @@ nv30_init_state_functions(struct nv30_context *nv30)
 	nv30->pipe.set_scissor_state = nv30_set_scissor_state;
 	nv30->pipe.set_viewport_state = nv30_set_viewport_state;
 
-	nv30->pipe.set_edgeflags = nv30_set_edgeflags;
 	nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers;
 	nv30->pipe.set_vertex_elements = nv30_set_vertex_elements;
 }
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
index 189656ec81..e32b8141af 100644
--- a/src/gallium/drivers/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -485,11 +485,6 @@ nv30_vbo_validate(struct nv30_context *nv30)
 	unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
 	int hw;
 
-	if (nv30->edgeflags) {
-		/*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/
-		return FALSE;
-	}
-
 	vtxbuf = so_new(20, 18);
 	so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
 	vtxfmt = so_new(17, 0);
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index 36ac8299f0..5d60984622 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -530,6 +530,9 @@ nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc,
 			return FALSE;
 		}
 		break;
+	case TGSI_SEMANTIC_EDGEFLAG:
+		NOUVEAU_ERR("cannot handle edgeflag output\n");
+		return FALSE;
 	default:
 		NOUVEAU_ERR("bad output semantic\n");
 		return FALSE;
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index a3d594167a..06172e8817 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -159,7 +159,6 @@ struct nv40_context {
 	unsigned vtxbuf_nr;
 	struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS];
 	unsigned vtxelt_nr;
-	const unsigned *edgeflags;
 };
 
 static INLINE struct nv40_context *
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
index bc34e32a4b..ed55d29aff 100644
--- a/src/gallium/drivers/nv40/nv40_state.c
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -687,16 +687,6 @@ nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count,
 	nv40->draw_dirty |= NV40_NEW_ARRAYS;
 }
 
-static void
-nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-	struct nv40_context *nv40 = nv40_context(pipe);
-
-	nv40->edgeflags = bitfield;
-	nv40->dirty |= NV40_NEW_ARRAYS;
-	nv40->draw_dirty |= NV40_NEW_ARRAYS;
-}
-
 void
 nv40_init_state_functions(struct nv40_context *nv40)
 {
@@ -736,7 +726,6 @@ nv40_init_state_functions(struct nv40_context *nv40)
 	nv40->pipe.set_scissor_state = nv40_set_scissor_state;
 	nv40->pipe.set_viewport_state = nv40_set_viewport_state;
 
-	nv40->pipe.set_edgeflags = nv40_set_edgeflags;
 	nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers;
 	nv40->pipe.set_vertex_elements = nv40_set_vertex_elements;
 }
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index 198692965d..980ed217ec 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -160,7 +160,6 @@ nv40_state_validate_swtnl(struct nv40_context *nv40)
 		draw_set_viewport_state(draw, &nv40->viewport);
 
 	if (nv40->draw_dirty & NV40_NEW_ARRAYS) {
-		draw_set_edgeflags(draw, nv40->edgeflags);
 		draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf);
 		draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt);	
 	}
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
index b2753b8e2e..af3fcf6a34 100644
--- a/src/gallium/drivers/nv40/nv40_vbo.c
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -484,11 +484,6 @@ nv40_vbo_validate(struct nv40_context *nv40)
 	unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
 	int hw;
 
-	if (nv40->edgeflags) {
-		nv40->fallback_swtnl |= NV40_NEW_ARRAYS;
-		return FALSE;
-	}
-
 	vtxbuf = so_new(20, 18);
 	so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
 	vtxfmt = so_new(17, 0);
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index 55835ee644..d9fc31006f 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -621,6 +621,10 @@ nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc,
 			return FALSE;
 		}
 		break;
+	case TGSI_SEMANTIC_EDGEFLAG:
+		/* not really an error just a fallback */
+		NOUVEAU_ERR("cannot handle edgeflag output\n");
+		return FALSE;
 	default:
 		NOUVEAU_ERR("bad output semantic\n");
 		return FALSE;
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index 219e7a7862..d21b80eab8 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -48,11 +48,6 @@ nv50_destroy(struct pipe_context *pipe)
 }
 
 
-static void
-nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield)
-{
-}
-
 struct pipe_context *
 nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
 {
@@ -71,7 +66,6 @@ nv50_create(struct pipe_screen *pscreen, unsigned pctx_id)
 
 	nv50->pipe.destroy = nv50_destroy;
 
-	nv50->pipe.set_edgeflags = nv50_set_edgeflags;
 	nv50->pipe.draw_arrays = nv50_draw_arrays;
 	nv50->pipe.draw_elements = nv50_draw_elements;
 	nv50->pipe.clear = nv50_clear;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 68c5408a64..a0ebdf3024 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -283,13 +283,6 @@ static void r300_delete_dsa_state(struct pipe_context* pipe,
     FREE(state);
 }
 
-static void r300_set_edgeflags(struct pipe_context* pipe,
-                               const unsigned* bitfield)
-{
-    /* XXX you know it's bad when i915 has this blank too */
-    /* XXX and even worse, I have no idea WTF the bitfield is */
-}
-
 static void r300_set_scissor_regs(const struct pipe_scissor_state* state,
                                   struct r300_scissor_regs *scissor,
                                   boolean is_r500)
@@ -840,8 +833,6 @@ void r300_init_state_functions(struct r300_context* r300)
     r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state;
     r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state;
 
-    r300->context.set_edgeflags = r300_set_edgeflags;
-
     r300->context.set_framebuffer_state = r300_set_framebuffer_state;
 
     r300->context.create_fs_state = r300_create_fs_state;
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 31248346bc..6ab5995244 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -77,6 +77,11 @@ static void r300_shader_read_vs_outputs(
                 vs_outputs->fog = i;
                 break;
 
+            case TGSI_SEMANTIC_EDGEFLAG:
+                assert(index == 0);
+                fprintf(stderr, "r300 VP: cannot handle edgeflag output\n");
+                assert(0);
+                break;
             default:
                 assert(0);
         }
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index e650a251d1..a851fa6705 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -202,8 +202,6 @@ struct svga_state
    struct pipe_clip_state clip;
    struct pipe_viewport_state viewport;
 
-   const unsigned *edgeflags;
-
    unsigned num_samplers;
    unsigned num_textures;
    unsigned num_vertex_elements;
@@ -380,9 +378,8 @@ struct svga_context
 #define SVGA_NEW_NEED_SWTNL          0x400000
 #define SVGA_NEW_FS_RESULT           0x800000
 #define SVGA_NEW_VS_RESULT           0x1000000
-#define SVGA_NEW_EDGEFLAGS           0x2000000
-#define SVGA_NEW_ZERO_STRIDE         0x4000000
-#define SVGA_NEW_TEXTURE_FLAGS       0x8000000
+#define SVGA_NEW_ZERO_STRIDE         0x2000000
+#define SVGA_NEW_TEXTURE_FLAGS       0x4000000
 
 
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
index 28e2787e0d..42f290d162 100644
--- a/src/gallium/drivers/svga/svga_pipe_vertex.c
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -84,18 +84,6 @@ static void svga_set_vertex_elements(struct pipe_context *pipe,
 }
 
 
-static void svga_set_edgeflags(struct pipe_context *pipe,
-                               const unsigned *bitfield)
-{
-   struct svga_context *svga = svga_context(pipe);
-
-   if (bitfield != NULL || svga->curr.edgeflags != NULL) {
-      svga->curr.edgeflags = bitfield;
-      svga->dirty |= SVGA_NEW_EDGEFLAGS;
-   }
-}
-
-
 void svga_cleanup_vertex_state( struct svga_context *svga )
 {
    unsigned i;
@@ -109,7 +97,6 @@ void svga_init_vertex_functions( struct svga_context *svga )
 {
    svga->pipe.set_vertex_buffers = svga_set_vertex_buffers;
    svga->pipe.set_vertex_elements = svga_set_vertex_elements;
-   svga->pipe.set_edgeflags = svga_set_edgeflags;
 }
 
 
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
index 00201b8091..3c35a8579f 100644
--- a/src/gallium/drivers/svga/svga_state_need_swtnl.c
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -108,6 +108,7 @@ static int update_need_pipeline( struct svga_context *svga,
 {
    
    boolean need_pipeline = FALSE;
+   struct svga_vertex_shader *vs = svga->curr.vs;
 
    /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
     */
@@ -119,11 +120,9 @@ static int update_need_pipeline( struct svga_context *svga,
       need_pipeline = TRUE;
    }
 
-   /* SVGA_NEW_EDGEFLAGS
+   /* EDGEFLAGS
     */
-   if (svga->curr.rast->hw_unfilled != PIPE_POLYGON_MODE_FILL &&
-       svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES && 
-       svga->curr.edgeflags != NULL) {
+    if (vs->base.info.writes_edgeflag) {
       SVGA_DBG(DEBUG_SWTNL, "%s: edgeflags\n", __FUNCTION__);
       need_pipeline = TRUE;
    }
@@ -150,6 +149,7 @@ struct svga_tracked_state svga_update_need_pipeline =
    "need pipeline",
    (SVGA_NEW_RAST |
     SVGA_NEW_CLIP |
+    SVGA_NEW_VS |
     SVGA_NEW_REDUCED_PRIMITIVE),
    update_need_pipeline
 };
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
index 1616312113..25b8c2af3a 100644
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -120,10 +120,6 @@ static int update_swtnl_draw( struct svga_context *svga,
       draw_set_mrd(svga->swtnl.draw, 
                    svga->curr.depthscale);
 
-   if (dirty & SVGA_NEW_EDGEFLAGS)
-      draw_set_edgeflags( svga->swtnl.draw, 
-                          svga->curr.edgeflags );
-
    return 0;
 }
 
@@ -138,8 +134,7 @@ struct svga_tracked_state svga_update_swtnl_draw =
     SVGA_NEW_VIEWPORT |
     SVGA_NEW_RAST |
     SVGA_NEW_FRAME_BUFFER |
-    SVGA_NEW_REDUCED_PRIMITIVE |
-    SVGA_NEW_EDGEFLAGS),
+    SVGA_NEW_REDUCED_PRIMITIVE),
    update_swtnl_draw
 };
 
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 2f0f063d2d..80f4874b78 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -94,25 +94,6 @@ trace_surface_unwrap(struct trace_context *tr_ctx,
 }
 
 
-static INLINE void
-trace_context_set_edgeflags(struct pipe_context *_pipe,
-                            const unsigned *bitfield)
-{
-   struct trace_context *tr_ctx = trace_context(_pipe);
-   struct pipe_context *pipe = tr_ctx->pipe;
-
-   trace_dump_call_begin("pipe_context", "set_edgeflags");
-
-   trace_dump_arg(ptr, pipe);
-   /* FIXME: we don't know how big this array is */
-   trace_dump_arg(ptr, bitfield);
-
-   pipe->set_edgeflags(pipe, bitfield);
-
-   trace_dump_call_end();
-}
-
-
 static INLINE void
 trace_context_draw_block(struct trace_context *tr_ctx, int flag)
 {
@@ -1298,7 +1279,6 @@ trace_context_create(struct pipe_screen *_screen,
    tr_ctx->base.winsys = _screen->winsys;
    tr_ctx->base.screen = _screen;
    tr_ctx->base.destroy = trace_context_destroy;
-   tr_ctx->base.set_edgeflags = trace_context_set_edgeflags;
    tr_ctx->base.draw_arrays = trace_context_draw_arrays;
    tr_ctx->base.draw_elements = trace_context_draw_elements;
    tr_ctx->base.draw_range_elements = trace_context_draw_range_elements;
diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py
index b32eafe23f..110b3d0ec1 100755
--- a/src/gallium/state_trackers/python/retrace/interpreter.py
+++ b/src/gallium/state_trackers/python/retrace/interpreter.py
@@ -507,10 +507,6 @@ class Context(Object):
             self.real.set_vertex_element(i, elements[i])
         self.real.set_vertex_elements(num_elements)
 
-    def set_edgeflags(self, bitfield):
-        # FIXME
-        pass
-    
     def dump_vertices(self, start, count):
         if not self.interpreter.verbosity(2):
             return
-- 
cgit v1.2.3


From f13a904c34cf7ac5aae3d50a1421259895fc9a08 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Fri, 18 Dec 2009 19:42:02 -0800
Subject: r300g: Respect provoking vertex for trifans.

Fixes part of piglit's clipFlat test; next up is to get quads right.
---
 src/gallium/drivers/r300/r300_render.c | 33 +++++++++++++++++++++++++++++++--
 src/gallium/drivers/r300/r300_state.c  |  4 ----
 2 files changed, 31 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index a0d67e7618..11c7ce859d 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -75,13 +75,40 @@ static boolean r300_nothing_to_draw(struct r300_context *r300)
            r300->scissor_state->scissor.empty_area;
 }
 
+static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
+                                            unsigned mode)
+{
+    uint32_t color_control = r300->rs_state->color_control;
+
+    /* By default (see r300_state.c:r300_create_rs_state) color_control is
+     * initialized to provoking the first vertex.
+     *
+     * If we are provoking the first vertex, then there's a quirk in the
+     * specification for ARB_provoking_vertex that essentially makes the
+     * second vertex the correct one to provoke for triangle fans.
+     * (http://www.opengl.org/registry/specs/ARB/provoking_vertex.txt)
+     * Otherwise, force the last vertex, as GL standard. */
+
+    if (r300->rs_state->rs.flatshade_first) {
+        if (mode == PIPE_PRIM_TRIANGLE_FAN) {
+            color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND;
+        }
+    } else {
+        color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+    }
+
+    return color_control;
+}
+
 static void r300_emit_draw_arrays(struct r300_context *r300,
                                   unsigned mode,
                                   unsigned count)
 {
     CS_LOCALS(r300);
 
-    BEGIN_CS(6);
+    BEGIN_CS(8);
+    OUT_CS_REG(R300_GA_COLOR_CONTROL,
+            r300_provoking_vertex_fixes(r300, mode));
     OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0);
     OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
@@ -108,7 +135,9 @@ static void r300_emit_draw_elements(struct r300_context *r300,
     assert((start * indexSize)  % 4 == 0);
     assert(offset_dwords == 0);
 
-    BEGIN_CS(12);
+    BEGIN_CS(14);
+    OUT_CS_REG(R300_GA_COLOR_CONTROL,
+            r300_provoking_vertex_fixes(r300, mode));
     OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, minIndex);
     OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, maxIndex);
     OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 91cf972ede..5f332b2e0c 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -510,10 +510,6 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
         rs->color_control = R300_SHADE_MODEL_SMOOTH;
     }
 
-    if (!state->flatshade_first) {
-        rs->color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
-    }
-
     return (void*)rs;
 }
 
-- 
cgit v1.2.3


From 759fd1f25f33273e0b7c02598bfa5b97d1a82d77 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Fri, 18 Dec 2009 21:50:45 -0800
Subject: r300g: Fix provoking vertex for non-quads.

Read the comments. In short, we can't possibly pass piglit's
glean/clipFlat without some help from Gallium and an API/spec change.
---
 src/gallium/drivers/r300/r300_render.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 11c7ce859d..2d70ec2ac9 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -83,15 +83,34 @@ static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
     /* By default (see r300_state.c:r300_create_rs_state) color_control is
      * initialized to provoking the first vertex.
      *
-     * If we are provoking the first vertex, then there's a quirk in the
-     * specification for ARB_provoking_vertex that essentially makes the
-     * second vertex the correct one to provoke for triangle fans.
+     * Triangle fans must be reduced to the second vertex, not the first, in
+     * Gallium flatshade-first mode, as per the GL spec.
      * (http://www.opengl.org/registry/specs/ARB/provoking_vertex.txt)
-     * Otherwise, force the last vertex, as GL standard. */
+     *
+     * Quads never provoke correctly in flatshade-first mode. The first
+     * vertex is never considered as provoking, so only the second, third,
+     * and fourth vertices can be selected, and both "third" and "last" modes
+     * select the fourth vertex. This is probably due to D3D lacking quads.
+     *
+     * Similarly, polygons reduce to the first, not the last, vertex, when in
+     * "last" mode, and all other modes start from the second vertex.
+     *
+     * ~ C.
+     */
 
     if (r300->rs_state->rs.flatshade_first) {
-        if (mode == PIPE_PRIM_TRIANGLE_FAN) {
-            color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND;
+        switch (mode) {
+            case PIPE_PRIM_TRIANGLE_FAN:
+                color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND;
+                break;
+            case PIPE_PRIM_QUADS:
+            case PIPE_PRIM_QUAD_STRIP:
+            case PIPE_PRIM_POLYGON:
+                color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+                break;
+            default:
+                color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST;
+                break;
         }
     } else {
         color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
-- 
cgit v1.2.3


From 6ba83cd63f84a4d74dd679d62662d59533fd1bdb Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Wed, 16 Dec 2009 06:07:39 +0100
Subject: r300g: add texture compare modes

---
 src/gallium/drivers/r300/r300_context.h    |   2 +
 src/gallium/drivers/r300/r300_emit.c       |  17 +++--
 src/gallium/drivers/r300/r300_fs.c         | 102 +++++++++++++++++++++++------
 src/gallium/drivers/r300/r300_fs.h         |  32 ++++++---
 src/gallium/drivers/r300/r300_state.c      |  23 ++++++-
 src/gallium/drivers/r300/r300_tgsi_to_rc.c |   9 ++-
 6 files changed, 148 insertions(+), 37 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 6bd2766730..232530b7dc 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -91,6 +91,8 @@ struct r300_rs_block {
 };
 
 struct r300_sampler_state {
+    struct pipe_sampler_state state;
+
     uint32_t filter0;      /* R300_TX_FILTER0: 0x4400 */
     uint32_t filter1;      /* R300_TX_FILTER1: 0x4440 */
     uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index d7b6511d6d..199ce3a945 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -158,6 +158,13 @@ static const float * get_shader_constant(
                     vec[1] = 1.0 / tex->height0;
                     break;
 
+                /* Texture compare-fail value. */
+                /* XXX Since Gallium doesn't support GL_ARB_shadow_ambient,
+                 * this is always (0,0,0,0). */
+                case RC_STATE_SHADOW_AMBIENT:
+                    vec[3] = 0;
+                    break;
+
                 default:
                     debug_printf("r300: Implementation error: "
                         "Unknown RC_CONSTANT type %d\n", constant->u.State[0]);
@@ -1030,18 +1037,20 @@ validate:
 
     if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) {
         if (r300screen->caps->is_r500) {
-            r500_emit_fragment_program_code(r300, &r300->fs->code);
+            r500_emit_fragment_program_code(r300, &r300->fs->shader->code);
         } else {
-            r300_emit_fragment_program_code(r300, &r300->fs->code);
+            r300_emit_fragment_program_code(r300, &r300->fs->shader->code);
         }
         r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER;
     }
 
     if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER_CONSTANTS) {
         if (r300screen->caps->is_r500) {
-            r500_emit_fs_constant_buffer(r300, &r300->fs->code.constants);
+            r500_emit_fs_constant_buffer(r300,
+                                         &r300->fs->shader->code.constants);
         } else {
-            r300_emit_fs_constant_buffer(r300, &r300->fs->code.constants);
+            r300_emit_fs_constant_buffer(r300,
+                                         &r300->fs->shader->code.constants);
         }
         r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER_CONSTANTS;
     }
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 79b01bb4dc..4e1b61ca40 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -22,6 +22,9 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
 #include "tgsi/tgsi_dump.h"
 
 #include "r300_context.h"
@@ -33,8 +36,8 @@
 #include "radeon_compiler.h"
 
 /* Convert info about FS input semantics to r300_shader_semantics. */
-static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
-                                       struct r300_shader_semantics* fs_inputs)
+void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
+                                struct r300_shader_semantics* fs_inputs)
 {
     int i;
     unsigned index;
@@ -66,7 +69,6 @@ static void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
     }
 }
 
-
 static void find_output_registers(struct r300_fragment_program_compiler * compiler,
                                   struct r300_fragment_shader * fs)
 {
@@ -95,7 +97,7 @@ static void allocate_hardware_inputs(
     void * mydata)
 {
     struct r300_shader_semantics* inputs =
-        &((struct r300_fragment_shader*)c->UserData)->inputs;
+        (struct r300_shader_semantics*)c->UserData;
     int i, reg = 0;
 
     /* Allocate input registers. */
@@ -114,31 +116,45 @@ static void allocate_hardware_inputs(
     }
 }
 
-void r300_translate_fragment_shader(struct r300_context* r300,
-                                    struct r300_fragment_shader* fs)
+static void get_compare_state(
+    struct r300_context* r300,
+    struct r300_fragment_program_external_state* state,
+    unsigned shadow_samplers)
+{
+    memset(state, 0, sizeof(*state));
+
+    for (int i = 0; i < r300->sampler_count; i++) {
+        struct r300_sampler_state* s = r300->sampler_states[i];
+
+        if (s && s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+            /* XXX Gallium doesn't provide us with any information regarding
+             * this mode, so we are screwed. I'm setting 0 = LUMINANCE. */
+            state->unit[i].depth_texture_mode = 0;
+
+            /* Fortunately, no need to translate this. */
+            state->unit[i].texture_compare_func = s->state.compare_func;
+        }
+    }
+}
+
+static void r300_translate_fragment_shader(
+    struct r300_context* r300,
+    struct r300_fragment_shader_code* shader)
 {
+    struct r300_fragment_shader* fs = r300->fs;
     struct r300_fragment_program_compiler compiler;
     struct tgsi_to_rc ttr;
 
-    /* Initialize. */
-    r300_shader_read_fs_inputs(&fs->info, &fs->inputs);
-
     /* Setup the compiler. */
     memset(&compiler, 0, sizeof(compiler));
     rc_init(&compiler.Base);
     compiler.Base.Debug = DBG_ON(r300, DBG_FP);
 
-    compiler.code = &fs->code;
+    compiler.code = &shader->code;
+    compiler.state = shader->compare_state;
     compiler.is_r500 = r300_screen(r300->context.screen)->caps->is_r500;
     compiler.AllocateHwInputs = &allocate_hardware_inputs;
-    compiler.UserData = fs;
-
-    /* XXX: Program compilation depends on texture compare modes,
-     * which are sampler state. Therefore, programs need to be recompiled
-     * depending on this state as in the classic Mesa driver.
-     *
-     * This is not yet handled correctly.
-     */
+    compiler.UserData = &fs->inputs;
 
     find_output_registers(&compiler, fs);
 
@@ -153,6 +169,8 @@ void r300_translate_fragment_shader(struct r300_context* r300,
 
     r300_tgsi_to_rc(&ttr, fs->state.tokens);
 
+    fs->shadow_samplers = compiler.Base.Program.ShadowSamplers;
+
     /* Invoke the compiler */
     r3xx_compile_fragment_program(&compiler);
     if (compiler.Base.Error) {
@@ -164,5 +182,51 @@ void r300_translate_fragment_shader(struct r300_context* r300,
 
     /* And, finally... */
     rc_destroy(&compiler.Base);
-    fs->translated = TRUE;
+}
+
+boolean r300_pick_fragment_shader(struct r300_context* r300)
+{
+    struct r300_fragment_shader* fs = r300->fs;
+    struct r300_fragment_program_external_state state;
+    struct r300_fragment_shader_code* ptr;
+
+    if (!fs->first) {
+        /* Build the fragment shader for the first time. */
+        fs->first = fs->shader = CALLOC_STRUCT(r300_fragment_shader_code);
+
+        /* BTW shadow samplers will be known after the first translation,
+         * therefore we set ~0, which means it should look at all sampler
+         * states. This choice doesn't have any impact on the correctness. */
+        get_compare_state(r300, &fs->shader->compare_state, ~0);
+        r300_translate_fragment_shader(r300, fs->shader);
+        return TRUE;
+
+    } else if (fs->shadow_samplers) {
+        get_compare_state(r300, &state, fs->shadow_samplers);
+
+        /* Check if the currently-bound shader has been compiled
+         * with the texture-compare state we need. */
+        if (memcmp(&fs->shader->compare_state, &state, sizeof(state)) != 0) {
+            /* Search for the right shader. */
+            ptr = fs->first;
+            while (ptr) {
+                if (memcmp(&ptr->compare_state, &state, sizeof(state)) == 0) {
+                    fs->shader = ptr;
+                    return TRUE;
+                }
+                ptr = ptr->next;
+            }
+
+            /* Not found, gotta compile a new one. */
+            ptr = CALLOC_STRUCT(r300_fragment_shader_code);
+            ptr->next = fs->first;
+            fs->first = fs->shader = ptr;
+
+            ptr->compare_state = state;
+            r300_translate_fragment_shader(r300, ptr);
+            return TRUE;
+        }
+    }
+
+    return FALSE;
 }
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
index 630e2d0c8a..40ce874353 100644
--- a/src/gallium/drivers/r300/r300_fs.h
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -30,6 +30,13 @@
 #include "radeon_code.h"
 #include "r300_shader_semantics.h"
 
+struct r300_fragment_shader_code {
+    struct r300_fragment_program_external_state compare_state;
+    struct rX00_fragment_program_code code;
+
+    struct r300_fragment_shader_code* next;
+};
+
 struct r300_fragment_shader {
     /* Parent class */
     struct pipe_shader_state state;
@@ -37,21 +44,28 @@ struct r300_fragment_shader {
     struct tgsi_shader_info info;
     struct r300_shader_semantics inputs;
 
-    /* Has this shader been translated yet? */
-    boolean translated;
+    /* Bits 0-15: TRUE if it's a shadow sampler, FALSE otherwise. */
+    unsigned shadow_samplers;
 
-    /* Compiled code */
-    struct rX00_fragment_program_code code;
+    /* Currently-bound fragment shader. */
+    struct r300_fragment_shader_code* shader;
+
+    /* List of the same shaders compiled with different texture-compare
+     * states. */
+    struct r300_fragment_shader_code* first;
 };
 
+void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
+                                struct r300_shader_semantics* fs_inputs);
 
-void r300_translate_fragment_shader(struct r300_context* r300,
-                                    struct r300_fragment_shader* fs);
+/* Return TRUE if the shader was switched and should be re-emitted. */
+boolean r300_pick_fragment_shader(struct r300_context* r300);
 
-static inline boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
+static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
 {
     if (!fs)
-	return FALSE;
-    return (fs->code.writes_depth) ? TRUE : FALSE;
+        return FALSE;
+    return (fs->shader->code.writes_depth) ? TRUE : FALSE;
 }
+
 #endif /* R300_FS_H */
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 5f332b2e0c..8bcd6c5060 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -355,6 +355,7 @@ static void* r300_create_fs_state(struct pipe_context* pipe,
     fs->state.tokens = tgsi_dup_tokens(shader->tokens);
 
     tgsi_scan_shader(shader->tokens, &fs->info);
+    r300_shader_read_fs_inputs(&fs->info, &fs->inputs);
 
     return (void*)fs;
 }
@@ -368,11 +369,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
     if (fs == NULL) {
         r300->fs = NULL;
         return;
-    } else if (!fs->translated) {
-        r300_translate_fragment_shader(r300, fs);
     }
 
     r300->fs = fs;
+    r300_pick_fragment_shader(r300);
 
     r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS;
 }
@@ -381,7 +381,14 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
 static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
 {
     struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
-    rc_constants_destroy(&fs->code.constants);
+    struct r300_fragment_shader_code *tmp, *ptr = fs->first;
+
+    while (ptr) {
+        tmp = ptr;
+        ptr = ptr->next;
+        rc_constants_destroy(&tmp->code.constants);
+        FREE(tmp);
+    }
     FREE((void*)fs->state.tokens);
     FREE(shader);
 }
@@ -547,6 +554,8 @@ static void*
     int lod_bias;
     union util_color uc;
 
+    sampler->state = *state;
+
     sampler->filter0 |=
         (r300_translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) |
         (r300_translate_wrap(state->wrap_t) << R300_TX_WRAP_T_SHIFT) |
@@ -597,6 +606,14 @@ static void r300_bind_sampler_states(struct pipe_context* pipe,
     }
 
     r300->sampler_count = count;
+
+    /* Pick a fragment shader based on the texture compare state. */
+    if (r300->fs && (r300->dirty_state & R300_ANY_NEW_SAMPLERS)) {
+        if (r300_pick_fragment_shader(r300)) {
+            r300->dirty_state |= R300_NEW_FRAGMENT_SHADER |
+                                 R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+        }
+    }
 }
 
 static void r300_lacks_vertex_textures(struct pipe_context* pipe,
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 9fb2de2403..096cdb20bb 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -212,7 +212,8 @@ static void transform_srcreg(
     dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0;
 }
 
-static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src)
+static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src,
+                              uint32_t *shadowSamplers)
 {
     switch(src.Texture) {
         case TGSI_TEXTURE_1D:
@@ -233,14 +234,17 @@ static void transform_texture(struct rc_instruction * dst, struct tgsi_instructi
         case TGSI_TEXTURE_SHADOW1D:
             dst->U.I.TexSrcTarget = RC_TEXTURE_1D;
             dst->U.I.TexShadow = 1;
+            *shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
             break;
         case TGSI_TEXTURE_SHADOW2D:
             dst->U.I.TexSrcTarget = RC_TEXTURE_2D;
             dst->U.I.TexShadow = 1;
+            *shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
             break;
         case TGSI_TEXTURE_SHADOWRECT:
             dst->U.I.TexSrcTarget = RC_TEXTURE_RECT;
             dst->U.I.TexShadow = 1;
+            *shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
             break;
     }
 }
@@ -269,7 +273,8 @@ static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_inst
 
     /* Texturing. */
     if (src->Instruction.Texture)
-       transform_texture(dst, src->Texture);
+        transform_texture(dst, src->Texture,
+                          &ttr->compiler->Program.ShadowSamplers);
 }
 
 static void handle_immediate(struct tgsi_to_rc * ttr, struct tgsi_full_immediate * imm)
-- 
cgit v1.2.3


From 3c7f109d67792e27064af5a07c754541348fd787 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 20 Dec 2009 14:12:35 +0100
Subject: nv50: handle TGSI_OPCODE_RET,KILP

---
 src/gallium/drivers/nv50/nv50_program.c | 82 ++++++++++++++++++++++-----------
 1 file changed, 54 insertions(+), 28 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index e496cf4cad..0b78a7112b 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1306,18 +1306,22 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 {
 	struct nv50_program_exec *e;
 	const int r_pred = 1;
-	unsigned cvn = CVT_F32_F32;
 
-	if (src->mod & NV50_MOD_NEG)
-		cvn |= CVT_NEG;
-	/* write predicate reg */
-	emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn);
-
-	/* conditional discard */
 	e = exec(pc);
-	e->inst[0] = 0x00000002;
-	set_long(pc, e);
-	set_pred(pc, 0x1 /* LT */, r_pred, e);
+	e->inst[0] = 0x00000002; /* discard */
+	set_long(pc, e); /* sets cond code to ALWAYS */
+
+	if (src) {
+		unsigned cvn = CVT_F32_F32;
+
+		set_pred(pc, 0x1 /* cc = LT */, r_pred, e);
+
+		if (src->mod & NV50_MOD_NEG)
+			cvn |= CVT_NEG;
+		/* write predicate reg */
+		emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn);
+	}
+
 	emit(pc, e);
 }
 
@@ -1343,6 +1347,19 @@ emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
 	return pc->p->exec_tail;
 }
 
+static void
+emit_ret(struct nv50_pc *pc, int pred, unsigned cc)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	e->inst[0] = 0x30000002;
+	set_long(pc, e);
+	if (pred >= 0)
+		set_pred(pc, cc, pred, e);
+
+	emit(pc, e);
+}
+
 #define QOP_ADD 0
 #define QOP_SUBR 1
 #define QOP_SUB 2
@@ -2063,6 +2080,24 @@ nv50_kill_branch(struct nv50_pc *pc)
 	return TRUE;
 }
 
+static void
+nv50_fp_move_results(struct nv50_pc *pc)
+{
+	struct nv50_reg reg;
+	unsigned i;
+
+	ctor_reg(&reg, P_TEMP, -1, -1);
+
+	for (i = 0; i < pc->result_nr * 4; ++i) {
+		if (pc->result[i].rhw < 0 || pc->result[i].hw < 0)
+			continue;
+		if (pc->result[i].rhw != pc->result[i].hw) {
+			reg.hw = pc->result[i].rhw;
+			emit_mov(pc, &reg, &pc->result[i]);
+		}
+	}
+}
+
 static boolean
 nv50_program_tx_insn(struct nv50_pc *pc,
 		     const struct tgsi_full_instruction *inst)
@@ -2291,11 +2326,15 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_KIL:
+		assert(src[0][0] && src[0][1] && src[0][2] && src[0][3]);
 		emit_kil(pc, src[0][0]);
 		emit_kil(pc, src[0][1]);
 		emit_kil(pc, src[0][2]);
 		emit_kil(pc, src[0][3]);
 		break;
+	case TGSI_OPCODE_KILP:
+		emit_kil(pc, NULL);
+		break;
 	case TGSI_OPCODE_LIT:
 		emit_lit(pc, &dst[0], mask, &src[0][0]);
 		break;
@@ -2352,6 +2391,11 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 	case TGSI_OPCODE_RCP:
 		emit_flop(pc, 0, brdc, src[0][0]);
 		break;
+	case TGSI_OPCODE_RET:
+		if (pc->p->type == PIPE_SHADER_FRAGMENT)
+			nv50_fp_move_results(pc);
+		emit_ret(pc, -1, 0);
+		break;
 	case TGSI_OPCODE_RSQ:
 		emit_flop(pc, 2, brdc, src[0][0]);
 		break;
@@ -3109,24 +3153,6 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
 	return TRUE;
 }
 
-static void
-nv50_fp_move_results(struct nv50_pc *pc)
-{
-	struct nv50_reg reg;
-	unsigned i;
-
-	ctor_reg(&reg, P_TEMP, -1, -1);
-
-	for (i = 0; i < pc->result_nr * 4; ++i) {
-		if (pc->result[i].rhw < 0 || pc->result[i].hw < 0)
-			continue;
-		if (pc->result[i].rhw != pc->result[i].hw) {
-			reg.hw = pc->result[i].rhw;
-			emit_mov(pc, &reg, &pc->result[i]);
-		}
-	}
-}
-
 static void
 nv50_program_fixup_insns(struct nv50_pc *pc)
 {
-- 
cgit v1.2.3


From 2ed083dda9600a1e20255b23c4bb9a156e765e1b Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 20 Dec 2009 12:04:26 +0100
Subject: nv50: use BREAKADDR and BREAK for loops

This is easier, we don't have to record all the BRK branches
to fill in the address at ENDLOOP - which wasn't done before
and thus made all but the last BRK branch jump to the start.

Also, we don't risk reactivting threads that already left
the loop if we join threads after a conditional with a BRK
(although it might be wiser to remove the join altogether
in such cases).
---
 src/gallium/drivers/nv50/nv50_program.c | 122 ++++++++++++++++++++------------
 1 file changed, 75 insertions(+), 47 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 0b78a7112b..9ad0677978 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -98,9 +98,17 @@ struct nv50_reg {
 #define NV50_MOD_ABS 2
 #define NV50_MOD_SAT 4
 
-/* arbitrary limits */
-#define MAX_IF_DEPTH 4
-#define MAX_LOOP_DEPTH 4
+/* STACK: Conditionals and loops have to use the (per warp) stack.
+ * Stack entries consist of an entry type (divergent path, join at),
+ * a mask indicating the active threads of the warp, and an address.
+ * MPs can store 12 stack entries internally, if we need more (and
+ * we probably do), we have to create a stack buffer in VRAM.
+ */
+/* impose low limits for now */
+#define NV50_MAX_COND_NESTING 4
+#define NV50_MAX_LOOP_NESTING 3
+
+#define JOIN_ON(e) e; pc->p->exec_tail->inst[1] |= 2
 
 struct nv50_pc {
 	struct nv50_program *p;
@@ -139,12 +147,11 @@ struct nv50_pc {
 	struct nv50_reg *iv_p;
 	struct nv50_reg *iv_c;
 
-	struct nv50_program_exec *if_cond;
-	struct nv50_program_exec *if_insn[MAX_IF_DEPTH];
-	struct nv50_program_exec *br_join[MAX_IF_DEPTH];
-	struct nv50_program_exec *br_loop[MAX_LOOP_DEPTH]; /* for BRK branch */
+	struct nv50_program_exec *if_insn[NV50_MAX_COND_NESTING];
+	struct nv50_program_exec *if_join[NV50_MAX_COND_NESTING];
+	struct nv50_program_exec *loop_brka[NV50_MAX_LOOP_NESTING];
 	int if_lvl, loop_lvl;
-	unsigned loop_pos[MAX_LOOP_DEPTH];
+	unsigned loop_pos[NV50_MAX_LOOP_NESTING];
 
 	/* current instruction and total number of insns */
 	unsigned insn_cur;
@@ -1184,7 +1191,6 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
 	set_src_1(pc, src1, e);
 
 	emit(pc, e);
-	pc->if_cond = pc->p->exec_tail; /* record for OPCODE_IF */
 
 	/* cvt.f32.u32/s32 (?) if we didn't only write the predicate */
 	if (rdst)
@@ -1326,21 +1332,49 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 }
 
 static struct nv50_program_exec *
-emit_branch(struct nv50_pc *pc, int pred, unsigned cc,
-	    struct nv50_program_exec **join)
+emit_breakaddr(struct nv50_pc *pc)
 {
 	struct nv50_program_exec *e = exec(pc);
 
-	if (join) {
-		set_long(pc, e);
-		e->inst[0] |= 0xa0000002;
-		emit(pc, e);
-		*join = e;
-		e = exec(pc);
-	}
+	e->inst[0] = 0x40000002;
+	set_long(pc, e);
+
+	emit(pc, e);
+	return e;
+}
+
+static void
+emit_break(struct nv50_pc *pc, int pred, unsigned cc)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	e->inst[0] = 0x50000002;
+	set_long(pc, e);
+	if (pred >= 0)
+		set_pred(pc, cc, pred, e);
+
+	emit(pc, e);
+}
+
+static struct nv50_program_exec *
+emit_joinat(struct nv50_pc *pc)
+{
+	struct nv50_program_exec *e = exec(pc);
 
+	e->inst[0] = 0xa0000002;
+	set_long(pc, e);
+
+	emit(pc, e);
+	return e;
+}
+
+static struct nv50_program_exec *
+emit_branch(struct nv50_pc *pc, int pred, unsigned cc)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	e->inst[0] = 0x10000002;
 	set_long(pc, e);
-	e->inst[0] |= 0x10000002;
 	if (pred >= 0)
 		set_pred(pc, cc, pred, e);
 	emit(pc, e);
@@ -1504,20 +1538,20 @@ emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod,
 	/* Subtract lod of each pixel from lod of top left pixel, jump
 	 * texlod insn if result is 0, then repeat for 2 other pixels.
 	 */
+	join_at = emit_joinat(pc);
 	emit_quadop(pc, NULL, 0, 0, tlod, tlod, 0x55);
-	emit_branch(pc, 0, 2, &join_at)->param.index = target;
+	emit_branch(pc, 0, 2)->param.index = target;
 
 	for (i = 1; i < 4; ++i) {
 		emit_quadop(pc, NULL, 0, i, tlod, tlod, 0x55);
-		emit_branch(pc, 0, 2, NULL)->param.index = target;
+		emit_branch(pc, 0, 2)->param.index = target;
 	}
 
 	emit_mov(pc, tlod, src); /* target */
 	emit(pc, tex); /* texlod */
 
 	join_at->param.index = target + 2 * 2;
-	emit_nop(pc);
-	pc->p->exec_tail->inst[1] |= 2; /* join _after_ tex */
+	JOIN_ON(emit_nop(pc)); /* join _after_ tex */
 }
 
 static void
@@ -2058,22 +2092,19 @@ nv50_kill_branch(struct nv50_pc *pc)
 	 */
 	if (has_pred(pc->if_insn[lvl], 0xf))
 		return FALSE;
-	assert(pc->if_insn[lvl] && pc->br_join[lvl]);
+	assert(pc->if_insn[lvl] && pc->if_join[lvl]);
 
-	/* We'll use the exec allocated for JOIN_AT (as we can't easily
-	 * update prev's next); if exec_tail is BRK, update the pointer.
+	/* We'll use the exec allocated for JOIN_AT (we can't easily
+	 * access nv50_program_exec's prev).
 	 */
-	if (pc->loop_lvl && pc->br_loop[pc->loop_lvl - 1] == pc->p->exec_tail)
-		pc->br_loop[pc->loop_lvl - 1] = pc->br_join[lvl];
-
 	pc->p->exec_size -= 4; /* remove JOIN_AT and BRA */
 
-	*pc->br_join[lvl] = *pc->p->exec_tail;
+	*pc->if_join[lvl] = *pc->p->exec_tail;
 
 	FREE(pc->if_insn[lvl]);
 	FREE(pc->p->exec_tail);
 
-	pc->p->exec_tail = pc->br_join[lvl];
+	pc->p->exec_tail = pc->if_join[lvl];
 	pc->p->exec_tail->next = NULL;
 	set_pred(pc, 0xd, 0, pc->p->exec_tail);
 
@@ -2184,13 +2215,13 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_arl(pc, dst[0], temp, 4);
 		break;
 	case TGSI_OPCODE_BGNLOOP:
+		pc->loop_brka[pc->loop_lvl] = emit_breakaddr(pc);
 		pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size;
 		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_BRK:
-		emit_branch(pc, -1, 0, NULL);
 		assert(pc->loop_lvl > 0);
-		pc->br_loop[pc->loop_lvl - 1] = pc->p->exec_tail;
+		emit_break(pc, -1, 0);
 		break;
 	case TGSI_OPCODE_CEIL:
 		for (c = 0; c < 4; c++) {
@@ -2266,7 +2297,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			emit_mov_immdval(pc, dst[0], 1.0f);
 		break;
 	case TGSI_OPCODE_ELSE:
-		emit_branch(pc, -1, 0, NULL);
+		emit_branch(pc, -1, 0);
 		pc->if_insn[--pc->if_lvl]->param.index = pc->p->exec_size;
 		pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
 		terminate_mbb(pc);
@@ -2278,21 +2309,20 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		if (nv50_kill_branch(pc) == TRUE)
 			break;
 
-		if (pc->br_join[pc->if_lvl]) {
-			pc->br_join[pc->if_lvl]->param.index = pc->p->exec_size;
-			pc->br_join[pc->if_lvl] = NULL;
+		if (pc->if_join[pc->if_lvl]) {
+			pc->if_join[pc->if_lvl]->param.index = pc->p->exec_size;
+			pc->if_join[pc->if_lvl] = NULL;
 		}
 		terminate_mbb(pc);
 		/* emit a NOP as join point, we could set it on the next
 		 * one, but would have to make sure it is long and !immd
 		 */
-		emit_nop(pc);
-		pc->p->exec_tail->inst[1] |= 2;
+		JOIN_ON(emit_nop(pc));
 		break;
 	case TGSI_OPCODE_ENDLOOP:
-		emit_branch(pc, -1, 0, NULL);
-		pc->p->exec_tail->param.index = pc->loop_pos[--pc->loop_lvl];
-		pc->br_loop[pc->loop_lvl]->param.index = pc->p->exec_size;
+		emit_branch(pc, -1, 0)->param.index =
+			pc->loop_pos[--pc->loop_lvl];
+		pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size;
 		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_EX2:
@@ -2316,13 +2346,11 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		}
 		break;
 	case TGSI_OPCODE_IF:
-		/* emitting a join_at may not be necessary */
-		assert(pc->if_lvl < MAX_IF_DEPTH);
-		/* set_pred_wr(pc, 1, 0, pc->if_cond); */
+		assert(pc->if_lvl < NV50_MAX_COND_NESTING);
 		emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN,
 			 CVT_F32_F32);
-		emit_branch(pc, 0, 2, &pc->br_join[pc->if_lvl]);
-		pc->if_insn[pc->if_lvl++] = pc->p->exec_tail;
+		pc->if_join[pc->if_lvl] = emit_joinat(pc);
+		pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);;
 		terminate_mbb(pc);
 		break;
 	case TGSI_OPCODE_KIL:
-- 
cgit v1.2.3


From b3ac75f7eb6348032c9b214d6048678aa1cc07c3 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Sun, 20 Dec 2009 13:40:14 +0100
Subject: nv50: make address reg allocation a little less hacky

Before I tried to distinguish int +0 and -0, and regs
relative to $a1 would have been considered reserved
for TGSI.
This would probably never occur, let it be fixed even so.
---
 src/gallium/drivers/nv50/nv50_program.c | 52 ++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 24 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 9ad0677978..04b345bcf3 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -131,6 +131,7 @@ struct nv50_pc {
 	int immd_nr;
 	struct nv50_reg **addr;
 	int addr_nr;
+	uint8_t addr_alloc; /* set bit indicates used for TGSI_FILE_ADDRESS */
 
 	struct nv50_reg *temp_temp[16];
 	unsigned temp_temp_nr;
@@ -200,8 +201,7 @@ terminate_mbb(struct nv50_pc *pc)
 
 	/* remove records of temporary address register values */
 	for (i = 0; i < NV50_SU_MAX_ADDR; ++i)
-		if (pc->r_addr[i].index < 0)
-			pc->r_addr[i].rhw = -1;
+		pc->r_addr[i].rhw = -1;
 }
 
 static void
@@ -546,21 +546,24 @@ emit_add_addr_imm(struct nv50_pc *pc, struct nv50_reg *dst,
 static struct nv50_reg *
 alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
 {
-	int i;
 	struct nv50_reg *a_tgsi = NULL, *a = NULL;
+	int i;
+	uint8_t avail = ~pc->addr_alloc;
 
 	if (!ref) {
-		/* allocate for TGSI address reg */
-		for (i = 0; i < NV50_SU_MAX_ADDR; ++i) {
-			if (pc->r_addr[i].index >= 0)
-				continue;
-			if (pc->r_addr[i].rhw >= 0 &&
-			    pc->r_addr[i].acc == pc->insn_cur)
-				continue;
+		/* allocate for TGSI_FILE_ADDRESS */
+		while (avail) {
+			i = ffs(avail) - 1;
 
-			pc->r_addr[i].rhw = -1;
-			pc->r_addr[i].index = i;
-			return &pc->r_addr[i];
+			if (pc->r_addr[i].rhw < 0 ||
+			    pc->r_addr[i].acc != pc->insn_cur) {
+				pc->addr_alloc |= (1 << i);
+
+				pc->r_addr[i].rhw = -1;
+				pc->r_addr[i].index = i;
+				return &pc->r_addr[i];
+			}
+			avail &= ~(1 << i);
 		}
 		assert(0);
 		return NULL;
@@ -568,15 +571,16 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
 
 	/* Allocate and set an address reg so we can access 'ref'.
 	 *
-	 * If and r_addr has index < 0, it is not reserved for TGSI,
-	 * and index will be the negative of the TGSI addr index the
-	 * value in rhw is relative to, or -256 if rhw is an offset
-	 * from 0. If rhw < 0, the reg has not been initialized.
+	 * If and r_addr->index will be -1 or the hw index the value
+	 * value in rhw is relative to. If rhw < 0, the reg has not
+	 * been initialized or is in use for TGSI_FILE_ADDRESS.
 	 */
-	for (i = NV50_SU_MAX_ADDR - 1; i >= 0; --i) {
-		if (pc->r_addr[i].index >= 0) /* occupied for TGSI */
-			continue;
-		if (pc->r_addr[i].rhw < 0) { /* unused */
+	while (avail) { /* only consider regs that are not TGSI */
+		i = ffs(avail) - 1;
+		avail &= ~(1 << i);
+
+		if ((!a || a->rhw >= 0) && pc->r_addr[i].rhw < 0) {
+			/* prefer an usused reg with low hw index */
 			a = &pc->r_addr[i];
 			continue;
 		}
@@ -586,8 +590,8 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
 		if (ref->hw - pc->r_addr[i].rhw >= 128)
 			continue;
 
-		if ((ref->acc >= 0 && pc->r_addr[i].index == -256) ||
-		    (ref->acc < 0 && -pc->r_addr[i].index == ref->index)) {
+		if ((ref->acc >= 0 && pc->r_addr[i].index < 0) ||
+		    (ref->acc < 0 && pc->r_addr[i].index == ref->index)) {
 			pc->r_addr[i].acc = pc->insn_cur;
 			return &pc->r_addr[i];
 		}
@@ -601,7 +605,7 @@ alloc_addr(struct nv50_pc *pc, struct nv50_reg *ref)
 
 	a->rhw = ref->hw & ~0x7f;
 	a->acc = pc->insn_cur;
-	a->index = a_tgsi ? -ref->index : -256;
+	a->index = a_tgsi ? ref->index : -1;
 	return a;
 }
 
-- 
cgit v1.2.3


From 32d737397c2ac21a553a5557914fa741be89c8a9 Mon Sep 17 00:00:00 2001
From: Corbin Simpson <MostAwesomeDude@gmail.com>
Date: Sun, 20 Dec 2009 14:00:34 -0800
Subject: r300g: Add a few more params.

---
 src/gallium/drivers/r300/r300_screen.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index a7ef3dbcc2..2a8667d483 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -142,6 +142,10 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
             return 0;
         case PIPE_CAP_BLEND_EQUATION_SEPARATE:
             return 1;
+        case PIPE_CAP_SM3:
+            return 1;
+        case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+            return 8;
         default:
             debug_printf("r300: Implementation error: Bad param %d\n",
                 param);
-- 
cgit v1.2.3


From f217927a57dbf57ffe23fb48417a4cdad7e0eeea Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 21 Dec 2009 15:38:08 +0000
Subject: i965g: remove duplicate set_viewport_state, fixes samples/depth

---
 src/gallium/drivers/i965/brw_pipe_misc.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_misc.c b/src/gallium/drivers/i965/brw_pipe_misc.c
index 0d0d92df82..3035907807 100644
--- a/src/gallium/drivers/i965/brw_pipe_misc.c
+++ b/src/gallium/drivers/i965/brw_pipe_misc.c
@@ -30,14 +30,6 @@ static void brw_set_scissor_state( struct pipe_context *pipe,
    brw->state.dirty.mesa |= PIPE_NEW_SCISSOR;
 }
 
-static void brw_set_viewport_state( struct pipe_context *pipe,
-                                    const struct pipe_viewport_state *viewport )
-{
-   struct brw_context *brw = brw_context(pipe);
-
-   brw->curr.viewport = *viewport;
-   brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT;
-}
 
 static void brw_set_clip_state( struct pipe_context *pipe,
                                 const struct pipe_clip_state *clip )
@@ -54,7 +46,6 @@ void brw_pipe_misc_init( struct brw_context *brw )
    brw->base.set_polygon_stipple = brw_set_polygon_stipple;
    brw->base.set_scissor_state = brw_set_scissor_state;
    brw->base.set_clip_state = brw_set_clip_state;
-   brw->base.set_viewport_state = brw_set_viewport_state;
 }
 
 
-- 
cgit v1.2.3


From 6c719d4c22e84e315e5dc9cbc69885401a7ae231 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 21 Dec 2009 16:55:40 +0000
Subject: i965g: add DEBUG_WINSYS flag

---
 src/gallium/drivers/i965/brw_debug.h  | 2 +-
 src/gallium/drivers/i965/brw_screen.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_debug.h b/src/gallium/drivers/i965/brw_debug.h
index 98407a06ed..ae8e9254a6 100644
--- a/src/gallium/drivers/i965/brw_debug.h
+++ b/src/gallium/drivers/i965/brw_debug.h
@@ -14,7 +14,7 @@
 #define DEBUG_VERBOSE	        0x40
 #define DEBUG_BATCH             0x80
 #define DEBUG_PIXEL             0x100
-#define DEBUG_BUFMGR            0x200
+#define DEBUG_WINSYS            0x200
 #define DEBUG_MIN_URB           0x400
 #define DEBUG_DISASSEM           0x800
 #define DEBUG_unused3           0x1000
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 1855e4fd45..3d96a77d65 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -47,7 +47,7 @@ static const struct debug_named_value debug_names[] = {
    { "verb",  DEBUG_VERBOSE},
    { "bat",   DEBUG_BATCH},
    { "pix",   DEBUG_PIXEL},
-   { "buf",   DEBUG_BUFMGR},
+   { "wins",  DEBUG_WINSYS},
    { "min",   DEBUG_MIN_URB},
    { "dis",   DEBUG_DISASSEM},
    { "sync",  DEBUG_SYNC},
-- 
cgit v1.2.3


From 1ec7e058f50882b27c0a2abd961bd49848386ff7 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 21 Dec 2009 16:56:46 +0000
Subject: i965g: keep refcounts to bound vertex buffers

---
 src/gallium/drivers/i965/brw_pipe_vertex.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index 73bba5b088..3d87a2853f 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -19,11 +19,26 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe,
 				   const struct pipe_vertex_buffer *buffers)
 {
    struct brw_context *brw = brw_context(pipe);
+   unsigned i;
 
-   /* XXX: don't we need to take some references here?  It's a bit
-    * awkward to do so, though.
-    */
-   memcpy(brw->curr.vertex_buffer, buffers, count * sizeof(buffers[0]));
+   /* Check for no change */
+   if (count == brw->curr.num_vertex_buffers &&
+       memcmp(brw->curr.vertex_buffer,
+              buffers,
+              count * sizeof buffers[0]) == 0)
+      return;
+
+   /* Adjust refcounts */
+   for (i = 0; i < count; i++) 
+      pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer, 
+                            buffers[i].buffer);
+
+   for ( ; i < brw->curr.num_vertex_buffers; i++)
+      pipe_buffer_reference(&brw->curr.vertex_buffer[i].buffer,
+                            NULL);
+
+   /* Copy remaining data */
+   memcpy(brw->curr.vertex_buffer, buffers, count * sizeof buffers[0]);
    brw->curr.num_vertex_buffers = count;
 
    brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER;
-- 
cgit v1.2.3


From 061411b2611634960f0ba36e42916c67918bb53d Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 21 Dec 2009 16:57:37 +0000
Subject: i965g: hook vertex state emit up to PIPE_NEW_VERTEX_BUFFER

---
 src/gallium/drivers/i965/brw_context.h     | 2 +-
 src/gallium/drivers/i965/brw_draw.c        | 7 ++++++-
 src/gallium/drivers/i965/brw_draw_upload.c | 5 +++--
 src/gallium/drivers/i965/brw_state_debug.c | 1 -
 4 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index b7330f00f4..143e068987 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -257,7 +257,7 @@ struct brw_sampler {
 #define BRW_NEW_WM_SURFACES		0x1000
 #define BRW_NEW_xxx                     0x2000 /* was FENCE */
 #define BRW_NEW_INDICES			0x4000
-#define BRW_NEW_VERTICES		0x8000
+
 /**
  * Used for any batch entry with a relocated pointer that will be used
  * by any 3D rendering.  Need to re-emit these fresh in each
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 45d5ade1fc..4498773dd8 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -92,6 +92,10 @@ static int brw_emit_prim(struct brw_context *brw,
    struct brw_3d_primitive prim_packet;
    int ret;
 
+   if (BRW_DEBUG & DEBUG_PRIMS)
+      debug_printf("%s start %d count %d indexed %d hw_prim %d\n",
+                   __FUNCTION__, start, count, indexed, hw_prim); 
+
    prim_packet.header.opcode = CMD_3D_PRIM;
    prim_packet.header.length = sizeof(prim_packet)/4 - 2;
    prim_packet.header.pad = 0;
@@ -187,7 +191,8 @@ brw_draw_range_elements(struct pipe_context *pipe,
    hw_prim = brw_set_prim(brw, mode);
 
    if (BRW_DEBUG & DEBUG_PRIMS)
-      debug_printf("PRIM: %s %d %d\n", u_prim_name(mode), start, count);
+      debug_printf("PRIM: %s start %d count %d index_buffer %p\n",
+                   u_prim_name(mode), start, count, (void *)index_buffer);
 
    /* Potentially trigger upload of new index buffer.
     *
diff --git a/src/gallium/drivers/i965/brw_draw_upload.c b/src/gallium/drivers/i965/brw_draw_upload.c
index f50ce3005d..a27da5f1c1 100644
--- a/src/gallium/drivers/i965/brw_draw_upload.c
+++ b/src/gallium/drivers/i965/brw_draw_upload.c
@@ -405,8 +405,9 @@ static int brw_emit_vertices( struct brw_context *brw )
 
 const struct brw_tracked_state brw_vertices = {
    .dirty = {
-      .mesa = PIPE_NEW_INDEX_RANGE,
-      .brw = BRW_NEW_BATCH | BRW_NEW_VERTICES,
+      .mesa = (PIPE_NEW_INDEX_RANGE |
+               PIPE_NEW_VERTEX_BUFFER),
+      .brw = BRW_NEW_BATCH,
       .cache = 0,
    },
    .prepare = brw_prepare_vertices,
diff --git a/src/gallium/drivers/i965/brw_state_debug.c b/src/gallium/drivers/i965/brw_state_debug.c
index 050f74761c..049c278c93 100644
--- a/src/gallium/drivers/i965/brw_state_debug.c
+++ b/src/gallium/drivers/i965/brw_state_debug.c
@@ -80,7 +80,6 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_WM_SURFACES),
    DEFINE_BIT(BRW_NEW_xxx),
    DEFINE_BIT(BRW_NEW_INDICES),
-   DEFINE_BIT(BRW_NEW_VERTICES),
    {0, 0, 0}
 };
 
-- 
cgit v1.2.3


From ba251376556835e84c7edb9b02b3fdefde32908a Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Mon, 21 Dec 2009 18:20:01 +0100
Subject: svga: fix typo

---
 src/gallium/drivers/svga/svga_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index e650a251d1..8e7b5b3da1 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -369,7 +369,7 @@ struct svga_context
 #define SVGA_NEW_FRAME_BUFFER        0x800
 #define SVGA_NEW_STIPPLE             0x1000
 #define SVGA_NEW_SCISSOR             0x2000
-#define SVGA_NEW_BLEND_COLOR         0x5000
+#define SVGA_NEW_BLEND_COLOR         0x4000
 #define SVGA_NEW_CLIP                0x8000
 #define SVGA_NEW_VIEWPORT            0x10000
 #define SVGA_NEW_PRESCALE            0x20000
-- 
cgit v1.2.3


From f72de22439a2d08bb461af60839baf4fbb3e54df Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 21 Dec 2009 19:22:45 +0000
Subject: i965g: remove half-finished change to tgsi_parse

---
 src/gallium/auxiliary/tgsi/tgsi_scan.h      | 3 ---
 src/gallium/drivers/i965/brw_state_upload.c | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h
index 6754001e88..8a7ee0c7e4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
@@ -61,9 +61,6 @@ struct tgsi_shader_info
    boolean uses_kill;  /**< KIL or KILP instruction used? */
    boolean uses_fogcoord; /**< fragment shader uses fog coord? */
    boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */
-
-   uint texture_max;
-   uint texture_mask;
 };
 
 
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index 233dce03df..bf65ca1cf2 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -191,7 +191,7 @@ enum pipe_error brw_validate_state( struct brw_context *brw )
       const struct brw_fragment_shader *fp = brw->curr.fragment_shader;
       if (fp) {
          assert(fp->info.file_max[TGSI_FILE_SAMPLER] < (int)brw->curr.num_samplers);
-	 assert(fp->info.texture_max <= brw->curr.num_textures);
+	 /*assert(fp->info.texture_max <= brw->curr.num_textures);*/
       }
    }
 
-- 
cgit v1.2.3


From 0fc4dd3819af252c028ed43bbd668b4f34104e32 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 21 Dec 2009 19:50:05 +0000
Subject: i965g: fixes to build after merge of master

---
 src/gallium/auxiliary/util/u_upload_mgr.h        |  2 +-
 src/gallium/drivers/i965/brw_clip_state.c        |  2 +-
 src/gallium/drivers/i965/brw_context.h           |  4 +-
 src/gallium/drivers/i965/brw_draw.c              |  2 +-
 src/gallium/drivers/i965/brw_eu.h                |  2 +-
 src/gallium/drivers/i965/brw_eu_emit.c           |  2 +-
 src/gallium/drivers/i965/brw_pipe_sampler.c      | 20 +++++--
 src/gallium/drivers/i965/brw_screen.c            |  2 +-
 src/gallium/drivers/i965/brw_screen_surface.c    |  5 +-
 src/gallium/drivers/i965/brw_screen_tex_layout.c | 68 +++++++++++-------------
 src/gallium/drivers/i965/brw_screen_texture.c    | 14 ++---
 src/gallium/drivers/i965/brw_state.h             |  4 +-
 src/gallium/drivers/i965/brw_state_upload.c      |  4 +-
 src/gallium/drivers/i965/brw_vs_emit.c           | 30 +++++------
 src/gallium/drivers/i965/brw_winsys.h            |  2 +-
 src/gallium/drivers/i965/brw_wm.c                |  2 +-
 src/gallium/drivers/i965/brw_wm_emit.c           |  2 +-
 src/gallium/drivers/i965/brw_wm_fp.c             | 46 ++++++++--------
 18 files changed, 112 insertions(+), 101 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h
index d414a1f2f6..e158bed9d0 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.h
+++ b/src/gallium/auxiliary/util/u_upload_mgr.h
@@ -32,7 +32,7 @@
 #ifndef U_UPLOAD_MGR_H
 #define U_UPLOAD_MGR_H
 
-#include "pipe/p_error.h"
+#include "pipe/p_defines.h"
 
 struct pipe_screen;
 struct pipe_buffer;
diff --git a/src/gallium/drivers/i965/brw_clip_state.c b/src/gallium/drivers/i965/brw_clip_state.c
index 467364e884..5c3ccfd8d0 100644
--- a/src/gallium/drivers/i965/brw_clip_state.c
+++ b/src/gallium/drivers/i965/brw_clip_state.c
@@ -69,7 +69,7 @@ clip_unit_populate_key(struct brw_context *brw, struct brw_clip_unit_key *key)
    key->urb_size = brw->urb.vsize;
 
    /*  */
-   key->depth_clamp = 0; // XXX: add this to gallium: ctx->Transform.DepthClamp;
+   key->depth_clamp = 0; /* XXX: add this to gallium: ctx->Transform.DepthClamp; */
 }
 
 static enum pipe_error
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 143e068987..56e7807400 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -197,7 +197,7 @@ struct brw_fragment_shader {
    struct brw_immediate_data immediates;
 
    unsigned iz_lookup;
-   //unsigned wm_lookup;
+   /*unsigned wm_lookup;*/
    
    unsigned  uses_depth:1;
    unsigned  has_flow_control:1;
@@ -722,7 +722,7 @@ struct brw_context
       /** Input sizes, calculated from active vertex program.
        * One bit per fragment program input attribute.
        */
-      //GLbitfield input_size_masks[4];
+      /*GLbitfield input_size_masks[4];*/
 
       /** Array of surface default colors (texture border color) */
       struct brw_winsys_buffer *sdc_bo[BRW_MAX_TEX_UNIT];
diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 4498773dd8..852fd22982 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -108,7 +108,7 @@ static int brw_emit_prim(struct brw_context *brw,
       prim_packet.start_vert_location += brw->ib.start_vertex_offset;
    prim_packet.instance_count = 1;
    prim_packet.start_instance_location = 0;
-   prim_packet.base_vert_location = 0; // prim->basevertex; XXX: add this to gallium
+   prim_packet.base_vert_location = 0; /* prim->basevertex; XXX: add this to gallium */
 
 
    /* If we're set to always flush, do it before and after the primitive emit.
diff --git a/src/gallium/drivers/i965/brw_eu.h b/src/gallium/drivers/i965/brw_eu.h
index 565f4ef1c5..af509b2e5f 100644
--- a/src/gallium/drivers/i965/brw_eu.h
+++ b/src/gallium/drivers/i965/brw_eu.h
@@ -34,7 +34,7 @@
 #define BRW_EU_H
 
 #include "util/u_debug.h"
-#include "pipe/p_error.h"
+#include "pipe/p_defines.h"
 
 #include "brw_structs.h"
 #include "brw_defines.h"
diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
index 3ee50899fb..4fe7b6acc1 100644
--- a/src/gallium/drivers/i965/brw_eu_emit.c
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -200,7 +200,7 @@ void brw_set_src1( struct brw_instruction *insn,
        * in the future:
        */
       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
-      //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
+      /*assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
 
       if (insn->header.access_mode == BRW_ALIGN_1) {
 	 insn->bits3.da1.src1_subreg_nr = reg.subnr;
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
index 5cd38a43a6..5ddc63f57e 100644
--- a/src/gallium/drivers/i965/brw_pipe_sampler.c
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -204,15 +204,29 @@ static void brw_set_sampler_textures(struct pipe_context *pipe,
    brw->state.dirty.mesa |= PIPE_NEW_BOUND_TEXTURES;
 }
 
+static void brw_set_vertex_sampler_textures(struct pipe_context *pipe,
+                                            unsigned num,
+                                            struct pipe_texture **texture)
+{
+}
+
+static void brw_bind_vertex_sampler_state(struct pipe_context *pipe,
+                                          unsigned num, void **sampler)
+{
+}
+
 
 void brw_pipe_sampler_init( struct brw_context *brw )
 {
-   brw->base.set_sampler_textures = brw_set_sampler_textures;
    brw->base.create_sampler_state = brw_create_sampler_state;
-   brw->base.bind_sampler_states = brw_bind_sampler_state;
    brw->base.delete_sampler_state = brw_delete_sampler_state;
 
-   brw->base.set_sampler_textures = brw_set_sampler_textures;
+   brw->base.set_fragment_sampler_textures = brw_set_sampler_textures;
+   brw->base.bind_fragment_sampler_states = brw_bind_sampler_state;
+
+   brw->base.set_vertex_sampler_textures = brw_set_vertex_sampler_textures;
+   brw->base.bind_vertex_sampler_states = brw_bind_vertex_sampler_state;
+
 }
 void brw_pipe_sampler_cleanup( struct brw_context *brw )
 {
diff --git a/src/gallium/drivers/i965/brw_screen.c b/src/gallium/drivers/i965/brw_screen.c
index 3d96a77d65..0ecacac9a3 100644
--- a/src/gallium/drivers/i965/brw_screen.c
+++ b/src/gallium/drivers/i965/brw_screen.c
@@ -396,7 +396,7 @@ brw_create_screen(struct brw_winsys_screen *sws, uint pci_id)
    brw_screen_tex_surface_init(bscreen);
    brw_screen_buffer_init(bscreen);
 
-   bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE);
+   bscreen->no_tiling = debug_get_option("BRW_NO_TILING", FALSE) != NULL;
    
    
    return &bscreen->base;
diff --git a/src/gallium/drivers/i965/brw_screen_surface.c b/src/gallium/drivers/i965/brw_screen_surface.c
index 1e37c63d6c..e2b9954e59 100644
--- a/src/gallium/drivers/i965/brw_screen_surface.c
+++ b/src/gallium/drivers/i965/brw_screen_surface.c
@@ -31,6 +31,7 @@
 
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
+#include "util/u_math.h"
 
 #include "pipe/p_screen.h"
 #include "brw_screen.h"
@@ -138,8 +139,8 @@ static struct brw_surface *create_in_place_view( struct brw_screen *brw_screen,
    assert(id.bits.zslice == 0);
 
    surface->base.format = tex->base.format;
-   surface->base.width = tex->base.width[id.bits.level];
-   surface->base.height = tex->base.height[id.bits.level];
+   surface->base.width = u_minify(tex->base.width0, id.bits.level);
+   surface->base.height = u_minify(tex->base.height0, id.bits.level);
    surface->base.offset = tex->image_offset[id.bits.level][id.bits.face];
    surface->base.usage = usage;
    surface->base.zslice = id.bits.zslice;
diff --git a/src/gallium/drivers/i965/brw_screen_tex_layout.c b/src/gallium/drivers/i965/brw_screen_tex_layout.c
index 71a8890f83..894f4bea40 100644
--- a/src/gallium/drivers/i965/brw_screen_tex_layout.c
+++ b/src/gallium/drivers/i965/brw_screen_tex_layout.c
@@ -110,10 +110,6 @@ brw_tex_set_level_info(struct brw_texture *tex,
    assert(tex->image_offset[level] == NULL);
    assert(nr_images >= 1);
 
-   tex->base.width[level] = w;
-   tex->base.height[level] = h;
-   tex->base.depth[level] = d;
-
    tex->level_offset[level] = (x + y * tex->pitch) * tex->cpp;
    tex->nr_images[level] = nr_images;
 
@@ -147,14 +143,14 @@ static void brw_layout_2d( struct brw_texture *tex )
    GLuint level;
    GLuint x = 0;
    GLuint y = 0;
-   GLuint width = tex->base.width[0];
-   GLuint height = tex->base.height[0];
+   GLuint width = tex->base.width0;
+   GLuint height = tex->base.height0;
 
-   tex->pitch = tex->base.width[0];
+   tex->pitch = tex->base.width0;
    brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
 
    if (tex->compressed) {
-       tex->pitch = align(tex->base.width[0], align_w);
+       tex->pitch = align(tex->base.width0, align_w);
    }
 
    /* May need to adjust pitch to accomodate the placement of
@@ -166,11 +162,11 @@ static void brw_layout_2d( struct brw_texture *tex )
        GLuint mip1_width;
 
        if (tex->compressed) {
-           mip1_width = align(minify(tex->base.width[0]), align_w)
-               + align(minify(minify(tex->base.width[0])), align_w);
+          mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + 
+                        align(u_minify(tex->base.width0, 2), align_w));
        } else {
-           mip1_width = align(minify(tex->base.width[0]), align_w)
-               + minify(minify(tex->base.width[0]));
+          mip1_width = (align(u_minify(tex->base.width0, 1), align_w) + 
+                        u_minify(tex->base.width0, 2));
        }
 
        if (mip1_width > tex->pitch) {
@@ -209,8 +205,8 @@ static void brw_layout_2d( struct brw_texture *tex )
 	 y += img_height;
       }
 
-      width  = minify(width);
-      height = minify(height);
+      width  = u_minify(width, 1);
+      height = u_minify(height, 1);
    }
 }
 
@@ -222,28 +218,28 @@ brw_layout_cubemap_idgng( struct brw_texture *tex )
    GLuint level;
    GLuint x = 0;
    GLuint y = 0;
-   GLuint width = tex->base.width[0];
-   GLuint height = tex->base.height[0];
+   GLuint width = tex->base.width0;
+   GLuint height = tex->base.height0;
    GLuint qpitch = 0;
    GLuint y_pitch = 0;
 
-   tex->pitch = tex->base.width[0];
+   tex->pitch = tex->base.width0;
    brw_tex_alignment_unit(tex->base.format, &align_w, &align_h);
    y_pitch = align(height, align_h);
 
    if (tex->compressed) {
-      tex->pitch = align(tex->base.width[0], align_w);
+      tex->pitch = align(tex->base.width0, align_w);
    }
 
    if (tex->base.last_level != 0) {
       GLuint mip1_width;
 
       if (tex->compressed) {
-	 mip1_width = (align(minify(tex->base.width[0]), align_w) +
-		       align(minify(minify(tex->base.width[0])), align_w));
+	 mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+		       align(u_minify(tex->base.width0, 2), align_w));
       } else {
-	 mip1_width = (align(minify(tex->base.width[0]), align_w) +
-		       minify(minify(tex->base.width[0])));
+	 mip1_width = (align(u_minify(tex->base.width0, 1), align_w) +
+		       u_minify(tex->base.width0, 2));
       }
 
       if (mip1_width > tex->pitch) {
@@ -255,19 +251,19 @@ brw_layout_cubemap_idgng( struct brw_texture *tex )
 
    if (tex->compressed) {
       qpitch = ((y_pitch + 
-		 align(minify(y_pitch), align_h) +
+		 align(u_minify(y_pitch, 1), align_h) +
 		 11 * align_h) / 4) * tex->pitch * tex->cpp;
 
       tex->total_height = ((y_pitch + 
-			    align(minify(y_pitch), align_h) + 
+			    align(u_minify(y_pitch, 1), align_h) + 
 			    11 * align_h) / 4) * 6;
    } else {
       qpitch = (y_pitch + 
-		align(minify(y_pitch), align_h) + 
+		align(u_minify(y_pitch, 1), align_h) + 
 		11 * align_h) * tex->pitch * tex->cpp;
 
       tex->total_height = (y_pitch +
-			   align(minify(y_pitch), align_h) +
+			   align(u_minify(y_pitch, 1), align_h) +
 			   11 * align_h) * 6;
    }
 
@@ -293,8 +289,8 @@ brw_layout_cubemap_idgng( struct brw_texture *tex )
 	 y += img_height;
       }
 
-      width  = minify(width);
-      height = minify(height);
+      width  = u_minify(width, 1);
+      height = u_minify(height, 1);
    }
 
    return TRUE;
@@ -304,9 +300,9 @@ brw_layout_cubemap_idgng( struct brw_texture *tex )
 static boolean
 brw_layout_3d_cube( struct brw_texture *tex )
 {
-   GLuint width  = tex->base.width[0];
-   GLuint height = tex->base.height[0];
-   GLuint depth = tex->base.depth[0];
+   GLuint width  = tex->base.width0;
+   GLuint height = tex->base.height0;
+   GLuint depth = tex->base.depth0;
    GLuint pack_x_pitch, pack_x_nr;
    GLuint pack_y_pitch;
    GLuint level;
@@ -320,8 +316,8 @@ brw_layout_3d_cube( struct brw_texture *tex )
       tex->pitch = align(width, align_w);
       pack_y_pitch = (height + 3) / 4;
    } else {
-      tex->pitch = brw_tex_pitch_align(tex, tex->base.width[0]);
-      pack_y_pitch = align(tex->base.height[0], align_h);
+      tex->pitch = brw_tex_pitch_align(tex, tex->base.width0);
+      pack_y_pitch = align(tex->base.height0, align_h);
    }
 
    pack_x_pitch = width;
@@ -349,9 +345,9 @@ brw_layout_3d_cube( struct brw_texture *tex )
 
 
       tex->total_height += y;
-      width  = minify(width);
-      height = minify(height);
-      depth  = minify(depth);
+      width  = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth  = u_minify(depth, 1);
 
       if (tex->compressed) {
 	 pack_y_pitch = (height + 3) / 4;
diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index f4c20f31a5..ff999086c0 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -259,8 +259,8 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
     */
    tex->ss.ss1.base_addr = 0; /* reloc */
    tex->ss.ss2.mip_count = tex->base.last_level;
-   tex->ss.ss2.width = tex->base.width[0] - 1;
-   tex->ss.ss2.height = tex->base.height[0] - 1;
+   tex->ss.ss2.width = tex->base.width0 - 1;
+   tex->ss.ss2.height = tex->base.height0 - 1;
 
    switch (tex->tiling) {
    case BRW_TILING_NONE:
@@ -278,7 +278,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
    }
 
    tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1;
-   tex->ss.ss3.depth = tex->base.depth[0] - 1;
+   tex->ss.ss3.depth = tex->base.depth0 - 1;
 
    tex->ss.ss4.min_lod = 0;
  
@@ -478,7 +478,7 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
 
    if (templ->target != PIPE_TEXTURE_2D ||
        templ->last_level != 0 ||
-       templ->depth[0] != 1)
+       templ->depth0 != 1)
       return NULL;
 
    if (pf_is_compressed(templ->format))
@@ -529,8 +529,8 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
     */
    tex->ss.ss1.base_addr = 0; /* reloc */
    tex->ss.ss2.mip_count = tex->base.last_level;
-   tex->ss.ss2.width = tex->base.width[0] - 1;
-   tex->ss.ss2.height = tex->base.height[0] - 1;
+   tex->ss.ss2.width = tex->base.width0 - 1;
+   tex->ss.ss2.height = tex->base.height0 - 1;
 
    switch (tex->tiling) {
    case BRW_TILING_NONE:
@@ -548,7 +548,7 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
    }
 
    tex->ss.ss3.pitch = (tex->pitch * tex->cpp) - 1;
-   tex->ss.ss3.depth = tex->base.depth[0] - 1;
+   tex->ss.ss3.depth = tex->base.depth0 - 1;
 
    tex->ss.ss4.min_lod = 0;
 
diff --git a/src/gallium/drivers/i965/brw_state.h b/src/gallium/drivers/i965/brw_state.h
index a9b8165495..d2bbd0123d 100644
--- a/src/gallium/drivers/i965/brw_state.h
+++ b/src/gallium/drivers/i965/brw_state.h
@@ -33,12 +33,12 @@
 #ifndef BRW_STATE_H
 #define BRW_STATE_H
 
-#include "pipe/p_error.h"
+#include "pipe/p_defines.h"
 #include "util/u_memory.h"
 
 #include "brw_context.h"
 
-static inline void
+static INLINE void
 brw_add_validated_bo(struct brw_context *brw, struct brw_winsys_buffer *bo)
 {
    assert(brw->state.validated_bo_count < Elements(brw->state.validated_bos));
diff --git a/src/gallium/drivers/i965/brw_state_upload.c b/src/gallium/drivers/i965/brw_state_upload.c
index bf65ca1cf2..f8b91eff81 100644
--- a/src/gallium/drivers/i965/brw_state_upload.c
+++ b/src/gallium/drivers/i965/brw_state_upload.c
@@ -38,7 +38,7 @@
 
 const struct brw_tracked_state *atoms[] =
 {
-//   &brw_wm_input_sizes,
+/*   &brw_wm_input_sizes, */
    &brw_vs_prog,
    &brw_gs_prog, 
    &brw_clip_prog, 
@@ -56,7 +56,7 @@ const struct brw_tracked_state *atoms[] =
    &brw_cc_unit,
 
    &brw_vs_surfaces,		/* must do before unit */
-   //&brw_wm_constant_surface,	/* must do before wm surfaces/bind bo */
+   /*&brw_wm_constant_surface,*/	/* must do before wm surfaces/bind bo */
    &brw_wm_surfaces,		/* must do before samplers and unit */
    &brw_wm_samplers,
 
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 6d8366f862..1d0fff0d9e 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -1067,22 +1067,22 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
 {
    struct brw_reg reg;
 
-   if (src->SrcRegister.File == TGSI_FILE_NULL)
+   if (src->Register.File == TGSI_FILE_NULL)
       return brw_null_reg();
 
    reg = get_src_reg(c, argIndex,
-		     src->SrcRegister.File,
-		     src->SrcRegister.Index,
-		     src->SrcRegister.Indirect);
+		     src->Register.File,
+		     src->Register.Index,
+		     src->Register.Indirect);
 
    /* Convert 3-bit swizzle to 2-bit.  
     */
-   reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SrcRegister.SwizzleX,
-				       src->SrcRegister.SwizzleY,
-				       src->SrcRegister.SwizzleZ,
-				       src->SrcRegister.SwizzleW);
+   reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->Register.SwizzleX,
+				       src->Register.SwizzleY,
+				       src->Register.SwizzleZ,
+				       src->Register.SwizzleW);
 
-   reg.negate = src->SrcRegister.Negate ? 1 : 0;   
+   reg.negate = src->Register.Negate ? 1 : 0;   
 
    /* XXX: abs, absneg
     */
@@ -1353,7 +1353,7 @@ static void emit_insn(struct brw_vs_compile *c,
 		      const struct tgsi_full_instruction *inst)
 {
    unsigned opcode = inst->Instruction.Opcode;
-   unsigned label = inst->InstructionExtLabel.Label;
+   unsigned label = inst->Label.Label;
    struct brw_compile *p = &c->func;
    struct brw_reg args[3], dst;
    GLuint i;
@@ -1366,7 +1366,7 @@ static void emit_insn(struct brw_vs_compile *c,
    /* Get argument regs.
     */
    for (i = 0; i < 3; i++) {
-      args[i] = get_arg(c, &inst->FullSrcRegisters[i], i);
+      args[i] = get_arg(c, &inst->Src[i], i);
    }
 
    /* Get dest regs.  Note that it is possible for a reg to be both
@@ -1374,9 +1374,9 @@ static void emit_insn(struct brw_vs_compile *c,
     * care needs to be taken emitting multi-operation instructions.
     */ 
    dst = get_dst(c, 
-		 inst->FullDstRegisters[0].DstRegister.File,
-		 inst->FullDstRegisters[0].DstRegister.Index,
-		 inst->FullDstRegisters[0].DstRegister.WriteMask);
+		 inst->Dst[0].Register.File,
+		 inst->Dst[0].Register.Index,
+		 inst->Dst[0].Register.WriteMask);
 
    /* XXX: saturate
     */
@@ -1619,7 +1619,7 @@ void brw_vs_emit(struct brw_vs_compile *c)
    struct tgsi_parse_context parse;
    struct tgsi_full_instruction *inst;
 
-//   if (BRW_DEBUG & DEBUG_VS)
+   if (BRW_DEBUG & DEBUG_VS)
       tgsi_dump(c->vp->tokens, 0); 
 
    c->stack_index = brw_indirect(0, 0);
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index af506a283d..2f47067716 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -27,7 +27,7 @@
 #define BRW_WINSYS_H
 
 #include "pipe/p_compiler.h"
-#include "pipe/p_error.h"
+#include "pipe/p_defines.h"
 #include "pipe/p_refcnt.h"
 
 struct brw_winsys;
diff --git a/src/gallium/drivers/i965/brw_wm.c b/src/gallium/drivers/i965/brw_wm.c
index 2c9d3e5e87..fdf820a9aa 100644
--- a/src/gallium/drivers/i965/brw_wm.c
+++ b/src/gallium/drivers/i965/brw_wm.c
@@ -180,7 +180,7 @@ static enum pipe_error do_wm_prog( struct brw_context *brw,
       /* XXX: GLSL support
        */
       exit(1);
-      //brw_wm_branching_shader_emit(brw, c);
+      /* brw_wm_branching_shader_emit(brw, c); */
    }
    else {
       c->dispatch_width = 16;
diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 0b82f4e156..7e57d0306b 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -1007,7 +1007,7 @@ static void emit_killp( struct brw_wm_compile *c )
 
    brw_push_insn_state(p);
    brw_set_mask_control(p, BRW_MASK_DISABLE);
-   brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK
+   brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
    brw_AND(p, r0uw, c->emit_mask_reg, r0uw);
    brw_pop_insn_state(p);
 }
diff --git a/src/gallium/drivers/i965/brw_wm_fp.c b/src/gallium/drivers/i965/brw_wm_fp.c
index a8b5e15f36..9c5b527f89 100644
--- a/src/gallium/drivers/i965/brw_wm_fp.c
+++ b/src/gallium/drivers/i965/brw_wm_fp.c
@@ -957,15 +957,15 @@ static struct brw_fp_dst translate_dst( struct brw_wm_compile *c,
 {
    struct brw_fp_dst out;
 
-   out.file = dst->DstRegister.File;
-   out.index = dst->DstRegister.Index;
-   out.writemask = dst->DstRegister.WriteMask;
-   out.indirect = dst->DstRegister.Indirect;
+   out.file = dst->Register.File;
+   out.index = dst->Register.Index;
+   out.writemask = dst->Register.WriteMask;
+   out.indirect = dst->Register.Indirect;
    out.saturate = (saturate == TGSI_SAT_ZERO_ONE);
    
    if (out.indirect) {
-      assert(dst->DstRegisterInd.File == TGSI_FILE_ADDRESS);
-      assert(dst->DstRegisterInd.Index == 0);
+      assert(dst->Indirect.File == TGSI_FILE_ADDRESS);
+      assert(dst->Indirect.Index == 0);
    }
    
    return out;
@@ -977,14 +977,14 @@ static struct brw_fp_src translate_src( struct brw_wm_compile *c,
 {
    struct brw_fp_src out;
 
-   out.file = src->SrcRegister.File;
-   out.index = src->SrcRegister.Index;
-   out.indirect = src->SrcRegister.Indirect;
+   out.file = src->Register.File;
+   out.index = src->Register.Index;
+   out.indirect = src->Register.Indirect;
 
-   out.swizzle = ((src->SrcRegister.SwizzleX << 0) |
-		  (src->SrcRegister.SwizzleY << 2) |
-		  (src->SrcRegister.SwizzleZ << 4) |
-		  (src->SrcRegister.SwizzleW << 6));
+   out.swizzle = ((src->Register.SwizzleX << 0) |
+		  (src->Register.SwizzleY << 2) |
+		  (src->Register.SwizzleZ << 4) |
+		  (src->Register.SwizzleW << 6));
    
    switch (tgsi_util_get_full_src_register_sign_mode( src, 0 )) {
    case TGSI_UTIL_SIGN_CLEAR:
@@ -1010,8 +1010,8 @@ static struct brw_fp_src translate_src( struct brw_wm_compile *c,
    }
 
    if (out.indirect) {
-      assert(src->SrcRegisterInd.File == TGSI_FILE_ADDRESS);
-      assert(src->SrcRegisterInd.Index == 0);
+      assert(src->Indirect.File == TGSI_FILE_ADDRESS);
+      assert(src->Indirect.Index == 0);
    }
    
    return out;
@@ -1027,11 +1027,11 @@ static void emit_insn( struct brw_wm_compile *c,
    struct brw_fp_src src[3];
    int i;
 
-   dst = translate_dst( c, &inst->FullDstRegisters[0],
+   dst = translate_dst( c, &inst->Dst[0],
 			inst->Instruction.Saturate );
 
    for (i = 0; i < inst->Instruction.NumSrcRegs; i++)
-      src[i] = translate_src( c, &inst->FullSrcRegisters[i] );
+      src[i] = translate_src( c, &inst->Src[i] );
    
    switch (opcode) {
    case TGSI_OPCODE_ABS:
@@ -1063,7 +1063,7 @@ static void emit_insn( struct brw_wm_compile *c,
 
    case TGSI_OPCODE_TEX:
       precalc_tex(c, dst,
-		  inst->InstructionExtTexture.Texture,
+		  inst->Texture.Texture,
 		  src[1].index,	/* use sampler unit for tex idx */
 		  src[0],       /* coord */
                   src[1]);      /* sampler */
@@ -1071,7 +1071,7 @@ static void emit_insn( struct brw_wm_compile *c,
 
    case TGSI_OPCODE_TXP:
       precalc_txp(c, dst,
-		  inst->InstructionExtTexture.Texture,
+		  inst->Texture.Texture,
 		  src[1].index,	/* use sampler unit for tex idx */
 		  src[0],       /* coord */
                   src[1]);      /* sampler */
@@ -1081,7 +1081,7 @@ static void emit_insn( struct brw_wm_compile *c,
       /* XXX: TXB not done
        */
       precalc_tex(c, dst,
-		  inst->InstructionExtTexture.Texture,
+		  inst->Texture.Texture,
 		  src[1].index,	/* use sampler unit for tex idx*/
 		  src[0],
                   src[1]);
@@ -1169,14 +1169,14 @@ int brw_wm_pass_fp( struct brw_wm_compile *c )
             unsigned first, last, mask;
             unsigned attrib;
 
-            first = decl->DeclarationRange.First;
-            last = decl->DeclarationRange.Last;
+            first = decl->Range.First;
+            last = decl->Range.Last;
             mask = decl->Declaration.UsageMask;
 
             for (attrib = first; attrib <= last; attrib++) {
 	       emit_interp(c, 
 			   attrib, 
-			   decl->Semantic.SemanticName,
+			   decl->Semantic.Name,
 			   decl->Declaration.Interpolate );
             }
          }
-- 
cgit v1.2.3


From 09e785ee04c80c2bdf27245be7dafc79cce5b0ad Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 21 Dec 2009 22:14:35 +0000
Subject: gallium: remove return value from draw calls

---
 src/gallium/auxiliary/rbug/rbug_context.h |  2 +-
 src/gallium/auxiliary/rbug/rbug_proto.h   |  2 +-
 src/gallium/auxiliary/util/u_format.h     |  2 +-
 src/gallium/drivers/failover/fo_context.c | 42 +++++++++++++++++--------------
 src/gallium/drivers/failover/fo_winsys.h  |  3 +++
 src/gallium/drivers/identity/id_context.c | 42 +++++++++++++++----------------
 src/gallium/drivers/svga/svga_pipe_draw.c | 28 ++++++++++-----------
 src/gallium/drivers/trace/tr_context.c    | 39 +++++++++-------------------
 src/gallium/drivers/trace/tr_state.h      |  2 +-
 src/gallium/include/pipe/p_context.h      | 28 ++++++++++-----------
 10 files changed, 90 insertions(+), 100 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/rbug/rbug_context.h b/src/gallium/auxiliary/rbug/rbug_context.h
index da61c2365b..03126d6b12 100644
--- a/src/gallium/auxiliary/rbug/rbug_context.h
+++ b/src/gallium/auxiliary/rbug/rbug_context.h
@@ -46,7 +46,7 @@ typedef enum
 	RBUG_BLOCK_BEFORE = 1,
 	RBUG_BLOCK_AFTER = 2,
 	RBUG_BLOCK_RULE = 4,
-	RBUG_BLOCK_MASK = 7,
+	RBUG_BLOCK_MASK = 7
 } rbug_block_t;
 
 struct rbug_proto_context_list
diff --git a/src/gallium/auxiliary/rbug/rbug_proto.h b/src/gallium/auxiliary/rbug/rbug_proto.h
index d273be0166..4f3eb75dc4 100644
--- a/src/gallium/auxiliary/rbug/rbug_proto.h
+++ b/src/gallium/auxiliary/rbug/rbug_proto.h
@@ -65,7 +65,7 @@ enum rbug_opcode
 	RBUG_OP_SHADER_DISABLE = 770,
 	RBUG_OP_SHADER_REPLACE = 771,
 	RBUG_OP_SHADER_LIST_REPLY = -768,
-	RBUG_OP_SHADER_INFO_REPLY = -769,
+	RBUG_OP_SHADER_INFO_REPLY = -769
 };
 
 /**
diff --git a/src/gallium/auxiliary/util/u_format.h b/src/gallium/auxiliary/util/u_format.h
index 090183fb17..a558923b2e 100644
--- a/src/gallium/auxiliary/util/u_format.h
+++ b/src/gallium/auxiliary/util/u_format.h
@@ -119,7 +119,7 @@ enum util_format_colorspace {
    UTIL_FORMAT_COLORSPACE_RGB = 0,
    UTIL_FORMAT_COLORSPACE_SRGB = 1,
    UTIL_FORMAT_COLORSPACE_YUV = 2,
-   UTIL_FORMAT_COLORSPACE_ZS = 3,
+   UTIL_FORMAT_COLORSPACE_ZS = 3
 };
 
 
diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c
index 37184eac7b..46e4338d98 100644
--- a/src/gallium/drivers/failover/fo_context.c
+++ b/src/gallium/drivers/failover/fo_context.c
@@ -44,11 +44,19 @@ static void failover_destroy( struct pipe_context *pipe )
 }
 
 
+void failover_fail_over( struct failover_context *failover )
+{
+   failover->dirty = TRUE;
+   failover->mode = FO_SW;
+}
+
 
-static boolean failover_draw_elements( struct pipe_context *pipe,
-				       struct pipe_buffer *indexBuffer,
-				       unsigned indexSize,
-				       unsigned prim, unsigned start, unsigned count)
+static void failover_draw_elements( struct pipe_context *pipe,
+                                    struct pipe_buffer *indexBuffer,
+                                    unsigned indexSize,
+                                    unsigned prim, 
+                                    unsigned start, 
+                                    unsigned count)
 {
    struct failover_context *failover = failover_context( pipe );
 
@@ -62,24 +70,22 @@ static boolean failover_draw_elements( struct pipe_context *pipe,
    /* Try hardware:
     */
    if (failover->mode == FO_HW) {
-      if (!failover->hw->draw_elements( failover->hw, 
-					indexBuffer, 
-					indexSize, 
-					prim, 
-					start, 
-					count )) {
-
-	 failover->hw->flush( failover->hw, ~0, NULL );
-	 failover->mode = FO_SW;
-      }
+      failover->hw->draw_elements( failover->hw, 
+                                   indexBuffer, 
+                                   indexSize, 
+                                   prim, 
+                                   start, 
+                                   count );
    }
 
    /* Possibly try software:
     */
    if (failover->mode == FO_SW) {
 
-      if (failover->dirty) 
+      if (failover->dirty) {
+         failover->hw->flush( failover->hw, ~0, NULL );
 	 failover_state_emit( failover );
+      }
 
       failover->sw->draw_elements( failover->sw, 
 				   indexBuffer, 
@@ -94,15 +100,13 @@ static boolean failover_draw_elements( struct pipe_context *pipe,
        */
       failover->sw->flush( failover->sw, ~0, NULL );
    }
-
-   return TRUE;
 }
 
 
-static boolean failover_draw_arrays( struct pipe_context *pipe,
+static void failover_draw_arrays( struct pipe_context *pipe,
 				     unsigned prim, unsigned start, unsigned count)
 {
-   return failover_draw_elements(pipe, NULL, 0, prim, start, count);
+   failover_draw_elements(pipe, NULL, 0, prim, start, count);
 }
 
 static unsigned int
diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h
index a8ce997a1f..533122b69d 100644
--- a/src/gallium/drivers/failover/fo_winsys.h
+++ b/src/gallium/drivers/failover/fo_winsys.h
@@ -36,10 +36,13 @@
 
 
 struct pipe_context;
+struct failover_context;
 
 
 struct pipe_context *failover_create( struct pipe_context *hw,
 				      struct pipe_context *sw );
 
 
+void failover_fail_over( struct failover_context *failover );
+
 #endif /* FO_WINSYS_H */
diff --git a/src/gallium/drivers/identity/id_context.c b/src/gallium/drivers/identity/id_context.c
index bedab56f59..37f2700fa1 100644
--- a/src/gallium/drivers/identity/id_context.c
+++ b/src/gallium/drivers/identity/id_context.c
@@ -56,7 +56,7 @@ identity_set_edgeflags(struct pipe_context *_pipe,
                        bitfield);
 }
 
-static boolean
+static void
 identity_draw_arrays(struct pipe_context *_pipe,
                      unsigned prim,
                      unsigned start,
@@ -65,13 +65,13 @@ identity_draw_arrays(struct pipe_context *_pipe,
    struct identity_context *id_pipe = identity_context(_pipe);
    struct pipe_context *pipe = id_pipe->pipe;
 
-   return pipe->draw_arrays(pipe,
-                            prim,
-                            start,
-                            count);
+   pipe->draw_arrays(pipe,
+                     prim,
+                     start,
+                     count);
 }
 
-static boolean
+static void
 identity_draw_elements(struct pipe_context *_pipe,
                        struct pipe_buffer *_indexBuffer,
                        unsigned indexSize,
@@ -84,15 +84,15 @@ identity_draw_elements(struct pipe_context *_pipe,
    struct pipe_context *pipe = id_pipe->pipe;
    struct pipe_buffer *indexBuffer = id_buffer->buffer;
 
-   return pipe->draw_elements(pipe,
-                              indexBuffer,
-                              indexSize,
-                              prim,
-                              start,
-                              count);
+   pipe->draw_elements(pipe,
+                       indexBuffer,
+                       indexSize,
+                       prim,
+                       start,
+                       count);
 }
 
-static boolean
+static void
 identity_draw_range_elements(struct pipe_context *_pipe,
                              struct pipe_buffer *_indexBuffer,
                              unsigned indexSize,
@@ -107,14 +107,14 @@ identity_draw_range_elements(struct pipe_context *_pipe,
    struct pipe_context *pipe = id_pipe->pipe;
    struct pipe_buffer *indexBuffer = id_buffer->buffer;
 
-   return pipe->draw_range_elements(pipe,
-                                    indexBuffer,
-                                    indexSize,
-                                    minIndex,
-                                    maxIndex,
-                                    mode,
-                                    start,
-                                    count);
+   pipe->draw_range_elements(pipe,
+                             indexBuffer,
+                             indexSize,
+                             minIndex,
+                             maxIndex,
+                             mode,
+                             start,
+                             count);
 }
 
 static struct pipe_query *
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
index 71a552862e..0f24ef4ee8 100644
--- a/src/gallium/drivers/svga/svga_pipe_draw.c
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -149,7 +149,7 @@ retry:
 
 
-static boolean
+static void
 svga_draw_range_elements( struct pipe_context *pipe,
                           struct pipe_buffer *index_buffer,
                           unsigned index_size,
@@ -162,7 +162,7 @@ svga_draw_range_elements( struct pipe_context *pipe,
    enum pipe_error ret = 0;
 
    if (!u_trim_pipe_prim( prim, &count ))
-      return TRUE;
+      return;
 
    /*
     * Mark currently bound target surfaces as dirty
@@ -183,7 +183,7 @@ svga_draw_range_elements( struct pipe_context *pipe,
 #ifdef DEBUG
    if (svga->curr.vs->base.id == svga->debug.disable_shader ||
        svga->curr.fs->base.id == svga->debug.disable_shader)
-      return 0;
+      return;
 #endif
 
    if (svga->state.sw.need_swtnl)
@@ -225,31 +225,29 @@ svga_draw_range_elements( struct pipe_context *pipe,
       svga_hwtnl_flush_retry( svga );
       svga_context_flush(svga, NULL);
    }
-
-   return ret == PIPE_OK;
 }
 
 
-static boolean 
+static void
 svga_draw_elements( struct pipe_context *pipe,
                     struct pipe_buffer *index_buffer,
                     unsigned index_size,
                     unsigned prim, unsigned start, unsigned count)
 {
-   return svga_draw_range_elements( pipe, index_buffer,
-                                    index_size,
-                                    0, 0xffffffff,
-                                    prim, start, count );
+   svga_draw_range_elements( pipe, index_buffer,
+                             index_size,
+                             0, 0xffffffff,
+                             prim, start, count );
 }
 
-static boolean 
+static void
 svga_draw_arrays( struct pipe_context *pipe,
                   unsigned prim, unsigned start, unsigned count)
 {
-   return svga_draw_range_elements(pipe, NULL, 0, 
-                                   start, start + count - 1, 
-                                   prim, 
-                                   start, count);
+   svga_draw_range_elements(pipe, NULL, 0, 
+                            start, start + count - 1, 
+                            prim, 
+                            start, count);
 }
 
 
diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index 2f0f063d2d..82d0911d36 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -178,16 +178,15 @@ trace_context_draw_block(struct trace_context *tr_ctx, int flag)
    pipe_mutex_unlock(tr_ctx->draw_mutex);
 }
 
-static INLINE boolean
+static INLINE void
 trace_context_draw_arrays(struct pipe_context *_pipe,
                           unsigned mode, unsigned start, unsigned count)
 {
    struct trace_context *tr_ctx = trace_context(_pipe);
    struct pipe_context *pipe = tr_ctx->pipe;
-   boolean result;
 
    if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
-      return 0;
+      return;
 
    trace_context_draw_block(tr_ctx, 1);
 
@@ -198,19 +197,15 @@ trace_context_draw_arrays(struct pipe_context *_pipe,
    trace_dump_arg(uint, start);
    trace_dump_arg(uint, count);
 
-   result = pipe->draw_arrays(pipe, mode, start, count);
-
-   trace_dump_ret(bool, result);
+   pipe->draw_arrays(pipe, mode, start, count);
 
    trace_dump_call_end();
 
    trace_context_draw_block(tr_ctx, 2);
-
-   return result;
 }
 
 
-static INLINE boolean
+static INLINE void
 trace_context_draw_elements(struct pipe_context *_pipe,
                           struct pipe_buffer *_indexBuffer,
                           unsigned indexSize,
@@ -220,10 +215,9 @@ trace_context_draw_elements(struct pipe_context *_pipe,
    struct trace_buffer *tr_buf = trace_buffer(_indexBuffer);
    struct pipe_context *pipe = tr_ctx->pipe;
    struct pipe_buffer *indexBuffer = tr_buf->buffer;
-   boolean result;
 
    if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
-      return 0;
+      return;
 
    trace_context_draw_block(tr_ctx, 1);
 
@@ -238,19 +232,15 @@ trace_context_draw_elements(struct pipe_context *_pipe,
    trace_dump_arg(uint, start);
    trace_dump_arg(uint, count);
 
-   result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);
-
-   trace_dump_ret(bool, result);
+   pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);
 
    trace_dump_call_end();
 
    trace_context_draw_block(tr_ctx, 2);
-
-   return result;
 }
 
 
-static INLINE boolean
+static INLINE void
 trace_context_draw_range_elements(struct pipe_context *_pipe,
                                   struct pipe_buffer *_indexBuffer,
                                   unsigned indexSize,
@@ -264,10 +254,9 @@ trace_context_draw_range_elements(struct pipe_context *_pipe,
    struct trace_buffer *tr_buf = trace_buffer(_indexBuffer);
    struct pipe_context *pipe = tr_ctx->pipe;
    struct pipe_buffer *indexBuffer = tr_buf->buffer;
-   boolean result;
 
    if (tr_ctx->curr.fs->disabled || tr_ctx->curr.vs->disabled)
-      return 0;
+      return;
 
    trace_context_draw_block(tr_ctx, 1);
 
@@ -284,18 +273,14 @@ trace_context_draw_range_elements(struct pipe_context *_pipe,
    trace_dump_arg(uint, start);
    trace_dump_arg(uint, count);
 
-   result = pipe->draw_range_elements(pipe,
-                                      indexBuffer,
-                                      indexSize, minIndex, maxIndex,
-                                      mode, start, count);
-
-   trace_dump_ret(bool, result);
+   pipe->draw_range_elements(pipe,
+                             indexBuffer,
+                             indexSize, minIndex, maxIndex,
+                             mode, start, count);
 
    trace_dump_call_end();
 
    trace_context_draw_block(tr_ctx, 2);
-
-   return result;
 }
 
 
diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h
index 1c16042ee5..e2f981d051 100644
--- a/src/gallium/drivers/trace/tr_state.h
+++ b/src/gallium/drivers/trace/tr_state.h
@@ -32,7 +32,7 @@ struct tgsi_token;
 enum trace_shader_type {
    TRACE_SHADER_FRAGMENT = 0,
    TRACE_SHADER_VERTEX   = 1,
-   TRACE_SHADER_GEOMETRY = 2,
+   TRACE_SHADER_GEOMETRY = 2
 };
 
 struct trace_shader
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index f896001eb1..564c35f789 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -69,27 +69,27 @@ struct pipe_context {
     * VBO drawing (return false on fallbacks (temporary??))
     */
    /*@{*/
-   boolean (*draw_arrays)( struct pipe_context *pipe,
-			   unsigned mode, unsigned start, unsigned count);
+   void (*draw_arrays)( struct pipe_context *pipe,
+                        unsigned mode, unsigned start, unsigned count);
 
-   boolean (*draw_elements)( struct pipe_context *pipe,
-			     struct pipe_buffer *indexBuffer,
-			     unsigned indexSize,
-			     unsigned mode, unsigned start, unsigned count);
+   void (*draw_elements)( struct pipe_context *pipe,
+                          struct pipe_buffer *indexBuffer,
+                          unsigned indexSize,
+                          unsigned mode, unsigned start, unsigned count);
 
    /* XXX: this is (probably) a temporary entrypoint, as the range
     * information should be available from the vertex_buffer state.
     * Using this to quickly evaluate a specialized path in the draw
     * module.
     */
-   boolean (*draw_range_elements)( struct pipe_context *pipe,
-                                   struct pipe_buffer *indexBuffer,
-                                   unsigned indexSize,
-                                   unsigned minIndex,
-                                   unsigned maxIndex,
-                                   unsigned mode, 
-                                   unsigned start, 
-                                   unsigned count);
+   void (*draw_range_elements)( struct pipe_context *pipe,
+                                struct pipe_buffer *indexBuffer,
+                                unsigned indexSize,
+                                unsigned minIndex,
+                                unsigned maxIndex,
+                                unsigned mode, 
+                                unsigned start, 
+                                unsigned count);
    /*@}*/
 
 
-- 
cgit v1.2.3


From 03f212b0d85fed5dec9a855fb6d079e5fdb60ac9 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Mon, 21 Dec 2009 22:47:21 +0000
Subject: gallium: propogate draw retval changes into more drivers

---
 src/gallium/drivers/i915/i915_context.c       | 18 ++++----
 src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 18 ++++----
 src/gallium/drivers/llvmpipe/lp_state.h       |  6 +--
 src/gallium/drivers/nv04/nv04_context.h       |  4 +-
 src/gallium/drivers/nv04/nv04_vbo.c           | 10 ++---
 src/gallium/drivers/nv10/nv10_context.h       |  4 +-
 src/gallium/drivers/nv10/nv10_vbo.c           | 10 ++---
 src/gallium/drivers/nv20/nv20_context.h       |  4 +-
 src/gallium/drivers/nv20/nv20_vbo.c           |  7 ++-
 src/gallium/drivers/nv30/nv30_context.h       |  4 +-
 src/gallium/drivers/nv30/nv30_vbo.c           | 16 +++----
 src/gallium/drivers/nv40/nv40_context.h       |  6 +--
 src/gallium/drivers/nv40/nv40_draw.c          |  6 +--
 src/gallium/drivers/nv40/nv40_vbo.c           | 23 +++++-----
 src/gallium/drivers/nv50/nv50_context.h       |  4 +-
 src/gallium/drivers/nv50/nv50_vbo.c           | 14 +++---
 src/gallium/drivers/r300/r300_render.c        | 62 +++++++++++++--------------
 src/gallium/drivers/r300/r300_render.h        | 60 +++++++++++++-------------
 src/gallium/drivers/softpipe/sp_draw_arrays.c | 18 ++++----
 src/gallium/drivers/softpipe/sp_state.h       | 16 +++----
 20 files changed, 146 insertions(+), 164 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index 94c8aee30f..753f37e095 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -45,7 +45,7 @@
  */
 
 
-static boolean
+static void
 i915_draw_range_elements(struct pipe_context *pipe,
                          struct pipe_buffer *indexBuffer,
                          unsigned indexSize,
@@ -106,27 +106,25 @@ i915_draw_range_elements(struct pipe_context *pipe,
       pipe_buffer_unmap(pipe->screen, indexBuffer);
       draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL);
    }
-
-   return TRUE;
 }
 
-static boolean
+static void
 i915_draw_elements(struct pipe_context *pipe,
                    struct pipe_buffer *indexBuffer,
                    unsigned indexSize,
                    unsigned prim, unsigned start, unsigned count)
 {
-   return i915_draw_range_elements(pipe, indexBuffer,
-                                   indexSize,
-                                   0, 0xffffffff,
-                                   prim, start, count);
+   i915_draw_range_elements(pipe, indexBuffer,
+                            indexSize,
+                            0, 0xffffffff,
+                            prim, start, count);
 }
 
-static boolean
+static void
 i915_draw_arrays(struct pipe_context *pipe,
                  unsigned prim, unsigned start, unsigned count)
 {
-   return i915_draw_elements(pipe, NULL, 0, prim, start, count);
+   i915_draw_elements(pipe, NULL, 0, prim, start, count);
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 0aa13a1fc6..57ce5b17a2 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -45,11 +45,11 @@
 
 
-boolean
+void
 llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
                      unsigned start, unsigned count)
 {
-   return llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count);
+   llvmpipe_draw_elements(pipe, NULL, 0, mode, start, count);
 }
 
 
@@ -58,7 +58,7 @@ llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
  * Basically, map the vertex buffers (and drawing surfaces), then hand off
  * the drawing to the 'draw' module.
  */
-boolean
+void
 llvmpipe_draw_range_elements(struct pipe_context *pipe,
                              struct pipe_buffer *indexBuffer,
                              unsigned indexSize,
@@ -116,21 +116,19 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
    /* Note: leave drawing surfaces mapped */
 
    lp->dirty_render_cache = TRUE;
-   
-   return TRUE;
 }
 
 
-boolean
+void
 llvmpipe_draw_elements(struct pipe_context *pipe,
                        struct pipe_buffer *indexBuffer,
                        unsigned indexSize,
                        unsigned mode, unsigned start, unsigned count)
 {
-   return llvmpipe_draw_range_elements( pipe, indexBuffer,
-                                        indexSize,
-                                        0, 0xffffffff,
-                                        mode, start, count );
+   llvmpipe_draw_range_elements( pipe, indexBuffer,
+                                 indexSize,
+                                 0, 0xffffffff,
+                                 mode, start, count );
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index d1c74ab07b..2857853586 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -197,14 +197,14 @@ void llvmpipe_update_fs(struct llvmpipe_context *lp);
 void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe );
 
 
-boolean llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+void llvmpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
 			     unsigned start, unsigned count);
 
-boolean llvmpipe_draw_elements(struct pipe_context *pipe,
+void llvmpipe_draw_elements(struct pipe_context *pipe,
 			       struct pipe_buffer *indexBuffer,
 			       unsigned indexSize,
 			       unsigned mode, unsigned start, unsigned count);
-boolean
+void
 llvmpipe_draw_range_elements(struct pipe_context *pipe,
                              struct pipe_buffer *indexBuffer,
                              unsigned indexSize,
diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h
index 55326c787a..5951115293 100644
--- a/src/gallium/drivers/nv04/nv04_context.h
+++ b/src/gallium/drivers/nv04/nv04_context.h
@@ -141,9 +141,9 @@ extern void nv04_emit_hw_state(struct nv04_context *nv04);
 extern void nv04_state_tex_update(struct nv04_context *nv04);
 
 /* nv04_vbo.c */
-extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv04_draw_arrays(struct pipe_context *, unsigned mode,
 				unsigned start, unsigned count);
-extern boolean nv04_draw_elements( struct pipe_context *pipe,
+extern void nv04_draw_elements( struct pipe_context *pipe,
                     struct pipe_buffer *indexBuffer,
                     unsigned indexSize,
                     unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c
index e3167814f2..704fae8c6c 100644
--- a/src/gallium/drivers/nv04/nv04_vbo.c
+++ b/src/gallium/drivers/nv04/nv04_vbo.c
@@ -9,7 +9,7 @@
 #include "nouveau/nouveau_channel.h"
 #include "nouveau/nouveau_pushbuf.h"
 
-boolean nv04_draw_elements( struct pipe_context *pipe,
+void nv04_draw_elements( struct pipe_context *pipe,
                     struct pipe_buffer *indexBuffer,
                     unsigned indexSize,
                     unsigned prim, unsigned start, unsigned count)
@@ -65,15 +65,13 @@ boolean nv04_draw_elements( struct pipe_context *pipe,
 		pipe_buffer_unmap(pscreen, indexBuffer);
 		draw_set_mapped_element_buffer(draw, 0, NULL);
 	}
-
-	return TRUE;
 }
 
-boolean nv04_draw_arrays( struct pipe_context *pipe,
-				 unsigned prim, unsigned start, unsigned count)
+void nv04_draw_arrays( struct pipe_context *pipe,
+                       unsigned prim, unsigned start, unsigned count)
 {
 	printf("coucou in draw arrays\n");
-	return nv04_draw_elements(pipe, NULL, 0, prim, start, count);
+	nv04_draw_elements(pipe, NULL, 0, prim, start, count);
 }
 
 
diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h
index 36a6aa7a74..3f829fd106 100644
--- a/src/gallium/drivers/nv10/nv10_context.h
+++ b/src/gallium/drivers/nv10/nv10_context.h
@@ -144,9 +144,9 @@ extern void nv10_emit_hw_state(struct nv10_context *nv10);
 extern void nv10_state_tex_update(struct nv10_context *nv10);
 
 /* nv10_vbo.c */
-extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv10_draw_arrays(struct pipe_context *, unsigned mode,
 				unsigned start, unsigned count);
-extern boolean nv10_draw_elements( struct pipe_context *pipe,
+extern void nv10_draw_elements( struct pipe_context *pipe,
                     struct pipe_buffer *indexBuffer,
                     unsigned indexSize,
                     unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c
index 441a4f75f3..230f2e6d39 100644
--- a/src/gallium/drivers/nv10/nv10_vbo.c
+++ b/src/gallium/drivers/nv10/nv10_vbo.c
@@ -9,7 +9,7 @@
 #include "nouveau/nouveau_channel.h"
 #include "nouveau/nouveau_pushbuf.h"
 
-boolean nv10_draw_elements( struct pipe_context *pipe,
+void nv10_draw_elements( struct pipe_context *pipe,
                     struct pipe_buffer *indexBuffer,
                     unsigned indexSize,
                     unsigned prim, unsigned start, unsigned count)
@@ -64,14 +64,12 @@ boolean nv10_draw_elements( struct pipe_context *pipe,
 		pipe_buffer_unmap(pscreen, indexBuffer);
 		draw_set_mapped_element_buffer(draw, 0, NULL);
 	}
-
-	return TRUE;
 }
 
-boolean nv10_draw_arrays( struct pipe_context *pipe,
-				 unsigned prim, unsigned start, unsigned count)
+void nv10_draw_arrays( struct pipe_context *pipe,
+                       unsigned prim, unsigned start, unsigned count)
 {
-	return nv10_draw_elements(pipe, NULL, 0, prim, start, count);
+	nv10_draw_elements(pipe, NULL, 0, prim, start, count);
 }
 
 
diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h
index a4eaa95660..c88a1bd9bd 100644
--- a/src/gallium/drivers/nv20/nv20_context.h
+++ b/src/gallium/drivers/nv20/nv20_context.h
@@ -143,9 +143,9 @@ extern void nv20_emit_hw_state(struct nv20_context *nv20);
 extern void nv20_state_tex_update(struct nv20_context *nv20);
 
 /* nv20_vbo.c */
-extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv20_draw_arrays(struct pipe_context *, unsigned mode,
 				unsigned start, unsigned count);
-extern boolean nv20_draw_elements( struct pipe_context *pipe,
+extern void nv20_draw_elements( struct pipe_context *pipe,
                     struct pipe_buffer *indexBuffer,
                     unsigned indexSize,
                     unsigned prim, unsigned start, unsigned count);
diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c
index 84d7db6c5e..2f5e12233f 100644
--- a/src/gallium/drivers/nv20/nv20_vbo.c
+++ b/src/gallium/drivers/nv20/nv20_vbo.c
@@ -9,7 +9,7 @@
 #include "nouveau/nouveau_channel.h"
 #include "nouveau/nouveau_pushbuf.h"
 
-boolean nv20_draw_elements( struct pipe_context *pipe,
+void nv20_draw_elements( struct pipe_context *pipe,
                     struct pipe_buffer *indexBuffer,
                     unsigned indexSize,
                     unsigned prim, unsigned start, unsigned count)
@@ -67,13 +67,12 @@ boolean nv20_draw_elements( struct pipe_context *pipe,
 	}
 
 	draw_flush(nv20->draw);
-	return TRUE;
 }
 
-boolean nv20_draw_arrays( struct pipe_context *pipe,
+void nv20_draw_arrays( struct pipe_context *pipe,
 				 unsigned prim, unsigned start, unsigned count)
 {
-	return nv20_draw_elements(pipe, NULL, 0, prim, start, count);
+	nv20_draw_elements(pipe, NULL, 0, prim, start, count);
 }
 
 
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index 6f44b1c7fe..17f8660590 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -199,9 +199,9 @@ extern struct nv30_state_entry nv30_state_fragtex;
 extern struct nv30_state_entry nv30_state_vbo;
 
 /* nv30_vbo.c */
-extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv30_draw_arrays(struct pipe_context *, unsigned mode,
 				unsigned start, unsigned count);
-extern boolean nv30_draw_elements(struct pipe_context *pipe,
+extern void nv30_draw_elements(struct pipe_context *pipe,
 				  struct pipe_buffer *indexBuffer,
 				  unsigned indexSize,
 				  unsigned mode, unsigned start,
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
index 189656ec81..d359f71f43 100644
--- a/src/gallium/drivers/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -163,7 +163,7 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so,
 	return TRUE;
 }
 
-boolean
+void
 nv30_draw_arrays(struct pipe_context *pipe,
 		 unsigned mode, unsigned start, unsigned count)
 {
@@ -175,7 +175,7 @@ nv30_draw_arrays(struct pipe_context *pipe,
 	if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
 		/*return nv30_draw_elements_swtnl(pipe, NULL, 0,
 						mode, start, count);*/
-		return FALSE;
+		return;
 	}
 
 	while (count) {
@@ -362,7 +362,7 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
 	}
 }
 
-static boolean
+static void
 nv30_draw_elements_inline(struct pipe_context *pipe,
 			  struct pipe_buffer *ib, unsigned ib_size,
 			  unsigned mode, unsigned start, unsigned count)
@@ -393,10 +393,9 @@ nv30_draw_elements_inline(struct pipe_context *pipe,
 	}
 
 	pipe_buffer_unmap(pscreen, ib);
-	return TRUE;
 }
 
-static boolean
+static void
 nv30_draw_elements_vbo(struct pipe_context *pipe,
 		       unsigned mode, unsigned start, unsigned count)
 {
@@ -445,11 +444,9 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,
 		count -= vc;
 		start = restart;
 	}
-
-	return TRUE;
 }
 
-boolean
+void
 nv30_draw_elements(struct pipe_context *pipe,
 		   struct pipe_buffer *indexBuffer, unsigned indexSize,
 		   unsigned mode, unsigned start, unsigned count)
@@ -461,7 +458,7 @@ nv30_draw_elements(struct pipe_context *pipe,
 	if (FORCE_SWTNL || !nv30_state_validate(nv30)) {
 		/*return nv30_draw_elements_swtnl(pipe, NULL, 0,
 						mode, start, count);*/
-		return FALSE;	
+		return;	
 	}
 
 	if (idxbuf) {
@@ -472,7 +469,6 @@ nv30_draw_elements(struct pipe_context *pipe,
 	}
 
 	pipe->flush(pipe, 0, NULL);
-	return TRUE;
 }
 
 static boolean
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index cf33b64a86..d12b9d88cb 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -184,7 +184,7 @@ extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen);
 
 /* nv40_draw.c */
 extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40);
-extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe,
+extern void nv40_draw_elements_swtnl(struct pipe_context *pipe,
 					struct pipe_buffer *idxbuf,
 					unsigned ib_size, unsigned mode,
 					unsigned start, unsigned count);
@@ -220,9 +220,9 @@ extern struct nv40_state_entry nv40_state_vbo;
 extern struct nv40_state_entry nv40_state_vtxfmt;
 
 /* nv40_vbo.c */
-extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv40_draw_arrays(struct pipe_context *, unsigned mode,
 				unsigned start, unsigned count);
-extern boolean nv40_draw_elements(struct pipe_context *pipe,
+extern void nv40_draw_elements(struct pipe_context *pipe,
 				  struct pipe_buffer *indexBuffer,
 				  unsigned indexSize,
 				  unsigned mode, unsigned start,
diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
index b2f19ecb69..38aba92a14 100644
--- a/src/gallium/drivers/nv40/nv40_draw.c
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -226,7 +226,7 @@ nv40_draw_render_stage(struct nv40_context *nv40)
 	return &render->stage;
 }
 
-boolean
+void
 nv40_draw_elements_swtnl(struct pipe_context *pipe,
 			 struct pipe_buffer *idxbuf, unsigned idxbuf_size,
 			 unsigned mode, unsigned start, unsigned count)
@@ -237,7 +237,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
 	void *map;
 
 	if (!nv40_state_validate_swtnl(nv40))
-		return FALSE;
+		return;
 	nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF);
 	nv40_state_emit(nv40);
 
@@ -277,8 +277,6 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
 
 	draw_flush(nv40->draw);
 	pipe->flush(pipe, 0, NULL);
-
-	return TRUE;
 }
 
 static INLINE void
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
index b2753b8e2e..eecdf01915 100644
--- a/src/gallium/drivers/nv40/nv40_vbo.c
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -164,7 +164,7 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so,
 	return TRUE;
 }
 
-boolean
+void
 nv40_draw_arrays(struct pipe_context *pipe,
 		 unsigned mode, unsigned start, unsigned count)
 {
@@ -174,8 +174,9 @@ nv40_draw_arrays(struct pipe_context *pipe,
 
 	nv40_vbo_set_idxbuf(nv40, NULL, 0);
 	if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
-		return nv40_draw_elements_swtnl(pipe, NULL, 0,
-						mode, start, count);
+		nv40_draw_elements_swtnl(pipe, NULL, 0,
+                                         mode, start, count);
+                return;
 	}
 
 	while (count) {
@@ -221,7 +222,6 @@ nv40_draw_arrays(struct pipe_context *pipe,
 	}
 
 	pipe->flush(pipe, 0, NULL);
-	return TRUE;
 }
 
 static INLINE void
@@ -362,7 +362,7 @@ nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
 	}
 }
 
-static boolean
+static void
 nv40_draw_elements_inline(struct pipe_context *pipe,
 			  struct pipe_buffer *ib, unsigned ib_size,
 			  unsigned mode, unsigned start, unsigned count)
@@ -393,10 +393,9 @@ nv40_draw_elements_inline(struct pipe_context *pipe,
 	}
 
 	pipe_buffer_unmap(pscreen, ib);
-	return TRUE;
 }
 
-static boolean
+static void
 nv40_draw_elements_vbo(struct pipe_context *pipe,
 		       unsigned mode, unsigned start, unsigned count)
 {
@@ -445,11 +444,9 @@ nv40_draw_elements_vbo(struct pipe_context *pipe,
 		count -= vc;
 		start = restart;
 	}
-
-	return TRUE;
 }
 
-boolean
+void
 nv40_draw_elements(struct pipe_context *pipe,
 		   struct pipe_buffer *indexBuffer, unsigned indexSize,
 		   unsigned mode, unsigned start, unsigned count)
@@ -459,8 +456,9 @@ nv40_draw_elements(struct pipe_context *pipe,
 
 	idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize);
 	if (FORCE_SWTNL || !nv40_state_validate(nv40)) {
-		return nv40_draw_elements_swtnl(pipe, NULL, 0,
-						mode, start, count);
+		nv40_draw_elements_swtnl(pipe, NULL, 0,
+                                         mode, start, count);
+                return;
 	}
 
 	if (idxbuf) {
@@ -471,7 +469,6 @@ nv40_draw_elements(struct pipe_context *pipe,
 	}
 
 	pipe->flush(pipe, 0, NULL);
-	return TRUE;
 }
 
 static boolean
diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 5578a5838f..cbd4c3ff86 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -191,9 +191,9 @@ nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst,
 extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50);
 
 /* nv50_vbo.c */
-extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode,
+extern void nv50_draw_arrays(struct pipe_context *, unsigned mode,
 				unsigned start, unsigned count);
-extern boolean nv50_draw_elements(struct pipe_context *pipe,
+extern void nv50_draw_elements(struct pipe_context *pipe,
 				  struct pipe_buffer *indexBuffer,
 				  unsigned indexSize,
 				  unsigned mode, unsigned start,
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index f7fa0659e8..ca8608e987 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -152,7 +152,7 @@ nv50_vbo_vtxelt_to_hw(struct pipe_vertex_element *ve)
 	return (hw_type | hw_size);
 }
 
-boolean
+void
 nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 		 unsigned count)
 {
@@ -182,7 +182,9 @@ nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start,
 	BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
 	OUT_RING  (chan, 0);
 
-	return ret;
+        /* XXX: not sure what to do if ret != TRUE: flush and retry?
+         */
+        assert(ret);
 }
 
 static INLINE boolean
@@ -275,7 +277,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
 	return TRUE;
 }
 
-boolean
+void
 nv50_draw_elements(struct pipe_context *pipe,
 		   struct pipe_buffer *indexBuffer, unsigned indexSize,
 		   unsigned mode, unsigned start, unsigned count)
@@ -317,8 +319,10 @@ nv50_draw_elements(struct pipe_context *pipe,
 	OUT_RING  (chan, 0);
 
 	pipe_buffer_unmap(pscreen, indexBuffer);
-
-	return ret;
+        
+        /* XXX: what to do if ret != TRUE?  Flush and retry?
+         */
+	assert(ret);
 }
 
 static INLINE boolean
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 2d70ec2ac9..87ad30ac30 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -213,7 +213,7 @@ validate:
 }
 
 /* This is the fast-path drawing & emission for HW TCL. */
-boolean r300_draw_range_elements(struct pipe_context* pipe,
+void r300_draw_range_elements(struct pipe_context* pipe,
                                  struct pipe_buffer* indexBuffer,
                                  unsigned indexSize,
                                  unsigned minIndex,
@@ -225,30 +225,33 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
     struct r300_context* r300 = r300_context(pipe);
 
     if (!u_trim_pipe_prim(mode, &count)) {
-        return FALSE;
+        return;
     }
 
     if (count > 65535) {
-        return FALSE;
+       /* XXX: use aux/indices functions to split this into smaller
+        * primitives.
+        */
+        return;
     }
 
     if (r300_nothing_to_draw(r300)) {
-        return TRUE;
+        return;
     }
 
     r300_update_derived_state(r300);
 
     if (!r300_setup_vertex_buffers(r300)) {
-        return FALSE;
+        return;
     }
 
     if (!r300->winsys->add_buffer(r300->winsys, indexBuffer,
                                   RADEON_GEM_DOMAIN_GTT, 0)) {
-        return FALSE;
+        return;
     }
 
     if (!r300->winsys->validate(r300->winsys)) {
-        return FALSE;
+        return;
     }
 
     r300_emit_dirty_state(r300);
@@ -257,41 +260,42 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
 
     r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex, maxIndex,
                             mode, start, count);
-
-    return TRUE;
 }
 
 /* Simple helpers for context setup. Should probably be moved to util. */
-boolean r300_draw_elements(struct pipe_context* pipe,
-                           struct pipe_buffer* indexBuffer,
-                           unsigned indexSize, unsigned mode,
-                           unsigned start, unsigned count)
+void r300_draw_elements(struct pipe_context* pipe,
+                        struct pipe_buffer* indexBuffer,
+                        unsigned indexSize, unsigned mode,
+                        unsigned start, unsigned count)
 {
-    return pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0,
-                                     mode, start, count);
+   pipe->draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0,
+                             mode, start, count);
 }
 
-boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
                          unsigned start, unsigned count)
 {
     struct r300_context* r300 = r300_context(pipe);
 
     if (!u_trim_pipe_prim(mode, &count)) {
-        return FALSE;
+        return;
     }
 
     if (count > 65535) {
-        return FALSE;
+        /* XXX: driver needs to handle this -- use the functions in
+         * aux/indices to split this into several smaller primitives.
+         */
+        return;
     }
 
     if (r300_nothing_to_draw(r300)) {
-        return TRUE;
+        return;
     }
 
     r300_update_derived_state(r300);
 
     if (!r300_setup_vertex_buffers(r300)) {
-        return FALSE;
+        return;
     }
 
     r300_emit_dirty_state(r300);
@@ -299,8 +303,6 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
     r300_emit_aos(r300, start);
 
     r300_emit_draw_arrays(r300, mode, count);
-
-    return TRUE;
 }
 
 /****************************************************************************
@@ -309,7 +311,7 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
  ***************************************************************************/
 
 /* SW TCL arrays, using Draw. */
-boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
+void r300_swtcl_draw_arrays(struct pipe_context* pipe,
                                unsigned mode,
                                unsigned start,
                                unsigned count)
@@ -318,11 +320,11 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
     int i;
 
     if (!u_trim_pipe_prim(mode, &count)) {
-        return FALSE;
+        return;
     }
 
     if (r300_nothing_to_draw(r300)) {
-        return TRUE;
+        return;
     }
 
     for (i = 0; i < r300->vertex_buffer_count; i++) {
@@ -345,12 +347,10 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
         pipe_buffer_unmap(pipe->screen, r300->vertex_buffer[i].buffer);
         draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
     }
-
-    return TRUE;
 }
 
 /* SW TCL elements, using Draw. */
-boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
+void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
                                        struct pipe_buffer* indexBuffer,
                                        unsigned indexSize,
                                        unsigned minIndex,
@@ -363,11 +363,11 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
     int i;
 
     if (!u_trim_pipe_prim(mode, &count)) {
-        return FALSE;
+        return;
     }
 
     if (r300_nothing_to_draw(r300)) {
-        return TRUE;
+        return;
     }
 
     for (i = 0; i < r300->vertex_buffer_count; i++) {
@@ -397,8 +397,6 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
     pipe_buffer_unmap(pipe->screen, indexBuffer);
     draw_set_mapped_element_buffer_range(r300->draw, 0, start,
                                          start + count - 1, NULL);
-
-    return TRUE;
 }
 
 /* Object for rendering using Draw. */
diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h
index da83069083..27b5e6a963 100644
--- a/src/gallium/drivers/r300/r300_render.h
+++ b/src/gallium/drivers/r300/r300_render.h
@@ -25,35 +25,35 @@
 
 uint32_t r300_translate_primitive(unsigned prim);
 
-boolean r300_draw_range_elements(struct pipe_context* pipe,
-                                 struct pipe_buffer* indexBuffer,
-                                 unsigned indexSize,
-                                 unsigned minIndex,
-                                 unsigned maxIndex,
-                                 unsigned mode,
-                                 unsigned start,
-                                 unsigned count);
-
-boolean r300_draw_elements(struct pipe_context* pipe,
-                           struct pipe_buffer* indexBuffer,
-                           unsigned indexSize, unsigned mode,
-                           unsigned start, unsigned count);
-
-boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
-                         unsigned start, unsigned count);
-
-boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
-                               unsigned mode,
-                               unsigned start,
-                               unsigned count);
-
-boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
-                                       struct pipe_buffer* indexBuffer,
-                                       unsigned indexSize,
-                                       unsigned minIndex,
-                                       unsigned maxIndex,
-                                       unsigned mode,
-                                       unsigned start,
-                                       unsigned count);
+void r300_draw_range_elements(struct pipe_context* pipe,
+                              struct pipe_buffer* indexBuffer,
+                              unsigned indexSize,
+                              unsigned minIndex,
+                              unsigned maxIndex,
+                              unsigned mode,
+                              unsigned start,
+                              unsigned count);
+
+void r300_draw_elements(struct pipe_context* pipe,
+                        struct pipe_buffer* indexBuffer,
+                        unsigned indexSize, unsigned mode,
+                        unsigned start, unsigned count);
+
+void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+                      unsigned start, unsigned count);
+
+void r300_swtcl_draw_arrays(struct pipe_context* pipe,
+                            unsigned mode,
+                            unsigned start,
+                            unsigned count);
+
+void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
+                                    struct pipe_buffer* indexBuffer,
+                                    unsigned indexSize,
+                                    unsigned minIndex,
+                                    unsigned maxIndex,
+                                    unsigned mode,
+                                    unsigned start,
+                                    unsigned count);
 
 #endif /* R300_RENDER_H */
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index d4045816d0..70a92fd4c6 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -88,11 +88,11 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp)
 }
 
 
-boolean
+void
 softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
                      unsigned start, unsigned count)
 {
-   return softpipe_draw_elements(pipe, NULL, 0, mode, start, count);
+   softpipe_draw_elements(pipe, NULL, 0, mode, start, count);
 }
 
 
@@ -101,7 +101,7 @@ softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
  * Basically, map the vertex buffers (and drawing surfaces), then hand off
  * the drawing to the 'draw' module.
  */
-boolean
+void
 softpipe_draw_range_elements(struct pipe_context *pipe,
                              struct pipe_buffer *indexBuffer,
                              unsigned indexSize,
@@ -168,21 +168,19 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
    softpipe_unmap_constant_buffers(sp);
 
    sp->dirty_render_cache = TRUE;
-   
-   return TRUE;
 }
 
 
-boolean
+void
 softpipe_draw_elements(struct pipe_context *pipe,
                        struct pipe_buffer *indexBuffer,
                        unsigned indexSize,
                        unsigned mode, unsigned start, unsigned count)
 {
-   return softpipe_draw_range_elements( pipe, indexBuffer,
-                                        indexSize,
-                                        0, 0xffffffff,
-                                        mode, start, count );
+   softpipe_draw_range_elements( pipe, indexBuffer,
+                                 indexSize,
+                                 0, 0xffffffff,
+                                 mode, start, count );
 }
 
 
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index d488fb8710..7a61422387 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -174,14 +174,14 @@ void softpipe_set_vertex_buffers(struct pipe_context *,
 void softpipe_update_derived( struct softpipe_context *softpipe );
 
 
-boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
-			     unsigned start, unsigned count);
-
-boolean softpipe_draw_elements(struct pipe_context *pipe,
-			       struct pipe_buffer *indexBuffer,
-			       unsigned indexSize,
-			       unsigned mode, unsigned start, unsigned count);
-boolean
+void softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode,
+                          unsigned start, unsigned count);
+
+void softpipe_draw_elements(struct pipe_context *pipe,
+                            struct pipe_buffer *indexBuffer,
+                            unsigned indexSize,
+                            unsigned mode, unsigned start, unsigned count);
+void
 softpipe_draw_range_elements(struct pipe_context *pipe,
                              struct pipe_buffer *indexBuffer,
                              unsigned indexSize,
-- 
cgit v1.2.3


From ebbc73d1aed283c9bc4aa2b37bed4374bbaec5b5 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 21 Dec 2009 21:50:21 +0100
Subject: nv50: correct the negation of DDY sources

The state tracker now does the correct thing, so
interpret negation normally.
---
 src/gallium/drivers/nv50/nv50_program.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 04b345bcf3..679c28ce4b 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1702,8 +1702,8 @@ emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 
 	assert(src->type == P_TEMP);
 
-	e->inst[0] = 0xc0140000;
-	e->inst[1] = 0x89800000;
+	e->inst[0] = (src->mod & NV50_MOD_NEG) ? 0xc0240000 : 0xc0140000;
+	e->inst[1] = (src->mod & NV50_MOD_NEG) ? 0x86400000 : 0x89800000;
 	set_long(pc, e);
 	set_dst(pc, dst, e);
 	set_src_0(pc, src, e);
@@ -1715,25 +1715,16 @@ emit_ddx(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 static void
 emit_ddy(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
-	struct nv50_reg *r = src;
 	struct nv50_program_exec *e = exec(pc);
 
 	assert(src->type == P_TEMP);
 
-	if (!(src->mod & NV50_MOD_NEG)) { /* ! double negation */
-		r = alloc_temp(pc, NULL);
-		emit_neg(pc, r, src);
-	}
-
-	e->inst[0] = 0xc0150000;
-	e->inst[1] = 0x8a400000;
+	e->inst[0] = (src->mod & NV50_MOD_NEG) ? 0xc0250000 : 0xc0150000;
+	e->inst[1] = (src->mod & NV50_MOD_NEG) ? 0x85800000 : 0x8a400000;
 	set_long(pc, e);
 	set_dst(pc, dst, e);
-	set_src_0(pc, r, e);
-	set_src_2(pc, r, e);
-
-	if (r != src)
-		free_temp(pc, r);
+	set_src_0(pc, src, e);
+	set_src_2(pc, src, e);
 
 	emit(pc, e);
 }
@@ -1791,6 +1782,7 @@ static boolean
 negate_supported(const struct tgsi_full_instruction *insn, int i)
 {
 	switch (insn->Instruction.Opcode) {
+	case TGSI_OPCODE_DDX:
 	case TGSI_OPCODE_DDY:
 	case TGSI_OPCODE_DP3:
 	case TGSI_OPCODE_DP4:
-- 
cgit v1.2.3


From 96d63ebbf524df78e7a1d83d2acf75ae72d27f2a Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 22 Dec 2009 09:52:26 +0000
Subject: i965g: update for u_format changes

---
 src/gallium/drivers/i965/brw_screen_texture.c | 23 ++++++++++++-----------
 src/gallium/drivers/i965/brw_winsys.h         |  2 +-
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_screen_texture.c b/src/gallium/drivers/i965/brw_screen_texture.c
index ff999086c0..feb9d5f765 100644
--- a/src/gallium/drivers/i965/brw_screen_texture.c
+++ b/src/gallium/drivers/i965/brw_screen_texture.c
@@ -31,6 +31,7 @@
 
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
+#include "util/u_format.h"
 
 #include "brw_screen.h"
 #include "brw_defines.h"
@@ -201,8 +202,8 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
 
    /* XXX: compressed textures need special treatment here
     */
-   tex->cpp = pf_get_size(tex->base.format);
-   tex->compressed = pf_is_compressed(tex->base.format);
+   tex->cpp = util_format_get_blocksize(tex->base.format);
+   tex->compressed = util_format_is_compressed(tex->base.format);
 
    make_empty_list(&tex->views[0]);
    make_empty_list(&tex->views[1]);
@@ -213,7 +214,7 @@ static struct pipe_texture *brw_texture_create( struct pipe_screen *screen,
        !bscreen->no_tiling) 
    {
       if (bscreen->chipset.is_965 &&
-	  pf_is_depth_or_stencil(templ->format))
+	  util_format_is_depth_or_stencil(templ->format))
 	 tex->tiling = BRW_TILING_Y;
       else
 	 tex->tiling = BRW_TILING_X;
@@ -391,14 +392,10 @@ brw_get_tex_transfer(struct pipe_screen *screen,
    trans = CALLOC_STRUCT(brw_transfer);
    if (trans) {
       pipe_texture_reference(&trans->base.texture, texture);
-      trans->base.format = trans->base.format;
       trans->base.x = x;
       trans->base.y = y;
       trans->base.width = w;
       trans->base.height = h;
-      trans->base.block = texture->block;
-      trans->base.nblocksx = texture->nblocksx[level];
-      trans->base.nblocksy = texture->nblocksy[level];
       trans->base.stride = tex->pitch * tex->cpp;
       trans->offset = offset;
       trans->base.usage = usage;
@@ -426,9 +423,11 @@ brw_transfer_map(struct pipe_screen *screen,
    if (!map)
       return NULL;
 
+   /* XXX: blocksize and compressed textures
+    */
    return map + brw_transfer(transfer)->offset +
-      transfer->y / transfer->block.height * transfer->stride +
-      transfer->x / transfer->block.width * transfer->block.size;
+      transfer->y /* / transfer->block.height */ * transfer->stride +
+      transfer->x /* / transfer->block.width */ * brw_texture(transfer->texture)->cpp;
 }
 
 static void
@@ -481,7 +480,7 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
        templ->depth0 != 1)
       return NULL;
 
-   if (pf_is_compressed(templ->format))
+   if (util_format_is_compressed(templ->format))
       return NULL;
 
    tex = CALLOC_STRUCT(brw_texture);
@@ -492,7 +491,9 @@ brw_texture_blanket_winsys_buffer(struct pipe_screen *screen,
    pipe_reference_init(&tex->base.reference, 1);
    tex->base.screen = screen;
 
-   tex->cpp = pf_get_size(tex->base.format);
+   /* XXX: cpp vs. blocksize
+    */
+   tex->cpp = util_format_get_blocksize(tex->base.format);
    tex->tiling = tiling;
 
    make_empty_list(&tex->views[0]);
diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index 2f47067716..4f3187c33e 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -244,7 +244,7 @@ bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf)
 {
    struct brw_winsys_buffer *old_buf = *ptr;
 
-   if (pipe_reference((struct pipe_reference **)ptr, &buf->reference))
+   if (pipe_reference(&(*ptr)->reference, &buf->reference))
       old_buf->sws->bo_destroy(old_buf);
 }
 
-- 
cgit v1.2.3


From f069e457503c9fe5d252330937f944a5d2aeb54c Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 22 Dec 2009 09:57:16 +0000
Subject: i965g: fix bo_reference

---
 src/gallium/drivers/i965/brw_winsys.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_winsys.h b/src/gallium/drivers/i965/brw_winsys.h
index 4f3187c33e..a242e31218 100644
--- a/src/gallium/drivers/i965/brw_winsys.h
+++ b/src/gallium/drivers/i965/brw_winsys.h
@@ -246,6 +246,8 @@ bo_reference(struct brw_winsys_buffer **ptr, struct brw_winsys_buffer *buf)
 
    if (pipe_reference(&(*ptr)->reference, &buf->reference))
       old_buf->sws->bo_destroy(old_buf);
+
+   *ptr = buf;
 }
 
 
-- 
cgit v1.2.3


From d186079520234a776c3fa88c81da935d65981fec Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Tue, 22 Dec 2009 21:26:51 +0100
Subject: i965g: fix for edgeflag changes (untested)

---
 src/gallium/drivers/i965/brw_pipe_shader.c |  3 ---
 src/gallium/drivers/i965/brw_pipe_vertex.c |  7 -------
 src/gallium/drivers/i965/brw_vs.c          | 14 --------------
 src/gallium/drivers/i965/brw_vs.h          |  3 ---
 src/gallium/drivers/i965/brw_vs_emit.c     |  6 ------
 5 files changed, 33 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 31a715ab65..20f20571f6 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -215,15 +215,12 @@ static void *brw_create_vs_state( struct pipe_context *pipe,
          else
             vs->output_bfc1 = i;
          break;
-#if 0
       case TGSI_SEMANTIC_EDGEFLAG:
          vs->output_edgeflag = i;
          break;
-#endif
       }
    }
 
-
    
    /* Done:
     */
diff --git a/src/gallium/drivers/i965/brw_pipe_vertex.c b/src/gallium/drivers/i965/brw_pipe_vertex.c
index 3d87a2853f..e3c48e3149 100644
--- a/src/gallium/drivers/i965/brw_pipe_vertex.c
+++ b/src/gallium/drivers/i965/brw_pipe_vertex.c
@@ -44,19 +44,12 @@ static void brw_set_vertex_buffers(struct pipe_context *pipe,
    brw->state.dirty.mesa |= PIPE_NEW_VERTEX_BUFFER;
 }
 
-static void brw_set_edgeflags( struct pipe_context *pipe,
-			       const unsigned *bitfield )
-{
-   /* XXX */
-}
-
 
 void 
 brw_pipe_vertex_init( struct brw_context *brw )
 {
    brw->base.set_vertex_buffers = brw_set_vertex_buffers;
    brw->base.set_vertex_elements = brw_set_vertex_elements;
-   brw->base.set_edgeflags = brw_set_edgeflags;
 }
 
 
diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c
index 14a1c3bcf1..e3ea5a3a13 100644
--- a/src/gallium/drivers/i965/brw_vs.c
+++ b/src/gallium/drivers/i965/brw_vs.c
@@ -58,18 +58,6 @@ static enum pipe_error do_vs_prog( struct brw_context *brw,
    c.prog_data.nr_outputs = vp->info.num_outputs;
    c.prog_data.nr_inputs = vp->info.num_inputs;
 
-   /* XXX: we want edgeflag handling to be integrated to the vertex
-    * shader, but are currently faking the edgeflag output:
-    */
-   if (c.key.copy_edgeflag) {
-      c.prog_data.output_edgeflag = c.prog_data.nr_outputs;
-      c.prog_data.nr_outputs++;
-   }
-   else {
-      c.prog_data.output_edgeflag = ~0;
-   }
-
-
    if (1)
       tgsi_dump(c.vp->tokens, 0);
 
@@ -108,8 +96,6 @@ static enum pipe_error brw_upload_vs_prog(struct brw_context *brw)
 
    key.program_string_id = vp->id;
    key.nr_userclip = brw->curr.ucp.nr;
-   key.copy_edgeflag = (brw->curr.rast->templ.fill_ccw != PIPE_POLYGON_MODE_FILL ||
-			brw->curr.rast->templ.fill_cw != PIPE_POLYGON_MODE_FILL);
 
    memcpy(&key.fs_signature, sig, brw_fs_signature_size(sig));
 
diff --git a/src/gallium/drivers/i965/brw_vs.h b/src/gallium/drivers/i965/brw_vs.h
index 3d1598d02b..944d88c84c 100644
--- a/src/gallium/drivers/i965/brw_vs.h
+++ b/src/gallium/drivers/i965/brw_vs.h
@@ -41,7 +41,6 @@
 struct brw_vs_prog_key {
    GLuint program_string_id;
    GLuint nr_userclip:4;
-   GLuint copy_edgeflag:1;
    GLuint pad:26;
    struct brw_fs_signature fs_signature;
 };
@@ -66,8 +65,6 @@ struct brw_vs_compile {
    GLuint nr_immediates;
    GLfloat immediate[128][4];
 
-   GLboolean copy_edgeflag;
-
    GLuint overflow_grf_start;
    GLuint overflow_count;
 
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 1d0fff0d9e..714def5046 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -1141,12 +1141,6 @@ static void emit_vertex_write( struct brw_vs_compile *c)
    int i;
    GLuint len_vertext_header = 2;
 
-   if (c->key.copy_edgeflag) {
-      brw_MOV(p, 
-              get_reg(c, TGSI_FILE_OUTPUT, c->prog_data.output_edgeflag),
-              brw_imm_f(1));
-   }
-
    /* Build ndc coords */
    ndc = get_tmp(c);
    /* ndc = 1.0 / pos.w */
-- 
cgit v1.2.3


From fb8bff341e6ceae25327f152d197f74d11432f22 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Tue, 22 Dec 2009 15:19:48 -0800
Subject: i915g: Use C-style comment.

---
 src/gallium/drivers/i915simple/i915_state.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c
index 0087dfa410..352a4ae2f3 100644
--- a/src/gallium/drivers/i915simple/i915_state.c
+++ b/src/gallium/drivers/i915simple/i915_state.c
@@ -58,8 +58,10 @@ translate_wrap_mode(unsigned wrap)
       return TEXCOORDMODE_CLAMP_EDGE;
    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
       return TEXCOORDMODE_CLAMP_BORDER;
-//   case PIPE_TEX_WRAP_MIRRORED_REPEAT:
-//      return TEXCOORDMODE_MIRROR;
+   /*         
+   case PIPE_TEX_WRAP_MIRRORED_REPEAT:
+      return TEXCOORDMODE_MIRROR;
+    */
    default:
       return TEXCOORDMODE_WRAP;
    }
-- 
cgit v1.2.3


From f6ca26e5a7ad0dddf7990aa2a3420ff0f1cc93aa Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Tue, 22 Dec 2009 17:17:28 -0800
Subject: trace: Add PIPE_OS_APPLE.

---
 src/gallium/drivers/trace/tr_dump.c | 4 ++--
 src/gallium/drivers/trace/tr_rbug.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c
index 7e2ccbcfdc..0f45e211a3 100644
--- a/src/gallium/drivers/trace/tr_dump.c
+++ b/src/gallium/drivers/trace/tr_dump.c
@@ -40,7 +40,7 @@
 
 #include "pipe/p_config.h"
 
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
 #include <stdlib.h>
 #endif
 
@@ -258,7 +258,7 @@ boolean trace_dump_trace_begin()
       trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n");
       trace_dump_writes("<trace version='0.1'>\n");
 
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
       /* Linux applications rarely cleanup GL / Gallium resources so catch
        * application exit here */
       atexit(trace_dump_trace_close);
diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c
index e85ac15edc..1dd9900be0 100644
--- a/src/gallium/drivers/trace/tr_rbug.c
+++ b/src/gallium/drivers/trace/tr_rbug.c
@@ -44,7 +44,7 @@
 
 #if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
 #  define sleep Sleep
-#elif defined(PIPE_OS_LINUX)
+#elif defined(PIPE_OS_LINUX) || defined(PIPE_OS_APPLE)
 void usleep(int);
 #  define sleep usleep
 #else
-- 
cgit v1.2.3


From b20382d477b7454922af56c455b555d9e904cdc4 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Tue, 22 Dec 2009 17:34:39 -0800
Subject: trace: Silence uninitialized variable warnings.

---
 src/gallium/drivers/trace/tr_rbug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c
index 1dd9900be0..0372d92782 100644
--- a/src/gallium/drivers/trace/tr_rbug.c
+++ b/src/gallium/drivers/trace/tr_rbug.c
@@ -179,7 +179,7 @@ static int
 trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header, uint32_t serial)
 {
    struct trace_screen *tr_scr = tr_rbug->tr_scr;
-   struct trace_texture *tr_tex;
+   struct trace_texture *tr_tex = NULL;
    struct rbug_proto_texture_info *gpti = (struct rbug_proto_texture_info *)header;
    struct tr_list *ptr;
    struct pipe_texture *t;
@@ -220,7 +220,7 @@ trace_rbug_texture_read(struct trace_rbug *tr_rbug, struct rbug_header *header,
    struct rbug_proto_texture_read *gptr = (struct rbug_proto_texture_read *)header;
 
    struct trace_screen *tr_scr = tr_rbug->tr_scr;
-   struct trace_texture *tr_tex;
+   struct trace_texture *tr_tex = NULL;
    struct tr_list *ptr;
 
    struct pipe_screen *screen = tr_scr->screen;
-- 
cgit v1.2.3


From 315ca95666b3744f8c72c35135aea5d96de4cbb2 Mon Sep 17 00:00:00 2001
From: Tomas Carnecky <tom@dbservice.com>
Date: Wed, 23 Dec 2009 15:40:20 +0000
Subject: [i965] Rename {pf -> util}_format_is_depth_or_stencil()

The function was moved and renamed in 0bed834b.

Signed-off-by: Tomas Carnecky <tom@dbservice.com>
---
 src/gallium/drivers/i965/brw_wm_sampler_state.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index 4e99ac703a..d9f17ac305 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -30,6 +30,7 @@
   */
                    
 #include "util/u_math.h"
+#include "util/u_format.h"
 
 #include "brw_context.h"
 #include "brw_state.h"
@@ -130,7 +131,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw)
       const struct brw_sampler *sampler = brw->curr.sampler[i];
       const float *bc;
 
-      if (pf_is_depth_or_stencil(tex->base.format)) {
+      if (util_is_depth_or_stencil(tex->base.format)) {
 	 float bordercolor[4] = {
 	    sampler->border_color[0],
 	    sampler->border_color[0],
-- 
cgit v1.2.3


From cc7a2d8e6422f632b28e740afc4ab29bd68ac0f9 Mon Sep 17 00:00:00 2001
From: Tomas Carnecky <tom@dbservice.com>
Date: Wed, 23 Dec 2009 16:57:17 +0100
Subject: i965g: Rename {pf->util_format}_is_depth_or_stencil()

---
 src/gallium/drivers/i965/brw_wm_sampler_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_sampler_state.c b/src/gallium/drivers/i965/brw_wm_sampler_state.c
index d9f17ac305..a8bc31c9ce 100644
--- a/src/gallium/drivers/i965/brw_wm_sampler_state.c
+++ b/src/gallium/drivers/i965/brw_wm_sampler_state.c
@@ -131,7 +131,7 @@ brw_wm_sampler_update_default_colors(struct brw_context *brw)
       const struct brw_sampler *sampler = brw->curr.sampler[i];
       const float *bc;
 
-      if (util_is_depth_or_stencil(tex->base.format)) {
+      if (util_format_is_depth_or_stencil(tex->base.format)) {
 	 float bordercolor[4] = {
 	    sampler->border_color[0],
 	    sampler->border_color[0],
-- 
cgit v1.2.3


From b5a408bae518ededbb871d113dab89f3e15bfb45 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Wed, 23 Dec 2009 15:21:56 +0000
Subject: llvmpipe: Install unit tests on build/xxx/bin

---
 scons/gallium.py                        |  7 +++++++
 src/gallium/drivers/llvmpipe/SConscript | 24 +++++++++++-------------
 2 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/scons/gallium.py b/scons/gallium.py
index 5f149f9528..b7a14af4b1 100644
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -156,6 +156,12 @@ def symlink(target, source, env):
         os.remove(target)
     os.symlink(os.path.basename(source), target)
 
+def install_program(env, source):
+    source = str(source[0])
+    target_dir =  os.path.join(env.Dir('#.').srcnode().abspath, env['build'], 'bin')
+    target_name = str(source)
+    env.InstallAs(os.path.join(target_dir, target_name), source)
+
 def install_shared_library(env, source, version = ()):
     source = str(source[0])
     version = tuple(map(str, version))
@@ -169,6 +175,7 @@ def install_shared_library(env, source, version = ()):
         last = env.Command(os.path.join(target_dir, target_name), last, action) 
 
 def createInstallMethods(env):
+    env.AddMethod(install_program, 'InstallProgram')
     env.AddMethod(install_shared_library, 'InstallSharedLibrary')
 
 
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 3bd2e70013..de6156795d 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -76,19 +76,17 @@ env = env.Clone()
 
 env.Prepend(LIBS = [llvmpipe] + auxiliaries)
 
-env.Program(
-    target = 'lp_test_format',
-    source = ['lp_test_format.c', 'lp_test_main.c'],
-)
-
-env.Program(
-    target = 'lp_test_blend',
-    source = ['lp_test_blend.c', 'lp_test_main.c'],
-)
+tests = [
+    'format',
+    'blend',
+    'conv',
+]
 
-env.Program(
-    target = 'lp_test_conv',
-    source = ['lp_test_conv.c', 'lp_test_main.c'],
-)
+for test in tests:
+    target = env.Program(
+        target = 'lp_test_' + test,
+        source = ['lp_test_' + test + '.c', 'lp_test_main.c'],
+    )
+    env.InstallProgram(target)
 
 Export('llvmpipe')
-- 
cgit v1.2.3


From e3be32ccf900f22c1f75c9f2d8842fe9630da7d9 Mon Sep 17 00:00:00 2001
From: Roland Scheidegger <sroland@vmware.com>
Date: Wed, 23 Dec 2009 21:48:36 +0100
Subject: gallium: propagate draw retval changes into cell driver

---
 src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index 644496db40..34fe1b1b64 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -85,7 +85,7 @@ cell_unmap_constant_buffers(struct cell_context *sp)
  *
  * XXX should the element buffer be specified/bound with a separate function?
  */
-static boolean
+static void
 cell_draw_range_elements(struct pipe_context *pipe,
                          struct pipe_buffer *indexBuffer,
                          unsigned indexSize,
@@ -145,29 +145,27 @@ cell_draw_range_elements(struct pipe_context *pipe,
 
    /* Note: leave drawing surfaces mapped */
    cell_unmap_constant_buffers(sp);
-
-   return TRUE;
 }
 
 
-static boolean
+static void
 cell_draw_elements(struct pipe_context *pipe,
                    struct pipe_buffer *indexBuffer,
                    unsigned indexSize,
                    unsigned mode, unsigned start, unsigned count)
 {
-   return cell_draw_range_elements( pipe, indexBuffer,
-                                    indexSize,
-                                    0, 0xffffffff,
-                                    mode, start, count );
+   cell_draw_range_elements( pipe, indexBuffer,
+                             indexSize,
+                             0, 0xffffffff,
+                             mode, start, count );
 }
 
 
-static boolean
+static void
 cell_draw_arrays(struct pipe_context *pipe, unsigned mode,
                      unsigned start, unsigned count)
 {
-   return cell_draw_elements(pipe, NULL, 0, mode, start, count);
+   cell_draw_elements(pipe, NULL, 0, mode, start, count);
 }
 
 
-- 
cgit v1.2.3


From d29f55546dec74ca77dce3a3bf581c251be1d397 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 24 Dec 2009 12:39:42 +0100
Subject: nv50: make edgeflags work

It doesn't seem to be possible to set the egdeflag in the
vertex shader, so we need to fallback to pushing vertices
through the FIFO and use method 0x15e4 if they are used.

This only works if VP does MOV OUT[X] IN[Y] where X is the
edgeflag output, and Y is saved so we can tell the correct
input later.

The VP still writes the useless values to wasted outputs
as punishment.
---
 src/gallium/drivers/nv50/nv50_program.c | 19 +++++++++++---
 src/gallium/drivers/nv50/nv50_program.h |  1 +
 src/gallium/drivers/nv50/nv50_screen.c  |  3 +++
 src/gallium/drivers/nv50/nv50_vbo.c     | 45 ++++++++++++++++++++++++++++++++-
 4 files changed, 64 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 679c28ce4b..ce3fa5fc88 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -159,6 +159,8 @@ struct nv50_pc {
 	unsigned insn_nr;
 
 	boolean allow32;
+
+	uint8_t edgeflag_out;
 };
 
 static INLINE struct nv50_reg *
@@ -2554,10 +2556,16 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
 	mask = dst->WriteMask;
 
         if (dst->File == TGSI_FILE_TEMPORARY)
-                reg = pc->temp;
+		reg = pc->temp;
         else
-        if (dst->File == TGSI_FILE_OUTPUT)
-                reg = pc->result;
+	if (dst->File == TGSI_FILE_OUTPUT) {
+		reg = pc->result;
+
+		if (insn->Instruction.Opcode == TGSI_OPCODE_MOV &&
+		    dst->Index == pc->edgeflag_out &&
+		    insn->Src[0].Register.File == TGSI_FILE_INPUT)
+			pc->p->cfg.edgeflag_in = insn->Src[0].Register.Index;
+	}
 
 	if (reg) {
 		for (c = 0; c < 4; c++) {
@@ -2856,6 +2864,9 @@ nv50_program_tx_prep(struct nv50_pc *pc)
 					if (p->cfg.io_nr > first)
 						p->cfg.io_nr = first;
 					break;
+				case TGSI_SEMANTIC_EDGEFLAG:
+					pc->edgeflag_out = first;
+					break;
 					/*
 				case TGSI_SEMANTIC_CLIP_DISTANCE:
 					p->cfg.clpd = MIN2(p->cfg.clpd, first);
@@ -3104,6 +3115,8 @@ ctor_nv50_pc(struct nv50_pc *pc, struct nv50_program *p)
 	p->cfg.two_side[0].hw = 0x40;
 	p->cfg.two_side[1].hw = 0x40;
 
+	p->cfg.edgeflag_in = pc->edgeflag_out = 0xff;
+
 	switch (p->type) {
 	case PIPE_SHADER_VERTEX:
 		p->cfg.psiz = 0x40;
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 4a90c372ce..461fec1d89 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -58,6 +58,7 @@ struct nv50_program {
 		/* VP only */
 		uint8_t clpd, clpd_nr;
 		uint8_t psiz;
+		uint8_t edgeflag_in;
 	} cfg;
 };
 
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index d443ca3ad0..2435f65ed2 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -441,6 +441,9 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1);
 	so_data  (so, 1);
 
+	so_method(so, screen->tesla, 0x15e4, 1);
+	so_data  (so, 1); /* default edgeflag to TRUE */
+
 	so_emit(chan, so);
 	so_ref (so, &screen->static_init);
 	so_ref (NULL, &so);
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index f7fa0659e8..39324e30f6 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -372,6 +372,10 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
 		so_data  (so, fui(v[1]));
 		break;
 	case 1:
+		if (attrib == nv50->vertprog->cfg.edgeflag_in) {
+			so_method(so, tesla, 0x15e4, 1);
+			so_data  (so, v[0] ? 1 : 0);
+		}
 		so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
 		so_data  (so, fui(v[0]));
 		break;
@@ -401,6 +405,9 @@ nv50_vbo_validate(struct nv50_context *nv50)
 		    !(nv50->vtxbuf[i].buffer->usage & PIPE_BUFFER_USAGE_VERTEX))
 			nv50->vbo_fifo = 0xffff;
 
+	if (nv50->vertprog->cfg.edgeflag_in < 16)
+		nv50->vbo_fifo = 0xffff; /* vertprog can't set edgeflag */
+
 	n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
 
 	vtxattr = NULL;
@@ -479,6 +486,9 @@ struct nv50_vbo_emitctx
 	unsigned nr_ve;
 	unsigned vtx_dwords;
 	unsigned vtx_max;
+
+	float edgeflag;
+	unsigned ve_edgeflag;
 };
 
 static INLINE void
@@ -622,6 +632,9 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
 	if (nv50_map_vbufs(nv50) == FALSE)
 		return FALSE;
 
+	emit->ve_edgeflag = nv50->vertprog->cfg.edgeflag_in;
+
+	emit->edgeflag = 0.5f;
 	emit->nr_ve = 0;
 	emit->vtx_dwords = 0;
 
@@ -644,7 +657,8 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
 		desc = util_format_description(ve->src_format);
 		assert(desc);
 
-		size = util_format_get_component_bits(ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
+		size = util_format_get_component_bits(
+			ve->src_format, UTIL_FORMAT_COLORSPACE_RGB, 0);
 
 		assert(ve->nr_components > 0 && ve->nr_components <= 4);
 
@@ -686,10 +700,31 @@ emit_prepare(struct nv50_context *nv50, struct nv50_vbo_emitctx *emit,
 	}
 
 	emit->vtx_max = 512 / emit->vtx_dwords;
+	if (emit->ve_edgeflag < 16)
+		emit->vtx_max = 1;
 
 	return TRUE;
 }
 
+static INLINE void
+set_edgeflag(struct nouveau_channel *chan,
+	     struct nouveau_grobj *tesla,
+	     struct nv50_vbo_emitctx *emit, uint32_t index)
+{
+	unsigned i = emit->ve_edgeflag;
+
+	if (i < 16) {
+		float f = *((float *)(emit->map[i] + index * emit->stride[i]));
+
+		if (emit->edgeflag != f) {
+			emit->edgeflag = f;
+
+			BEGIN_RING(chan, tesla, 0x15e4, 1);
+			OUT_RING  (chan, f ? 1 : 0);
+		}
+	}
+}
+
 static boolean
 nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
 {
@@ -704,6 +739,8 @@ nv50_push_arrays(struct nv50_context *nv50, unsigned start, unsigned count)
 		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
 	        dw = nr * emit.vtx_dwords;
 
+		set_edgeflag(chan, tesla, &emit, 0); /* nr will be 1 */
+
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
 		for (i = 0; i < nr; ++i)
 			emit_vtx_next(chan, &emit);
@@ -729,6 +766,8 @@ nv50_push_elements_u32(struct nv50_context *nv50, uint32_t *map, unsigned count)
 		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
 	        dw = nr * emit.vtx_dwords;
 
+		set_edgeflag(chan, tesla, &emit, *map);
+
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
 		for (i = 0; i < nr; ++i)
 			emit_vtx(chan, &emit, *map++);
@@ -754,6 +793,8 @@ nv50_push_elements_u16(struct nv50_context *nv50, uint16_t *map, unsigned count)
 		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
 	        dw = nr * emit.vtx_dwords;
 
+		set_edgeflag(chan, tesla, &emit, *map);
+
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
 		for (i = 0; i < nr; ++i)
 			emit_vtx(chan, &emit, *map++);
@@ -779,6 +820,8 @@ nv50_push_elements_u08(struct nv50_context *nv50, uint8_t *map, unsigned count)
 		unsigned i, dw, nr = MIN2(count, emit.vtx_max);
 	        dw = nr * emit.vtx_dwords;
 
+		set_edgeflag(chan, tesla, &emit, *map);
+
 		BEGIN_RING(chan, tesla, NV50TCL_VERTEX_DATA | 0x40000000, dw);
 		for (i = 0; i < nr; ++i)
 			emit_vtx(chan, &emit, *map++);
-- 
cgit v1.2.3


From b7b2226a75f1955da9bd4a28754b7eaebb01fed5 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 24 Dec 2009 13:35:55 +0100
Subject: nv50: support TGSI_OPCODE_CONT

---
 src/gallium/drivers/nv50/nv50_program.c | 5 +++++
 src/gallium/drivers/nv50/nv50_screen.c  | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index ce3fa5fc88..a101ac095c 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2241,6 +2241,11 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			set_pred(pc, 0x6, 1, pc->p->exec_tail); /* @NSF */
 		}
 		break;
+	case TGSI_OPCODE_CONT:
+		assert(pc->loop_lvl > 0);
+		emit_branch(pc, -1, 0)->param.index =
+			pc->loop_pos[pc->loop_lvl - 1];
+		break;
 	case TGSI_OPCODE_COS:
 		if (mask & 8) {
 			emit_precossin(pc, temp, src[0][3]);
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 2435f65ed2..5a1efd3998 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -128,7 +128,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
 	case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
 		return 1;
 	case PIPE_CAP_TGSI_CONT_SUPPORTED:
-		return 0;
+		return 1;
 	case PIPE_CAP_BLEND_EQUATION_SEPARATE:
 		return 1;
 	case NOUVEAU_CAP_HW_VTXBUF:
-- 
cgit v1.2.3


From 9546c3dbd2bdf85654d7ef0e90837f641bd801e4 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Tue, 22 Dec 2009 10:14:59 +0000
Subject: i965g: calculate depth min/max

Previously hard-wired to 0..1
---
 src/gallium/drivers/i965/brw_pipe_fb.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_fb.c b/src/gallium/drivers/i965/brw_pipe_fb.c
index 6b03094f50..5d4e5025f9 100644
--- a/src/gallium/drivers/i965/brw_pipe_fb.c
+++ b/src/gallium/drivers/i965/brw_pipe_fb.c
@@ -3,6 +3,7 @@
 #include "pipe/p_state.h"
 
 #include "brw_context.h"
+#include "brw_debug.h"
 
 /**
  * called from intelDrawBuffer()
@@ -51,8 +52,14 @@ static void brw_set_viewport_state( struct pipe_context *pipe,
    struct brw_context *brw = brw_context(pipe);
 
    brw->curr.viewport = *viewport;
-   brw->curr.ccv.min_depth = 0.0;         /* XXX: near */
-   brw->curr.ccv.max_depth = 1.0;         /* XXX: far */
+   brw->curr.ccv.min_depth = viewport->scale[2] * -1.0 + viewport->translate[2];
+   brw->curr.ccv.max_depth = viewport->scale[2] *  1.0 + viewport->translate[2];
+
+   if (0)
+      debug_printf("%s depth range %f .. %f\n",
+                   __FUNCTION__,
+                   brw->curr.ccv.min_depth,
+                   brw->curr.ccv.max_depth);
 
    brw->state.dirty.mesa |= PIPE_NEW_VIEWPORT;
 }
-- 
cgit v1.2.3


From 6c30e17f9eb572f1bb9b80652a8c6c0d838d0498 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 24 Dec 2009 12:45:42 +0000
Subject: i965g: strict aliasing changes

---
 src/gallium/drivers/i965/brw_pipe_clear.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_pipe_clear.c b/src/gallium/drivers/i965/brw_pipe_clear.c
index 211be88178..452e1e89f9 100644
--- a/src/gallium/drivers/i965/brw_pipe_clear.c
+++ b/src/gallium/drivers/i965/brw_pipe_clear.c
@@ -114,18 +114,18 @@ static void color_clear(struct brw_context *brw,
                         const float *rgba )
 {
    enum pipe_error ret;
-   unsigned value;
+   union util_color value;
 
    util_pack_color( rgba, bsurface->base.format, &value );
 
    if (bsurface->cpp == 2)
-      value |= value << 16;
+      value.ui |= value.ui << 16;
 
-   ret = try_clear( brw, bsurface, value );
+   ret = try_clear( brw, bsurface, value.ui );
 
    if (ret != 0) {
       brw_context_flush( brw );
-      ret = try_clear( brw, bsurface, value );
+      ret = try_clear( brw, bsurface, value.ui );
       assert( ret == 0 );
    }
 }
-- 
cgit v1.2.3


From 5f6dcf65e7023edda1783eccef03d213f3cb26fb Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 24 Dec 2009 12:52:43 +0000
Subject: i965g: get trivial/tri working again after edgeflag changes

---
 src/gallium/drivers/i965/brw_clip.c        | 12 ++++-----
 src/gallium/drivers/i965/brw_context.h     |  9 +++++--
 src/gallium/drivers/i965/brw_pipe_shader.c |  7 ++++++
 src/gallium/drivers/i965/brw_vs_emit.c     | 39 ++++++++++--------------------
 4 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c
index 58d9e56df2..d67a1a6263 100644
--- a/src/gallium/drivers/i965/brw_clip.c
+++ b/src/gallium/drivers/i965/brw_clip.c
@@ -83,19 +83,19 @@ compile_clip_prog( struct brw_context *brw,
 
    c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE;
 
-   if (c.key.output_color0)
+   if (c.key.output_color0 != BRW_OUTPUT_NOT_PRESENT)
       c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE;
 
-   if (c.key.output_color1)
+   if (c.key.output_color1 != BRW_OUTPUT_NOT_PRESENT)
       c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE;
 
-   if (c.key.output_bfc0)
+   if (c.key.output_bfc0 != BRW_OUTPUT_NOT_PRESENT)
       c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE;
 
-   if (c.key.output_bfc1)
+   if (c.key.output_bfc1 != BRW_OUTPUT_NOT_PRESENT)
       c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE;
 
-   if (c.key.output_edgeflag)
+   if (c.key.output_edgeflag != BRW_OUTPUT_NOT_PRESENT)
       c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE;
    
    if (BRW_IS_IGDNG(brw))
@@ -182,7 +182,6 @@ upload_clip_prog(struct brw_context *brw)
     */
    /* CACHE_NEW_VS_PROG */
    key.nr_attrs        = brw->vs.prog_data->nr_outputs;
-   key.output_edgeflag = brw->vs.prog_data->output_edgeflag;
 
    /* PIPE_NEW_VS */
    key.output_hpos     = vs->output_hpos;
@@ -190,6 +189,7 @@ upload_clip_prog(struct brw_context *brw)
    key.output_color1   = vs->output_color1;
    key.output_bfc0     = vs->output_bfc0;
    key.output_bfc1     = vs->output_bfc1;
+   key.output_edgeflag = vs->output_edgeflag;
 
    /* PIPE_NEW_CLIP */
    key.nr_userclip = brw->curr.ucp.nr;
diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h
index 56e7807400..8c006bb95b 100644
--- a/src/gallium/drivers/i965/brw_context.h
+++ b/src/gallium/drivers/i965/brw_context.h
@@ -120,6 +120,13 @@
 
 #define BRW_MAX_CURBE                    (32*16)
 
+
+/* Need a value to say a particular vertex shader output isn't
+ * present.  Limits us to 63 outputs currently.
+ */
+#define BRW_OUTPUT_NOT_PRESENT           ((1<<6)-1)
+
+
 struct brw_context;
 
 struct brw_depth_stencil_state {
@@ -335,8 +342,6 @@ struct brw_vs_prog_data {
 
    GLuint nr_params;       /**< number of TGSI_FILE_CONSTANT's */
 
-   GLuint output_edgeflag;
-
    GLboolean writes_psiz;
 
    /* Used for calculating urb partitions:
diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c
index 20f20571f6..bb32d90e33 100644
--- a/src/gallium/drivers/i965/brw_pipe_shader.c
+++ b/src/gallium/drivers/i965/brw_pipe_shader.c
@@ -197,6 +197,13 @@ static void *brw_create_vs_state( struct pipe_context *pipe,
    vs->id = brw->program_id++;
    vs->has_flow_control = has_flow_control(&vs->info);
 
+   vs->output_hpos = BRW_OUTPUT_NOT_PRESENT;
+   vs->output_color0 = BRW_OUTPUT_NOT_PRESENT;
+   vs->output_color1 = BRW_OUTPUT_NOT_PRESENT;
+   vs->output_bfc0 = BRW_OUTPUT_NOT_PRESENT;
+   vs->output_bfc1 = BRW_OUTPUT_NOT_PRESENT;
+   vs->output_edgeflag = BRW_OUTPUT_NOT_PRESENT;
+
    for (i = 0; i < vs->info.num_outputs; i++) {
       int index = vs->info.output_semantic_index[i];
       switch (vs->info.output_semantic_name[i]) {
diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c
index 714def5046..8a16205d2f 100644
--- a/src/gallium/drivers/i965/brw_vs_emit.c
+++ b/src/gallium/drivers/i965/brw_vs_emit.c
@@ -79,18 +79,12 @@ static void release_tmps( struct brw_vs_compile *c )
 static boolean is_position_output( struct brw_vs_compile *c,
                                    unsigned vs_output )
 {
-   struct brw_vertex_shader *vs = c->vp;
-
-   if (vs_output == c->prog_data.output_edgeflag) {
-      return FALSE;
-   }
-   else {
-      unsigned semantic = vs->info.output_semantic_name[vs_output];
-      unsigned index = vs->info.output_semantic_index[vs_output];
+   const struct brw_vertex_shader *vs = c->vp;
+   unsigned semantic = vs->info.output_semantic_name[vs_output];
+   unsigned index = vs->info.output_semantic_index[vs_output];
       
-      return (semantic == TGSI_SEMANTIC_POSITION &&
-              index == 0);
-   }
+   return (semantic == TGSI_SEMANTIC_POSITION &&
+           index == 0);
 }
 
 
@@ -98,23 +92,16 @@ static boolean find_output_slot( struct brw_vs_compile *c,
                                   unsigned vs_output,
                                   unsigned *fs_input_slot )
 {
-   struct brw_vertex_shader *vs = c->vp;
+   const struct brw_vertex_shader *vs = c->vp;
+   unsigned semantic = vs->info.output_semantic_name[vs_output];
+   unsigned index = vs->info.output_semantic_index[vs_output];
+   unsigned i;
 
-   if (vs_output == c->prog_data.output_edgeflag) {
-      *fs_input_slot = c->key.fs_signature.nr_inputs;
-      return TRUE;
-   }
-   else {
-      unsigned semantic = vs->info.output_semantic_name[vs_output];
-      unsigned index = vs->info.output_semantic_index[vs_output];
-      unsigned i;
-
-      for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
-         if (c->key.fs_signature.input[i].semantic == semantic &&
+   for (i = 0; i < c->key.fs_signature.nr_inputs; i++) {
+      if (c->key.fs_signature.input[i].semantic == semantic &&
           c->key.fs_signature.input[i].semantic_index == index) {
-            *fs_input_slot = i;
-            return TRUE;
-         }
+         *fs_input_slot = i;
+         return TRUE;
       }
    }
 
-- 
cgit v1.2.3


From 89d8577fb3036547ef0b47498cc8dc5c77f886e0 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Mon, 14 Dec 2009 17:11:46 -0500
Subject: gallium: add geometry shader support to gallium

---
 src/gallium/auxiliary/cso_cache/cso_context.c      |  40 ++-
 src/gallium/auxiliary/cso_cache/cso_context.h      |   7 +
 src/gallium/auxiliary/draw/Makefile                |   1 +
 src/gallium/auxiliary/draw/SConscript              |   3 +-
 src/gallium/auxiliary/draw/draw_context.c          |  77 ++++-
 src/gallium/auxiliary/draw/draw_context.h          |  19 +-
 src/gallium/auxiliary/draw/draw_gs.c               | 338 +++++++++++++++++++++
 src/gallium/auxiliary/draw/draw_gs.h               |  76 +++++
 src/gallium/auxiliary/draw/draw_pipe_aaline.c      |  12 +-
 src/gallium/auxiliary/draw/draw_pipe_aapoint.c     |  12 +-
 src/gallium/auxiliary/draw/draw_pipe_clip.c        |   4 +-
 src/gallium/auxiliary/draw/draw_pipe_cull.c        |   2 +-
 src/gallium/auxiliary/draw/draw_pipe_offset.c      |   2 +-
 src/gallium/auxiliary/draw/draw_pipe_stipple.c     |   5 +-
 src/gallium/auxiliary/draw/draw_pipe_wide_line.c   |   2 +-
 src/gallium/auxiliary/draw/draw_pipe_wide_point.c  |  14 +-
 src/gallium/auxiliary/draw/draw_private.h          |  32 +-
 .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c  |  50 ++-
 src/gallium/auxiliary/draw/draw_pt_post_vs.c       |   4 +-
 src/gallium/auxiliary/draw/draw_pt_util.c          |  16 +
 .../auxiliary/draw/draw_pt_varray_tmp_linear.h     |   4 +
 src/gallium/auxiliary/draw/draw_vs_varient.c       |   5 +-
 src/gallium/auxiliary/tgsi/tgsi_dump.c             |   4 +-
 src/gallium/auxiliary/tgsi/tgsi_exec.c             |  45 ++-
 src/gallium/auxiliary/tgsi/tgsi_exec.h             |  15 +-
 src/gallium/auxiliary/tgsi/tgsi_text.c             |   4 +-
 src/gallium/drivers/cell/ppu/cell_draw_arrays.c    |   2 +-
 src/gallium/drivers/cell/ppu/cell_state_derived.c  |   8 +-
 src/gallium/drivers/cell/ppu/cell_state_emit.c     |   2 +-
 src/gallium/drivers/i915/i915_context.c            |   2 +-
 src/gallium/drivers/i915/i915_state_derived.c      |  10 +-
 src/gallium/drivers/llvmpipe/lp_state_derived.c    |   2 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c         |   3 +-
 src/gallium/drivers/nv04/nv04_vbo.c                |   2 +-
 src/gallium/drivers/nv10/nv10_vbo.c                |   1 +
 src/gallium/drivers/nv20/nv20_state_emit.c         |  16 +-
 src/gallium/drivers/nv20/nv20_vbo.c                |   2 +-
 src/gallium/drivers/nv40/nv40_draw.c               |   5 +-
 src/gallium/drivers/softpipe/sp_context.c          |   4 +
 src/gallium/drivers/softpipe/sp_context.h          |   1 +
 src/gallium/drivers/softpipe/sp_draw_arrays.c      |  23 +-
 src/gallium/drivers/softpipe/sp_screen.c           |   2 +
 src/gallium/drivers/softpipe/sp_setup.c            |   2 +-
 src/gallium/drivers/softpipe/sp_state.h            |  10 +
 src/gallium/drivers/softpipe/sp_state_derived.c    |  10 +-
 src/gallium/drivers/softpipe/sp_state_fs.c         |  59 ++++
 src/gallium/drivers/svga/svga_swtnl_draw.c         |   2 +-
 src/gallium/include/pipe/p_context.h               |   6 +
 src/gallium/include/pipe/p_defines.h               |  30 +-
 src/gallium/include/pipe/p_shader_tokens.h         |  22 +-
 src/gallium/state_trackers/python/p_context.i      |  19 ++
 src/gallium/state_trackers/python/samples/gs.py    | 254 ++++++++++++++++
 src/gallium/state_trackers/python/st_device.h      |   1 +
 src/mesa/state_tracker/st_draw_feedback.c          |   3 +-
 54 files changed, 1159 insertions(+), 137 deletions(-)
 create mode 100644 src/gallium/auxiliary/draw/draw_gs.c
 create mode 100644 src/gallium/auxiliary/draw/draw_gs.h
 create mode 100644 src/gallium/state_trackers/python/samples/gs.py

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 80bd0c91db..2b16332e14 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -42,6 +42,7 @@
 #include "cso_cache/cso_context.h"
 #include "cso_cache/cso_cache.h"
 #include "cso_cache/cso_hash.h"
+#include "cso_context.h"
 
 struct cso_context {
    struct pipe_context *pipe;
@@ -85,8 +86,8 @@ struct cso_context {
    void *blend, *blend_saved;
    void *depth_stencil, *depth_stencil_saved;
    void *rasterizer, *rasterizer_saved;
-   void *fragment_shader, *fragment_shader_saved;
-   void *vertex_shader, *vertex_shader_saved;
+   void *fragment_shader, *fragment_shader_saved, *geometry_shader;
+   void *vertex_shader, *vertex_shader_saved, *geometry_shader_saved;
 
    struct pipe_framebuffer_state fb, fb_saved;
    struct pipe_viewport_state vp, vp_saved;
@@ -1027,3 +1028,38 @@ enum pipe_error cso_set_blend_color(struct cso_context *ctx,
    }
    return PIPE_OK;
 }
+
+enum pipe_error cso_set_geometry_shader_handle(struct cso_context *ctx,
+                                               void *handle)
+{
+   if (ctx->geometry_shader != handle) {
+      ctx->geometry_shader = handle;
+      ctx->pipe->bind_gs_state(ctx->pipe, handle);
+   }
+   return PIPE_OK;
+}
+
+void cso_delete_geometry_shader(struct cso_context *ctx, void *handle)
+{
+    if (handle == ctx->geometry_shader) {
+      /* unbind before deleting */
+      ctx->pipe->bind_gs_state(ctx->pipe, NULL);
+      ctx->geometry_shader = NULL;
+   }
+   ctx->pipe->delete_gs_state(ctx->pipe, handle);
+}
+
+void cso_save_geometry_shader(struct cso_context *ctx)
+{
+   assert(!ctx->geometry_shader_saved);
+   ctx->geometry_shader_saved = ctx->geometry_shader;
+}
+
+void cso_restore_geometry_shader(struct cso_context *ctx)
+{
+   if (ctx->geometry_shader_saved != ctx->geometry_shader) {
+      ctx->pipe->bind_gs_state(ctx->pipe, ctx->geometry_shader_saved);
+      ctx->geometry_shader = ctx->geometry_shader_saved;
+   }
+   ctx->geometry_shader_saved = NULL;
+}
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h
index e5b92177cf..b9e313e32d 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -146,6 +146,13 @@ void cso_save_vertex_shader(struct cso_context *cso);
 void cso_restore_vertex_shader(struct cso_context *cso);
 
 
+enum pipe_error cso_set_geometry_shader_handle(struct cso_context *ctx,
+                                               void *handle);
+void cso_delete_geometry_shader(struct cso_context *ctx, void *handle);
+void cso_save_geometry_shader(struct cso_context *cso);
+void cso_restore_geometry_shader(struct cso_context *cso);
+
+
 
 enum pipe_error cso_set_framebuffer(struct cso_context *cso,
                                     const struct pipe_framebuffer_state *fb);
diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile
index 5041dcc072..248167465f 100644
--- a/src/gallium/auxiliary/draw/Makefile
+++ b/src/gallium/auxiliary/draw/Makefile
@@ -5,6 +5,7 @@ LIBNAME = draw
 
 C_SOURCES = \
 	draw_context.c \
+        draw_gs.c \
 	draw_pipe.c \
 	draw_pipe_aaline.c \
 	draw_pipe_aapoint.c \
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
index 5f05aa324a..a022c145e9 100644
--- a/src/gallium/auxiliary/draw/SConscript
+++ b/src/gallium/auxiliary/draw/SConscript
@@ -40,7 +40,8 @@ draw = env.ConvenienceLibrary(
 		'draw_vs_llvm.c',
 		'draw_vs_ppc.c',
 		'draw_vs_sse.c',
-		'draw_vs_varient.c'
+		'draw_vs_varient.c',
+                'draw_gs.c'
 	])
 
 auxiliaries.insert(0, draw)
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index cc5f7f0105..667aa46b20 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -36,6 +36,7 @@
 #include "draw_context.h"
 #include "draw_vbuf.h"
 #include "draw_vs.h"
+#include "draw_gs.h"
 #include "draw_pt.h"
 #include "draw_pipe.h"
 
@@ -67,6 +68,9 @@ struct draw_context *draw_create( void )
    if (!draw_vs_init( draw ))
       goto fail;
 
+   if (!draw_gs_init( draw ))
+      goto fail;
+
    return draw;
 
 fail:
@@ -231,11 +235,19 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw,
 
 void
 draw_set_mapped_constant_buffer(struct draw_context *draw,
-                                const void *buffer, 
+                                unsigned shader_type,
+                                const void *buffer,
                                 unsigned size )
 {
-   draw->pt.user.constants = buffer;
-   draw_vs_set_constants( draw, (const float (*)[4])buffer, size );
+   debug_assert(shader_type == PIPE_SHADER_VERTEX ||
+                shader_type == PIPE_SHADER_GEOMETRY);
+   if (shader_type == PIPE_SHADER_VERTEX) {
+      draw->pt.user.vs_constants = buffer;
+      draw_vs_set_constants( draw, (const float (*)[4])buffer, size );
+   } else if (shader_type == PIPE_SHADER_GEOMETRY) {
+      draw->pt.user.gs_constants = buffer;
+      draw_gs_set_constants( draw, (const float (*)[4])buffer, size );
+   }
 }
 
 
@@ -298,7 +310,7 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable )
  * a post-transformed vertex.
  *
  * With this function, drivers that use the draw module should have no reason
- * to track the current vertex shader.
+ * to track the current vertex/geometry shader.
  *
  * Note that the draw module may sometimes generate vertices with extra
  * attributes (such as texcoords for AA lines).  The driver can call this
@@ -309,43 +321,59 @@ draw_set_force_passthrough( struct draw_context *draw, boolean enable )
  * work for the drivers.
  */
 int
-draw_find_vs_output(const struct draw_context *draw,
-                    uint semantic_name, uint semantic_index)
+draw_find_shader_output(const struct draw_context *draw,
+                        uint semantic_name, uint semantic_index)
 {
    const struct draw_vertex_shader *vs = draw->vs.vertex_shader;
+   const struct draw_geometry_shader *gs = draw->gs.geometry_shader;
    uint i;
-   for (i = 0; i < vs->info.num_outputs; i++) {
-      if (vs->info.output_semantic_name[i] == semantic_name &&
-          vs->info.output_semantic_index[i] == semantic_index)
+   const struct tgsi_shader_info *info = &vs->info;
+
+   if (gs)
+      info = &gs->info;
+
+   for (i = 0; i < info->num_outputs; i++) {
+      if (info->output_semantic_name[i] == semantic_name &&
+          info->output_semantic_index[i] == semantic_index)
          return i;
    }
 
    /* XXX there may be more than one extra vertex attrib.
     * For example, simulated gl_FragCoord and gl_PointCoord.
     */
-   if (draw->extra_vp_outputs.semantic_name == semantic_name &&
-       draw->extra_vp_outputs.semantic_index == semantic_index) {
-      return draw->extra_vp_outputs.slot;
+   if (draw->extra_shader_outputs.semantic_name == semantic_name &&
+       draw->extra_shader_outputs.semantic_index == semantic_index) {
+      return draw->extra_shader_outputs.slot;
    }
+
    return 0;
 }
 
 
 /**
- * Return number of vertex shader outputs.
+ * Return number of the shader outputs.
+ *
+ * If geometry shader is present, its output will be returned,
+ * if not vertex shader is used.
  */
 uint
-draw_num_vs_outputs(const struct draw_context *draw)
+draw_num_shader_outputs(const struct draw_context *draw)
 {
    uint count = draw->vs.vertex_shader->info.num_outputs;
-   if (draw->extra_vp_outputs.slot > 0)
+
+   /* if geometry shader is present, its outputs go to te
+    * driver, not the vertex shaders */
+   if (draw->gs.geometry_shader)
+      count = draw->gs.geometry_shader->info.num_outputs;
+
+   if (draw->extra_shader_outputs.slot > 0)
       count++;
    return count;
 }
 
 
 /**
- * Provide TGSI sampler objects for vertex shaders that use texture fetches.
+ * Provide TGSI sampler objects for vertex/geometry shaders that use texture fetches.
  * This might only be used by software drivers for the time being.
  */
 void
@@ -355,6 +383,8 @@ draw_texture_samplers(struct draw_context *draw,
 {
    draw->vs.num_samplers = num_samplers;
    draw->vs.samplers = samplers;
+   draw->gs.num_samplers = num_samplers;
+   draw->gs.samplers = samplers;
 }
 
 
@@ -421,3 +451,18 @@ void draw_do_flush( struct draw_context *draw, unsigned flags )
       draw->flushing = FALSE;
    }
 }
+
+
+int draw_current_shader_outputs(struct draw_context *draw)
+{
+   if (draw->gs.geometry_shader)
+      return draw->gs.num_gs_outputs;
+   return draw->vs.num_vs_outputs;
+}
+
+int draw_current_shader_position_output(struct draw_context *draw)
+{
+   if (draw->gs.geometry_shader)
+      return draw->gs.position_output;
+   return draw->vs.position_output;
+}
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 465b8f10c6..b716209df2 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -45,6 +45,7 @@ struct pipe_context;
 struct draw_context;
 struct draw_stage;
 struct draw_vertex_shader;
+struct draw_geometry_shader;
 struct tgsi_sampler;
 
 
@@ -85,11 +86,11 @@ draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe
 
 
 int
-draw_find_vs_output(const struct draw_context *draw,
-                    uint semantic_name, uint semantic_index);
+draw_find_shader_output(const struct draw_context *draw,
+                        uint semantic_name, uint semantic_index);
 
 uint
-draw_num_vs_outputs(const struct draw_context *draw);
+draw_num_shader_outputs(const struct draw_context *draw);
 
 
 void
@@ -112,6 +113,17 @@ void draw_delete_vertex_shader(struct draw_context *draw,
                                struct draw_vertex_shader *dvs);
 
 
+/*
+ * Geometry shader functions
+ */
+struct draw_geometry_shader *
+draw_create_geometry_shader(struct draw_context *draw,
+                            const struct pipe_shader_state *shader);
+void draw_bind_geometry_shader(struct draw_context *draw,
+                               struct draw_geometry_shader *dvs);
+void draw_delete_geometry_shader(struct draw_context *draw,
+                                 struct draw_geometry_shader *dvs);
+
 
 /*
  * Vertex data functions
@@ -140,6 +152,7 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw,
                                    unsigned attr, const void *buffer);
 
 void draw_set_mapped_constant_buffer(struct draw_context *draw,
+                                     unsigned shader_type,
                                      const void *buffer,
                                      unsigned size );
 
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
new file mode 100644
index 0000000000..11542286dd
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -0,0 +1,338 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMWare Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "draw_gs.h"
+
+#include "draw_private.h"
+#include "draw_context.h"
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_exec.h"
+
+#include "pipe/p_shader_tokens.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#define MAX_PRIM_VERTICES 6
+/* fixme: move it from here */
+#define MAX_PRIMITIVES 64
+
+boolean
+draw_gs_init( struct draw_context *draw )
+{
+   draw->gs.machine = tgsi_exec_machine_create();
+   if (!draw->gs.machine)
+      return FALSE;
+
+   draw->gs.machine->Primitives = align_malloc(
+      MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
+   if (!draw->gs.machine->Primitives)
+      return FALSE;
+   memset(draw->gs.machine->Primitives, 0,
+          MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
+
+   return TRUE;
+}
+
+
+void draw_gs_set_constants( struct draw_context *draw,
+                            const float (*constants)[4],
+                            unsigned size )
+{
+}
+
+
+struct draw_geometry_shader *
+draw_create_geometry_shader(struct draw_context *draw,
+                            const struct pipe_shader_state *state)
+{
+   struct draw_geometry_shader *gs;
+   int i;
+
+   gs = CALLOC_STRUCT(draw_geometry_shader);
+
+   if (!gs)
+      return NULL;
+
+   gs->state = *state;
+   gs->state.tokens = tgsi_dup_tokens(state->tokens);
+   if (!gs->state.tokens) {
+      FREE(gs);
+      return NULL;
+   }
+
+   tgsi_scan_shader(state->tokens, &gs->info);
+
+   /* setup the defaults */
+   gs->input_primitive = PIPE_PRIM_TRIANGLES;
+   gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP;
+   gs->max_output_vertices = 32;
+
+   for (i = 0; i < gs->info.num_properties; ++i) {
+      if (gs->info.properties[i].name ==
+          TGSI_PROPERTY_GS_INPUT_PRIM)
+         gs->input_primitive = gs->info.properties[i].data[0];
+      else if (gs->info.properties[i].name ==
+               TGSI_PROPERTY_GS_OUTPUT_PRIM)
+         gs->output_primitive = gs->info.properties[i].data[0];
+      else if (gs->info.properties[i].name ==
+               TGSI_PROPERTY_GS_MAX_VERTICES)
+         gs->max_output_vertices = gs->info.properties[i].data[0];
+   }
+
+   gs->machine = draw->gs.machine;
+
+   if (gs)
+   {
+      uint i;
+      for (i = 0; i < gs->info.num_outputs; i++) {
+         if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
+             gs->info.output_semantic_index[i] == 0)
+            gs->position_output = i;
+      }
+   }
+
+   return gs;
+}
+
+void draw_bind_geometry_shader(struct draw_context *draw,
+                               struct draw_geometry_shader *dgs)
+{
+   draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
+
+   if (dgs) {
+      draw->gs.geometry_shader = dgs;
+      draw->gs.num_gs_outputs = dgs->info.num_outputs;
+      draw->gs.position_output = dgs->position_output;
+      draw_geometry_shader_prepare(dgs, draw);
+   }
+   else {
+      draw->gs.geometry_shader = NULL;
+      draw->gs.num_gs_outputs = 0;
+   }
+}
+
+void draw_delete_geometry_shader(struct draw_context *draw,
+                                 struct draw_geometry_shader *dgs)
+{
+   FREE(dgs);
+}
+
+static INLINE int num_vertices_for_prim(int prim)
+{
+   switch(prim) {
+   case PIPE_PRIM_POINTS:
+      return 1;
+   case PIPE_PRIM_LINES:
+      return 2;
+   case PIPE_PRIM_LINE_LOOP:
+      return 2;
+   case PIPE_PRIM_LINE_STRIP:
+      return 2;
+   case PIPE_PRIM_TRIANGLES:
+      return 3;
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      return 3;
+   case PIPE_PRIM_TRIANGLE_FAN:
+      return 3;
+   case PIPE_PRIM_LINES_ADJACENCY:
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+      return 4;
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      return 6;
+   default:
+      assert(!"Bad geometry shader input");
+      return 0;
+   }
+}
+
+static void draw_fetch_geometry_input(struct draw_geometry_shader *shader,
+                                      int start_primitive,
+                                      int num_primitives,
+                                      const float (*input_ptr)[4],
+                                      unsigned input_vertex_stride,
+                                      unsigned inputs_from_vs)
+{
+   struct tgsi_exec_machine *machine = shader->machine;
+   unsigned slot, vs_slot, k, j;
+   unsigned num_vertices = num_vertices_for_prim(shader->input_primitive);
+   int idx = 0;
+
+   for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; slot++) {
+      debug_printf("Slot = %d (semantic = %d)\n", slot,
+                   shader->info.input_semantic_name[slot]);
+      if (shader->info.input_semantic_name[slot] ==
+          TGSI_SEMANTIC_VERTICES) {
+         for (j = 0; j < num_primitives; ++j) {
+            machine->Inputs[idx].xyzw[0].f[j] = (float)num_vertices;
+            machine->Inputs[idx].xyzw[1].f[j] = (float)num_vertices;
+            machine->Inputs[idx].xyzw[2].f[j] = (float)num_vertices;
+            machine->Inputs[idx].xyzw[3].f[j] = (float)num_vertices;
+         }
+         ++idx;
+      } else {
+         for (j = 0; j < num_primitives; ++j) {
+            int vidx = idx;
+            const float (*prim_ptr)[4];
+            debug_printf("    %d) Prim (num_verts = %d)\n", start_primitive + j,
+                         num_vertices);
+            prim_ptr = (const float (*)[4])(
+               (const char *)input_ptr +
+               (j * num_vertices * input_vertex_stride));
+
+            for (k = 0; k < num_vertices; ++k, ++vidx) {
+               const float (*input)[4];
+               input = (const float (*)[4])(
+                  (const char *)prim_ptr + (k * input_vertex_stride));
+               debug_printf("\t%d)(%d) Input vert:\n", vidx, k);
+#if 1
+               assert(!util_is_inf_or_nan(input[vs_slot][0]));
+               assert(!util_is_inf_or_nan(input[vs_slot][1]));
+               assert(!util_is_inf_or_nan(input[vs_slot][2]));
+               assert(!util_is_inf_or_nan(input[vs_slot][3]));
+#endif
+               machine->Inputs[vidx].xyzw[0].f[j] = input[vs_slot][0];
+               machine->Inputs[vidx].xyzw[1].f[j] = input[vs_slot][1];
+               machine->Inputs[vidx].xyzw[2].f[j] = input[vs_slot][2];
+               machine->Inputs[vidx].xyzw[3].f[j] = input[vs_slot][3];
+#if 0
+               debug_printf("\t\t%d %f %f %f %f\n", slot,
+                            machine->Inputs[vidx].xyzw[0].f[j],
+                            machine->Inputs[vidx].xyzw[1].f[j],
+                            machine->Inputs[vidx].xyzw[2].f[j],
+                            machine->Inputs[vidx].xyzw[3].f[j]);
+#endif
+            }
+         }
+         ++vs_slot;
+         idx += num_vertices;
+      }
+   }
+}
+
+static INLINE void
+draw_geometry_fetch_outputs(struct draw_geometry_shader *shader,
+                            int num_primitives,
+                            float (*output)[4],
+                            unsigned vertex_size)
+{
+   struct tgsi_exec_machine *machine = shader->machine;
+   unsigned prim_idx, j, slot;
+
+   /* Unswizzle all output results.
+    */
+   /* FIXME: handle all the primitives produced by the gs, not just
+    * the first one
+    unsigned prim_count =
+    mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];*/
+   for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
+      unsigned num_verts_per_prim = machine->Primitives[0];
+      for (j = 0; j < num_verts_per_prim; j++) {
+         int idx = (prim_idx * num_verts_per_prim + j) *
+                   shader->info.num_outputs;
+#ifdef DEBUG_OUTPUTS
+         debug_printf("%d) Output vert:\n", idx);
+#endif
+         for (slot = 0; slot < shader->info.num_outputs; slot++) {
+            output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[prim_idx];
+            output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[prim_idx];
+            output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[prim_idx];
+            output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[prim_idx];
+#ifdef DEBUG_OUTPUTS
+            debug_printf("\t%d: %f %f %f %f\n", slot,
+                         output[slot][0],
+                         output[slot][1],
+                         output[slot][2],
+                         output[slot][3]);
+#endif
+            debug_assert(!util_is_inf_or_nan(output[slot][0]));
+         }
+         output = (float (*)[4])((char *)output + vertex_size);
+      }
+   }
+}
+
+void draw_geometry_shader_run(struct draw_geometry_shader *shader,
+                              const float (*input)[4],
+                              float (*output)[4],
+                              const float (*constants)[4],
+                              unsigned count,
+                              unsigned input_stride,
+                              unsigned vertex_size)
+{
+   struct tgsi_exec_machine *machine = shader->machine;
+   unsigned int i;
+   unsigned num_vertices = num_vertices_for_prim(shader->input_primitive);
+   unsigned num_primitives = count/num_vertices;
+   unsigned inputs_from_vs = 0;
+
+   machine->Consts = constants;
+
+   for (i = 0; i < shader->info.num_inputs; ++i) {
+      if (shader->info.input_semantic_name[i] != TGSI_SEMANTIC_VERTICES &&
+          shader->info.input_semantic_name[i] != TGSI_SEMANTIC_PRIMID)
+         ++inputs_from_vs;
+   }
+
+   for (i = 0; i < num_primitives; ++i) {
+      unsigned int max_primitives = 1;
+
+      draw_fetch_geometry_input(shader, i, max_primitives, input,
+                                input_stride, inputs_from_vs);
+
+      tgsi_set_exec_mask(machine,
+                         1,
+                         max_primitives > 1,
+                         max_primitives > 2,
+                         max_primitives > 3);
+
+      /* run interpreter */
+      tgsi_exec_machine_run(machine);
+
+      draw_geometry_fetch_outputs(shader, max_primitives,
+                                  output, vertex_size);
+   }
+}
+
+void draw_geometry_shader_delete(struct draw_geometry_shader *shader)
+{
+   FREE((void*) shader->state.tokens);
+   FREE(shader);
+}
+
+void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
+                                  struct draw_context *draw)
+{
+    if (shader->machine->Tokens != shader->state.tokens) {
+       tgsi_exec_machine_bind_shader(shader->machine,
+                                     shader->state.tokens,
+                                     draw->gs.num_samplers,
+                                     draw->gs.samplers);
+    }
+}
diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h
new file mode 100644
index 0000000000..d6a97d9c4e
--- /dev/null
+++ b/src/gallium/auxiliary/draw/draw_gs.h
@@ -0,0 +1,76 @@
+/**************************************************************************
+ * 
+ * Copyright 2009 VMWare Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+#ifndef DRAW_GS_H
+#define DRAW_GS_H
+
+#include "draw_context.h"
+#include "draw_private.h"
+
+
+#define MAX_TGSI_PRIMITIVES 4
+
+struct draw_context;
+
+/**
+ * Private version of the compiled geometry shader
+ */
+struct draw_geometry_shader {
+   struct draw_context *draw;
+
+   struct tgsi_exec_machine *machine;
+
+   /* This member will disappear shortly:*/
+   struct pipe_shader_state state;
+
+   struct tgsi_shader_info info;
+   unsigned position_output;
+
+   unsigned max_output_vertices;
+   unsigned input_primitive;
+   unsigned output_primitive;
+
+   /* Extracted from shader:
+    */
+   const float (*immediates)[4];
+};
+
+void draw_geometry_shader_run(struct draw_geometry_shader *shader,
+                              const float (*input)[4],
+                              float (*output)[4],
+                              const float (*constants)[4],
+                              unsigned count,
+                              unsigned input_stride,
+                              unsigned output_stride);
+
+void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
+                                  struct draw_context *draw);
+
+void draw_geometry_shader_delete(struct draw_geometry_shader *shader);
+
+
+#endif
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 14375426ed..4585dcdb48 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -660,13 +660,13 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
    }
 
    /* update vertex attrib info */
-   aaline->tex_slot = draw->vs.num_vs_outputs;
-   aaline->pos_slot = draw->vs.position_output;
+   aaline->tex_slot = draw_current_shader_outputs(draw);
+   aaline->pos_slot = draw_current_shader_position_output(draw);;
 
    /* advertise the extra post-transformed vertex attribute */
-   draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
-   draw->extra_vp_outputs.semantic_index = aaline->fs->generic_attrib;
-   draw->extra_vp_outputs.slot = aaline->tex_slot;
+   draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
+   draw->extra_shader_outputs.semantic_index = aaline->fs->generic_attrib;
+   draw->extra_shader_outputs.slot = aaline->tex_slot;
 
    /* how many samplers? */
    /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
@@ -707,7 +707,7 @@ aaline_flush(struct draw_stage *stage, unsigned flags)
                                        aaline->state.texture);
    draw->suspend_flushing = FALSE;
 
-   draw->extra_vp_outputs.slot = 0;
+   draw->extra_shader_outputs.slot = 0;
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
index 75130a8fb0..d86717e518 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -687,14 +687,14 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
    bind_aapoint_fragment_shader(aapoint);
 
    /* update vertex attrib info */
-   aapoint->tex_slot = draw->vs.num_vs_outputs;
+   aapoint->tex_slot = draw_current_shader_outputs(draw);
    assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
 
-   aapoint->pos_slot = draw->vs.position_output;
+   aapoint->pos_slot = draw_current_shader_position_output(draw);
 
-   draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
-   draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib;
-   draw->extra_vp_outputs.slot = aapoint->tex_slot;
+   draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
+   draw->extra_shader_outputs.semantic_index = aapoint->fs->generic_attrib;
+   draw->extra_shader_outputs.slot = aapoint->tex_slot;
 
    /* find psize slot in post-transform vertex */
    aapoint->psize_slot = -1;
@@ -731,7 +731,7 @@ aapoint_flush(struct draw_stage *stage, unsigned flags)
    aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs);
    draw->suspend_flushing = FALSE;
 
-   draw->extra_vp_outputs.slot = 0;
+   draw->extra_shader_outputs.slot = 0;
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 0670268a19..205cda5eab 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -114,8 +114,8 @@ static void interp( const struct clipper *clip,
 		    const struct vertex_header *out, 
 		    const struct vertex_header *in )
 {
-   const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs;
-   const unsigned pos_attr = clip->stage.draw->vs.position_output;
+   const unsigned nr_attrs = draw_current_shader_outputs(clip->stage.draw);
+   const unsigned pos_attr = draw_current_shader_position_output(clip->stage.draw);
    unsigned j;
 
    /* Vertex header.
diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c
index 0a70483858..11b39db599 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_cull.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c
@@ -55,7 +55,7 @@ static INLINE struct cull_stage *cull_stage( struct draw_stage *stage )
 static void cull_tri( struct draw_stage *stage,
 		      struct prim_header *header )
 {
-   const unsigned pos = stage->draw->vs.position_output;
+   const unsigned pos = draw_current_shader_position_output(stage->draw);
 
    /* Window coords: */
    const float *v0 = header->v[0]->data[pos];
diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c
index 40798a5d6e..e829492423 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_offset.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c
@@ -63,7 +63,7 @@ static INLINE struct offset_stage *offset_stage( struct draw_stage *stage )
 static void do_offset_tri( struct draw_stage *stage,
 			   struct prim_header *header )
 {
-   const unsigned pos = stage->draw->vs.position_output;
+   const unsigned pos = draw_current_shader_position_output(stage->draw);
    struct offset_stage *offset = offset_stage(stage);   
    float inv_det = 1.0f / header->det;
 
diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
index 6e921bac27..70fbab9ea7 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c
@@ -73,7 +73,8 @@ screen_interp( struct draw_context *draw,
                const struct vertex_header *v1 )
 {
    uint attr;
-   for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) {
+   int num_outputs = draw_current_shader_outputs(draw);
+   for (attr = 0; attr < num_outputs; attr++) {
       const float *val0 = v0->data[attr];
       const float *val1 = v1->data[attr];
       float *newv = dst->data[attr];
@@ -121,7 +122,7 @@ stipple_line(struct draw_stage *stage, struct prim_header *header)
    struct stipple_stage *stipple = stipple_stage(stage);
    struct vertex_header *v0 = header->v[0];
    struct vertex_header *v1 = header->v[1];
-   const unsigned pos = stage->draw->vs.position_output;
+   const unsigned pos = draw_current_shader_position_output(stage->draw);
    const float *pos0 = v0->data[pos];
    const float *pos1 = v1->data[pos];
    float start = 0;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
index f32cbef983..3073c87082 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c
@@ -59,7 +59,7 @@ static void wideline_line( struct draw_stage *stage,
                            struct prim_header *header )
 {
    /*const struct wideline_stage *wide = wideline_stage(stage);*/
-   const unsigned pos = stage->draw->vs.position_output;
+   const unsigned pos = draw_current_shader_position_output(stage->draw);
    const float half_width = 0.5f * stage->draw->rasterizer->line_width;
 
    struct prim_header tri;
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index 7d76a7dbf3..8dc50c0ab4 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -112,7 +112,7 @@ static void set_texcoords(const struct widepoint_stage *wide,
 
    if (wide->point_coord_fs_input >= 0) {
       /* put gl_PointCoord into the extra vertex slot */
-      uint slot = wide->stage.draw->extra_vp_outputs.slot;
+      uint slot = wide->stage.draw->extra_shader_outputs.slot;
       v->data[slot][0] = tc[0];
       v->data[slot][1] = tc[1];
       v->data[slot][2] = 0.0F;
@@ -130,7 +130,7 @@ static void widepoint_point( struct draw_stage *stage,
                              struct prim_header *header )
 {
    const struct widepoint_stage *wide = widepoint_stage(stage);
-   const unsigned pos = stage->draw->vs.position_output;
+   const unsigned pos = draw_current_shader_position_output(stage->draw);
    const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite;
    float half_size;
    float left_adj, right_adj, bot_adj, top_adj;
@@ -257,13 +257,13 @@ static void widepoint_first_point( struct draw_stage *stage,
       wide->point_coord_fs_input = find_pntc_input_attrib(draw);
 
       /* setup extra vp output (point coord implemented as a texcoord) */
-      draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
-      draw->extra_vp_outputs.semantic_index = 0;
-      draw->extra_vp_outputs.slot = draw->vs.num_vs_outputs;
+      draw->extra_shader_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
+      draw->extra_shader_outputs.semantic_index = 0;
+      draw->extra_shader_outputs.slot = draw_current_shader_outputs(draw);
    }
    else {
       wide->point_coord_fs_input = -1;
-      draw->extra_vp_outputs.slot = 0;
+      draw->extra_shader_outputs.slot = 0;
    }
 
    wide->psize_slot = -1;
@@ -287,7 +287,7 @@ static void widepoint_flush( struct draw_stage *stage, unsigned flags )
 {
    stage->point = widepoint_first_point;
    stage->next->flush( stage->next, flags );
-   stage->draw->extra_vp_outputs.slot = 0;
+   stage->draw->extra_shader_outputs.slot = 0;
 }
 
 
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 3850cede1e..e49041556b 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -152,8 +152,9 @@ struct draw_context
          /** vertex arrays */
          const void *vbuffer[PIPE_MAX_ATTRIBS];
          
-         /** constant buffer (for vertex shader) */
-         const void *constants;
+         /** constant buffer (for vertex/geometry shader) */
+         const void *vs_constants;
+         const void *gs_constants;
       } user;
 
       boolean test_fse;         /* enable FSE even though its not correct (eg for softpipe) */
@@ -211,6 +212,18 @@ struct draw_context
       struct translate_cache *emit_cache;
    } vs;
 
+   struct {
+      struct draw_geometry_shader *geometry_shader;
+      uint num_gs_outputs;  /**< convenience, from geometry_shader */
+      uint position_output;
+
+      /** TGSI program interpreter runtime state */
+      struct tgsi_exec_machine *machine;
+
+      uint num_samplers;
+      struct tgsi_sampler **samplers;
+   } gs;
+
    /* Clip derived state:
     */
    float plane[12][4];
@@ -222,7 +235,7 @@ struct draw_context
       uint semantic_name;
       uint semantic_index;
       int slot;
-   } extra_vp_outputs;
+   } extra_shader_outputs;
 
    unsigned reduced_prim;
 
@@ -245,6 +258,19 @@ void draw_vs_set_constants( struct draw_context *,
 
 
+/*******************************************************************************
+ * Geometry shading code:
+ */
+boolean draw_gs_init( struct draw_context *draw );
+void draw_gs_set_constants( struct draw_context *,
+                            const float (*constants)[4],
+                            unsigned size );
+
+/*******************************************************************************
+ * Common shading code:
+ */
+int draw_current_shader_outputs(struct draw_context *draw);
+int draw_current_shader_position_output(struct draw_context *draw);
 
 /*******************************************************************************
  * Vertex processing (was passthrough) code:
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
index 932113783d..1a9df4cac5 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -32,6 +32,7 @@
 #include "draw/draw_vertex.h"
 #include "draw/draw_pt.h"
 #include "draw/draw_vs.h"
+#include "draw/draw_gs.h"
 #include "translate/translate.h"
 
 
@@ -119,7 +120,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
 {
    struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
    struct draw_context *draw = fpme->draw;
-   struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+   struct draw_vertex_shader *vshader = draw->vs.vertex_shader;
+   struct draw_geometry_shader *gshader = draw->gs.geometry_shader;
    unsigned opt = fpme->opt;
    unsigned alloc_count = align( fetch_count, 4 );
 
@@ -147,13 +149,21 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle,
     */
    if (opt & PT_SHADE)
    {
-      shader->run_linear(shader, 
-			 (const float (*)[4])pipeline_verts->data,
-			 (      float (*)[4])pipeline_verts->data,
-			 (const float (*)[4])draw->pt.user.constants,
-			 fetch_count,
-			 fpme->vertex_size,
-			 fpme->vertex_size);
+      vshader->run_linear(vshader,
+                          (const float (*)[4])pipeline_verts->data,
+                          (      float (*)[4])pipeline_verts->data,
+                          (const float (*)[4])draw->pt.user.vs_constants,
+                          fetch_count,
+                          fpme->vertex_size,
+                          fpme->vertex_size);
+      if (gshader)
+         draw_geometry_shader_run(gshader,
+                                  (const float (*)[4])pipeline_verts->data,
+                                  (      float (*)[4])pipeline_verts->data,
+                                  (const float (*)[4])draw->pt.user.gs_constants,
+                                  fetch_count,
+                                  fpme->vertex_size,
+                                  fpme->vertex_size);
    }
 
    if (draw_pt_post_vs_run( fpme->post_vs,
@@ -196,6 +206,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
    struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
    struct draw_context *draw = fpme->draw;
    struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+   struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader;
    unsigned opt = fpme->opt;
    unsigned alloc_count = align( count, 4 );
 
@@ -226,10 +237,19 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle,
       shader->run_linear(shader,
 			 (const float (*)[4])pipeline_verts->data,
 			 (      float (*)[4])pipeline_verts->data,
-			 (const float (*)[4])draw->pt.user.constants,
+			 (const float (*)[4])draw->pt.user.vs_constants,
 			 count,
 			 fpme->vertex_size,
 			 fpme->vertex_size);
+
+      if (geometry_shader)
+         draw_geometry_shader_run(geometry_shader,
+                                  (const float (*)[4])pipeline_verts->data,
+                                  (      float (*)[4])pipeline_verts->data,
+                                  (const float (*)[4])draw->pt.user.gs_constants,
+                                  count,
+                                  fpme->vertex_size,
+                                  fpme->vertex_size);
    }
 
    if (draw_pt_post_vs_run( fpme->post_vs,
@@ -270,6 +290,7 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
    struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle;
    struct draw_context *draw = fpme->draw;
    struct draw_vertex_shader *shader = draw->vs.vertex_shader;
+   struct draw_geometry_shader *geometry_shader = draw->gs.geometry_shader;
    unsigned opt = fpme->opt;
    unsigned alloc_count = align( count, 4 );
 
@@ -296,10 +317,19 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle
       shader->run_linear(shader,
 			 (const float (*)[4])pipeline_verts->data,
 			 (      float (*)[4])pipeline_verts->data,
-			 (const float (*)[4])draw->pt.user.constants,
+			 (const float (*)[4])draw->pt.user.vs_constants,
 			 count,
 			 fpme->vertex_size,
 			 fpme->vertex_size);
+
+      if (geometry_shader)
+         draw_geometry_shader_run(geometry_shader,
+                                  (const float (*)[4])pipeline_verts->data,
+                                  (      float (*)[4])pipeline_verts->data,
+                                  (const float (*)[4])draw->pt.user.gs_constants,
+                                  count,
+                                  fpme->vertex_size,
+                                  fpme->vertex_size);
    }
 
    if (draw_pt_post_vs_run( fpme->post_vs,
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index 9dfb47837e..55151823a1 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -100,7 +100,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs,
    struct vertex_header *out = vertices;
    const float *scale = pvs->draw->viewport.scale;
    const float *trans = pvs->draw->viewport.translate;
-   const unsigned pos = pvs->draw->vs.position_output;
+   const unsigned pos = draw_current_shader_position_output(pvs->draw);
    unsigned clipped = 0;
    unsigned j;
 
@@ -190,7 +190,7 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs,
    struct vertex_header *out = vertices;
    const float *scale = pvs->draw->viewport.scale;
    const float *trans = pvs->draw->viewport.translate;
-   const unsigned pos = pvs->draw->vs.position_output;
+   const unsigned pos = draw_current_shader_position_output(pvs->draw);
    unsigned j;
 
    if (0) debug_printf("%s\n", __FUNCTION__);
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
index b61fa29143..17c3b8cec2 100644
--- a/src/gallium/auxiliary/draw/draw_pt_util.c
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -50,16 +50,32 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
       *first = 2;
       *incr = 1;
       break;
+   case PIPE_PRIM_LINES_ADJACENCY:
+      *first = 4;
+      *incr = 2;
+      break;
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+      *first = 4;
+      *incr = 1;
+      break;
    case PIPE_PRIM_TRIANGLES:
       *first = 3;
       *incr = 3;
       break;
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+      *first = 6;
+      *incr = 3;
+      break;
    case PIPE_PRIM_TRIANGLE_STRIP:
    case PIPE_PRIM_TRIANGLE_FAN:
    case PIPE_PRIM_POLYGON:
       *first = 3;
       *incr = 1;
       break;
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      *first = 6;
+      *incr = 1;
+      break;
    case PIPE_PRIM_QUADS:
       *first = 4;
       *incr = 4;
diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
index 010c7a18a7..f0aec5feba 100644
--- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
+++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h
@@ -36,6 +36,10 @@ static void FUNC(struct draw_pt_front_end *frontend,
    case PIPE_PRIM_TRIANGLE_STRIP:
    case PIPE_PRIM_QUADS:
    case PIPE_PRIM_QUAD_STRIP:
+   case PIPE_PRIM_LINES_ADJACENCY:
+   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
+   case PIPE_PRIM_TRIANGLES_ADJACENCY:
+   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
       for (j = 0; j < count;) {
          unsigned remaining = count - j;
          unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr );
diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c
index 7ee567d478..d16692584e 100644
--- a/src/gallium/auxiliary/draw/draw_vs_varient.c
+++ b/src/gallium/auxiliary/draw/draw_vs_varient.c
@@ -147,11 +147,12 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient,
    vsvg->base.vs->run_linear( vsvg->base.vs, 
                               temp_buffer,
                               temp_buffer,
-                              (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
+                              (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants,
                               count,
                               temp_vertex_stride, 
                               temp_vertex_stride);
 
+   /* FIXME: geometry shading? */
 
    if (vsvg->base.key.clip) {
       /* not really handling clipping, just do the rhw so we can
@@ -207,7 +208,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient,
    vsvg->base.vs->run_linear( vsvg->base.vs, 
                               temp_buffer,
                               temp_buffer,
-                              (const float (*)[4])vsvg->base.vs->draw->pt.user.constants,
+                              (const float (*)[4])vsvg->base.vs->draw->pt.user.vs_constants,
                               count,
                               temp_vertex_stride, 
                               temp_vertex_stride);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index 5e7e5d2ff9..bb4f564161 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -122,7 +122,9 @@ static const char *semantic_names[] =
    "GENERIC",
    "NORMAL",
    "FACE",
-   "EDGEFLAG"
+   "EDGEFLAG",
+   "VERTICES_IN",
+   "PRIM_ID"
 };
 
 static const char *immediate_type_names[] =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 123117cb0a..e2f28783cc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -372,6 +372,7 @@ tgsi_exec_machine_create( void )
    memset(mach, 0, sizeof(*mach));
 
    mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
+   mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
    mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
 
    /* Setup constants. */
@@ -1468,6 +1469,15 @@ store_dest(
       index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
          + reg->Register.Index;
       dst = &mach->Outputs[offset + index].xyzw[chan_index];
+#if 0
+      if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) {
+         fprintf(stderr, "STORING OUT[%d] mask(%d), = (", index, execmask);
+         for (i = 0; i < QUAD_SIZE; i++)
+            if (execmask & (1 << i))
+               fprintf(stderr, "%f, ", chan->f[i]);
+         fprintf(stderr, ")\n");
+      }
+#endif
       break;
 
    case TGSI_FILE_TEMPORARY:
@@ -1638,6 +1648,35 @@ exec_kilp(struct tgsi_exec_machine *mach,
    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
 }
 
+static void
+emit_vertex(struct tgsi_exec_machine *mach)
+{
+   /* FIXME: check for exec mask correctly
+   unsigned i;
+   for (i = 0; i < QUAD_SIZE; ++i) {
+         if ((mach->ExecMask & (1 << i)))
+   */
+   if (mach->ExecMask) {
+      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
+      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+   }
+}
+
+static void
+emit_primitive(struct tgsi_exec_machine *mach)
+{
+   unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
+   /* FIXME: check for exec mask correctly
+   unsigned i;
+   for (i = 0; i < QUAD_SIZE; ++i) {
+         if ((mach->ExecMask & (1 << i)))
+   */
+   if (mach->ExecMask) {
+      ++(*prim_count);
+      debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
+      mach->Primitives[*prim_count] = 0;
+   }
+}
 
 /*
  * Fetch a four texture samples using STR texture coordinates.
@@ -3087,13 +3126,11 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_EMIT:
-      mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
-      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+      emit_vertex(mach);
       break;
 
    case TGSI_OPCODE_ENDPRIM:
-      mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
-      mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
+      emit_primitive(mach);
       break;
 
    case TGSI_OPCODE_BGNFOR:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index fd94c1bc44..afaf5c39c4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -191,6 +191,14 @@ struct tgsi_exec_labels
  */
 #define TGSI_EXEC_MAX_CONST_BUFFER  4096
 
+/* The maximum number of vertices per primitive */
+#define TGSI_MAX_PRIM_VERTICES 6
+
+/* The maximum number of primitives to be generated */
+#define TGSI_MAX_PRIMITIVES 64
+
+/* The maximum total number of vertices */
+#define TGSI_MAX_TOTAL_VERTICES (TGSI_MAX_PRIM_VERTICES * TGSI_MAX_PRIMITIVES * PIPE_MAX_ATTRIBS)
 
 /** function call/activation record */
 struct tgsi_call_record
@@ -201,7 +209,6 @@ struct tgsi_call_record
    uint ReturnAddr;
 };
 
-
 /**
  * Run-time virtual machine state for executing TGSI shader.
  */
@@ -214,8 +221,8 @@ struct tgsi_exec_machine
 
    float                         Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
 
-   struct tgsi_exec_vector       Inputs[PIPE_MAX_ATTRIBS];
-   struct tgsi_exec_vector       Outputs[PIPE_MAX_ATTRIBS];
+   struct tgsi_exec_vector       Inputs[TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS];
+   struct tgsi_exec_vector       Outputs[TGSI_MAX_TOTAL_VERTICES];
 
    struct tgsi_exec_vector       *Addrs;
    struct tgsi_exec_vector       *Predicates;
@@ -229,6 +236,8 @@ struct tgsi_exec_machine
 
    /* GEOMETRY processor only. */
    unsigned                      *Primitives;
+   unsigned                       NumOutputs;
+   unsigned                       MaxGeometryShaderOutputs;
 
    /* FRAGMENT processor only. */
    const struct tgsi_interp_coef *InterpCoefs;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index f000958bfc..84e863b237 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -791,7 +791,9 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] =
    "PSIZE",
    "GENERIC",
    "NORMAL",
-   "FACE"
+   "FACE",
+   "VERTICES_IN",
+   "PRIM_ID"
 };
 
 static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
index 5cc1d4ddf8..01bea0f8cc 100644
--- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
+++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c
@@ -59,7 +59,7 @@ cell_map_constant_buffers(struct cell_context *sp)
       }
    }
 
-   draw_set_mapped_constant_buffer(sp->draw,
+   draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX,
                                    sp->mapped_constants[PIPE_SHADER_VERTEX],
                                    sp->constants[PIPE_SHADER_VERTEX].buffer->size);
 }
diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c
index efc4f78364..b723e794e7 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_derived.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c
@@ -66,7 +66,7 @@ calculate_vertex_layout( struct cell_context *cell )
    vinfo->num_attribs = 0;
 
    /* we always want to emit vertex pos */
-   src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
+   src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_POSITION, 0);
    assert(src >= 0);
    draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src);
 
@@ -82,14 +82,14 @@ calculate_vertex_layout( struct cell_context *cell )
          break;
 
       case TGSI_SEMANTIC_COLOR:
-         src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR, 
+         src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_COLOR, 
                                    fs->info.input_semantic_index[i]);
          assert(src >= 0);
          draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
          break;
 
       case TGSI_SEMANTIC_FOG:
-         src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
+         src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_FOG, 0);
 #if 1
          if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */
             src = 0;
@@ -100,7 +100,7 @@ calculate_vertex_layout( struct cell_context *cell )
 
       case TGSI_SEMANTIC_GENERIC:
          /* this includes texcoords and varying vars */
-         src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC,
+         src = draw_find_shader_output(cell->draw, TGSI_SEMANTIC_GENERIC,
                               fs->info.input_semantic_index[i]);
          assert(src >= 0);
          draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index ac5fafec1a..5b87286d4c 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -331,7 +331,7 @@ cell_emit_state(struct cell_context *cell)
       const struct draw_context *const draw = cell->draw;
       struct cell_shader_info info;
 
-      info.num_outputs = draw_num_vs_outputs(draw);
+      info.num_outputs = draw_num_shader_outputs(draw);
       info.declarations = (uintptr_t) draw->vs.machine.Declarations;
       info.num_declarations = draw->vs.machine.NumDeclarations;
       info.instructions = (uintptr_t) draw->vs.machine.Instructions;
diff --git a/src/gallium/drivers/i915/i915_context.c b/src/gallium/drivers/i915/i915_context.c
index 94c8aee30f..949f046350 100644
--- a/src/gallium/drivers/i915/i915_context.c
+++ b/src/gallium/drivers/i915/i915_context.c
@@ -84,7 +84,7 @@ i915_draw_range_elements(struct pipe_context *pipe,
    }
 
 
-   draw_set_mapped_constant_buffer(draw,
+   draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
                                    i915->current.constants[PIPE_SHADER_VERTEX],
                                    (i915->current.num_user_constants[PIPE_SHADER_VERTEX] * 
                                       4 * sizeof(float)));
diff --git a/src/gallium/drivers/i915/i915_state_derived.c b/src/gallium/drivers/i915/i915_state_derived.c
index 178d4e8781..03dd5091a6 100644
--- a/src/gallium/drivers/i915/i915_state_derived.c
+++ b/src/gallium/drivers/i915/i915_state_derived.c
@@ -84,7 +84,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
 
    
    /* pos */
-   src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
+   src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_POSITION, 0);
    if (needW) {
       draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src);
       vinfo.hwfmt[0] |= S4_VFMT_XYZW;
@@ -101,21 +101,21 @@ static void calculate_vertex_layout( struct i915_context *i915 )
 
    /* primary color */
    if (colors[0]) {
-      src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
+      src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 0);
       draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
       vinfo.hwfmt[0] |= S4_VFMT_COLOR;
    }
 
    /* secondary color */
    if (colors[1]) {
-      src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
+      src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_COLOR, 1);
       draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src);
       vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG;
    }
 
    /* fog coord, not fog blend factor */
    if (fog) {
-      src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
+      src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_FOG, 0);
       draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
       vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM;
    }
@@ -125,7 +125,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
       uint hwtc;
       if (texCoords[i]) {
          hwtc = TEXCOORDFMT_4D;
-         src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
+         src = draw_find_shader_output(i915->draw, TGSI_SEMANTIC_GENERIC, i);
          draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
       }
       else {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index e703964aaa..fdc6a389b4 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -66,7 +66,7 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
       /* compute vertex layout now */
       const struct lp_fragment_shader *lpfs = llvmpipe->fs;
       struct vertex_info *vinfo_vbuf = &llvmpipe->vertex_info_vbuf;
-      const uint num = draw_num_vs_outputs(llvmpipe->draw);
+      const uint num = draw_current_shader_outputs(llvmpipe->draw);
       uint i;
 
       /* Tell draw_vbuf to simply emit the whole post-xform vertex
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 22683ff8b4..af053f1dc6 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -734,7 +734,8 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
    }
 
    if(shader == PIPE_SHADER_VERTEX) {
-      draw_set_mapped_constant_buffer(llvmpipe->draw, data, size);
+      draw_set_mapped_constant_buffer(llvmpipe->draw, PIPE_SHADER_VERTEX,
+                                      data, size);
    }
 
    llvmpipe->dirty |= LP_NEW_CONSTANTS;
diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c
index e3167814f2..099ab10043 100644
--- a/src/gallium/drivers/nv04/nv04_vbo.c
+++ b/src/gallium/drivers/nv04/nv04_vbo.c
@@ -45,7 +45,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe,
 		draw_set_mapped_element_buffer(draw, 0, NULL);
 	}
 
-	draw_set_mapped_constant_buffer(draw,
+	draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
 					nv04->constbuf[PIPE_SHADER_VERTEX],
 					nv04->constbuf_nr[PIPE_SHADER_VERTEX]);
 
diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c
index 441a4f75f3..0d26141248 100644
--- a/src/gallium/drivers/nv10/nv10_vbo.c
+++ b/src/gallium/drivers/nv10/nv10_vbo.c
@@ -45,6 +45,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe,
 	}
 
 	draw_set_mapped_constant_buffer(draw,
+                                        PIPE_SHADER_VERTEX,
 					nv10->constbuf[PIPE_SHADER_VERTEX],
 					nv10->constbuf_nr[PIPE_SHADER_VERTEX]);
 
diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c
index 0122b1c2cd..63cba1f412 100644
--- a/src/gallium/drivers/nv20/nv20_state_emit.c
+++ b/src/gallium/drivers/nv20/nv20_state_emit.c
@@ -228,7 +228,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
 	}
 
 	/* always do position */ {
-		src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0);
+		src = draw_find_shader_output(dc, TGSI_SEMANTIC_POSITION, 0);
 		draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
 		vinfo->hwfmt[0] |= (1 << 0);
 	}
@@ -237,19 +237,19 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
 	for (i = 4; i < 6; i++) {
 		if (!generics[i])
 			continue;
-		src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+		src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
 		draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
 		vinfo->hwfmt[0] |= (1 << (i - 3));
 	}
 
 	if (colors[0]) {
-		src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0);
+		src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 0);
 		draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
 		vinfo->hwfmt[0] |= (1 << 3);
 	}
 
 	if (colors[1]) {
-		src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1);
+		src = draw_find_shader_output(dc, TGSI_SEMANTIC_COLOR, 1);
 		draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
 		vinfo->hwfmt[0] |= (1 << 4);
 	}
@@ -258,7 +258,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
 	for (i = 6; i < 10; i++) {
 		if (!generics[i])
 			continue;
-		src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+		src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
 		draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
 		vinfo->hwfmt[0] |= (1 << (i - 1));
 	}
@@ -267,7 +267,7 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
 	for (i = 0; i < 4; i++) {
 		if (!generics[i])
 			continue;
-		src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+		src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
 		draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
 		vinfo->hwfmt[0] |= (1 << (i + 9));
 	}
@@ -276,13 +276,13 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
 	for (i = 10; i < 12; i++) {
 		if (!generics[i])
 			continue;
-		src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i);
+		src = draw_find_shader_output(dc, TGSI_SEMANTIC_GENERIC, i);
 		draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
 		vinfo->hwfmt[0] |= (1 << (i + 3));
 	}
 
 	if (fog) {
-		src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0);
+		src = draw_find_shader_output(dc, TGSI_SEMANTIC_FOG, 0);
 		draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
 		vinfo->hwfmt[0] |= (1 << 15);
 	}
diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c
index 84d7db6c5e..4bf461eba9 100644
--- a/src/gallium/drivers/nv20/nv20_vbo.c
+++ b/src/gallium/drivers/nv20/nv20_vbo.c
@@ -45,7 +45,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe,
 		draw_set_mapped_element_buffer(draw, 0, NULL);
 	}
 
-	draw_set_mapped_constant_buffer(draw,
+	draw_set_mapped_constant_buffer(draw, PIPE_SHADER_VERTEX,
 					nv20->constbuf[PIPE_SHADER_VERTEX],
 					nv20->constbuf_nr[PIPE_SHADER_VERTEX]);
 
diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
index b2f19ecb69..3875bc3545 100644
--- a/src/gallium/drivers/nv40/nv40_draw.c
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -261,7 +261,8 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe,
 		map = pipe_buffer_map(pscreen,
 				      nv40->constbuf[PIPE_SHADER_VERTEX],
 				      PIPE_BUFFER_USAGE_CPU_READ);
-		draw_set_mapped_constant_buffer(nv40->draw, map, nr);
+		draw_set_mapped_constant_buffer(nv40->draw, PIPE_SHADER_VERTEX,
+                                                map, nr);
 	}
 
 	draw_arrays(nv40->draw, mode, start, count);
@@ -285,7 +286,7 @@ static INLINE void
 emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit,
 	    unsigned semantic, unsigned index)
 {
-	unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index);
+	unsigned draw_out = draw_find_shader_output(nv40->draw, semantic, index);
 	unsigned a = nv40->swtnl.nr_attribs++;
 
 	nv40->swtnl.hw[a] = hw;
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 2a33587b5a..0c890cb940 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -222,6 +222,10 @@ softpipe_create( struct pipe_screen *screen )
    softpipe->pipe.bind_vs_state   = softpipe_bind_vs_state;
    softpipe->pipe.delete_vs_state = softpipe_delete_vs_state;
 
+   softpipe->pipe.create_gs_state = softpipe_create_gs_state;
+   softpipe->pipe.bind_gs_state   = softpipe_bind_gs_state;
+   softpipe->pipe.delete_gs_state = softpipe_delete_gs_state;
+
    softpipe->pipe.set_blend_color = softpipe_set_blend_color;
    softpipe->pipe.set_clip_state = softpipe_set_clip_state;
    softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 8ce20c5744..159547e2a6 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -58,6 +58,7 @@ struct softpipe_context {
    struct pipe_rasterizer_state *rasterizer;
    struct sp_fragment_shader *fs;
    struct sp_vertex_shader *vs;
+   struct sp_geometry_shader *gs;
 
    /** Other rendering state */
    struct pipe_blend_color blend_color;
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 518ef8806e..9ea5d6fb9f 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -48,7 +48,7 @@ static void
 softpipe_map_constant_buffers(struct softpipe_context *sp)
 {
    struct pipe_winsys *ws = sp->pipe.winsys;
-   uint i, size;
+   uint i, vssize, gssize;
 
    for (i = 0; i < PIPE_SHADER_TYPES; i++) {
       if (sp->constants[i].buffer && sp->constants[i].buffer->size)
@@ -57,13 +57,21 @@ softpipe_map_constant_buffers(struct softpipe_context *sp)
    }
 
    if (sp->constants[PIPE_SHADER_VERTEX].buffer)
-      size = sp->constants[PIPE_SHADER_VERTEX].buffer->size;
+      vssize = sp->constants[PIPE_SHADER_VERTEX].buffer->size;
    else
-      size = 0;
+      vssize = 0;
 
-   draw_set_mapped_constant_buffer(sp->draw,
+   if (sp->constants[PIPE_SHADER_GEOMETRY].buffer)
+      gssize = sp->constants[PIPE_SHADER_GEOMETRY].buffer->size;
+   else
+      gssize = 0;
+
+   draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX,
                                    sp->mapped_constants[PIPE_SHADER_VERTEX],
-                                   size);
+                                   vssize);
+   draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY,
+                                   sp->mapped_constants[PIPE_SHADER_GEOMETRY],
+                                   gssize);
 }
 
 
@@ -78,9 +86,10 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp)
     */
    draw_flush(sp->draw);
 
-   draw_set_mapped_constant_buffer(sp->draw, NULL, 0);
+   draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_VERTEX, NULL, 0);
+   draw_set_mapped_constant_buffer(sp->draw, PIPE_SHADER_GEOMETRY, NULL, 0);
 
-   for (i = 0; i < 2; i++) {
+   for (i = 0; i < PIPE_SHADER_TYPES; i++) {
       if (sp->constants[i].buffer && sp->constants[i].buffer->size)
          ws->buffer_unmap(ws, sp->constants[i].buffer);
       sp->mapped_constants[i] = NULL;
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index bd3532de4f..a32312d29b 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -89,6 +89,8 @@ softpipe_get_param(struct pipe_screen *screen, int param)
       return 13; /* max 4Kx4K */
    case PIPE_CAP_TGSI_CONT_SUPPORTED:
       return 1;
+   case PIPE_CAP_GEOMETRY_SHADER4:
+      return 1;
    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
       return 1;
    default:
diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c
index 615581b95f..3da75364c5 100644
--- a/src/gallium/drivers/softpipe/sp_setup.c
+++ b/src/gallium/drivers/softpipe/sp_setup.c
@@ -1268,7 +1268,7 @@ void sp_setup_prepare( struct setup_context *setup )
    }
 
    /* Note: nr_attrs is only used for debugging (vertex printing) */
-   setup->nr_vertex_attrs = draw_num_vs_outputs(sp->draw);
+   setup->nr_vertex_attrs = draw_num_shader_outputs(sp->draw);
 
    sp->quad.first->begin( sp->quad.first );
 
diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h
index 26d5c3fbb2..5a32d211d6 100644
--- a/src/gallium/drivers/softpipe/sp_state.h
+++ b/src/gallium/drivers/softpipe/sp_state.h
@@ -50,6 +50,7 @@
 #define SP_NEW_VERTEX        0x1000
 #define SP_NEW_VS            0x2000
 #define SP_NEW_QUERY         0x4000
+#define SP_NEW_GS            0x8000
 
 
 struct tgsi_sampler;
@@ -90,6 +91,11 @@ struct sp_vertex_shader {
    int max_sampler;             /* -1 if no samplers */
 };
 
+/** Subclass of pipe_shader_state */
+struct sp_geometry_shader {
+   struct pipe_shader_state shader;
+   struct draw_geometry_shader *draw_data;
+};
 
 
 void *
@@ -143,6 +149,10 @@ void *softpipe_create_vs_state(struct pipe_context *,
                                const struct pipe_shader_state *);
 void softpipe_bind_vs_state(struct pipe_context *, void *);
 void softpipe_delete_vs_state(struct pipe_context *, void *);
+void *softpipe_create_gs_state(struct pipe_context *,
+                               const struct pipe_shader_state *);
+void softpipe_bind_gs_state(struct pipe_context *, void *);
+void softpipe_delete_gs_state(struct pipe_context *, void *);
 
 void softpipe_set_polygon_stipple( struct pipe_context *,
 				  const struct pipe_poly_stipple * );
diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c
index c24a737d07..f6856a5f69 100644
--- a/src/gallium/drivers/softpipe/sp_state_derived.c
+++ b/src/gallium/drivers/softpipe/sp_state_derived.c
@@ -67,7 +67,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
       /* compute vertex layout now */
       const struct sp_fragment_shader *spfs = softpipe->fs;
       struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf;
-      const uint num = draw_num_vs_outputs(softpipe->draw);
+      const uint num = draw_current_shader_outputs(softpipe->draw);
       uint i;
 
       /* Tell draw_vbuf to simply emit the whole post-xform vertex
@@ -117,13 +117,13 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe)
          }
 
          /* this includes texcoords and varying vars */
-         src = draw_find_vs_output(softpipe->draw,
-                                   spfs->info.input_semantic_name[i],
-                                   spfs->info.input_semantic_index[i]);
+         src = draw_find_shader_output(softpipe->draw,
+                                       spfs->info.input_semantic_name[i],
+                                       spfs->info.input_semantic_index[i]);
          draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
       }
 
-      softpipe->psize_slot = draw_find_vs_output(softpipe->draw,
+      softpipe->psize_slot = draw_find_shader_output(softpipe->draw,
                                                  TGSI_SEMANTIC_PSIZE, 0);
       if (softpipe->psize_slot > 0) {
          draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index b41f7e8ab7..22f82b1a42 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -165,3 +165,62 @@ softpipe_set_constant_buffer(struct pipe_context *pipe,
 
    softpipe->dirty |= SP_NEW_CONSTANTS;
 }
+
+void *
+softpipe_create_gs_state(struct pipe_context *pipe,
+                         const struct pipe_shader_state *templ)
+{
+   struct softpipe_context *softpipe = softpipe_context(pipe);
+   struct sp_geometry_shader *state;
+
+   state = CALLOC_STRUCT(sp_geometry_shader);
+   if (state == NULL )
+      goto fail;
+
+   /* copy shader tokens, the ones passed in will go away.
+    */
+   state->shader.tokens = tgsi_dup_tokens(templ->tokens);
+   if (state->shader.tokens == NULL)
+      goto fail;
+
+   state->draw_data = draw_create_geometry_shader(softpipe->draw, templ);
+   if (state->draw_data == NULL)
+      goto fail;
+
+   return state;
+
+fail:
+   if (state) {
+      FREE( (void *)state->shader.tokens );
+      FREE( state->draw_data );
+      FREE( state );
+   }
+   return NULL;
+}
+
+
+void
+softpipe_bind_gs_state(struct pipe_context *pipe, void *gs)
+{
+   struct softpipe_context *softpipe = softpipe_context(pipe);
+
+   softpipe->gs = (struct sp_geometry_shader *)gs;
+
+   draw_bind_geometry_shader(softpipe->draw,
+                             (softpipe->gs ? softpipe->gs->draw_data : NULL));
+
+   softpipe->dirty |= SP_NEW_GS;
+}
+
+
+void
+softpipe_delete_gs_state(struct pipe_context *pipe, void *gs)
+{
+   struct softpipe_context *softpipe = softpipe_context(pipe);
+
+   struct sp_geometry_shader *state =
+      (struct sp_geometry_shader *)gs;
+
+   draw_delete_geometry_shader(softpipe->draw, state->draw_data);
+   FREE(state);
+}
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
index 8b14c913f7..7655121bec 100644
--- a/src/gallium/drivers/svga/svga_swtnl_draw.c
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -90,7 +90,7 @@ svga_swtnl_draw_range_elements(struct svga_context *svga,
                             PIPE_BUFFER_USAGE_CPU_READ);
       assert(map);
       draw_set_mapped_constant_buffer(
-         draw, 
+         draw, PIPE_SHADER_VERTEX,
          map,
          svga->curr.cb[PIPE_SHADER_VERTEX]->size);
    }
diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
index 11bcdc0a24..6c06fb9027 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -142,6 +142,12 @@ struct pipe_context {
                              const struct pipe_shader_state *);
    void   (*bind_vs_state)(struct pipe_context *, void *);
    void   (*delete_vs_state)(struct pipe_context *, void *);
+
+   void * (*create_gs_state)(struct pipe_context *,
+                             const struct pipe_shader_state *);
+   void   (*bind_gs_state)(struct pipe_context *, void *);
+   void   (*delete_gs_state)(struct pipe_context *, void *);
+
    /*@}*/
 
    /**
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index fe1390d765..90fc3331d1 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -321,23 +321,28 @@ enum pipe_transfer_usage {
  */
 #define PIPE_SHADER_VERTEX   0
 #define PIPE_SHADER_FRAGMENT 1
-#define PIPE_SHADER_TYPES    2
+#define PIPE_SHADER_GEOMETRY 2
+#define PIPE_SHADER_TYPES    3
 
 
 /**
  * Primitive types:
  */
-#define PIPE_PRIM_POINTS          0
-#define PIPE_PRIM_LINES           1
-#define PIPE_PRIM_LINE_LOOP       2
-#define PIPE_PRIM_LINE_STRIP      3
-#define PIPE_PRIM_TRIANGLES       4
-#define PIPE_PRIM_TRIANGLE_STRIP  5
-#define PIPE_PRIM_TRIANGLE_FAN    6
-#define PIPE_PRIM_QUADS           7
-#define PIPE_PRIM_QUAD_STRIP      8
-#define PIPE_PRIM_POLYGON         9
-#define PIPE_PRIM_MAX             10
+#define PIPE_PRIM_POINTS               0
+#define PIPE_PRIM_LINES                1
+#define PIPE_PRIM_LINE_LOOP            2
+#define PIPE_PRIM_LINE_STRIP           3
+#define PIPE_PRIM_TRIANGLES            4
+#define PIPE_PRIM_TRIANGLE_STRIP       5
+#define PIPE_PRIM_TRIANGLE_FAN         6
+#define PIPE_PRIM_QUADS                7
+#define PIPE_PRIM_QUAD_STRIP           8
+#define PIPE_PRIM_POLYGON              9
+#define PIPE_PRIM_LINES_ADJACENCY          10
+#define PIPE_PRIM_LINE_STRIP_ADJACENCY    11
+#define PIPE_PRIM_TRIANGLES_ADJACENCY      12
+#define PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY 13
+#define PIPE_PRIM_MAX                      14
 
 
 /**
@@ -393,6 +398,7 @@ enum pipe_transfer_usage {
 #define PIPE_CAP_MAX_PREDICATE_REGISTERS 30
 #define PIPE_CAP_MAX_COMBINED_SAMPLERS   31  /*< Maximum texture image units accessible from vertex
                                                  and fragment shaders combined */
+#define PIPE_CAP_GEOMETRY_SHADER4        32
 
 
 /**
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 5da85bbbc2..3e7335b455 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -121,16 +121,18 @@ struct tgsi_declaration_range
    unsigned Last    : 16; /**< UINT */
 };
 
-#define TGSI_SEMANTIC_POSITION 0
-#define TGSI_SEMANTIC_COLOR    1
-#define TGSI_SEMANTIC_BCOLOR   2 /**< back-face color */
-#define TGSI_SEMANTIC_FOG      3
-#define TGSI_SEMANTIC_PSIZE    4
-#define TGSI_SEMANTIC_GENERIC  5
-#define TGSI_SEMANTIC_NORMAL   6
-#define TGSI_SEMANTIC_FACE     7
-#define TGSI_SEMANTIC_EDGEFLAG 8
-#define TGSI_SEMANTIC_COUNT    9 /**< number of semantic values */
+#define TGSI_SEMANTIC_POSITION  0
+#define TGSI_SEMANTIC_COLOR     1
+#define TGSI_SEMANTIC_BCOLOR    2 /**< back-face color */
+#define TGSI_SEMANTIC_FOG       3
+#define TGSI_SEMANTIC_PSIZE     4
+#define TGSI_SEMANTIC_GENERIC   5
+#define TGSI_SEMANTIC_NORMAL    6
+#define TGSI_SEMANTIC_FACE      7
+#define TGSI_SEMANTIC_EDGEFLAG  8
+#define TGSI_SEMANTIC_VERTICES  9
+#define TGSI_SEMANTIC_PRIMID   10
+#define TGSI_SEMANTIC_COUNT    11 /**< number of semantic values */
 
 struct tgsi_declaration_semantic
 {
diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i
index 9728207d9c..3c35e6f745 100644
--- a/src/gallium/state_trackers/python/p_context.i
+++ b/src/gallium/state_trackers/python/p_context.i
@@ -103,6 +103,25 @@ struct st_context {
       $self->vs = vs;
    }
 
+   void set_geometry_shader( const struct pipe_shader_state *state ) {
+      void *gs;
+
+      if(!state) {
+         cso_set_geometry_shader_handle($self->cso, NULL);
+         return;
+      }
+
+      gs = $self->pipe->create_gs_state($self->pipe, state);
+      if(!gs)
+         return;
+
+      if(cso_set_geometry_shader_handle($self->cso, gs) != PIPE_OK)
+         return;
+
+      cso_delete_geometry_shader($self->cso, $self->gs);
+      $self->gs = gs;
+   }
+
    /*
     * Parameter-like state (or properties)
     */
diff --git a/src/gallium/state_trackers/python/samples/gs.py b/src/gallium/state_trackers/python/samples/gs.py
new file mode 100644
index 0000000000..f4f083e547
--- /dev/null
+++ b/src/gallium/state_trackers/python/samples/gs.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python
+##########################################################################
+#
+# Copyright 2009 VMware
+# All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sub license, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+##########################################################################
+
+
+from gallium import *
+
+
+def make_image(surface):
+    data = surface.get_tile_rgba8(0, 0, surface.width, surface.height)
+
+    import Image
+    outimage = Image.fromstring('RGBA', (surface.width, surface.height), data, "raw", 'RGBA', 0, 1)
+    return outimage
+
+def save_image(filename, surface):
+    outimage = make_image(surface)
+    outimage.save(filename, "PNG")
+
+def show_image(surface):
+    outimage = make_image(surface)
+
+    import Tkinter as tk
+    from PIL import Image, ImageTk
+    root = tk.Tk()
+
+    root.title('background image')
+
+    image1 = ImageTk.PhotoImage(outimage)
+    w = image1.width()
+    h = image1.height()
+    x = 100
+    y = 100
+    root.geometry("%dx%d+%d+%d" % (w, h, x, y))
+    panel1 = tk.Label(root, image=image1)
+    panel1.pack(side='top', fill='both', expand='yes')
+    panel1.image = image1
+    root.mainloop()
+
+
+def test(dev):
+    ctx = dev.context_create()
+
+    width = 255
+    height = 255
+    minz = 0.0
+    maxz = 1.0
+
+    # disabled blending/masking
+    blend = Blend()
+    blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE
+    blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE
+    blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO
+    blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO
+    blend.colormask = PIPE_MASK_RGBA
+    ctx.set_blend(blend)
+
+    # depth/stencil/alpha
+    depth_stencil_alpha = DepthStencilAlpha()
+    depth_stencil_alpha.depth.enabled = 1
+    depth_stencil_alpha.depth.writemask = 1
+    depth_stencil_alpha.depth.func = PIPE_FUNC_LESS
+    ctx.set_depth_stencil_alpha(depth_stencil_alpha)
+
+    # rasterizer
+    rasterizer = Rasterizer()
+    rasterizer.front_winding = PIPE_WINDING_CW
+    rasterizer.cull_mode = PIPE_WINDING_NONE
+    rasterizer.scissor = 1
+    ctx.set_rasterizer(rasterizer)
+
+    # viewport
+    viewport = Viewport()
+    scale = FloatArray(4)
+    scale[0] = width / 2.0
+    scale[1] = -height / 2.0
+    scale[2] = (maxz - minz) / 2.0
+    scale[3] = 1.0
+    viewport.scale = scale
+    translate = FloatArray(4)
+    translate[0] = width / 2.0
+    translate[1] = height / 2.0
+    translate[2] = (maxz - minz) / 2.0
+    translate[3] = 0.0
+    viewport.translate = translate
+    ctx.set_viewport(viewport)
+
+    # samplers
+    sampler = Sampler()
+    sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE
+    sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE
+    sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE
+    sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE
+    sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST
+    sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST
+    sampler.normalized_coords = 1
+    ctx.set_sampler(0, sampler)
+
+    # scissor
+    scissor = Scissor()
+    scissor.minx = 0
+    scissor.miny = 0
+    scissor.maxx = width
+    scissor.maxy = height
+    ctx.set_scissor(scissor)
+
+    clip = Clip()
+    clip.nr = 0
+    ctx.set_clip(clip)
+
+    # framebuffer
+    cbuf = dev.texture_create(
+        PIPE_FORMAT_X8R8G8B8_UNORM,
+        width, height,
+        tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET,
+    ).get_surface()
+    zbuf = dev.texture_create(
+        PIPE_FORMAT_Z16_UNORM,
+        width, height,
+        tex_usage=PIPE_TEXTURE_USAGE_DEPTH_STENCIL,
+    ).get_surface()
+    fb = Framebuffer()
+    fb.width = width
+    fb.height = height
+    fb.nr_cbufs = 1
+    fb.set_cbuf(0, cbuf)
+    fb.set_zsbuf(zbuf)
+    ctx.set_framebuffer(fb)
+    rgba = FloatArray(4);
+    rgba[0] = 0.0
+    rgba[1] = 0.0
+    rgba[2] = 0.0
+    rgba[3] = 0.0
+    ctx.clear(PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTHSTENCIL, rgba, 1.0, 0xff)
+
+    # vertex shader
+    vs = Shader('''
+        VERT
+        DCL IN[0], POSITION, CONSTANT
+        DCL IN[1], COLOR, CONSTANT
+        DCL OUT[0], POSITION, CONSTANT
+        DCL OUT[1], COLOR, CONSTANT
+        0:MOV OUT[0], IN[0]
+        1:MOV OUT[1], IN[1]
+        2:END
+    ''')
+    ctx.set_vertex_shader(vs)
+
+    gs = Shader('''
+        GEOM
+        PROPERTY GS_INPUT_PRIMITIVE TRIANGLES
+        PROPERTY GS_OUTPUT_PRIMITIVE TRIANGLE_STRIP
+        DCL IN[][0], POSITION, CONSTANT
+        DCL IN[][1], COLOR, CONSTANT
+        DCL OUT[0], POSITION, CONSTANT
+        DCL OUT[1], COLOR, CONSTANT
+        0:MOV OUT[0], IN[0][0]
+        1:MOV OUT[1], IN[0][1]
+        2:EMIT_VERTEX
+        3:MOV OUT[0], IN[1][0]
+        4:MOV OUT[1], IN[1][1]
+        5:EMIT_VERTEX
+        6:MOV OUT[0], IN[2][0]
+        7:MOV OUT[1], IN[2][1]
+        8:EMIT_VERTEX
+        9:END_PRIMITIVE
+        10:END
+    ''')
+    ctx.set_geometry_shader(gs)
+
+    # fragment shader
+    fs = Shader('''
+        FRAG
+        DCL IN[0], COLOR, LINEAR
+        DCL OUT[0], COLOR, CONSTANT
+        0:MOV OUT[0], IN[0]
+        1:END
+    ''')
+    ctx.set_fragment_shader(fs)
+
+    nverts = 3
+    nattrs = 2
+    verts = FloatArray(nverts * nattrs * 4)
+
+    verts[ 0] =   0.0 # x1
+    verts[ 1] =   0.8 # y1
+    verts[ 2] =   0.2 # z1
+    verts[ 3] =   1.0 # w1
+    verts[ 4] =   1.0 # r1
+    verts[ 5] =   0.0 # g1
+    verts[ 6] =   0.0 # b1
+    verts[ 7] =   1.0 # a1
+    verts[ 8] =  -0.8 # x2
+    verts[ 9] =  -0.8 # y2
+    verts[10] =   0.5 # z2
+    verts[11] =   1.0 # w2
+    verts[12] =   0.0 # r2
+    verts[13] =   1.0 # g2
+    verts[14] =   0.0 # b2
+    verts[15] =   1.0 # a2
+    verts[16] =   0.8 # x3
+    verts[17] =  -0.8 # y3
+    verts[18] =   0.8 # z3
+    verts[19] =   1.0 # w3
+    verts[20] =   0.0 # r3
+    verts[21] =   0.0 # g3
+    verts[22] =   1.0 # b3
+    verts[23] =   1.0 # a3
+
+    ctx.draw_vertices(PIPE_PRIM_TRIANGLES,
+                      nverts,
+                      nattrs,
+                      verts)
+
+    ctx.flush()
+
+    show_image(cbuf)
+    #show_image(zbuf)
+    #save_image('cbuf.png', cbuf)
+    #save_image('zbuf.png', zbuf)
+
+
+
+def main():
+    dev = Device()
+    test(dev)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/gallium/state_trackers/python/st_device.h b/src/gallium/state_trackers/python/st_device.h
index a246b6a1f2..2a7a323985 100644
--- a/src/gallium/state_trackers/python/st_device.h
+++ b/src/gallium/state_trackers/python/st_device.h
@@ -57,6 +57,7 @@ struct st_context {
    
    void *vs;
    void *fs;
+   void *gs;
 
    struct pipe_texture *default_texture;
    struct pipe_texture *sampler_textures[PIPE_MAX_SAMPLERS];
diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c
index d793f820bc..cfc0caac98 100644
--- a/src/mesa/state_tracker/st_draw_feedback.c
+++ b/src/mesa/state_tracker/st_draw_feedback.c
@@ -241,7 +241,8 @@ st_feedback_draw_vbo(GLcontext *ctx,
    mapped_constants = pipe_buffer_map(pipe->screen,
                                       st->state.constants[PIPE_SHADER_VERTEX].buffer,
                                       PIPE_BUFFER_USAGE_CPU_READ);
-   draw_set_mapped_constant_buffer(st->draw, mapped_constants,
+   draw_set_mapped_constant_buffer(st->draw, PIPE_SHADER_VERTEX,
+                                   mapped_constants,
                                    st->state.constants[PIPE_SHADER_VERTEX].buffer->size);
 
 
-- 
cgit v1.2.3


From b0575151b66c9f5387c0433f41db9c19a848d45e Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Sat, 19 Dec 2009 13:54:53 -0500
Subject: gs: fix drivers so they work with geometry shaders

---
 src/gallium/drivers/llvmpipe/lp_state_derived.c | 4 ++--
 src/gallium/drivers/r300/r300_state_derived.c   | 6 +++---
 src/gallium/drivers/svga/svga_swtnl_state.c     | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index fdc6a389b4..acfd7be5f7 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -116,13 +116,13 @@ llvmpipe_get_vertex_info(struct llvmpipe_context *llvmpipe)
          }
 
          /* this includes texcoords and varying vars */
-         src = draw_find_vs_output(llvmpipe->draw,
+         src = draw_find_shader_output(llvmpipe->draw,
                                    lpfs->info.input_semantic_name[i],
                                    lpfs->info.input_semantic_index[i]);
          draw_emit_vertex_attr(vinfo, EMIT_4F, interp, src);
       }
 
-      llvmpipe->psize_slot = draw_find_vs_output(llvmpipe->draw,
+      llvmpipe->psize_slot = draw_find_shader_output(llvmpipe->draw,
                                                  TGSI_SEMANTIC_PSIZE, 0);
       if (llvmpipe->psize_slot > 0) {
          draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT,
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 29bc701a86..727ae7ade6 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -71,9 +71,9 @@ static void r300_draw_emit_attrib(struct r300_context* r300,
     struct tgsi_shader_info* info = &r300->vs->info;
     int output;
 
-    output = draw_find_vs_output(r300->draw,
-                                 info->output_semantic_name[index],
-                                 info->output_semantic_index[index]);
+    output = draw_find_shader_output(r300->draw,
+                                     info->output_semantic_name[index],
+                                     info->output_semantic_index[index]);
     draw_emit_vertex_attr(&r300->vertex_info->vinfo, emit, interp, output);
 }
 
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
index 25b8c2af3a..94b6ccc62d 100644
--- a/src/gallium/drivers/svga/svga_swtnl_state.c
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -156,7 +156,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga )
    memset(vdecl, 0, sizeof(vdecl));
 
    /* always add position */
-   src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0);
+   src = draw_find_shader_output(draw, TGSI_SEMANTIC_POSITION, 0);
    draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
    vinfo->attrib[0].emit = EMIT_4F;
    vdecl[0].array.offset = offset;
@@ -169,7 +169,7 @@ int svga_swtnl_update_vdecl( struct svga_context *svga )
    for (i = 0; i < fs->base.info.num_inputs; i++) {
       unsigned name = fs->base.info.input_semantic_name[i];
       unsigned index = fs->base.info.input_semantic_index[i];
-      src = draw_find_vs_output(draw, name, index);
+      src = draw_find_shader_output(draw, name, index);
       vdecl[nr_decls].array.offset = offset;
       vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i];
 
-- 
cgit v1.2.3


From 42eefb8235437655632b36490f49a3a8fdc69401 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Wed, 23 Dec 2009 12:34:34 -0500
Subject: softpipe/gs: don't crash with null shader

---
 src/gallium/drivers/softpipe/sp_state_fs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index 22f82b1a42..b904bde25c 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -221,6 +221,7 @@ softpipe_delete_gs_state(struct pipe_context *pipe, void *gs)
    struct sp_geometry_shader *state =
       (struct sp_geometry_shader *)gs;
 
-   draw_delete_geometry_shader(softpipe->draw, state->draw_data);
+   draw_delete_geometry_shader(softpipe->draw,
+                               (state) ? state->draw_data : 0);
    FREE(state);
 }
-- 
cgit v1.2.3


From ac96ee1c16419583aa43c9f4100aaca774b9439a Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Wed, 23 Dec 2009 13:30:12 -0500
Subject: softpipe: make it possible to dump geometry shader from the softpipe

---
 src/gallium/drivers/softpipe/sp_context.c  | 1 +
 src/gallium/drivers/softpipe/sp_context.h  | 1 +
 src/gallium/drivers/softpipe/sp_state_fs.c | 4 ++++
 3 files changed, 6 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 0c890cb940..82173a3c2a 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -191,6 +191,7 @@ softpipe_create( struct pipe_screen *screen )
 #endif
 
    softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE );
+   softpipe->dump_gs = debug_get_bool_option( "SOFTPIPE_DUMP_GS", FALSE );
 
    softpipe->pipe.winsys = screen->winsys;
    softpipe->pipe.screen = screen;
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 159547e2a6..6a89bd4b06 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -148,6 +148,7 @@ struct softpipe_context {
 
    unsigned use_sse : 1;
    unsigned dump_fs : 1;
+   unsigned dump_gs : 1;
    unsigned no_rast : 1;
 };
 
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index b904bde25c..acec62dc2a 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -177,6 +177,10 @@ softpipe_create_gs_state(struct pipe_context *pipe,
    if (state == NULL )
       goto fail;
 
+   /* debug */
+   if (softpipe->dump_gs)
+      tgsi_dump(templ->tokens, 0);
+
    /* copy shader tokens, the ones passed in will go away.
     */
    state->shader.tokens = tgsi_dup_tokens(templ->tokens);
-- 
cgit v1.2.3


From 519a6d308d201a5ccedecf87344fd7542e90bb0f Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Thu, 24 Dec 2009 09:30:00 -0500
Subject: gallium: geometry shader can be always enabled and we don't need a
 cap for that

using the draw module allows us to enable geometry shading even on hardware
that doesn't support it.
---
 src/gallium/drivers/softpipe/sp_screen.c | 2 --
 src/gallium/include/pipe/p_defines.h     | 1 -
 2 files changed, 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index a32312d29b..bd3532de4f 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -89,8 +89,6 @@ softpipe_get_param(struct pipe_screen *screen, int param)
       return 13; /* max 4Kx4K */
    case PIPE_CAP_TGSI_CONT_SUPPORTED:
       return 1;
-   case PIPE_CAP_GEOMETRY_SHADER4:
-      return 1;
    case PIPE_CAP_BLEND_EQUATION_SEPARATE:
       return 1;
    default:
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index 90fc3331d1..2cda408fec 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -398,7 +398,6 @@ enum pipe_transfer_usage {
 #define PIPE_CAP_MAX_PREDICATE_REGISTERS 30
 #define PIPE_CAP_MAX_COMBINED_SAMPLERS   31  /*< Maximum texture image units accessible from vertex
                                                  and fragment shaders combined */
-#define PIPE_CAP_GEOMETRY_SHADER4        32
 
 
 /**
-- 
cgit v1.2.3


From 5676082d28d1d0cbb70b6526d1a7c650a3d28336 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Sat, 26 Dec 2009 07:28:21 +1000
Subject: r300g: fixup for GS additions

draw_set_mapped_constant_buffer changed API
---
 src/gallium/drivers/r300/r300_render.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 2d70ec2ac9..82089f9161 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -335,8 +335,9 @@ boolean r300_swtcl_draw_arrays(struct pipe_context* pipe,
     draw_set_mapped_element_buffer(r300->draw, 0, NULL);
 
     draw_set_mapped_constant_buffer(r300->draw,
-            r300->shader_constants[PIPE_SHADER_VERTEX].constants,
-            r300->shader_constants[PIPE_SHADER_VERTEX].count *
+				    PIPE_SHADER_VERTEX,
+				    r300->shader_constants[PIPE_SHADER_VERTEX].constants,
+				    r300->shader_constants[PIPE_SHADER_VERTEX].count *
                 (sizeof(float) * 4));
 
     draw_arrays(r300->draw, mode, start, count);
@@ -383,6 +384,7 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
                                          minIndex, maxIndex, indices);
 
     draw_set_mapped_constant_buffer(r300->draw,
+				    PIPE_SHADER_VERTEX,
             r300->shader_constants[PIPE_SHADER_VERTEX].constants,
             r300->shader_constants[PIPE_SHADER_VERTEX].count *
                 (sizeof(float) * 4));
-- 
cgit v1.2.3


From 8353c87ccef69fdfaf778e177c1a6651214db135 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 24 Dec 2009 15:53:45 +0000
Subject: svga: Allow to dump an individual command.

---
 src/gallium/drivers/svga/svgadump/svga_dump.c  | 606 +++++++++++++------------
 src/gallium/drivers/svga/svgadump/svga_dump.h  |   3 +
 src/gallium/drivers/svga/svgadump/svga_dump.py |  94 ++--
 3 files changed, 363 insertions(+), 340 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
index e6d4a74e86..d59fb89a58 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.c
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -1443,6 +1443,312 @@ dump_SVGA3dCmdBlitSurfaceToScreen(const SVGA3dCmdBlitSurfaceToScreen *cmd)
 }
 
 
+void            
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size)
+{
+   const uint8_t *body = (const uint8_t *)data;
+   const uint8_t *next = body + size;
+  
+   switch(cmd_id) {
+   case SVGA_3D_CMD_SURFACE_DEFINE:
+      _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n");
+      {
+         const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body;
+         dump_SVGA3dCmdDefineSurface(cmd);
+         body = (const uint8_t *)&cmd[1];
+         while(body + sizeof(SVGA3dSize) <= next) {
+            dump_SVGA3dSize((const SVGA3dSize *)body);
+            body += sizeof(SVGA3dSize);
+         }
+      }
+      break;
+   case SVGA_3D_CMD_SURFACE_DESTROY:
+      _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n");
+      {
+         const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body;
+         dump_SVGA3dCmdDestroySurface(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SURFACE_COPY:
+      _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n");
+      {
+         const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body;
+         dump_SVGA3dCmdSurfaceCopy(cmd);
+         body = (const uint8_t *)&cmd[1];
+         while(body + sizeof(SVGA3dCopyBox) <= next) {
+            dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+            body += sizeof(SVGA3dCopyBox);
+         }
+      }
+      break;
+   case SVGA_3D_CMD_SURFACE_STRETCHBLT:
+      _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n");
+      {
+         const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body;
+         dump_SVGA3dCmdSurfaceStretchBlt(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SURFACE_DMA:
+      _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n");
+      {
+         const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body;
+         dump_SVGA3dCmdSurfaceDMA(cmd);
+         body = (const uint8_t *)&cmd[1];
+         while(body + sizeof(SVGA3dCopyBox) <= next) {
+            dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+            body += sizeof(SVGA3dCopyBox);
+         }
+         while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) {
+            dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body);
+            body += sizeof(SVGA3dCmdSurfaceDMASuffix);
+         }
+      }
+      break;
+   case SVGA_3D_CMD_CONTEXT_DEFINE:
+      _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n");
+      {
+         const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body;
+         dump_SVGA3dCmdDefineContext(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_CONTEXT_DESTROY:
+      _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n");
+      {
+         const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body;
+         dump_SVGA3dCmdDestroyContext(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SETTRANSFORM:
+      _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n");
+      {
+         const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body;
+         dump_SVGA3dCmdSetTransform(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SETZRANGE:
+      _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n");
+      {
+         const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body;
+         dump_SVGA3dCmdSetZRange(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SETRENDERSTATE:
+      _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n");
+      {
+         const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body;
+         dump_SVGA3dCmdSetRenderState(cmd);
+         body = (const uint8_t *)&cmd[1];
+         while(body + sizeof(SVGA3dRenderState) <= next) {
+            dump_SVGA3dRenderState((const SVGA3dRenderState *)body);
+            body += sizeof(SVGA3dRenderState);
+         }
+      }
+      break;
+   case SVGA_3D_CMD_SETRENDERTARGET:
+      _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n");
+      {
+         const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body;
+         dump_SVGA3dCmdSetRenderTarget(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SETTEXTURESTATE:
+      _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n");
+      {
+         const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body;
+         dump_SVGA3dCmdSetTextureState(cmd);
+         body = (const uint8_t *)&cmd[1];
+         while(body + sizeof(SVGA3dTextureState) <= next) {
+            dump_SVGA3dTextureState((const SVGA3dTextureState *)body);
+            body += sizeof(SVGA3dTextureState);
+         }
+      }
+      break;
+   case SVGA_3D_CMD_SETMATERIAL:
+      _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n");
+      {
+         const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body;
+         dump_SVGA3dCmdSetMaterial(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SETLIGHTDATA:
+      _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n");
+      {
+         const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body;
+         dump_SVGA3dCmdSetLightData(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SETLIGHTENABLED:
+      _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n");
+      {
+         const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body;
+         dump_SVGA3dCmdSetLightEnabled(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SETVIEWPORT:
+      _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n");
+      {
+         const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body;
+         dump_SVGA3dCmdSetViewport(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SETCLIPPLANE:
+      _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n");
+      {
+         const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body;
+         dump_SVGA3dCmdSetClipPlane(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_CLEAR:
+      _debug_printf("\tSVGA_3D_CMD_CLEAR\n");
+      {
+         const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body;
+         dump_SVGA3dCmdClear(cmd);
+         body = (const uint8_t *)&cmd[1];
+         while(body + sizeof(SVGA3dRect) <= next) {
+            dump_SVGA3dRect((const SVGA3dRect *)body);
+            body += sizeof(SVGA3dRect);
+         }
+      }
+      break;
+   case SVGA_3D_CMD_PRESENT:
+      _debug_printf("\tSVGA_3D_CMD_PRESENT\n");
+      {
+         const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body;
+         dump_SVGA3dCmdPresent(cmd);
+         body = (const uint8_t *)&cmd[1];
+         while(body + sizeof(SVGA3dCopyRect) <= next) {
+            dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body);
+            body += sizeof(SVGA3dCopyRect);
+         }
+      }
+      break;
+   case SVGA_3D_CMD_SHADER_DEFINE:
+      _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n");
+      {
+         const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
+         dump_SVGA3dCmdDefineShader(cmd);
+         body = (const uint8_t *)&cmd[1];
+         svga_shader_dump((const uint32_t *)body, 
+                      (unsigned)(next - body)/sizeof(uint32_t),
+                      FALSE );
+         body = next;
+      }
+      break;
+   case SVGA_3D_CMD_SHADER_DESTROY:
+      _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n");
+      {
+         const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body;
+         dump_SVGA3dCmdDestroyShader(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SET_SHADER:
+      _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n");
+      {
+         const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body;
+         dump_SVGA3dCmdSetShader(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_SET_SHADER_CONST:
+      _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n");
+      {
+         const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body;
+         dump_SVGA3dCmdSetShaderConst(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_DRAW_PRIMITIVES:
+      _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n");
+      {
+         const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body;
+         unsigned i, j;
+         dump_SVGA3dCmdDrawPrimitives(cmd);
+         body = (const uint8_t *)&cmd[1];
+         for(i = 0; i < cmd->numVertexDecls; ++i) {
+            dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body);
+            body += sizeof(SVGA3dVertexDecl);
+         }
+         for(j = 0; j < cmd->numRanges; ++j) {
+            dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body);
+            body += sizeof(SVGA3dPrimitiveRange);
+         }
+         while(body + sizeof(SVGA3dVertexDivisor) <= next) {
+            dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body);
+            body += sizeof(SVGA3dVertexDivisor);
+         }
+      }
+      break;
+   case SVGA_3D_CMD_SETSCISSORRECT:
+      _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n");
+      {
+         const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body;
+         dump_SVGA3dCmdSetScissorRect(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_BEGIN_QUERY:
+      _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n");
+      {
+         const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body;
+         dump_SVGA3dCmdBeginQuery(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_END_QUERY:
+      _debug_printf("\tSVGA_3D_CMD_END_QUERY\n");
+      {
+         const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body;
+         dump_SVGA3dCmdEndQuery(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_WAIT_FOR_QUERY:
+      _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n");
+      {
+         const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body;
+         dump_SVGA3dCmdWaitForQuery(cmd);
+         body = (const uint8_t *)&cmd[1];
+      }
+      break;
+   case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN:
+      _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n");
+      {
+         const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body;
+         dump_SVGA3dCmdBlitSurfaceToScreen(cmd);
+         body = (const uint8_t *)&cmd[1];
+         while(body + sizeof(SVGASignedRect) <= next) {
+            dump_SVGASignedRect((const SVGASignedRect *)body);
+            body += sizeof(SVGASignedRect);
+         }
+      }
+      break;
+   default:
+      _debug_printf("\t0x%08x\n", cmd_id);
+      break;
+   }
+
+   while(body + sizeof(uint32_t) <= next) {
+      _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+      body += sizeof(uint32_t);
+   }
+   while(body + sizeof(uint32_t) <= next)
+      _debug_printf("\t\t0x%02x\n", *body++);
+}
+
+
 void            
 svga_dump_commands(const void *commands, uint32_t size)
 {
@@ -1458,307 +1764,11 @@ svga_dump_commands(const void *commands, uint32_t size)
          const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
          const uint8_t *body = (const uint8_t *)&header[1];
 
-         next = (const uint8_t *)body + header->size;
+         next = body + header->size;
          if(next > last)
             break;
 
-         switch(cmd_id) {
-         case SVGA_3D_CMD_SURFACE_DEFINE:
-            _debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n");
-            {
-               const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body;
-               dump_SVGA3dCmdDefineSurface(cmd);
-               body = (const uint8_t *)&cmd[1];
-               while(body + sizeof(SVGA3dSize) <= next) {
-                  dump_SVGA3dSize((const SVGA3dSize *)body);
-                  body += sizeof(SVGA3dSize);
-               }
-            }
-            break;
-         case SVGA_3D_CMD_SURFACE_DESTROY:
-            _debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n");
-            {
-               const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body;
-               dump_SVGA3dCmdDestroySurface(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SURFACE_COPY:
-            _debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n");
-            {
-               const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body;
-               dump_SVGA3dCmdSurfaceCopy(cmd);
-               body = (const uint8_t *)&cmd[1];
-               while(body + sizeof(SVGA3dCopyBox) <= next) {
-                  dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
-                  body += sizeof(SVGA3dCopyBox);
-               }
-            }
-            break;
-         case SVGA_3D_CMD_SURFACE_STRETCHBLT:
-            _debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n");
-            {
-               const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body;
-               dump_SVGA3dCmdSurfaceStretchBlt(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SURFACE_DMA:
-            _debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n");
-            {
-               const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body;
-               dump_SVGA3dCmdSurfaceDMA(cmd);
-               body = (const uint8_t *)&cmd[1];
-               while(body + sizeof(SVGA3dCopyBox) <= next) {
-                  dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
-                  body += sizeof(SVGA3dCopyBox);
-               }
-               while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) {
-                  dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body);
-                  body += sizeof(SVGA3dCmdSurfaceDMASuffix);
-               }
-            }
-            break;
-         case SVGA_3D_CMD_CONTEXT_DEFINE:
-            _debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n");
-            {
-               const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body;
-               dump_SVGA3dCmdDefineContext(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_CONTEXT_DESTROY:
-            _debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n");
-            {
-               const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body;
-               dump_SVGA3dCmdDestroyContext(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SETTRANSFORM:
-            _debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n");
-            {
-               const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body;
-               dump_SVGA3dCmdSetTransform(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SETZRANGE:
-            _debug_printf("\tSVGA_3D_CMD_SETZRANGE\n");
-            {
-               const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body;
-               dump_SVGA3dCmdSetZRange(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SETRENDERSTATE:
-            _debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n");
-            {
-               const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body;
-               dump_SVGA3dCmdSetRenderState(cmd);
-               body = (const uint8_t *)&cmd[1];
-               while(body + sizeof(SVGA3dRenderState) <= next) {
-                  dump_SVGA3dRenderState((const SVGA3dRenderState *)body);
-                  body += sizeof(SVGA3dRenderState);
-               }
-            }
-            break;
-         case SVGA_3D_CMD_SETRENDERTARGET:
-            _debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n");
-            {
-               const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body;
-               dump_SVGA3dCmdSetRenderTarget(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SETTEXTURESTATE:
-            _debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n");
-            {
-               const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body;
-               dump_SVGA3dCmdSetTextureState(cmd);
-               body = (const uint8_t *)&cmd[1];
-               while(body + sizeof(SVGA3dTextureState) <= next) {
-                  dump_SVGA3dTextureState((const SVGA3dTextureState *)body);
-                  body += sizeof(SVGA3dTextureState);
-               }
-            }
-            break;
-         case SVGA_3D_CMD_SETMATERIAL:
-            _debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n");
-            {
-               const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body;
-               dump_SVGA3dCmdSetMaterial(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SETLIGHTDATA:
-            _debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n");
-            {
-               const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body;
-               dump_SVGA3dCmdSetLightData(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SETLIGHTENABLED:
-            _debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n");
-            {
-               const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body;
-               dump_SVGA3dCmdSetLightEnabled(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SETVIEWPORT:
-            _debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n");
-            {
-               const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body;
-               dump_SVGA3dCmdSetViewport(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SETCLIPPLANE:
-            _debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n");
-            {
-               const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body;
-               dump_SVGA3dCmdSetClipPlane(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_CLEAR:
-            _debug_printf("\tSVGA_3D_CMD_CLEAR\n");
-            {
-               const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body;
-               dump_SVGA3dCmdClear(cmd);
-               body = (const uint8_t *)&cmd[1];
-               while(body + sizeof(SVGA3dRect) <= next) {
-                  dump_SVGA3dRect((const SVGA3dRect *)body);
-                  body += sizeof(SVGA3dRect);
-               }
-            }
-            break;
-         case SVGA_3D_CMD_PRESENT:
-            _debug_printf("\tSVGA_3D_CMD_PRESENT\n");
-            {
-               const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body;
-               dump_SVGA3dCmdPresent(cmd);
-               body = (const uint8_t *)&cmd[1];
-               while(body + sizeof(SVGA3dCopyRect) <= next) {
-                  dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body);
-                  body += sizeof(SVGA3dCopyRect);
-               }
-            }
-            break;
-         case SVGA_3D_CMD_SHADER_DEFINE:
-            _debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n");
-            {
-               const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
-               dump_SVGA3dCmdDefineShader(cmd);
-               body = (const uint8_t *)&cmd[1];
-               svga_shader_dump((const uint32_t *)body, 
-                            (unsigned)(next - body)/sizeof(uint32_t),
-                            FALSE );
-               body = next;
-            }
-            break;
-         case SVGA_3D_CMD_SHADER_DESTROY:
-            _debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n");
-            {
-               const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body;
-               dump_SVGA3dCmdDestroyShader(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SET_SHADER:
-            _debug_printf("\tSVGA_3D_CMD_SET_SHADER\n");
-            {
-               const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body;
-               dump_SVGA3dCmdSetShader(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_SET_SHADER_CONST:
-            _debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n");
-            {
-               const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body;
-               dump_SVGA3dCmdSetShaderConst(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_DRAW_PRIMITIVES:
-            _debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n");
-            {
-               const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body;
-               unsigned i, j;
-               dump_SVGA3dCmdDrawPrimitives(cmd);
-               body = (const uint8_t *)&cmd[1];
-               for(i = 0; i < cmd->numVertexDecls; ++i) {
-                  dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body);
-                  body += sizeof(SVGA3dVertexDecl);
-               }
-               for(j = 0; j < cmd->numRanges; ++j) {
-                  dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body);
-                  body += sizeof(SVGA3dPrimitiveRange);
-               }
-               while(body + sizeof(SVGA3dVertexDivisor) <= next) {
-                  dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body);
-                  body += sizeof(SVGA3dVertexDivisor);
-               }
-            }
-            break;
-         case SVGA_3D_CMD_SETSCISSORRECT:
-            _debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n");
-            {
-               const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body;
-               dump_SVGA3dCmdSetScissorRect(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_BEGIN_QUERY:
-            _debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n");
-            {
-               const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body;
-               dump_SVGA3dCmdBeginQuery(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_END_QUERY:
-            _debug_printf("\tSVGA_3D_CMD_END_QUERY\n");
-            {
-               const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body;
-               dump_SVGA3dCmdEndQuery(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_WAIT_FOR_QUERY:
-            _debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n");
-            {
-               const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body;
-               dump_SVGA3dCmdWaitForQuery(cmd);
-               body = (const uint8_t *)&cmd[1];
-            }
-            break;
-         case SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN:
-            _debug_printf("\tSVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN\n");
-            {
-               const SVGA3dCmdBlitSurfaceToScreen *cmd = (const SVGA3dCmdBlitSurfaceToScreen *)body;
-               dump_SVGA3dCmdBlitSurfaceToScreen(cmd);
-               body = (const uint8_t *)&cmd[1];
-               while(body + sizeof(SVGASignedRect) <= next) {
-                  dump_SVGASignedRect((const SVGASignedRect *)body);
-                  body += sizeof(SVGASignedRect);
-               }
-            }
-            break;
-         default:
-            _debug_printf("\t0x%08x\n", cmd_id);
-            break;
-         }
-
-         while(body + sizeof(uint32_t) <= next) {
-            _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
-            body += sizeof(uint32_t);
-         }
-         while(body + sizeof(uint32_t) <= next)
-            _debug_printf("\t\t0x%02x\n", *body++);
+         svga_dump_command(cmd_id, body, header->size);
       }
       else if(cmd_id == SVGA_CMD_FENCE) {
          _debug_printf("\tSVGA_CMD_FENCE\n");
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h
index 69a8702087..ca0154361c 100644
--- a/src/gallium/drivers/svga/svgadump/svga_dump.h
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.h
@@ -28,6 +28,9 @@
 
 #include "pipe/p_compiler.h"
 
+void            
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size);
+
 void
 svga_dump_commands(const void *commands, uint32_t size);
 
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py
index a1ada29ef8..0bc0b3ae31 100755
--- a/src/gallium/drivers/svga/svgadump/svga_dump.py
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.py
@@ -208,6 +208,56 @@ cmds = [
 def dump_cmds():
     print r'''
 void            
+svga_dump_command(uint32_t cmd_id, const void *data, uint32_t size)
+{
+   const uint8_t *body = (const uint8_t *)data;
+   const uint8_t *next = body + size;
+'''
+    print '   switch(cmd_id) {'
+    indexes = 'ijklmn'
+    for id, header, body, footer in cmds:
+        print '   case %s:' % id
+        print '      _debug_printf("\\t%s\\n");' % id
+        print '      {'
+        print '         const %s *cmd = (const %s *)body;' % (header, header)
+        if len(body):
+            print '         unsigned ' + ', '.join(indexes[:len(body)]) + ';'
+        print '         dump_%s(cmd);' % header
+        print '         body = (const uint8_t *)&cmd[1];'
+        for i in range(len(body)):
+            struct, count = body[i]
+            idx = indexes[i]
+            print '         for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx)
+            print '            dump_%s((const %s *)body);' % (struct, struct)
+            print '            body += sizeof(%s);' % struct
+            print '         }'
+        if footer is not None:
+            print '         while(body + sizeof(%s) <= next) {' % footer
+            print '            dump_%s((const %s *)body);' % (footer, footer)
+            print '            body += sizeof(%s);' % footer
+            print '         }'
+        if id == 'SVGA_3D_CMD_SHADER_DEFINE':
+            print '         svga_shader_dump((const uint32_t *)body,'
+            print '                          (unsigned)(next - body)/sizeof(uint32_t),'
+            print '                          FALSE);'
+            print '         body = next;'
+        print '      }'
+        print '      break;'
+    print '   default:'
+    print '      _debug_printf("\\t0x%08x\\n", cmd_id);'
+    print '      break;'
+    print '   }'
+    print r'''
+   while(body + sizeof(uint32_t) <= next) {
+      _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+      body += sizeof(uint32_t);
+   }
+   while(body + sizeof(uint32_t) <= next)
+      _debug_printf("\t\t0x%02x\n", *body++);
+}
+'''
+    print r'''
+void            
 svga_dump_commands(const void *commands, uint32_t size)
 {
    const uint8_t *next = commands;
@@ -222,51 +272,11 @@ svga_dump_commands(const void *commands, uint32_t size)
          const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
          const uint8_t *body = (const uint8_t *)&header[1];
 
-         next = (const uint8_t *)body + header->size;
+         next = body + header->size;
          if(next > last)
             break;
-'''
 
-    print '         switch(cmd_id) {'
-    indexes = 'ijklmn'
-    for id, header, body, footer in cmds:
-        print '         case %s:' % id
-        print '            _debug_printf("\\t%s\\n");' % id
-        print '            {'
-        print '               const %s *cmd = (const %s *)body;' % (header, header)
-        if len(body):
-            print '               unsigned ' + ', '.join(indexes[:len(body)]) + ';'
-        print '               dump_%s(cmd);' % header
-        print '               body = (const uint8_t *)&cmd[1];'
-        for i in range(len(body)):
-            struct, count = body[i]
-            idx = indexes[i]
-            print '               for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx)
-            print '                  dump_%s((const %s *)body);' % (struct, struct)
-            print '                  body += sizeof(%s);' % struct
-            print '               }'
-        if footer is not None:
-            print '               while(body + sizeof(%s) <= next) {' % footer
-            print '                  dump_%s((const %s *)body);' % (footer, footer)
-            print '                  body += sizeof(%s);' % footer
-            print '               }'
-        if id == 'SVGA_3D_CMD_SHADER_DEFINE':
-            print '               sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));'
-            print '               body = next;'
-        print '            }'
-        print '            break;'
-    print '         default:'
-    print '            _debug_printf("\\t0x%08x\\n", cmd_id);'
-    print '            break;'
-    print '         }'
-            
-    print r'''
-         while(body + sizeof(uint32_t) <= next) {
-            _debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
-            body += sizeof(uint32_t);
-         }
-         while(body + sizeof(uint32_t) <= next)
-            _debug_printf("\t\t0x%02x\n", *body++);
+         svga_dump_command(cmd_id, body, header->size);
       }
       else if(cmd_id == SVGA_CMD_FENCE) {
          _debug_printf("\tSVGA_CMD_FENCE\n");
-- 
cgit v1.2.3


From 3f176bf08cd729d67a00d2bd073f29286b1f9a29 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 25 Dec 2009 17:40:08 +0000
Subject: trace: Fix transfer size computation.

---
 src/gallium/drivers/trace/tr_screen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c
index ac20a47af1..117503aaff 100644
--- a/src/gallium/drivers/trace/tr_screen.c
+++ b/src/gallium/drivers/trace/tr_screen.c
@@ -426,7 +426,7 @@ trace_screen_transfer_unmap(struct pipe_screen *_screen,
    struct pipe_transfer *transfer = tr_trans->transfer;
 
    if(tr_trans->map) {
-      size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->width) * transfer->stride;
+      size_t size = util_format_get_nblocksy(transfer->texture->format, transfer->height) * transfer->stride;
 
       trace_dump_call_begin("pipe_screen", "transfer_write");
 
-- 
cgit v1.2.3


From 491a18494373434c1a8e563f489d51b7760f227f Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 26 Dec 2009 11:00:42 +0000
Subject: llvmpipe: Flush draw module before switching framebuffer.

Otherwise geometry will end up in the wrong rendertarget.
---
 src/gallium/drivers/llvmpipe/lp_state_surface.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c
index ba970cac98..e37ff04f3d 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -51,6 +51,8 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
    struct llvmpipe_context *lp = llvmpipe_context(pipe);
    uint i;
 
+   draw_flush(lp->draw);
+
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
       /* check if changing cbuf */
       if (lp->framebuffer.cbufs[i] != fb->cbufs[i]) {
-- 
cgit v1.2.3


From 7a15642f411613df51474d5c2ab85456b5ca41ce Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 26 Dec 2009 11:01:05 +0000
Subject: softpipe: Flush draw module before switching framebuffer.

Otherwise geometry might end up in the wrong rendertarget.
---
 src/gallium/drivers/softpipe/sp_state_surface.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c
index a518248bb1..f6154109ea 100644
--- a/src/gallium/drivers/softpipe/sp_state_surface.c
+++ b/src/gallium/drivers/softpipe/sp_state_surface.c
@@ -51,6 +51,8 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe,
    struct softpipe_context *sp = softpipe_context(pipe);
    uint i;
 
+   draw_flush(sp->draw);
+
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
       /* check if changing cbuf */
       if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) {
-- 
cgit v1.2.3


From d01c7bef296f66bffe3ad167abb0c04c25f7fdae Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 26 Dec 2009 11:02:02 +0000
Subject: llvmpipe: Use comments and more code from softpipe's
 is_texture_referenced implementation.

---
 src/gallium/drivers/llvmpipe/lp_context.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 001311e703..37587d4f79 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -140,6 +140,7 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
    struct llvmpipe_context *llvmpipe = llvmpipe_context( pipe );
    unsigned i;
 
+   /* check if any of the bound drawing surfaces are this texture */
    if(llvmpipe->dirty_render_cache) {
       for (i = 0; i < llvmpipe->framebuffer.nr_cbufs; i++) {
          if(llvmpipe->framebuffer.cbufs[i] && 
@@ -150,6 +151,13 @@ llvmpipe_is_texture_referenced( struct pipe_context *pipe,
          llvmpipe->framebuffer.zsbuf->texture == texture)
          return PIPE_REFERENCED_FOR_WRITE;
    }
+
+   /* check if any of the tex_cache textures are this texture */
+   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+      if (llvmpipe->tex_cache[i] &&
+            llvmpipe->tex_cache[i]->texture == texture)
+         return PIPE_REFERENCED_FOR_READ;
+   }
    for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
       if (llvmpipe->vertex_tex_cache[i] &&
           llvmpipe->vertex_tex_cache[i]->texture == texture)
-- 
cgit v1.2.3


From 080703e398f737b71336312fd3dc8d6f38f61e51 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 26 Dec 2009 15:21:16 +0000
Subject: llvmpipe: Treat state changes systematically.

That is:
- check for no op
- update/flush draw module
- update bound state and mark it as dirty

In particular flushing the draw module is important since it may contain
unflushed primitives which would otherwise be draw with wrong state.
---
 src/gallium/drivers/llvmpipe/lp_state_blend.c      | 18 +++++++++++++++++-
 src/gallium/drivers/llvmpipe/lp_state_fs.c         | 10 +++++++---
 src/gallium/drivers/llvmpipe/lp_state_rasterizer.c |  9 ++++++---
 src/gallium/drivers/llvmpipe/lp_state_vs.c         | 12 ++++++++----
 4 files changed, 38 insertions(+), 11 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_state_blend.c b/src/gallium/drivers/llvmpipe/lp_state_blend.c
index b2e75d3b14..a94cd05ef2 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_blend.c
@@ -34,6 +34,7 @@
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/u_debug_dump.h"
+#include "draw/draw_context.h"
 #include "lp_screen.h"
 #include "lp_context.h"
 #include "lp_state.h"
@@ -51,6 +52,11 @@ void llvmpipe_bind_blend_state( struct pipe_context *pipe,
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
 
+   if (llvmpipe->blend == blend)
+      return;
+
+   draw_flush(llvmpipe->draw);
+
    llvmpipe->blend = blend;
 
    llvmpipe->dirty |= LP_NEW_BLEND;
@@ -69,6 +75,11 @@ void llvmpipe_set_blend_color( struct pipe_context *pipe,
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
    unsigned i, j;
 
+   if(memcmp(&llvmpipe->blend_color, blend_color, sizeof *blend_color) == 0)
+      return;
+
+   draw_flush(llvmpipe->draw);
+
    memcpy(&llvmpipe->blend_color, blend_color, sizeof *blend_color);
 
    if(!llvmpipe->jit_context.blend_color)
@@ -99,7 +110,12 @@ llvmpipe_bind_depth_stencil_state(struct pipe_context *pipe,
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
 
-   llvmpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil;
+   if (llvmpipe->depth_stencil == depth_stencil)
+      return;
+
+   draw_flush(llvmpipe->draw);
+
+   llvmpipe->depth_stencil = depth_stencil;
 
    if(llvmpipe->depth_stencil)
       llvmpipe->jit_context.alpha_ref_value = llvmpipe->depth_stencil->alpha.ref_value;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index af053f1dc6..3e8f595e99 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -673,7 +673,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
 
-   llvmpipe->fs = (struct lp_fragment_shader *) fs;
+   if (llvmpipe->fs == fs)
+      return;
+
+   draw_flush(llvmpipe->draw);
+
+   llvmpipe->fs = fs;
 
    llvmpipe->dirty |= LP_NEW_FS;
 }
@@ -723,8 +728,7 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
    assert(shader < PIPE_SHADER_TYPES);
    assert(index == 0);
 
-   if(shader == PIPE_SHADER_VERTEX)
-      draw_flush(llvmpipe->draw);
+   draw_flush(llvmpipe->draw);
 
    /* note: reference counting */
    pipe_buffer_reference(&llvmpipe->constants[shader].buffer, buffer);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
index 4561c6b845..aa3b5a3f91 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_rasterizer.c
@@ -41,14 +41,17 @@ llvmpipe_create_rasterizer_state(struct pipe_context *pipe,
 }
 
 void llvmpipe_bind_rasterizer_state(struct pipe_context *pipe,
-                                    void *setup)
+                                    void *rasterizer)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
 
+   if (llvmpipe->rasterizer == rasterizer)
+      return;
+
    /* pass-through to draw module */
-   draw_set_rasterizer_state(llvmpipe->draw, setup);
+   draw_set_rasterizer_state(llvmpipe->draw, rasterizer);
 
-   llvmpipe->rasterizer = (struct pipe_rasterizer_state *)setup;
+   llvmpipe->rasterizer = rasterizer;
 
    llvmpipe->dirty |= LP_NEW_RASTERIZER;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c
index 8a761648e7..884e3878e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c
@@ -70,14 +70,18 @@ fail:
 
 
 void
-llvmpipe_bind_vs_state(struct pipe_context *pipe, void *vs)
+llvmpipe_bind_vs_state(struct pipe_context *pipe, void *_vs)
 {
    struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   const struct lp_vertex_shader *vs = (const struct lp_vertex_shader *)_vs;
 
-   llvmpipe->vs = (const struct lp_vertex_shader *)vs;
+   if (llvmpipe->vs == vs)
+      return;
 
-   draw_bind_vertex_shader(llvmpipe->draw,
-                           (llvmpipe->vs ? llvmpipe->vs->draw_data : NULL));
+   draw_bind_vertex_shader(llvmpipe->draw, 
+                           vs ? vs->draw_data : NULL);
+
+   llvmpipe->vs = vs;
 
    llvmpipe->dirty |= LP_NEW_VS;
 }
-- 
cgit v1.2.3


From 4ccf0bb74e7f88ff51bba64a2a94a29f997231f5 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 26 Dec 2009 21:05:31 +0000
Subject: softpipe: Flush draw module when fragment pipeline state changes.

---
 src/gallium/drivers/softpipe/sp_state_blend.c      |  5 +++++
 src/gallium/drivers/softpipe/sp_state_fs.c         | 11 ++++++++++-
 src/gallium/drivers/softpipe/sp_state_rasterizer.c |  9 ++++++---
 3 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c
index efed082f82..95ab323433 100644
--- a/src/gallium/drivers/softpipe/sp_state_blend.c
+++ b/src/gallium/drivers/softpipe/sp_state_blend.c
@@ -29,6 +29,7 @@
  */
 
 #include "util/u_memory.h"
+#include "draw/draw_context.h"
 #include "sp_context.h"
 #include "sp_state.h"
 
@@ -45,6 +46,8 @@ void softpipe_bind_blend_state( struct pipe_context *pipe,
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
 
+   draw_flush(softpipe->draw);
+
    softpipe->blend = (struct pipe_blend_state *)blend;
 
    softpipe->dirty |= SP_NEW_BLEND;
@@ -62,6 +65,8 @@ void softpipe_set_blend_color( struct pipe_context *pipe,
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
 
+   draw_flush(softpipe->draw);
+
    softpipe->blend_color = *blend_color;
 
    softpipe->dirty |= SP_NEW_BLEND;
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index acec62dc2a..aa12bb215a 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -69,7 +69,14 @@ softpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
 
-   softpipe->fs = (struct sp_fragment_shader *) fs;
+   draw_flush(softpipe->draw);
+
+   if (softpipe->fs == fs)
+      return;
+
+   draw_flush(softpipe->draw);
+
+   softpipe->fs = fs;
 
    softpipe->dirty |= SP_NEW_FS;
 }
@@ -159,6 +166,8 @@ softpipe_set_constant_buffer(struct pipe_context *pipe,
    assert(shader < PIPE_SHADER_TYPES);
    assert(index == 0);
 
+   draw_flush(softpipe->draw);
+
    /* note: reference counting */
    pipe_buffer_reference(&softpipe->constants[shader].buffer,
 			 buf ? buf->buffer : NULL);
diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
index 87b7219683..a5b00336d4 100644
--- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c
+++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c
@@ -41,14 +41,17 @@ softpipe_create_rasterizer_state(struct pipe_context *pipe,
 }
 
 void softpipe_bind_rasterizer_state(struct pipe_context *pipe,
-                                    void *setup)
+                                    void *rasterizer)
 {
    struct softpipe_context *softpipe = softpipe_context(pipe);
 
+   if (softpipe->rasterizer == rasterizer)
+      return;
+
    /* pass-through to draw module */
-   draw_set_rasterizer_state(softpipe->draw, setup);
+   draw_set_rasterizer_state(softpipe->draw, rasterizer);
 
-   softpipe->rasterizer = (struct pipe_rasterizer_state *)setup;
+   softpipe->rasterizer = rasterizer;
 
    softpipe->dirty |= SP_NEW_RASTERIZER;
 }
-- 
cgit v1.2.3


From 180ccffe550698d860e06d3cf5e16e4d9c3e7ddd Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sun, 27 Dec 2009 17:52:47 -0800
Subject: softpipe: Silence unused variable warning.

---
 src/gallium/drivers/softpipe/sp_prim_vbuf.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
index 5fbac06a53..7f573aef3c 100644
--- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c
+++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c
@@ -128,6 +128,7 @@ sp_vbuf_unmap_vertices(struct vbuf_render *vbr,
 {
    struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr);
    assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
+   (void) cvbr;
    /* do nothing */
 }
 
-- 
cgit v1.2.3


From 3a2f96f18aab60ed061339f74792307964c284a2 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sun, 27 Dec 2009 18:06:02 -0800
Subject: softpipe: Silence unintialized variable warnings.

---
 src/gallium/drivers/softpipe/sp_quad_blend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c
index fe6b6cec35..d9babe81da 100644
--- a/src/gallium/drivers/softpipe/sp_quad_blend.c
+++ b/src/gallium/drivers/softpipe/sp_quad_blend.c
@@ -229,7 +229,7 @@ blend_quad(struct quad_stage *qs,
    static const float zero[4] = { 0, 0, 0, 0 };
    static const float one[4] = { 1, 1, 1, 1 };
    struct softpipe_context *softpipe = qs->softpipe;
-   float source[4][QUAD_SIZE];
+   float source[4][QUAD_SIZE] = { { 0 } };
 
    /*
     * Compute src/first term RGB
-- 
cgit v1.2.3


From f31f9cf485ba3e735c9e10acc715897e0151492c Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sun, 27 Dec 2009 18:09:58 -0800
Subject: i915g: Silence unused variable warning.

---
 src/gallium/drivers/i915/i915_buffer.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i915/i915_buffer.c b/src/gallium/drivers/i915/i915_buffer.c
index effeba1297..669964770d 100644
--- a/src/gallium/drivers/i915/i915_buffer.c
+++ b/src/gallium/drivers/i915/i915_buffer.c
@@ -111,6 +111,7 @@ i915_buffer_unmap(struct pipe_screen *screen,
 {
    struct i915_buffer *buf = i915_buffer(buffer);
    assert(!buf->ibuf);
+   (void) buf;
 }
 
 static void
-- 
cgit v1.2.3


From d1f64fa72f7e2362fa68f9cc8dc76be06fc846b4 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sun, 27 Dec 2009 18:12:58 -0800
Subject: trace: Silence unused variable warnings.

---
 src/gallium/drivers/trace/tr_context.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c
index bf470b46ae..540855c067 100644
--- a/src/gallium/drivers/trace/tr_context.c
+++ b/src/gallium/drivers/trace/tr_context.c
@@ -52,6 +52,7 @@ trace_buffer_unwrap(struct trace_context *tr_ctx,
 
    assert(tr_buf->buffer);
    assert(tr_buf->buffer->screen == tr_scr->screen);
+   (void) tr_scr;
    return tr_buf->buffer;
 }
 
@@ -90,6 +91,7 @@ trace_surface_unwrap(struct trace_context *tr_ctx,
 
    assert(tr_surf->surface);
    assert(tr_surf->surface->texture->screen == tr_scr->screen);
+   (void) tr_scr;
    return tr_surf->surface;
 }
 
-- 
cgit v1.2.3


From 31d1822473bf9d4105bb82b67572cfeea53aaf94 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Mon, 28 Dec 2009 00:44:30 -0800
Subject: llvmpipe: Silence compiler warnings.

---
 src/gallium/drivers/llvmpipe/lp_bld_format_aos.c | 2 +-
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c   | 2 +-
 src/gallium/drivers/llvmpipe/lp_prim_vbuf.c      | 1 +
 src/gallium/drivers/llvmpipe/lp_state_fs.c       | 1 +
 src/gallium/drivers/llvmpipe/lp_tile_soa.h       | 2 +-
 src/gallium/drivers/llvmpipe/lp_winsys.h         | 2 +-
 6 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
index 5836e0173f..10e82f120b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_format_aos.c
@@ -130,7 +130,7 @@ lp_build_unpack_rgba_aos(LLVMBuilderRef builder,
 
    shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), "");
    masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), "");
-   // UIToFP can't be expressed in SSE2
+   /* UIToFP can't be expressed in SSE2 */
    casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatType(), 4), "");
 
    if (normalized)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 94ce4ae831..52554b950c 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -321,7 +321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
 {
    const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
    LLVMValueRef lodbias;
-   LLVMValueRef oow;
+   LLVMValueRef oow = NULL;
    LLVMValueRef coords[3];
    unsigned num_coords;
    unsigned i;
diff --git a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
index 4abff4eccc..e8e2e2524a 100644
--- a/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
+++ b/src/gallium/drivers/llvmpipe/lp_prim_vbuf.c
@@ -128,6 +128,7 @@ lp_vbuf_unmap_vertices(struct vbuf_render *vbr,
 {
    struct llvmpipe_vbuf_render *cvbr = llvmpipe_vbuf_render(vbr);
    assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size );
+   (void) cvbr;
    /* do nothing */
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 2e9aa9fffe..8e2aae40af 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -676,6 +676,7 @@ llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
    struct lp_fragment_shader_variant *variant;
 
    assert(fs != llvmpipe->fs);
+   (void) llvmpipe;
 
    variant = shader->variants;
    while(variant) {
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
index 040b01865d..19d00b58d3 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -29,7 +29,7 @@
 #define LP_TILE_SOA_H
 
 #include "pipe/p_compiler.h"
-#include "tgsi/tgsi_exec.h" // for NUM_CHANNELS
+#include "tgsi/tgsi_exec.h" /* for NUM_CHANNELS */
 
 
 #ifdef __cplusplus
diff --git a/src/gallium/drivers/llvmpipe/lp_winsys.h b/src/gallium/drivers/llvmpipe/lp_winsys.h
index 595481c2cb..74b472b653 100644
--- a/src/gallium/drivers/llvmpipe/lp_winsys.h
+++ b/src/gallium/drivers/llvmpipe/lp_winsys.h
@@ -35,7 +35,7 @@
 #define LP_WINSYS_H
 
 
-#include "pipe/p_compiler.h" // for boolean
+#include "pipe/p_compiler.h" /* for boolean */
 #include "pipe/p_format.h"
 
 
-- 
cgit v1.2.3


From c84cc09d41a83caa96eca84c73284024d8d63024 Mon Sep 17 00:00:00 2001
From: Marcin Kościelnicki <koriakin@0x04.net>
Date: Mon, 28 Dec 2009 16:23:40 +0000
Subject: nv50: Dehexify and bring up to date with new method defines.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
---
 src/gallium/drivers/nv50/nv50_program.c        |  4 +--
 src/gallium/drivers/nv50/nv50_query.c          |  4 +--
 src/gallium/drivers/nv50/nv50_screen.c         | 44 ++++++++++++--------------
 src/gallium/drivers/nv50/nv50_state.c          | 13 ++++----
 src/gallium/drivers/nv50/nv50_state_validate.c | 20 ++++++------
 src/gallium/drivers/nv50/nv50_surface.c        |  6 ++--
 src/gallium/drivers/nv50/nv50_transfer.c       | 12 +++----
 src/gallium/drivers/nv50/nv50_vbo.c            | 38 +++++++++++-----------
 8 files changed, 69 insertions(+), 72 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index a101ac095c..b9910b430a 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -3498,7 +3498,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
 	so_data  (so, p->cfg.high_temp);
 	so_method(so, tesla, NV50TCL_FP_RESULT_COUNT, 1);
 	so_data  (so, p->cfg.high_result);
-	so_method(so, tesla, NV50TCL_FP_CTRL_UNK19A8, 1);
+	so_method(so, tesla, NV50TCL_FP_CONTROL, 1);
 	so_data  (so, p->cfg.regs[2]);
 	so_method(so, tesla, NV50TCL_FP_CTRL_UNK196C, 1);
 	so_data  (so, p->cfg.regs[3]);
@@ -3670,7 +3670,7 @@ nv50_linkage_validate(struct nv50_context *nv50)
 	so_method(so, tesla, NV50TCL_FP_INTERPOLANT_CTRL, 1);
 	so_data  (so, reg[4]);
 
-	so_method(so, tesla, 0x1540, 4);
+	so_method(so, tesla, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
 	so_datap (so, lin, 4);
 
 	if (nv50->rasterizer->pipe.point_sprite) {
diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c
index 268c9823f7..5d9e18218a 100644
--- a/src/gallium/drivers/nv50/nv50_query.c
+++ b/src/gallium/drivers/nv50/nv50_query.c
@@ -77,9 +77,9 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
 	struct nv50_query *q = nv50_query(pq);
 
-	BEGIN_RING(chan, tesla, 0x1530, 1);
+	BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_RESET, 1);
 	OUT_RING  (chan, 1);
-	BEGIN_RING(chan, tesla, 0x1514, 1);
+	BEGIN_RING(chan, tesla, NV50TCL_SAMPLECNT_ENABLE, 1);
 	OUT_RING  (chan, 1);
 
 	q->ready = FALSE;
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 5a1efd3998..15e4b6e5ca 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -231,8 +231,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		break;
 	case 0x80:
 	case 0x90:
-		/* this stupid name should be corrected. */
-		tesla_class = NV54TCL;
+		tesla_class = NV84TCL;
 		break;
 	case 0xa0:
 		switch (chipset) {
@@ -242,7 +241,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 			tesla_class = NVA0TCL;
 			break;
 		default:
-			tesla_class = 0x8597;
+			tesla_class = NVA8TCL;
 			break;
 		}
 		break;
@@ -287,7 +286,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_data  (so, chan->vram->handle);
 	so_method(so, screen->eng2d, NV50_2D_OPERATION, 1);
 	so_data  (so, NV50_2D_OPERATION_SRCCOPY);
-	so_method(so, screen->eng2d, 0x0290, 1);
+	so_method(so, screen->eng2d, NV50_2D_CLIP_ENABLE, 1);
 	so_data  (so, 0);
 	so_method(so, screen->eng2d, 0x0888, 1);
 	so_data  (so, 1);
@@ -297,34 +296,33 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	/* Static tesla init */
 	so = so_new(256, 20);
 
-	so_method(so, screen->tesla, 0x1558, 1);
-	so_data  (so, 1);
+	so_method(so, screen->tesla, NV50TCL_COND_MODE, 1);
+	so_data  (so, NV50TCL_COND_MODE_ALWAYS);
 	so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1);
 	so_data  (so, screen->sync->handle);
-	so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0),
-				     NV50TCL_DMA_UNK0__SIZE);
-	for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++)
+	so_method(so, screen->tesla, NV50TCL_DMA_ZETA, 11);
+	for (i = 0; i < 11; i++)
 		so_data(so, chan->vram->handle);
-	so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0),
-				     NV50TCL_DMA_UNK1__SIZE);
-	for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++)
+	so_method(so, screen->tesla, NV50TCL_DMA_COLOR(0),
+				     NV50TCL_DMA_COLOR__SIZE);
+	for (i = 0; i < NV50TCL_DMA_COLOR__SIZE; i++)
 		so_data(so, chan->vram->handle);
-	so_method(so, screen->tesla, 0x121c, 1);
+	so_method(so, screen->tesla, NV50TCL_RT_CONTROL, 1);
 	so_data  (so, 1);
 
 	/* activate all 32 lanes (threads) in a warp */
-	so_method(so, screen->tesla, 0x19a0, 1);
+	so_method(so, screen->tesla, NV50TCL_WARP_HALVES, 1);
 	so_data  (so, 0x2);
 	so_method(so, screen->tesla, 0x1400, 1);
 	so_data  (so, 0xf);
 
 	/* max TIC (bits 4:8) & TSC (ignored) bindings, per program type */
-	so_method(so, screen->tesla, 0x13b4, 1);
+	so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(0), 1);
 	so_data  (so, 0x54);
-	so_method(so, screen->tesla, 0x13bc, 1);
+	so_method(so, screen->tesla, NV50TCL_TEX_LIMITS(2), 1);
 	so_data  (so, 0x54);
 	/* origin is top left (set to 1 for bottom left) */
-	so_method(so, screen->tesla, 0x13ac, 1);
+	so_method(so, screen->tesla, NV50TCL_Y_ORIGIN_BOTTOM, 1);
 	so_data  (so, 0);
 	so_method(so, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
 	so_data  (so, 8);
@@ -360,7 +358,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	//  B = buffer ID (maybe more than 1 byte)
 	//  N = CB index used in shader instruction
 	//  P = program type (0 = VP, 2 = GP, 3 = FP)
-	so_method(so, screen->tesla, 0x1694, 1);
+	so_method(so, screen->tesla, NV50TCL_SET_PROGRAM_CB, 1);
 	so_data  (so, 0x000BBNP1);
 	*/
 
@@ -424,24 +422,24 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 
 	/* Vertex array limits - max them out */
 	for (i = 0; i < 16; i++) {
-		so_method(so, screen->tesla, NV50TCL_UNK1080_OFFSET_HIGH(i), 2);
+		so_method(so, screen->tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
 		so_data  (so, 0x000000ff);
 		so_data  (so, 0xffffffff);
 	}
 
-	so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2);
+	so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR(0), 2);
 	so_data  (so, fui(0.0));
 	so_data  (so, fui(1.0));
 
 	/* no dynamic combination of TIC & TSC entries => only BIND_TIC used */
-	so_method(so, screen->tesla, 0x1234, 1);
+	so_method(so, screen->tesla, NV50TCL_LINKED_TSC, 1);
 	so_data  (so, 1);
 
 	/* activate first scissor rectangle */
-	so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE, 1);
+	so_method(so, screen->tesla, NV50TCL_SCISSOR_ENABLE(0), 1);
 	so_data  (so, 1);
 
-	so_method(so, screen->tesla, 0x15e4, 1);
+	so_method(so, screen->tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
 	so_data  (so, 1); /* default edgeflag to TRUE */
 
 	so_emit(chan, so);
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 88aef52d08..18a2b819d8 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -295,7 +295,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
 	so_method(so, tesla, NV50TCL_SHADE_MODEL, 1);
 	so_data  (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT :
 				       NV50TCL_SHADE_MODEL_SMOOTH);
-	so_method(so, tesla, 0x1684, 1);
+	so_method(so, tesla, NV50TCL_PROVOKING_VERTEX_LAST, 1);
 	so_data  (so, cso->flatshade_first ? 0 : 1);
 
 	so_method(so, tesla, NV50TCL_VERTEX_TWO_SIDE_ENABLE, 1);
@@ -439,9 +439,8 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
 		so_data  (so, 0);
 	}
 
-	/* XXX: keep hex values until header is updated (names reversed) */
 	if (cso->stencil[0].enabled) {
-		so_method(so, tesla, 0x1380, 8);
+		so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8);
 		so_data  (so, 1);
 		so_data  (so, nvgl_stencil_op(cso->stencil[0].fail_op));
 		so_data  (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
@@ -451,23 +450,23 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
 		so_data  (so, cso->stencil[0].writemask);
 		so_data  (so, cso->stencil[0].valuemask);
 	} else {
-		so_method(so, tesla, 0x1380, 1);
+		so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1);
 		so_data  (so, 0);
 	}
 
 	if (cso->stencil[1].enabled) {
-		so_method(so, tesla, 0x1594, 5);
+		so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5);
 		so_data  (so, 1);
 		so_data  (so, nvgl_stencil_op(cso->stencil[1].fail_op));
 		so_data  (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
 		so_data  (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
 		so_data  (so, nvgl_comparison_op(cso->stencil[1].func));
-		so_method(so, tesla, 0x0f54, 3);
+		so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3);
 		so_data  (so, cso->stencil[1].ref_value);
 		so_data  (so, cso->stencil[1].writemask);
 		so_data  (so, cso->stencil[1].valuemask);
 	} else {
-		so_method(so, tesla, 0x1594, 1);
+		so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1);
 		so_data  (so, 0);
 	}
 
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 871e8097b6..c8bdf9dc27 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -41,7 +41,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
 	 * FP result 0 always goes to RT[0], bits 4 - 6 are ignored.
 	 * Ambiguous assignment results in no rendering (no DATA_ERROR).
 	 */
-	so_method(so, tesla, 0x121c, 1);
+	so_method(so, tesla, NV50TCL_RT_CONTROL, 1);
 	so_data  (so, fb->nr_cbufs |
 		  (0 <<  4) | (1 <<  7) | (2 << 10) | (3 << 13) |
 		  (4 << 16) | (5 << 19) | (6 << 22) | (7 << 25));
@@ -87,7 +87,7 @@ nv50_state_validate_fb(struct nv50_context *nv50)
 				level[fb->cbufs[i]->level].tile_mode << 4);
 		so_data(so, 0x00000000);
 
-		so_method(so, tesla, 0x1224, 1);
+		so_method(so, tesla, NV50TCL_RT_ARRAY_MODE, 1);
 		so_data  (so, 1);
 	}
 
@@ -124,22 +124,22 @@ nv50_state_validate_fb(struct nv50_context *nv50)
 				level[fb->zsbuf->level].tile_mode << 4);
 		so_data(so, 0x00000000);
 
-		so_method(so, tesla, 0x1538, 1);
+		so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
 		so_data  (so, 1);
 		so_method(so, tesla, NV50TCL_ZETA_HORIZ, 3);
 		so_data  (so, fb->zsbuf->width);
 		so_data  (so, fb->zsbuf->height);
 		so_data  (so, 0x00010001);
 	} else {
-		so_method(so, tesla, 0x1538, 1);
+		so_method(so, tesla, NV50TCL_ZETA_ENABLE, 1);
 		so_data  (so, 0);
 	}
 
-	so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2);
+	so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ(0), 2);
 	so_data  (so, w << 16);
 	so_data  (so, h << 16);
 	/* set window lower left corner */
-	so_method(so, tesla, NV50TCL_WINDOW_LEFT, 2);
+	so_method(so, tesla, NV50TCL_WINDOW_OFFSET_X, 2);
 	so_data  (so, 0);
 	so_data  (so, 0);
 	/* set screen scissor rectangle */
@@ -325,7 +325,7 @@ nv50_state_validate(struct nv50_context *nv50)
 		nv50->state.scissor_enabled = rast->scissor;
 
 		so = so_new(3, 0);
-		so_method(so, tesla, NV50TCL_SCISSOR_HORIZ, 2);
+		so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
 		if (nv50->state.scissor_enabled) {
 			so_data(so, (s->maxx << 16) | s->minx);
 			so_data(so, (s->maxy << 16) | s->miny);
@@ -355,11 +355,11 @@ scissor_uptodate:
 
 		so = so_new(14, 0);
 		if (!bypass) {
-			so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE(0), 3);
+			so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
 			so_data  (so, fui(nv50->viewport.translate[0]));
 			so_data  (so, fui(nv50->viewport.translate[1]));
 			so_data  (so, fui(nv50->viewport.translate[2]));
-			so_method(so, tesla, NV50TCL_VIEWPORT_SCALE(0), 3);
+			so_method(so, tesla, NV50TCL_VIEWPORT_SCALE_X(0), 3);
 			so_data  (so, fui(nv50->viewport.scale[0]));
 			so_data  (so, fui(nv50->viewport.scale[1]));
 			so_data  (so, fui(nv50->viewport.scale[2]));
@@ -440,7 +440,7 @@ void nv50_so_init_sifc(struct nv50_context *nv50,
 	so_data  (so, 1);
 	so_reloc (so, bo, offset, reloc | NOUVEAU_BO_HIGH, 0, 0);
 	so_reloc (so, bo, offset, reloc | NOUVEAU_BO_LOW, 0, 0);
-	so_method(so, eng2d, NV50_2D_SIFC_UNK0800, 2);
+	so_method(so, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
 	so_data  (so, 0);
 	so_data  (so, NV50_2D_SIFC_FORMAT_R8_UNORM);
 	so_method(so, eng2d, NV50_2D_SIFC_WIDTH, 10);
diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c
index 79655fc08d..6378132979 100644
--- a/src/gallium/drivers/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nv50/nv50_surface.c
@@ -176,11 +176,11 @@ nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
 	if (ret)
 		return;
 
-	BEGIN_RING(chan, eng2d, 0x0580, 3);
-	OUT_RING  (chan, 4);
+	BEGIN_RING(chan, eng2d, NV50_2D_DRAW_SHAPE, 3);
+	OUT_RING  (chan, NV50_2D_DRAW_SHAPE_RECTANGLES);
 	OUT_RING  (chan, format);
 	OUT_RING  (chan, value);
-	BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4);
+	BEGIN_RING(chan, eng2d, NV50_2D_DRAW_POINT32_X(0), 4);
 	OUT_RING  (chan, destx);
 	OUT_RING  (chan, desty);
 	OUT_RING  (chan, width);
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index 4d9afa6fed..a2f1db2914 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -47,7 +47,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 			NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_IN, 1);
 		OUT_RING  (chan, 1);
 		BEGIN_RING(chan, m2mf,
-			NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1);
+			NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_IN, 1);
 		OUT_RING  (chan, src_pitch);
 		src_offset += (sy * src_pitch) + (sx * cpp);
 	} else {
@@ -66,7 +66,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 			NV50_MEMORY_TO_MEMORY_FORMAT_LINEAR_OUT, 1);
 		OUT_RING  (chan, 1);
 		BEGIN_RING(chan, m2mf,
-			NV50_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1);
+			NV04_MEMORY_TO_MEMORY_FORMAT_PITCH_OUT, 1);
 		OUT_RING  (chan, dst_pitch);
 		dst_offset += (dy * dst_pitch) + (dx * cpp);
 	} else {
@@ -89,7 +89,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 		OUT_RELOCh(chan, src_bo, src_offset, src_reloc);
 		OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc);
 		BEGIN_RING(chan, m2mf,
-			NV50_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
+			NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 2);
 		OUT_RELOCl(chan, src_bo, src_offset, src_reloc);
 		OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc);
 		if (src_bo->tile_flags) {
@@ -107,7 +107,7 @@ nv50_transfer_rect_m2mf(struct pipe_screen *pscreen,
 			dst_offset += (line_count * dst_pitch);
 		}
 		BEGIN_RING(chan, m2mf,
-			NV50_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4);
+			NV04_MEMORY_TO_MEMORY_FORMAT_LINE_LENGTH_IN, 4);
 		OUT_RING  (chan, width * cpp);
 		OUT_RING  (chan, line_count);
 		OUT_RING  (chan, 0x00000101);
@@ -291,7 +291,7 @@ nv50_upload_sifc(struct nv50_context *nv50,
 
 	/* NV50_2D_OPERATION_SRCCOPY assumed already set */
 
-	BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2);
+	BEGIN_RING(chan, eng2d, NV50_2D_SIFC_BITMAP_ENABLE, 2);
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, src_format);
 	BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
@@ -334,6 +334,6 @@ nv50_upload_sifc(struct nv50_context *nv50,
 		src += src_pitch;
 	}
 
-	BEGIN_RING(chan, tesla, 0x1440, 1);
+	BEGIN_RING(chan, tesla, NV50TCL_CODE_CB_FLUSH, 1);
 	OUT_RING  (chan, 0);
 }
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index 39324e30f6..602adfc50d 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -99,19 +99,19 @@ nv50_vbo_size_to_hw(unsigned size, unsigned nr_c)
 {
 	static const uint32_t hw_values[] = {
 		0, 0, 0, 0,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_8_8_8_8,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_16_16_16_16,
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8,
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8,
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8,
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_8_8_8_8,
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16,
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16,
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16,
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_16_16_16_16,
 		0, 0, 0, 0,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32,
-		NV50TCL_VERTEX_ARRAY_ATTRIB_SIZE_32_32_32_32 };
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32,
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32,
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32,
+		NV50TCL_VERTEX_ARRAY_ATTRIB_FORMAT_32_32_32_32 };
 
 	/* we'd also have R11G11B10 and R10G10B10A2 */
 
@@ -198,7 +198,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
 		return nv50_push_elements_u08(nv50, map, count);
 
 	if (count & 1) {
-		BEGIN_RING(chan, tesla, 0x15e8, 1);
+		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
 		OUT_RING  (chan, map[0]);
 		map++;
 		count--;
@@ -208,7 +208,7 @@ nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map,
 		unsigned nr = count > 2046 ? 2046 : count;
 		int i;
 
-		BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1);
+		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
 		for (i = 0; i < nr; i += 2)
 			OUT_RING  (chan, (map[i + 1] << 16) | map[i]);
 
@@ -231,7 +231,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
 		return nv50_push_elements_u16(nv50, map, count);
 
 	if (count & 1) {
-		BEGIN_RING(chan, tesla, 0x15e8, 1);
+		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32, 1);
 		OUT_RING  (chan, map[0]);
 		map++;
 		count--;
@@ -241,7 +241,7 @@ nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map,
 		unsigned nr = count > 2046 ? 2046 : count;
 		int i;
 
-		BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1);
+		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U16 | 0x40000000, nr >> 1);
 		for (i = 0; i < nr; i += 2)
 			OUT_RING  (chan, (map[i + 1] << 16) | map[i]);
 
@@ -266,7 +266,7 @@ nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint32_t *map,
 	while (count) {
 		unsigned nr = count > 2047 ? 2047 : count;
 
-		BEGIN_RING(chan, tesla, 0x400015e8, nr);
+		BEGIN_RING(chan, tesla, NV50TCL_VB_ELEMENT_U32 | 0x40000000, nr);
 		OUT_RINGp (chan, map, nr);
 
 		count -= nr;
@@ -373,7 +373,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
 		break;
 	case 1:
 		if (attrib == nv50->vertprog->cfg.edgeflag_in) {
-			so_method(so, tesla, 0x15e4, 1);
+			so_method(so, tesla, NV50TCL_EDGEFLAG_ENABLE, 1);
 			so_data  (so, v[0] ? 1 : 0);
 		}
 		so_method(so, tesla, NV50TCL_VTX_ATTR_1F(attrib), 1);
@@ -452,7 +452,7 @@ nv50_vbo_validate(struct nv50_context *nv50)
 			  NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0);
 
 		/* vertex array limits */
-		so_method(vtxbuf, tesla, 0x1080 + (i * 8), 2);
+		so_method(vtxbuf, tesla, NV50TCL_VERTEX_ARRAY_LIMIT_HIGH(i), 2);
 		so_reloc (vtxbuf, bo, vb->buffer->size - 1,
 			  NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
 			  NOUVEAU_BO_HIGH, 0, 0);
-- 
cgit v1.2.3


From 49a0f291aef5f601f172cf6f41fd83d6c342bb8a Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sat, 26 Dec 2009 17:26:49 +0100
Subject: nv04: Fix build after the latest nouveau_class.h changes.

---
 src/gallium/drivers/nv04/nv04_context.c    | 18 ++++-----
 src/gallium/drivers/nv04/nv04_fragtex.c    | 16 ++++----
 src/gallium/drivers/nv04/nv04_prim_vbuf.c  | 16 ++++----
 src/gallium/drivers/nv04/nv04_screen.c     |  4 +-
 src/gallium/drivers/nv04/nv04_state.c      | 60 +++++++++++++++---------------
 src/gallium/drivers/nv04/nv04_state_emit.c | 10 ++---
 6 files changed, 62 insertions(+), 62 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c
index 4b33636b2e..770733a4a1 100644
--- a/src/gallium/drivers/nv04/nv04_context.c
+++ b/src/gallium/drivers/nv04/nv04_context.c
@@ -31,26 +31,26 @@ static boolean
 nv04_init_hwctx(struct nv04_context *nv04)
 {
 	// requires a valid handle
-//	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1);
+//	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1);
 //	OUT_RING(0);
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1);
 	OUT_RING(0);
 
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
 	OUT_RING(0x40182800);
 //	OUT_RING(1<<20/*no cull*/);
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
 //	OUT_RING(0x24|(1<<6)|(1<<8));
 	OUT_RING(0x120001a4);
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1);
 	OUT_RING(0x332213a1);
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1);
 	OUT_RING(0x11001010);
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1);
 	OUT_RING(0x0);
-//	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1);
+//	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1);
 //	OUT_RING(SCREEN_OFFSET);
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1);
 	OUT_RING(0xff000000);
 
 
diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c
index 0cce71ad1d..c152b52119 100644
--- a/src/gallium/drivers/nv04/nv04_fragtex.c
+++ b/src/gallium/drivers/nv04/nv04_fragtex.c
@@ -4,7 +4,7 @@
 #define _(m,tf)                                                                \
 {                                                                              \
   PIPE_FORMAT_##m,                                                             \
-  NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf,                                               \
+  NV04_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf,                                               \
 }
 
 struct nv04_texture_format {
@@ -53,14 +53,14 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit)
 		return;
 	}
 
-	nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER 
-		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
+	nv04->fragtex.format = NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER
+		| NV04_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
 		| nv04_fragtex_format(pt->format)
-		| ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
-		| ( log2i(pt->width0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
-		| ( log2i(pt->height0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
-		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
-		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
+		| ( (pt->last_level + 1) << NV04_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
+		| ( log2i(pt->width0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
+		| ( log2i(pt->height0) << NV04_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
+		| NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
+		| NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
 		;
 }
 
diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
index f6458232ae..25395edfd7 100644
--- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c
+++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
@@ -93,7 +93,7 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render,
 
 static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5)
 {
-	BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49);
+	BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49);
 	OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
 	OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
 	OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
@@ -105,7 +105,7 @@ static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buf
 
 static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2)
 {
-	BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25);
+	BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25);
 	OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
 	OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
 	OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
@@ -114,7 +114,7 @@ static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buff
 
 static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3)
 {
-	BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33);
+	BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33);
 	OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
 	OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
 	OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
@@ -166,11 +166,11 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con
 		if (numvert<3)
 			break;
 
-		BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 );
+		BEGIN_RING( fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 );
 		for(j = 0; j<numvert; j++)
 			OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 );
 
-		BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 );
+		BEGIN_RING_NI( fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 );
 		for(j = 0; j<numtri/2; j++ )
 			OUT_RING(striptbl[j]);
 		if (numtri%2)
@@ -185,7 +185,7 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const
 	struct nv04_context* nv04 = render->nv04;
 	int i,j;
 
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
 	OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8);
 
 	for(i = 1; i<nr_indices; i+=14)
@@ -195,12 +195,12 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const
 		if (numvert < 3)
 			break;
 
-		BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
+		BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
 
 		for(j=0;j<numvert;j++)
 			OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 );
 
-		BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2);
+		BEGIN_RING_NI(fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2);
 		for(j = 0; j<numtri/2; j++)
 			OUT_RING(fantbl[j]);
 		if (numtri%2)
diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c
index 170ce3eb7e..ee9b7d2e48 100644
--- a/src/gallium/drivers/nv04/nv04_screen.c
+++ b/src/gallium/drivers/nv04/nv04_screen.c
@@ -163,10 +163,10 @@ nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		fahrenheit_class = 0;
 		sub3d_class = 0;
 	} else if (dev->chipset >= 0x10) {
-		fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE;
+		fahrenheit_class = NV10_TEXTURED_TRIANGLE;
 		sub3d_class = NV10_CONTEXT_SURFACES_3D;
 	} else {
-		fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE;
+		fahrenheit_class=NV04_TEXTURED_TRIANGLE;
 		sub3d_class = NV04_CONTEXT_SURFACES_3D;
 	}
 
diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c
index ef3005db5f..e3dc4c5bf4 100644
--- a/src/gallium/drivers/nv04/nv04_state.c
+++ b/src/gallium/drivers/nv04/nv04_state.c
@@ -50,28 +50,28 @@ wrap_mode(unsigned wrap) {
 
 	switch (wrap) {
 	case PIPE_TEX_WRAP_REPEAT:
-		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT;
+		ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT;
 		break;
 	case PIPE_TEX_WRAP_MIRROR_REPEAT:
-		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT;
+		ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT;
 		break;
 	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE;
+		ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE;
 		break;
 	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER;
+		ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER;
 		break;
 	case PIPE_TEX_WRAP_CLAMP:
-		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+		ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
 		break;
 	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
 	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
 	case PIPE_TEX_WRAP_MIRROR_CLAMP:
 	default:
 		NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
-		ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
+		ret = NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP;
 	}
-	return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT;
+	return ret >> NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT;
 }
 
 static void *
@@ -84,20 +84,20 @@ nv04_sampler_state_create(struct pipe_context *pipe,
 
 	ss = MALLOC(sizeof(struct nv04_sampler_state));
 
-	ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) |
-		    (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT));
+	ss->format = ((wrap_mode(cso->wrap_s) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) |
+		    (wrap_mode(cso->wrap_t) << NV04_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT));
 
 	if (cso->max_anisotropy > 1.0) {
-		filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE;
+		filter |= NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE;
 	}
 
 	switch (cso->mag_img_filter) {
 	case PIPE_TEX_FILTER_LINEAR:
-		filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR;
+		filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR;
 		break;
 	case PIPE_TEX_FILTER_NEAREST:
 	default:
-		filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
+		filter |= NV04_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST;
 		break;
 	}
 
@@ -105,14 +105,14 @@ nv04_sampler_state_create(struct pipe_context *pipe,
 	case PIPE_TEX_FILTER_LINEAR:
 		switch (cso->min_mip_filter) {
 		case PIPE_TEX_MIPFILTER_NEAREST:
-			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
+			filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST;
 			break;
 		case PIPE_TEX_MIPFILTER_LINEAR:
-			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
+			filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR;
 			break;
 		case PIPE_TEX_MIPFILTER_NONE:
 		default:
-			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR;
+			filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR;
 			break;
 		}
 		break;
@@ -120,14 +120,14 @@ nv04_sampler_state_create(struct pipe_context *pipe,
 	default:
 		switch (cso->min_mip_filter) {
 		case PIPE_TEX_MIPFILTER_NEAREST:
-			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
+			filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST;
 		break;
 		case PIPE_TEX_MIPFILTER_LINEAR:
-			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
+			filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR;
 			break;
 		case PIPE_TEX_MIPFILTER_NONE:
 		default:
-			filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST;
+			filter |= NV04_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST;
 			break;
 		}
 		break;
@@ -181,7 +181,7 @@ nv04_rasterizer_state_create(struct pipe_context *pipe,
 	 */
 	rs = MALLOC(sizeof(struct nv04_rasterizer_state));
 
-	rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
+	rs->blend = cso->flatshade ? NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD;
 
 	return (void *)rs;
 }
@@ -229,16 +229,16 @@ nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe,
 	hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state));
 
 	hw->control = float_to_ubyte(cso->alpha.ref_value);
-	hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT );
-	hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0;
-	hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
-	hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0;
-	hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT );
-	hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module
-	hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
-	hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE;
-	hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0;
-	hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format
+	hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT );
+	hw->control |= cso->alpha.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_ALPHA_ENABLE : 0;
+	hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_ORIGIN;
+	hw->control |= cso->depth.enabled ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE : 0;
+	hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT );
+	hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module
+	hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE;
+	hw->control |= NV04_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE;
+	hw->control |= cso->depth.writemask ? NV04_TEXTURED_TRIANGLE_CONTROL_Z_WRITE : 0;
+	hw->control |= 1 << NV04_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format
 
 	return (void *)hw;
 }
@@ -377,7 +377,7 @@ nv04_set_scissor_state(struct pipe_context *pipe,
 /*	struct nv04_context *nv04 = nv04_context(pipe);
 
 	// XXX
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2);
 	OUT_RING  (((s->maxx - s->minx) << 16) | s->minx);
 	OUT_RING  (((s->maxy - s->miny) << 16) | s->miny);*/
 }
diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c
index eb2c1c57c6..bd98ae091f 100644
--- a/src/gallium/drivers/nv04/nv04_state_emit.c
+++ b/src/gallium/drivers/nv04/nv04_state_emit.c
@@ -58,7 +58,7 @@ static void nv04_emit_control(struct nv04_context* nv04)
 {
 	uint32_t control = nv04->dsa->control;
 
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
 	OUT_RING(control);
 }
 
@@ -75,7 +75,7 @@ static void nv04_emit_blend(struct nv04_context* nv04)
 	blend|=(nv04_blend_func(nv04->blend->b_src)<<24);
 	blend|=(nv04_blend_func(nv04->blend->b_dst)<<28);
 
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
 	OUT_RING(blend);
 }
 
@@ -84,7 +84,7 @@ static void nv04_emit_sampler(struct nv04_context *nv04, int unit)
 	struct nv04_miptree *nv04mt = nv04->tex_miptree[unit];
 	struct pipe_texture *pt = &nv04mt->base;
 
-	BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3);
+	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3);
 	OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
 	OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
 	OUT_RING(nv04->sampler[unit]->filter);
@@ -163,7 +163,7 @@ nv04_emit_hw_state(struct nv04_context *nv04)
 	if (nv04->dirty & NV04_NEW_CONTROL) {
 		nv04->dirty &= ~NV04_NEW_CONTROL;
 
-		BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1);
+		BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
 		OUT_RING(nv04->dsa->control);
 	}
 
@@ -218,7 +218,7 @@ nv04_emit_hw_state(struct nv04_context *nv04)
 		if (!(nv04->fp_samplers & (1 << i)))
 			continue;
 		struct nv04_miptree *nv04mt = nv04->tex_miptree[i];
-		BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2);
+		BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2);
 		OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
 		OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
 	}
-- 
cgit v1.2.3


From 67171ed85f3f9486238a994e8a427ddc1ac31069 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 28 Dec 2009 16:25:48 +0000
Subject: xlib: Integrate the trace driver with all pipe drivers.

And not just softpipe.

It is particularly convenient to use llvmpipe instead, since it is much
faster. It also allows to use rbug with all xlib drivers.
---
 src/gallium/drivers/trace/README             |   5 +-
 src/gallium/state_trackers/glx/xlib/xm_api.c |  23 +++++-
 src/gallium/winsys/xlib/Makefile             |   7 +-
 src/gallium/winsys/xlib/SConscript           |   7 +-
 src/gallium/winsys/xlib/xlib.c               |   9 ---
 src/gallium/winsys/xlib/xlib.h               |   1 -
 src/gallium/winsys/xlib/xlib_trace.c         | 113 ---------------------------
 7 files changed, 24 insertions(+), 141 deletions(-)
 delete mode 100644 src/gallium/winsys/xlib/xlib_trace.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README
index 1000c31e49..203c3851bc 100644
--- a/src/gallium/drivers/trace/README
+++ b/src/gallium/drivers/trace/README
@@ -24,11 +24,10 @@ ensure the right libGL.so is being picked by doing
 
  ldd progs/trivial/tri 
 
-== Traceing ==
+== Tracing ==
 
-For traceing then do
+For tracing then do
 
- export XMESA_TRACE=y
  GALLIUM_TRACE=tri.trace progs/trivial/tri
 
 which should create a tri.trace file, which is an XML file. You can view copying 
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c
index c76dfb31d2..1783bc504d 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -67,6 +67,10 @@
 #include "pipe/p_screen.h"
 #include "pipe/p_context.h"
 
+#include "trace/tr_screen.h"
+#include "trace/tr_context.h"
+#include "trace/tr_texture.h"
+
 #include "xm_winsys.h"
 #include <GL/glx.h>
 
@@ -87,6 +91,8 @@ void xmesa_set_driver( const struct xm_driver *templ )
  */
 pipe_mutex _xmesa_lock;
 
+static struct pipe_screen *_screen = NULL;
+static struct pipe_screen *screen = NULL;
 
 
 /**********************************************************************/
@@ -754,7 +760,7 @@ PUBLIC
 XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
 {
    static GLboolean firstTime = GL_TRUE;
-   static struct pipe_screen *screen = NULL;
+   struct pipe_context *_pipe = NULL;
    struct pipe_context *pipe = NULL;
    XMesaContext c;
    GLcontext *mesaCtx;
@@ -762,7 +768,8 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
 
    if (firstTime) {
       pipe_mutex_init(_xmesa_lock);
-      screen = driver.create_pipe_screen();
+      _screen = driver.create_pipe_screen();
+      screen = trace_screen_create( _screen );
       firstTime = GL_FALSE;
    }
 
@@ -781,9 +788,11 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list )
    if (screen == NULL)
       goto fail;
 
-   pipe = driver.create_pipe_context(screen, (void *) c);
-   if (pipe == NULL)
+   _pipe = driver.create_pipe_context(_screen, (void *) c);
+   if (_pipe == NULL)
       goto fail;
+   pipe = trace_context_create(screen, _pipe);
+   pipe->priv = c;
 
    c->st = st_create_context(pipe, 
                              &v->mesa_visual,
@@ -1110,6 +1119,12 @@ void XMesaSwapBuffers( XMesaBuffer b )
    st_swapbuffers(b->stfb, &frontLeftSurf, NULL);
 
    if (frontLeftSurf) {
+      if (_screen != screen) {
+         struct trace_surface *tr_surf = trace_surface( frontLeftSurf );
+         struct pipe_surface *surf = tr_surf->surface;
+         frontLeftSurf = surf;
+      }
+
       driver.display_surface(b, frontLeftSurf);
    }
 
diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile
index a0293fe9b4..9482e8f9b1 100644
--- a/src/gallium/winsys/xlib/Makefile
+++ b/src/gallium/winsys/xlib/Makefile
@@ -23,17 +23,14 @@ INCLUDE_DIRS = \
 	-I$(TOP)/src/gallium/auxiliary
 
 DEFINES += \
-	-DGALLIUM_SOFTPIPE \
-	-DGALLIUM_TRACE \
-	-DGALLIUM_BRW
+	-DGALLIUM_SOFTPIPE
 #-DGALLIUM_CELL will be defined by the config */
 
 XLIB_WINSYS_SOURCES = \
 	xlib.c \
 	xlib_cell.c \
 	xlib_llvmpipe.c \
-	xlib_softpipe.c \
-	xlib_trace.c 
+	xlib_softpipe.c
 
 
 XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o)
diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript
index 7a9e985cb5..ccec2566b1 100644
--- a/src/gallium/winsys/xlib/SConscript
+++ b/src/gallium/winsys/xlib/SConscript
@@ -22,7 +22,7 @@ if env['platform'] == 'linux' \
         'xlib.c',
     ]
 
-    drivers = []
+    drivers = [trace]
         
     if 'softpipe' in env['drivers']:
         env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE')
@@ -42,11 +42,6 @@ if env['platform'] == 'linux' \
         sources += ['xlib_cell.c']
         drivers += [cell]
 
-    if 'trace' in env['drivers']:
-        env.Append(CPPDEFINES = 'GALLIUM_TRACE')
-        sources += ['xlib_trace.c']
-        drivers += [trace]
-
     # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
     libgl = env.SharedLibrary(
         target ='GL',
diff --git a/src/gallium/winsys/xlib/xlib.c b/src/gallium/winsys/xlib/xlib.c
index 163cc8863c..6dbe05f193 100644
--- a/src/gallium/winsys/xlib/xlib.c
+++ b/src/gallium/winsys/xlib/xlib.c
@@ -42,7 +42,6 @@
  */
 
 enum mode {
-   MODE_TRACE,
    MODE_CELL,
    MODE_LLVMPIPE,
    MODE_SOFTPIPE
@@ -51,9 +50,6 @@ enum mode {
 
 static enum mode get_mode()
 {
-   if (getenv("XMESA_TRACE"))
-      return MODE_TRACE;
-
 #ifdef GALLIUM_CELL
    if (!getenv("GALLIUM_NOCELL")) 
       return MODE_CELL;
@@ -73,11 +69,6 @@ static void _init( void )
    enum mode xlib_mode = get_mode();
 
    switch (xlib_mode) {
-   case MODE_TRACE:
-#if defined(GALLIUM_TRACE) && defined(GALLIUM_SOFTPIPE)
-      xmesa_set_driver( &xlib_trace_driver );
-#endif
-      break;
    case MODE_CELL:
 #if defined(GALLIUM_CELL)
       xmesa_set_driver( &xlib_cell_driver );
diff --git a/src/gallium/winsys/xlib/xlib.h b/src/gallium/winsys/xlib/xlib.h
index f0855035f7..8e091d0c08 100644
--- a/src/gallium/winsys/xlib/xlib.h
+++ b/src/gallium/winsys/xlib/xlib.h
@@ -5,7 +5,6 @@
 #include "pipe/p_compiler.h"
 #include "xm_winsys.h"
 
-extern struct xm_driver xlib_trace_driver;
 extern struct xm_driver xlib_softpipe_driver;
 extern struct xm_driver xlib_llvmpipe_driver;
 extern struct xm_driver xlib_cell_driver;
diff --git a/src/gallium/winsys/xlib/xlib_trace.c b/src/gallium/winsys/xlib/xlib_trace.c
deleted file mode 100644
index dbea655ab4..0000000000
--- a/src/gallium/winsys/xlib/xlib_trace.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * 
- **************************************************************************/
-
-/*
- * Authors:
- *   Keith Whitwell
- *   Brian Paul
- */
-
-
-#include "xlib.h"
-
-#include "trace/tr_screen.h"
-#include "trace/tr_context.h"
-#include "trace/tr_texture.h"
-
-#include "pipe/p_screen.h"
-
-
-
-static struct pipe_screen *
-xlib_create_trace_screen( void )
-{
-   struct pipe_screen *screen, *trace_screen;
-
-   screen = xlib_softpipe_driver.create_pipe_screen();
-   if (screen == NULL)
-      goto fail;
-
-   /* Wrap it:
-    */
-   trace_screen = trace_screen_create(screen);
-   if (trace_screen == NULL)
-      goto fail;
-
-   return trace_screen;
-
-fail:
-   if (screen)
-      screen->destroy( screen );
-   return NULL;
-}
-
-static struct pipe_context *
-xlib_create_trace_context( struct pipe_screen *_screen,
-                           void *priv )
-{
-   struct trace_screen *tr_scr = trace_screen( _screen );
-   struct pipe_screen *screen = tr_scr->screen;
-   struct pipe_context *pipe, *trace_pipe;
-   
-   pipe = xlib_softpipe_driver.create_pipe_context( screen, priv );
-   if (pipe == NULL)
-      goto fail;
-
-   /* Wrap it:
-    */
-   trace_pipe = trace_context_create(_screen, pipe);
-   if (trace_pipe == NULL)
-      goto fail;
-
-   trace_pipe->priv = priv;
-
-   return trace_pipe;
-
-fail:
-   if (pipe)
-      pipe->destroy( pipe );
-   return NULL;
-}
-
-static void
-xlib_trace_display_surface( struct xmesa_buffer *buffer,
-                            struct pipe_surface *_surf )
-{
-   struct trace_surface *tr_surf = trace_surface( _surf );
-   struct pipe_surface *surf = tr_surf->surface;
-
-   xlib_softpipe_driver.display_surface( buffer, surf );
-}
-
-
-struct xm_driver xlib_trace_driver = 
-{
-   .create_pipe_screen = xlib_create_trace_screen,
-   .create_pipe_context = xlib_create_trace_context,
-   .display_surface = xlib_trace_display_surface,
-};
-- 
cgit v1.2.3


From 926562fe271a031774d1aae850e955c2180f2816 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Mon, 28 Dec 2009 22:52:41 +0000
Subject: llvmpipe: Unmapping vertex/index buffers does NOT flush draw module
 anymore.

Not since 6094e79f4e3350d123c7532b1c73faa60834a62d.

Drivers now need to flush draw module explicitely (which explains why
all those previous commits adding draw_flushes calls were necessary).

This is a good thing, but it's tricky to get this right in face of user buffers
(it's not even clear who has the responsibility to flush when a user buffer
is seen -- statetracker or pipe driver), so just force flush (temporarily)
since it's not a bottleneck now.
---
 src/gallium/drivers/llvmpipe/lp_draw_arrays.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
index 2299566c66..a96c2cad9d 100644
--- a/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
+++ b/src/gallium/drivers/llvmpipe/lp_draw_arrays.c
@@ -103,7 +103,7 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
    draw_arrays(draw, mode, start, count);
 
    /*
-    * unmap vertex/index buffers - will cause draw module to flush
+    * unmap vertex/index buffers
     */
    for (i = 0; i < lp->num_vertex_buffers; i++) {
       draw_set_mapped_vertex_buffer(draw, i, NULL);
@@ -112,6 +112,12 @@ llvmpipe_draw_range_elements(struct pipe_context *pipe,
       draw_set_mapped_element_buffer(draw, 0, NULL);
    }
 
+   /*
+    * TODO: Flush only when a user vertex/index buffer is present
+    * (or even better, modify draw module to do this
+    * internally when this condition is seen?)
+    */
+   draw_flush(draw);
 
    /* Note: leave drawing surfaces mapped */
 
-- 
cgit v1.2.3


From a55e50b082ca068d35d695ff323603507e2b64aa Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sat, 26 Dec 2009 08:12:15 +0100
Subject: NV30/NV40 CMP and SCS src == dst handling

CMP and SCS can produce incorrect results if the source and
destination are the same.
This patch should fix the issues.
CMP is fixed by predicating both moves.
SCS by changing the order if the source component is X.
---
 src/gallium/drivers/nv30/nv30_fragprog.c | 32 ++++++++++++++++++++++++--------
 src/gallium/drivers/nv40/nv40_fragprog.c | 32 ++++++++++++++++++++++++--------
 2 files changed, 48 insertions(+), 16 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index 40965a9772..dc4e583ce9 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -435,10 +435,11 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_CMP:
-		tmp = temp(fpc);
-		arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+		tmp = nv30_sr(NV30SR_NONE, 0);
 		tmp.cc_update = 1;
 		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+		dst.cc_test = NV30_VP_INST_COND_GE;
+		arith(fpc, sat, MOV, dst, mask, src[2], none, none);
 		dst.cc_test = NV30_VP_INST_COND_LT;
 		arith(fpc, sat, MOV, dst, mask, src[1], none, none);
 		break;
@@ -517,13 +518,28 @@ nv30_fragprog_parse_instruction(struct nv30_fpc *fpc,
 		arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none);
 		break;
 	case TGSI_OPCODE_SCS:
-		if (mask & MASK_X) {
-			arith(fpc, sat, COS, dst, MASK_X,
-			      swz(src[0], X, X, X, X), none, none);
+		/* avoid overwriting the source */
+		if(src[0].swz[SWZ_X] != SWZ_X)
+		{
+			if (mask & MASK_X) {
+				arith(fpc, sat, COS, dst, MASK_X,
+				      swz(src[0], X, X, X, X), none, none);
+			}
+			if (mask & MASK_Y) {
+				arith(fpc, sat, SIN, dst, MASK_Y,
+				      swz(src[0], X, X, X, X), none, none);
+			}
 		}
-		if (mask & MASK_Y) {
-			arith(fpc, sat, SIN, dst, MASK_Y,
-			      swz(src[0], X, X, X, X), none, none);
+		else
+		{
+			if (mask & MASK_Y) {
+				arith(fpc, sat, SIN, dst, MASK_Y,
+				      swz(src[0], X, X, X, X), none, none);
+			}
+			if (mask & MASK_X) {
+				arith(fpc, sat, COS, dst, MASK_X,
+				      swz(src[0], X, X, X, X), none, none);
+			}
 		}
 		break;
 	case TGSI_OPCODE_SIN:
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 1bf16726d1..468d3509a9 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -445,10 +445,11 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 		arith(fpc, sat, ADD, dst, mask, src[0], src[1], none);
 		break;
 	case TGSI_OPCODE_CMP:
-		tmp = temp(fpc);
-		arith(fpc, sat, MOV, dst, mask, src[2], none, none);
+		tmp = nv40_sr(NV40SR_NONE, 0);
 		tmp.cc_update = 1;
 		arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none);
+		dst.cc_test = NV40_VP_INST_COND_GE;
+		arith(fpc, sat, MOV, dst, mask, src[2], none, none);
 		dst.cc_test = NV40_VP_INST_COND_LT;
 		arith(fpc, sat, MOV, dst, mask, src[1], none, none);
 		break;
@@ -573,13 +574,28 @@ nv40_fragprog_parse_instruction(struct nv40_fpc *fpc,
 		      neg(swz(tmp, X, X, X, X)), none, none);
 		break;
 	case TGSI_OPCODE_SCS:
-		if (mask & MASK_X) {
-			arith(fpc, sat, COS, dst, MASK_X,
-			      swz(src[0], X, X, X, X), none, none);
+		/* avoid overwriting the source */
+		if(src[0].swz[SWZ_X] != SWZ_X)
+		{
+			if (mask & MASK_X) {
+				arith(fpc, sat, COS, dst, MASK_X,
+				      swz(src[0], X, X, X, X), none, none);
+			}
+			if (mask & MASK_Y) {
+				arith(fpc, sat, SIN, dst, MASK_Y,
+				      swz(src[0], X, X, X, X), none, none);
+			}
 		}
-		if (mask & MASK_Y) {
-			arith(fpc, sat, SIN, dst, MASK_Y,
-			      swz(src[0], X, X, X, X), none, none);
+		else
+		{
+			if (mask & MASK_Y) {
+				arith(fpc, sat, SIN, dst, MASK_Y,
+				      swz(src[0], X, X, X, X), none, none);
+			}
+			if (mask & MASK_X) {
+				arith(fpc, sat, COS, dst, MASK_X,
+				      swz(src[0], X, X, X, X), none, none);
+			}
 		}
 		break;
 	case TGSI_OPCODE_SEQ:
-- 
cgit v1.2.3


From 3a0c527bd03c0a4e637a1dcde11f0fb6129c07f6 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sun, 27 Dec 2009 04:04:46 +0100
Subject: nouveau: Fix nv20-40 swizzled miptree RTs

I just coded a patch that does this and seems to work fine. It must be
fixed since it breaks OpenGL (or the state tracker can be changed, but
it seems better to do it in the driver).

The patch also fixes NV20 and NV30 in the same way. They compile but
are untested.

I would guess that using the 3D engine is faster for the larger
levels, but the 2D engine is faster for the smaller ones (and lacks
this issue).
---
 src/gallium/drivers/nouveau/nouveau_winsys.h |  3 ++
 src/gallium/drivers/nv04/nv04_surface_2d.c   | 46 ++++++++++++++++++++++++++++
 src/gallium/drivers/nv04/nv04_surface_2d.h   |  4 +++
 src/gallium/drivers/nv20/nv20_miptree.c      | 22 +++++++++++++
 src/gallium/drivers/nv20/nv20_transfer.c     |  2 +-
 src/gallium/drivers/nv30/nv30_miptree.c      | 22 +++++++++++++
 src/gallium/drivers/nv30/nv30_transfer.c     |  2 +-
 src/gallium/drivers/nv40/nv40_miptree.c      | 22 +++++++++++++
 src/gallium/drivers/nv40/nv40_transfer.c     |  2 +-
 9 files changed, 122 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index 42c77e5e77..4c3e08a43f 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -23,6 +23,9 @@
 #define NOUVEAU_BUFFER_USAGE_ZETA     (1 << 17)
 #define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18)
 
+/* use along with GPU_WRITE for 2D-only writes */
+#define NOUVEAU_BUFFER_USAGE_NO_RENDER (1 << 19)
+
 extern struct pipe_screen *
 nv04_screen_create(struct pipe_winsys *ws, struct nouveau_device *);
 
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 12df7fd199..819f45e96a 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -491,3 +491,49 @@ nv04_surface_2d_init(struct nouveau_screen *screen)
 	ctx->fill = nv04_surface_fill;
 	return ctx;
 }
+
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns)
+{
+	int temp_flags;
+
+	// printf("creating temp, flags is %i!\n", flags);
+
+	if(ns->base.usage & PIPE_BUFFER_USAGE_DISCARD)
+	{
+		temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ;
+		ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_DISCARD;
+	}
+	else
+	{
+		temp_flags = ns->base.usage | PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE;
+		ns->base.usage = PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER | PIPE_BUFFER_USAGE_GPU_READ;
+	}
+
+	struct nv40_screen* screen = (struct nv40_screen*)pscreen;
+	ns->base.usage = PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE;
+
+	struct pipe_texture templ;
+	memset(&templ, 0, sizeof(templ));
+	templ.format = ns->base.texture->format;
+	templ.target = PIPE_TEXTURE_2D;
+	templ.width0 = ns->base.width;
+	templ.height0 = ns->base.height;
+	templ.depth0 = 1;
+	templ.last_level = 0;
+
+	// TODO: this is probably wrong and we should specifically handle multisampling somehow once it is implemented
+	templ.nr_samples = ns->base.texture->nr_samples;
+
+	templ.tex_usage = ns->base.texture->tex_usage | PIPE_TEXTURE_USAGE_RENDER_TARGET;
+
+	struct pipe_texture* temp_tex = pscreen->texture_create(pscreen, &templ);
+	struct nv04_surface* temp_ns = (struct nv04_surface*)pscreen->get_tex_surface(pscreen, temp_tex, 0, 0, 0, temp_flags);
+	temp_ns->backing = ns;
+
+	if(ns->base.usage & PIPE_BUFFER_USAGE_GPU_READ)
+		eng2d->copy(eng2d, &temp_ns->backing->base, 0, 0, &ns->base, 0, 0, ns->base.width, ns->base.height);
+
+	return temp_ns;
+}
+
diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h
index 02b3f56ba8..ce696a11a3 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.h
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.h
@@ -4,6 +4,7 @@
 struct nv04_surface {
 	struct pipe_surface base;
 	unsigned pitch;
+	struct nv04_surface* backing;
 };
 
 struct nv04_surface_2d {
@@ -30,4 +31,7 @@ nv04_surface_2d_init(struct nouveau_screen *screen);
 void
 nv04_surface_2d_takedown(struct nv04_surface_2d **);
 
+struct nv04_surface*
+nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns);
+
 #endif
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
index d1291a92e0..8f7538e7f5 100644
--- a/src/gallium/drivers/nv20/nv20_miptree.c
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -6,6 +6,7 @@
 
 #include "nv20_context.h"
 #include "nv20_screen.h"
+#include "../nv04/nv04_surface_2d.h"
 
 static void
 nv20_miptree_layout(struct nv20_miptree *nv20mt)
@@ -127,6 +128,12 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
 	if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
 		buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
 
+	/* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+	 * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+	 * This also happens for small mipmaps of large textures. */
+	if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
 	nv20_miptree_layout(mt);
 
 	mt->buffer = screen->buffer_create(screen, 256, buf_usage, mt->total_size);
@@ -183,12 +190,27 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
 		ns->base.offset = nv20mt->level[level].image_offset[0];
 	}
 
+	/* create a linear temporary that we can render into if necessary.
+	 * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+	 * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+	if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+		return &nv04_surface_wrap_for_render(screen, ((struct nv20_screen*)screen)->eng2d, ns)->base;
+
 	return &ns->base;
 }
 
 static void
 nv20_miptree_surface_destroy(struct pipe_surface *ps)
 {
+	struct nv04_surface* ns = (struct nv04_surface*)ps;
+	if(ns->backing)
+	{
+		struct nv20_screen* screen = (struct nv20_screen*)ps->texture->screen;
+		if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+			screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+		nv20_miptree_surface_destroy(&ns->backing->base);
+	}
+	
 	pipe_texture_reference(&ps->texture, NULL);
 	FREE(ps);
 }
diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c
index 69b79c809f..7b51188635 100644
--- a/src/gallium/drivers/nv20/nv20_transfer.c
+++ b/src/gallium/drivers/nv20/nv20_transfer.c
@@ -126,7 +126,7 @@ nv20_transfer_del(struct pipe_transfer *ptx)
 
 		dst = pscreen->get_tex_surface(pscreen, ptx->texture,
 	                                       ptx->face, ptx->level, ptx->zslice,
-	                                       PIPE_BUFFER_USAGE_GPU_WRITE);
+	                                       PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
 
 		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index ce95d9700f..8fbba38e78 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -5,6 +5,7 @@
 #include "util/u_math.h"
 
 #include "nv30_context.h"
+#include "../nv04/nv04_surface_2d.h"
 
 static void
 nv30_miptree_layout(struct nv30_miptree *nv30mt)
@@ -108,6 +109,12 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 	if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
 		buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
 
+	/* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+	 * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+	 * This also happens for small mipmaps of large textures. */
+	if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
 	nv30_miptree_layout(mt);
 
 	mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage,
@@ -196,12 +203,27 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		ns->base.offset = nv30mt->level[level].image_offset[0];
 	}
 
+	/* create a linear temporary that we can render into if necessary.
+	 * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+	 * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+	if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+		return &nv04_surface_wrap_for_render(pscreen, ((struct nv30_screen*)pscreen)->eng2d, ns)->base;
+
 	return &ns->base;
 }
 
 static void
 nv30_miptree_surface_del(struct pipe_surface *ps)
 {
+	struct nv04_surface* ns = (struct nv04_surface*)ps;
+	if(ns->backing)
+	{
+		struct nv30_screen* screen = (struct nv30_screen*)ps->texture->screen;
+		if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+			screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+		nv30_miptree_surface_del(&ns->backing->base);
+	}
+
 	pipe_texture_reference(&ps->texture, NULL);
 	FREE(ps);
 }
diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
index 2255a02cae..68047c47ec 100644
--- a/src/gallium/drivers/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nv30/nv30_transfer.c
@@ -126,7 +126,7 @@ nv30_transfer_del(struct pipe_transfer *ptx)
 
 		dst = pscreen->get_tex_surface(pscreen, ptx->texture,
 	                                       ptx->face, ptx->level, ptx->zslice,
-	                                       PIPE_BUFFER_USAGE_GPU_WRITE);
+	                                       PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
 
 		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index b974e68a07..89bd155ff4 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -5,6 +5,7 @@
 #include "util/u_math.h"
 
 #include "nv40_context.h"
+#include "../nv04/nv04_surface_2d.h"
 
 
@@ -105,6 +106,12 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 	if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC)
 		buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
 
+	/* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
+	 * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
+	 * This also happens for small mipmaps of large textures. */
+	if (pt->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET && util_format_get_stride(pt->format, pt->width0) < 64)
+		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
+
 	nv40_miptree_layout(mt);
 
 	mt->buffer = pscreen->buffer_create(pscreen, 256, buf_usage, mt->total_size);
@@ -191,12 +198,27 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		ns->base.offset = mt->level[level].image_offset[0];
 	}
 
+	/* create a linear temporary that we can render into if necessary.
+	 * Note that ns->pitch is always a multiple of 64 for linear surfaces and swizzled surfaces are POT, so
+	 * ns->pitch & 63 is equivalent to (ns->pitch < 64 && swizzled)*/
+	if((ns->pitch & 63) && (ns->base.usage & (PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER)) == PIPE_BUFFER_USAGE_GPU_WRITE)
+		return &nv04_surface_wrap_for_render(pscreen, ((struct nv40_screen*)pscreen)->eng2d, ns)->base;
+
 	return &ns->base;
 }
 
 static void
 nv40_miptree_surface_del(struct pipe_surface *ps)
 {
+	struct nv04_surface* ns = (struct nv04_surface*)ps;
+	if(ns->backing)
+	{
+		struct nv40_screen* screen = (struct nv40_screen*)ps->texture->screen;
+		if(ns->backing->base.usage & PIPE_BUFFER_USAGE_GPU_WRITE)
+			screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
+		nv40_miptree_surface_del(&ns->backing->base);
+	}
+
 	pipe_texture_reference(&ps->texture, NULL);
 	FREE(ps);
 }
diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
index b084a38b48..adfd035621 100644
--- a/src/gallium/drivers/nv40/nv40_transfer.c
+++ b/src/gallium/drivers/nv40/nv40_transfer.c
@@ -126,7 +126,7 @@ nv40_transfer_del(struct pipe_transfer *ptx)
 
 		dst = pscreen->get_tex_surface(pscreen, ptx->texture,
 	                                       ptx->face, ptx->level, ptx->zslice,
-	                                       PIPE_BUFFER_USAGE_GPU_WRITE);
+	                                       PIPE_BUFFER_USAGE_GPU_WRITE | NOUVEAU_BUFFER_USAGE_NO_RENDER);
 
 		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
-- 
cgit v1.2.3


From cb9214f33030e1f482d59049c239f20061d24ad8 Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Sat, 26 Dec 2009 07:35:17 +0100
Subject: nouveau: Fix swizzling for copies to rectangular textures

nVidia hardware seems to swizzle rectangular texture (with width !=
height) coordinates by swizzling the lower bits and then adding the
higher bits from the larger dimension.
However, nv04_swizzle_bits ignores width and height and just
interleaves everything.
This causes problems with rectangular POT textures with height or
width 2048 or 4096 (but not 2048x1024 where it works by chance) since
the driver swizzles them in 1024x1024 chunks and gets the start
position for the non-first chunks wrong.
The following patch seems to fix those problems.
---
 src/gallium/drivers/nv04/nv04_surface_2d.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 819f45e96a..40b538fd71 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -77,7 +77,7 @@ nv04_scaled_image_format(enum pipe_format format)
 }
 
 static INLINE unsigned
-nv04_swizzle_bits(unsigned x, unsigned y)
+nv04_swizzle_bits_square(unsigned x, unsigned y)
 {
 	unsigned u = (x & 0x001) << 0 |
 	             (x & 0x002) << 1 |
@@ -107,6 +107,15 @@ nv04_swizzle_bits(unsigned x, unsigned y)
 	return v | u;
 }
 
+/* rectangular swizzled textures are linear concatenations of swizzled square tiles */
+static INLINE unsigned
+nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h)
+{
+	unsigned s = MIN2(w, h);
+	unsigned m = s - 1;
+	return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m);
+}
+
 static int
 nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
 			  struct pipe_surface *dst, int dx, int dy,
@@ -159,10 +168,10 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
 	    sub_w = MIN2(sub_w, w - x);
 
 	    /* Must be 64-byte aligned */
-	    assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format)) & 63));
+	    assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y, w, h) * util_format_get_blocksize(dst->texture->format)) & 63));
 
 	    BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
-	    OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y) * util_format_get_blocksize(dst->texture->format),
+	    OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y, w, h) * util_format_get_blocksize(dst->texture->format),
                              NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 
 	    BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
-- 
cgit v1.2.3


From 85dcc070719ef6a6bcf65dc4996b1733f63912d8 Mon Sep 17 00:00:00 2001
From: Younes Manton <younes.m@gmail.com>
Date: Mon, 28 Dec 2009 17:33:34 -0500
Subject: nouveau: Unreference state/buffer objects on context/screen
 destruction.

- unreference state objects so that buffer objects are unreferenced and
eventually destroyed
- free channel at screen's destruction

Based on Krzysztof Smiechowicz's patch.
---
 src/gallium/drivers/nouveau/nouveau_screen.c |  3 ++-
 src/gallium/drivers/nv04/nv04_screen.c       |  2 ++
 src/gallium/drivers/nv10/nv10_screen.c       |  3 +++
 src/gallium/drivers/nv20/nv20_screen.c       |  3 +++
 src/gallium/drivers/nv30/nv30_context.c      |  6 +++++
 src/gallium/drivers/nv30/nv30_fragprog.c     |  6 +++++
 src/gallium/drivers/nv30/nv30_screen.c       |  9 ++++++++
 src/gallium/drivers/nv40/nv40_context.c      |  6 +++++
 src/gallium/drivers/nv40/nv40_fragprog.c     | 14 ++++++++----
 src/gallium/drivers/nv40/nv40_screen.c       |  7 ++++++
 src/gallium/drivers/nv50/nv50_context.c      | 33 ++++++++++++++++++++++++++++
 src/gallium/drivers/nv50/nv50_screen.c       | 15 +++++++++++++
 12 files changed, 102 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index e4cf91c005..0437af3725 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -31,7 +31,7 @@ nouveau_screen_bo_skel(struct pipe_screen *pscreen, struct nouveau_bo *bo,
 		       unsigned alignment, unsigned usage, unsigned size)
 {
 	struct pipe_buffer *pb;
-	
+
 	pb = CALLOC(1, sizeof(struct pipe_buffer)+sizeof(struct nouveau_bo *));
 	if (!pb) {
 		nouveau_bo_ref(NULL, &bo);
@@ -239,5 +239,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
 void
 nouveau_screen_fini(struct nouveau_screen *screen)
 {
+	nouveau_channel_free(&screen->channel);
 }
 
diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c
index ee9b7d2e48..7c5b6e8229 100644
--- a/src/gallium/drivers/nv04/nv04_screen.c
+++ b/src/gallium/drivers/nv04/nv04_screen.c
@@ -119,6 +119,8 @@ nv04_screen_destroy(struct pipe_screen *pscreen)
 	nouveau_grobj_free(&screen->fahrenheit);
 	nv04_surface_2d_takedown(&screen->eng2d);
 
+	nouveau_screen_fini(&screen->base);
+
 	FREE(pscreen);
 }
 
diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c
index ee5901e743..6a39ddeaac 100644
--- a/src/gallium/drivers/nv10/nv10_screen.c
+++ b/src/gallium/drivers/nv10/nv10_screen.c
@@ -115,6 +115,9 @@ nv10_screen_destroy(struct pipe_screen *pscreen)
 
 	nouveau_notifier_free(&screen->sync);
 	nouveau_grobj_free(&screen->celsius);
+	nv04_surface_2d_takedown(&screen->eng2d);
+
+	nouveau_screen_fini(&screen->base);
 
 	FREE(pscreen);
 }
diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c
index 4eeacd1afd..a0973f1ebd 100644
--- a/src/gallium/drivers/nv20/nv20_screen.c
+++ b/src/gallium/drivers/nv20/nv20_screen.c
@@ -115,6 +115,9 @@ nv20_screen_destroy(struct pipe_screen *pscreen)
 
 	nouveau_notifier_free(&screen->sync);
 	nouveau_grobj_free(&screen->kelvin);
+	nv04_surface_2d_takedown(&screen->eng2d);
+
+	nouveau_screen_fini(&screen->base);
 
 	FREE(pscreen);
 }
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
index 46a821a48b..38b39159f1 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -25,6 +25,12 @@ static void
 nv30_destroy(struct pipe_context *pipe)
 {
 	struct nv30_context *nv30 = nv30_context(pipe);
+	unsigned i;
+
+	for (i = 0; i < NV30_STATE_MAX; i++) {
+		if (nv30->state.hw[i])
+			so_ref(NULL, &nv30->state.hw[i]);
+	}
 
 	if (nv30->draw)
 		draw_destroy(nv30->draw);
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index dc4e583ce9..d1ff18e2df 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -886,6 +886,12 @@ void
 nv30_fragprog_destroy(struct nv30_context *nv30,
 		      struct nv30_fragment_program *fp)
 {
+	if (fp->buffer)
+		pipe_buffer_reference(&fp->buffer, NULL);
+
+	if (fp->so)
+		so_ref(NULL, &fp->so);
+
 	if (fp->insn_len)
 		FREE(fp->insn);
 }
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index 7cd36902eb..760467f736 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -156,6 +156,12 @@ static void
 nv30_screen_destroy(struct pipe_screen *pscreen)
 {
 	struct nv30_screen *screen = nv30_screen(pscreen);
+	unsigned i;
+
+	for (i = 0; i < NV30_STATE_MAX; i++) {
+		if (screen->state[i])
+			so_ref(NULL, &screen->state[i]);
+	}
 
 	nouveau_resource_free(&screen->vp_exec_heap);
 	nouveau_resource_free(&screen->vp_data_heap);
@@ -163,6 +169,9 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
 	nouveau_notifier_free(&screen->query);
 	nouveau_notifier_free(&screen->sync);
 	nouveau_grobj_free(&screen->rankine);
+	nv04_surface_2d_takedown(&screen->eng2d);
+
+	nouveau_screen_fini(&screen->base);
 
 	FREE(pscreen);
 }
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
index eb9cce4c78..d56c7a6b49 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -25,6 +25,12 @@ static void
 nv40_destroy(struct pipe_context *pipe)
 {
 	struct nv40_context *nv40 = nv40_context(pipe);
+	unsigned i;
+
+	for (i = 0; i < NV40_STATE_MAX; i++) {
+		if (nv40->state.hw[i])
+			so_ref(NULL, &nv40->state.hw[i]);
+	}
 
 	if (nv40->draw)
 		draw_destroy(nv40->draw);
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index 468d3509a9..bb9c85cc43 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -149,7 +149,7 @@ emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src)
 				sizeof(uint32_t) * 4);
 		}
 
-		sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);	
+		sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT);
 		break;
 	case NV40SR_NONE:
 		sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT);
@@ -768,7 +768,7 @@ nv40_fragprog_prepare(struct nv40_fpc *fpc)
 		{
 			struct tgsi_full_immediate *imm;
 			float vals[4];
-			
+
 			imm = &p.FullToken.FullImmediate;
 			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
 			assert(fpc->nr_imm < MAX_IMM);
@@ -852,7 +852,7 @@ nv40_fragprog_translate(struct nv40_context *nv40,
 	fp->insn[fpc->inst_offset + 1] = 0x00000000;
 	fp->insn[fpc->inst_offset + 2] = 0x00000000;
 	fp->insn[fpc->inst_offset + 3] = 0x00000000;
-	
+
 	fp->translated = TRUE;
 out_err:
 	tgsi_parse_free(&parse);
@@ -933,7 +933,7 @@ nv40_fragprog_validate(struct nv40_context *nv40)
 update_constants:
 	if (fp->nr_consts) {
 		float *map;
-		
+
 		map = pipe_buffer_map(pscreen, constbuf,
 				      PIPE_BUFFER_USAGE_CPU_READ);
 		for (i = 0; i < fp->nr_consts; i++) {
@@ -964,6 +964,12 @@ void
 nv40_fragprog_destroy(struct nv40_context *nv40,
 		      struct nv40_fragment_program *fp)
 {
+	if (fp->buffer)
+		pipe_buffer_reference(&fp->buffer, NULL);
+
+	if (fp->so)
+		so_ref(NULL, &fp->so);
+
 	if (fp->insn_len)
 		FREE(fp->insn);
 }
diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
index bd13dfddd1..d01e712805 100644
--- a/src/gallium/drivers/nv40/nv40_screen.c
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -140,6 +140,12 @@ static void
 nv40_screen_destroy(struct pipe_screen *pscreen)
 {
 	struct nv40_screen *screen = nv40_screen(pscreen);
+	unsigned i;
+
+	for (i = 0; i < NV40_STATE_MAX; i++) {
+		if (screen->state[i])
+			so_ref(NULL, &screen->state[i]);
+	}
 
 	nouveau_resource_free(&screen->vp_exec_heap);
 	nouveau_resource_free(&screen->vp_data_heap);
@@ -147,6 +153,7 @@ nv40_screen_destroy(struct pipe_screen *pscreen)
 	nouveau_notifier_free(&screen->query);
 	nouveau_notifier_free(&screen->sync);
 	nouveau_grobj_free(&screen->curie);
+	nv04_surface_2d_takedown(&screen->eng2d);
 
 	nouveau_screen_fini(&screen->base);
 
diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c
index d21b80eab8..5997456e4c 100644
--- a/src/gallium/drivers/nv50/nv50_context.c
+++ b/src/gallium/drivers/nv50/nv50_context.c
@@ -43,6 +43,39 @@ nv50_destroy(struct pipe_context *pipe)
 {
 	struct nv50_context *nv50 = nv50_context(pipe);
 
+        if (nv50->state.fb)
+		so_ref(NULL, &nv50->state.fb);
+	if (nv50->state.blend)
+		so_ref(NULL, &nv50->state.blend);
+	if (nv50->state.blend_colour)
+		so_ref(NULL, &nv50->state.blend_colour);
+	if (nv50->state.zsa)
+		so_ref(NULL, &nv50->state.zsa);
+	if (nv50->state.rast)
+		so_ref(NULL, &nv50->state.rast);
+	if (nv50->state.stipple)
+		so_ref(NULL, &nv50->state.stipple);
+	if (nv50->state.scissor)
+		so_ref(NULL, &nv50->state.scissor);
+	if (nv50->state.viewport)
+		so_ref(NULL, &nv50->state.viewport);
+	if (nv50->state.tsc_upload)
+		so_ref(NULL, &nv50->state.tsc_upload);
+	if (nv50->state.tic_upload)
+		so_ref(NULL, &nv50->state.tic_upload);
+	if (nv50->state.vertprog)
+		so_ref(NULL, &nv50->state.vertprog);
+	if (nv50->state.fragprog)
+		so_ref(NULL, &nv50->state.fragprog);
+	if (nv50->state.programs)
+		so_ref(NULL, &nv50->state.programs);
+	if (nv50->state.vtxfmt)
+		so_ref(NULL, &nv50->state.vtxfmt);
+	if (nv50->state.vtxbuf)
+		so_ref(NULL, &nv50->state.vtxbuf);
+	if (nv50->state.vtxattr)
+		so_ref(NULL, &nv50->state.vtxattr);
+
 	draw_destroy(nv50->draw);
 	FREE(nv50);
 }
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 15e4b6e5ca..7e039ea82e 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -165,6 +165,21 @@ static void
 nv50_screen_destroy(struct pipe_screen *pscreen)
 {
 	struct nv50_screen *screen = nv50_screen(pscreen);
+	unsigned i;
+
+	for (i = 0; i < 2; i++) {
+		if (screen->constbuf_parm[i])
+			nouveau_bo_ref(NULL, &screen->constbuf_parm[i]);
+	}
+
+	if (screen->constbuf_misc[0])
+		nouveau_bo_ref(NULL, &screen->constbuf_misc[0]);
+	if (screen->tic)
+		nouveau_bo_ref(NULL, &screen->tic);
+	if (screen->tsc)
+		nouveau_bo_ref(NULL, &screen->tsc);
+	if (screen->static_init)
+		so_ref(NULL, &screen->static_init);
 
 	nouveau_notifier_free(&screen->sync);
 	nouveau_grobj_free(&screen->tesla);
-- 
cgit v1.2.3


From e049ddb7549a45adde521d6f2899c2b74b4ff972 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Mon, 28 Dec 2009 18:05:30 -0800
Subject: llvmpipe: Silence compiler warnings.

---
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 7 ++++++-
 src/gallium/drivers/llvmpipe/lp_tex_cache.h    | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 52554b950c..0dea2cd4c8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -446,7 +446,12 @@ emit_instruction(
 {
    unsigned chan_index;
    LLVMValueRef src0, src1, src2;
-   LLVMValueRef tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+   LLVMValueRef tmp0, tmp1, tmp2;
+   LLVMValueRef tmp3 = NULL;
+   LLVMValueRef tmp4 = NULL;
+   LLVMValueRef tmp5 = NULL;
+   LLVMValueRef tmp6 = NULL;
+   LLVMValueRef tmp7 = NULL;
    LLVMValueRef res;
    LLVMValueRef dst0[NUM_CHANNELS];
 
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.h b/src/gallium/drivers/llvmpipe/lp_tex_cache.h
index 9fa6c36812..05fded78e1 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.h
@@ -115,7 +115,7 @@ extern const struct llvmpipe_cached_tex_tile *
 lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
                         union tex_tile_address addr );
 
-static INLINE const union tex_tile_address
+static INLINE union tex_tile_address
 tex_tile_address( unsigned x,
                   unsigned y,
                   unsigned z,
-- 
cgit v1.2.3


From 46c2196de3ed0c2745afb3a2e5180947576f07ea Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Mon, 28 Dec 2009 18:07:09 -0800
Subject: llvmpipe: Fix assert.

---
 src/gallium/drivers/llvmpipe/lp_bld_depth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 98ec1cb1b9..d438c0e63d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -90,7 +90,7 @@ lp_depth_type(const struct util_format_description *format_desc,
 
    if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
       type.floating = TRUE;
-      assert(swizzle = 0);
+      assert(swizzle == 0);
       assert(format_desc->channel[swizzle].size == format_desc->block.bits);
    }
    else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
-- 
cgit v1.2.3


From 9656177bc0fac5785d01900a768c2855bdc04b5a Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 30 Dec 2009 02:54:39 +0100
Subject: nouveau: Fix glTexSubImage on swizzled surfaces on <=NV40

Currently in nvXX_transfer_new a temporary as large as the surface is created.
If the subrectangle is not the whole texture we would need to read
back the whole texture, but we aren't.
Thus, everything but the subrectangle specified is loaded as garbage.
This can be seen in progs/demos/ray.

This patch fixes the problem by creating a temporary that covers only
the desired subrectangle.

That makes us hit an alignment assert in nv04_surface_2d.c. Fix it
using the point registers instead of manipulating the swizzled surface
offset to account for the destination coordinates (which do not seem
to have a 1024 limit).

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
---
 src/gallium/drivers/nv04/nv04_surface_2d.c |  9 ++++-----
 src/gallium/drivers/nv04/nv04_transfer.c   | 24 ++++++++++++++----------
 src/gallium/drivers/nv10/nv10_transfer.c   | 24 ++++++++++++++----------
 src/gallium/drivers/nv20/nv20_transfer.c   | 24 ++++++++++++++----------
 src/gallium/drivers/nv30/nv30_transfer.c   | 24 ++++++++++++++----------
 src/gallium/drivers/nv40/nv40_transfer.c   | 24 ++++++++++++++----------
 6 files changed, 74 insertions(+), 55 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c
index 40b538fd71..b24a9cee5a 100644
--- a/src/gallium/drivers/nv04/nv04_surface_2d.c
+++ b/src/gallium/drivers/nv04/nv04_surface_2d.c
@@ -167,20 +167,19 @@ nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx,
 	  for (x = 0; x < w; x += sub_w) {
 	    sub_w = MIN2(sub_w, w - x);
 
-	    /* Must be 64-byte aligned */
-	    assert(!((dst->offset + nv04_swizzle_bits(dx+x, dy+y, w, h) * util_format_get_blocksize(dst->texture->format)) & 63));
+	    assert(!(dst->offset & 63));
 
 	    BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
-	    OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(dx+x, dy+y, w, h) * util_format_get_blocksize(dst->texture->format),
+	    OUT_RELOCl(chan, dst_bo, dst->offset,
                              NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 
 	    BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
 	    OUT_RING  (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
 	    OUT_RING  (chan, nv04_scaled_image_format(src->format));
 	    OUT_RING  (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
-	    OUT_RING  (chan, 0);
+	    OUT_RING  (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT));
 	    OUT_RING  (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w);
-	    OUT_RING  (chan, 0);
+	    OUT_RING  (chan, (x + dx) | ((y + dy) << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT));
 	    OUT_RING  (chan, sub_h << NV04_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w);
 	    OUT_RING  (chan, 1 << 20);
 	    OUT_RING  (chan, 1 << 20);
diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c
index 8446073ae8..2dd2e146a8 100644
--- a/src/gallium/drivers/nv04/nv04_transfer.c
+++ b/src/gallium/drivers/nv04/nv04_transfer.c
@@ -16,14 +16,14 @@ struct nv04_transfer {
 };
 
 static void
-nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
                              struct pipe_texture *template)
 {
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width0 = u_minify(pt->width0, level);
-	template->height0 = u_minify(pt->height0, level);
+	template->width0 = width;
+	template->height0 = height;
 	template->depth0 = 1;
 	template->last_level = 0;
 	template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 
 	tx->direct = false;
 
-	nv04_compatible_transfer_tex(pt, level, &tx_tex_template);
+	nv04_compatible_transfer_tex(pt, w, h, &tx_tex_template);
 
 	tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
 	if (!tx_tex)
@@ -80,6 +80,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	}
 
+	tx->base.stride = ((struct nv04_miptree*)tx_tex)->level[0].pitch;
+
 	tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
 	                                       face, level, zslice,
 	                                       pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv04_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		/* TODO: Check if SIFM can un-swizzle */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
 		                      tx->surface, 0, 0,
-		                      src, 0, 0,
-		                      src->width, src->height);
+		                      src, x, y,
+		                      w, h);
 
 		pipe_surface_reference(&src, NULL);
 	}
@@ -130,9 +132,9 @@ nv04_transfer_del(struct pipe_transfer *ptx)
 
 		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
-		                      dst, 0, 0,
+		                      dst, tx->base.x, tx->base.y,
 		                      tx->surface, 0, 0,
-		                      dst->width, dst->height);
+		                      tx->base.width, tx->base.height);
 
 		pipe_surface_reference(&dst, NULL);
 	}
@@ -151,8 +153,10 @@ nv04_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	void *map = pipe_buffer_map(pscreen, mt->buffer,
 	                            pipe_transfer_buffer_flags(ptx));
 
-	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+	if(!tx->direct)
+		return map + ns->base.offset;
+	else
+		return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c
index c664973e90..eb04af9782 100644
--- a/src/gallium/drivers/nv10/nv10_transfer.c
+++ b/src/gallium/drivers/nv10/nv10_transfer.c
@@ -16,14 +16,14 @@ struct nv10_transfer {
 };
 
 static void
-nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
                              struct pipe_texture *template)
 {
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width0 = u_minify(pt->width0, level);
-	template->height0 = u_minify(pt->height0, level);
+	template->width0 = width;
+	template->height0 = height;
 	template->depth0 = 1;
 	template->last_level = 0;
 	template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 
 	tx->direct = false;
 
-	nv10_compatible_transfer_tex(pt, level, &tx_tex_template);
+	nv10_compatible_transfer_tex(pt, w, h, &tx_tex_template);
 
 	tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
 	if (!tx_tex)
@@ -80,6 +80,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	}
 
+	tx->base.stride = ((struct nv10_miptree*)tx_tex)->level[0].pitch;
+
 	tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
 	                                       face, level, zslice,
 	                                       pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv10_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		/* TODO: Check if SIFM can un-swizzle */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
 		                      tx->surface, 0, 0,
-		                      src, 0, 0,
-		                      src->width, src->height);
+		                      src, x, y,
+		                      w, h);
 
 		pipe_surface_reference(&src, NULL);
 	}
@@ -130,9 +132,9 @@ nv10_transfer_del(struct pipe_transfer *ptx)
 
 		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
-		                      dst, 0, 0,
+		                      dst, tx->base.x, tx->base.y,
 		                      tx->surface, 0, 0,
-		                      dst->width, dst->height);
+		                      tx->base.width, tx->base.height);
 
 		pipe_surface_reference(&dst, NULL);
 	}
@@ -151,8 +153,10 @@ nv10_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	void *map = pipe_buffer_map(pscreen, mt->buffer,
 	                            pipe_transfer_buffer_flags(ptx));
 
-	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+	if(!tx->direct)
+		return map + ns->base.offset;
+	else
+		return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c
index 7b51188635..699773e8e6 100644
--- a/src/gallium/drivers/nv20/nv20_transfer.c
+++ b/src/gallium/drivers/nv20/nv20_transfer.c
@@ -16,14 +16,14 @@ struct nv20_transfer {
 };
 
 static void
-nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
                              struct pipe_texture *template)
 {
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width0 = u_minify(pt->width0, level);
-	template->height0 = u_minify(pt->height0, level);
+	template->width0 = width;
+	template->height0 = height;
 	template->depth0 = 1;
 	template->last_level = 0;
 	template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 
 	tx->direct = false;
 
-	nv20_compatible_transfer_tex(pt, level, &tx_tex_template);
+	nv20_compatible_transfer_tex(pt, w, h, &tx_tex_template);
 
 	tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
 	if (!tx_tex)
@@ -80,6 +80,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	}
 
+	tx->base.stride = ((struct nv20_miptree*)tx_tex)->level[0].pitch;
+
 	tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
 	                                       face, level, zslice,
 	                                       pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv20_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		/* TODO: Check if SIFM can un-swizzle */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
 		                      tx->surface, 0, 0,
-		                      src, 0, 0,
-		                      src->width, src->height);
+		                      src, x, y,
+		                      w, h);
 
 		pipe_surface_reference(&src, NULL);
 	}
@@ -130,9 +132,9 @@ nv20_transfer_del(struct pipe_transfer *ptx)
 
 		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
-		                      dst, 0, 0,
+		                      dst, tx->base.x, tx->base.y,
 		                      tx->surface, 0, 0,
-		                      dst->width, dst->height);
+		                      tx->base.width, tx->base.height);
 
 		pipe_surface_reference(&dst, NULL);
 	}
@@ -151,8 +153,10 @@ nv20_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	void *map = pipe_buffer_map(pscreen, mt->buffer,
 	                            pipe_transfer_buffer_flags(ptx));
 
-	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+	if(!tx->direct)
+		return map + ns->base.offset;
+	else
+		return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
index 68047c47ec..65598991c6 100644
--- a/src/gallium/drivers/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nv30/nv30_transfer.c
@@ -16,14 +16,14 @@ struct nv30_transfer {
 };
 
 static void
-nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
                              struct pipe_texture *template)
 {
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width0 = u_minify(pt->width0, level);
-	template->height0 = u_minify(pt->height0, level);
+	template->width0 = width;
+	template->height0 = height;
 	template->depth0 = 1;
 	template->last_level = 0;
 	template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 
 	tx->direct = false;
 
-	nv30_compatible_transfer_tex(pt, level, &tx_tex_template);
+	nv30_compatible_transfer_tex(pt, w, h, &tx_tex_template);
 
 	tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
 	if (!tx_tex)
@@ -80,6 +80,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	}
 
+	tx->base.stride = ((struct nv30_miptree*)tx_tex)->level[0].pitch;
+
 	tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
 	                                       0, 0, 0,
 	                                       pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv30_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		/* TODO: Check if SIFM can un-swizzle */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
 		                      tx->surface, 0, 0,
-		                      src, 0, 0,
-		                      src->width, src->height);
+		                      src, x, y,
+		                      w, h);
 
 		pipe_surface_reference(&src, NULL);
 	}
@@ -130,9 +132,9 @@ nv30_transfer_del(struct pipe_transfer *ptx)
 
 		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
-		                      dst, 0, 0,
+		                      dst, tx->base.x, tx->base.y,
 		                      tx->surface, 0, 0,
-		                      dst->width, dst->height);
+		                      tx->base.width, tx->base.height);
 
 		pipe_surface_reference(&dst, NULL);
 	}
@@ -151,8 +153,10 @@ nv30_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	void *map = pipe_buffer_map(pscreen, mt->buffer,
 	                            pipe_transfer_buffer_flags(ptx));
 
-	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+	if(!tx->direct)
+		return map + ns->base.offset;
+	else
+		return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
 }
 
 static void
diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
index adfd035621..791ee6823d 100644
--- a/src/gallium/drivers/nv40/nv40_transfer.c
+++ b/src/gallium/drivers/nv40/nv40_transfer.c
@@ -16,14 +16,14 @@ struct nv40_transfer {
 };
 
 static void
-nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
+nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned width, unsigned height,
                              struct pipe_texture *template)
 {
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width0 = u_minify(pt->width0, level);
-	template->height0 = u_minify(pt->height0, level);
+	template->width0 = width;
+	template->height0 = height;
 	template->depth0 = 1;
 	template->last_level = 0;
 	template->nr_samples = pt->nr_samples;
@@ -71,7 +71,7 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 
 	tx->direct = false;
 
-	nv40_compatible_transfer_tex(pt, level, &tx_tex_template);
+	nv40_compatible_transfer_tex(pt, w, h, &tx_tex_template);
 
 	tx_tex = pscreen->texture_create(pscreen, &tx_tex_template);
 	if (!tx_tex)
@@ -80,6 +80,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	}
 
+	tx->base.stride = ((struct nv40_miptree*)tx_tex)->level[0].pitch;
+
 	tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
 	                                       0, 0, 0,
 	                                       pipe_transfer_buffer_flags(&tx->base));
@@ -105,8 +107,8 @@ nv40_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		/* TODO: Check if SIFM can un-swizzle */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
 		                      tx->surface, 0, 0,
-		                      src, 0, 0,
-		                      src->width, src->height);
+		                      src, x, y,
+		                      w, h);
 
 		pipe_surface_reference(&src, NULL);
 	}
@@ -130,9 +132,9 @@ nv40_transfer_del(struct pipe_transfer *ptx)
 
 		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
 		nvscreen->eng2d->copy(nvscreen->eng2d,
-		                      dst, 0, 0,
+		                      dst, tx->base.x, tx->base.y,
 		                      tx->surface, 0, 0,
-		                      dst->width, dst->height);
+		                      tx->base.width, tx->base.height);
 
 		pipe_surface_reference(&dst, NULL);
 	}
@@ -151,8 +153,10 @@ nv40_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx)
 	void *map = pipe_buffer_map(pscreen, mt->buffer,
 	                            pipe_transfer_buffer_flags(ptx));
 
-	return map + ns->base.offset +
-	       ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
+	if(!tx->direct)
+		return map + ns->base.offset;
+	else
+		return map + ns->base.offset + ptx->y * ns->pitch + ptx->x * util_format_get_blocksize(ptx->texture->format);
 }
 
 static void
-- 
cgit v1.2.3


From 3019afdbd862b86eb26d222d3a1d743faf3693be Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Mon, 28 Dec 2009 23:17:26 +0100
Subject: nv50: don't negate immediates in set_immd

This negation would only be triggered in situations
where it's incorrect.
The caller of set_immd should negate the immediate value
in the instruction itself if desired, and will also know
if it's a float or an int.

ADD TEMP[0], CONST[0], -IMMD[0] would load the immediate
into extra TEMP, negated, and set the negate flag in add
as well - double negation.
---
 src/gallium/drivers/nv50/nv50_program.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index b9910b430a..8895a920fe 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -499,15 +499,6 @@ set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e)
 static INLINE void
 set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
 {
-	union {
-		float f;
-		uint32_t ui;
-	} u;
-	u.ui = pc->immd_buf[imm->hw];
-
-	u.f = (imm->mod & NV50_MOD_ABS) ? fabsf(u.f) : u.f;
-	u.f = (imm->mod & NV50_MOD_NEG) ? -u.f : u.f;
-
 	set_long(pc, e);
 	/* XXX: can't be predicated - bits overlap; cases where both
 	 * are required should be avoided by using pc->allow32 */
@@ -515,8 +506,8 @@ set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e)
 	set_pred_wr(pc, 0, 0, e);
 
 	e->inst[1] |= 0x00000002 | 0x00000001;
-	e->inst[0] |= (u.ui & 0x3f) << 16;
-	e->inst[1] |= (u.ui >> 6) << 2;
+	e->inst[0] |= (pc->immd_buf[imm->hw] & 0x3f) << 16;
+	e->inst[1] |= (pc->immd_buf[imm->hw] >> 6) << 2;
 }
 
 static INLINE void
@@ -888,7 +879,7 @@ emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	set_dst(pc, dst, e);
 	set_src_0(pc, src0, e);
 	if (src1->type == P_IMMD && !is_long(e)) {
-		if (src0->mod & NV50_MOD_NEG)
+		if (src0->mod ^ src1->mod)
 			e->inst[0] |= 0x00008000;
 		set_immd(pc, src1, e);
 	} else {
@@ -999,6 +990,8 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	    op != TGSI_OPCODE_XOR)
 		assert(!"invalid bit op");
 
+	assert(!(src0->mod | src1->mod));
+
 	if (src1->type == P_IMMD && src0->type == P_TEMP && pc->allow32) {
 		set_immd(pc, src1, e);
 		if (op == TGSI_OPCODE_OR)
-- 
cgit v1.2.3


From 4d2551beb7b3f5ae9f47ee97e24556c5bcb905c8 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Tue, 29 Dec 2009 00:02:45 +0100
Subject: nv50: neg and abs modifiers for flops

Also fixes RSQ of negative sources.
---
 src/gallium/drivers/nv50/nv50_program.c | 83 ++++++++++++++++++++++-----------
 1 file changed, 55 insertions(+), 28 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 8895a920fe..739621a5b3 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -656,6 +656,7 @@ set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s,
 	e->inst[1] |= (((src->type == P_IMMD) ? 0 : 1) << 22);
 }
 
+/* Never apply nv50_reg::mod in emit_mov, or carefully check the code !!! */
 static void
 emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
@@ -1043,6 +1044,14 @@ emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	src2->mod ^= NV50_MOD_NEG;
 }
 
+#define NV50_FLOP_RCP 0
+#define NV50_FLOP_RSQ 2
+#define NV50_FLOP_LG2 3
+#define NV50_FLOP_SIN 4
+#define NV50_FLOP_COS 5
+#define NV50_FLOP_EX2 6
+
+/* rcp, rsqrt, lg2 support neg and abs */
 static void
 emit_flop(struct nv50_pc *pc, unsigned sub,
 	  struct nv50_reg *dst, struct nv50_reg *src)
@@ -1050,17 +1059,20 @@ emit_flop(struct nv50_pc *pc, unsigned sub,
 	struct nv50_program_exec *e = exec(pc);
 
 	e->inst[0] |= 0x90000000;
-	if (sub) {
+	if (sub || src->mod) {
 		set_long(pc, e);
 		e->inst[1] |= (sub << 29);
 	}
 
 	set_dst(pc, dst, e);
+	set_src_0_restricted(pc, src, e);
 
-	if (sub == 0 || sub == 2)
-		set_src_0_restricted(pc, src, e);
-	else
-		set_src_0(pc, src, e);
+	assert(!src->mod || sub < 4);
+
+	if (src->mod & NV50_MOD_NEG)
+		e->inst[1] |= 0x04000000;
+	if (src->mod & NV50_MOD_ABS)
+		e->inst[1] |= 0x00100000;
 
 	emit(pc, e);
 }
@@ -1077,6 +1089,11 @@ emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 	set_long(pc, e);
 	e->inst[1] |= (6 << 29) | 0x00004000;
 
+	if (src->mod & NV50_MOD_NEG)
+		e->inst[1] |= 0x04000000;
+	if (src->mod & NV50_MOD_ABS)
+		e->inst[1] |= 0x00100000;
+
 	emit(pc, e);
 }
 
@@ -1092,6 +1109,11 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 	set_long(pc, e);
 	e->inst[1] |= (6 << 29);
 
+	if (src->mod & NV50_MOD_NEG)
+		e->inst[1] |= 0x04000000;
+	if (src->mod & NV50_MOD_ABS)
+		e->inst[1] |= 0x00100000;
+
 	emit(pc, e);
 }
 
@@ -1226,10 +1248,10 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
 {
 	struct nv50_reg *temp = alloc_temp(pc, NULL);
 
-	emit_flop(pc, 3, temp, v);
+	emit_flop(pc, NV50_FLOP_LG2, temp, v);
 	emit_mul(pc, temp, temp, e);
 	emit_preex2(pc, temp, temp);
-	emit_flop(pc, 6, dst, temp);
+	emit_flop(pc, NV50_FLOP_EX2, dst, temp);
 
 	free_temp(pc, temp);
 }
@@ -1453,7 +1475,7 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 	if (arg == 4) /* there is no textureProj(samplerCubeShadow) */
 		emit_mov(pc, t[3], src[3]);
 
-	emit_flop(pc, 0, t[2], t[2]);
+	emit_flop(pc, NV50_FLOP_RCP, t[2], t[2]);
 
 	emit_mul(pc, t[0], src[0], t[2]);
 	emit_mul(pc, t[1], src[1], t[2]);
@@ -1471,7 +1493,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 
 		t[3]->rhw = src[3]->rhw;
 		emit_interp(pc, t[3], NULL, (mode & INTERP_CENTROID));
-		emit_flop(pc, 0, t[3], t[3]);
+		emit_flop(pc, NV50_FLOP_RCP, t[3], t[3]);
 
 		for (c = 0; c < dim; ++c) {
 			t[c]->rhw = src[c]->rhw;
@@ -1485,7 +1507,7 @@ load_proj_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 		/* XXX: for some reason the blob sometimes uses MAD
 		 * (mad f32 $rX $rY $rZ neg $r63)
 		 */
-		emit_flop(pc, 0, t[3], src[3]);
+		emit_flop(pc, NV50_FLOP_RCP, t[3], src[3]);
 		for (c = 0; c < dim; ++c)
 			emit_mul(pc, t[c], src[c], t[3]);
 		if (arg != dim) /* depth reference value */
@@ -1777,20 +1799,24 @@ static boolean
 negate_supported(const struct tgsi_full_instruction *insn, int i)
 {
 	switch (insn->Instruction.Opcode) {
+	case TGSI_OPCODE_ADD:
+	case TGSI_OPCODE_COS:
 	case TGSI_OPCODE_DDX:
 	case TGSI_OPCODE_DDY:
 	case TGSI_OPCODE_DP3:
 	case TGSI_OPCODE_DP4:
-	case TGSI_OPCODE_MUL:
+	case TGSI_OPCODE_EX2:
 	case TGSI_OPCODE_KIL:
-	case TGSI_OPCODE_ADD:
-	case TGSI_OPCODE_SUB:
+	case TGSI_OPCODE_LG2:
 	case TGSI_OPCODE_MAD:
-		return TRUE;
+	case TGSI_OPCODE_MUL:
 	case TGSI_OPCODE_POW:
-		if (i == 1)
-			return TRUE;
-		return FALSE;
+	case TGSI_OPCODE_RCP:
+	case TGSI_OPCODE_RSQ: /* ignored, RSQ = rsqrt(abs(src.x)) */
+	case TGSI_OPCODE_SCS:
+	case TGSI_OPCODE_SIN:
+	case TGSI_OPCODE_SUB:
+		return TRUE;
 	default:
 		return FALSE;
 	}
@@ -2242,14 +2268,14 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 	case TGSI_OPCODE_COS:
 		if (mask & 8) {
 			emit_precossin(pc, temp, src[0][3]);
-			emit_flop(pc, 5, dst[3], temp);
+			emit_flop(pc, NV50_FLOP_COS, dst[3], temp);
 			if (!(mask &= 7))
 				break;
 			if (temp == dst[3])
 				temp = brdc = temp_temp(pc);
 		}
 		emit_precossin(pc, temp, src[0][0]);
-		emit_flop(pc, 5, brdc, temp);
+		emit_flop(pc, NV50_FLOP_COS, brdc, temp);
 		break;
 	case TGSI_OPCODE_DDX:
 		for (c = 0; c < 4; c++) {
@@ -2323,7 +2349,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		break;
 	case TGSI_OPCODE_EX2:
 		emit_preex2(pc, temp, src[0][0]);
-		emit_flop(pc, 6, brdc, temp);
+		emit_flop(pc, NV50_FLOP_EX2, brdc, temp);
 		break;
 	case TGSI_OPCODE_FLR:
 		for (c = 0; c < 4; c++) {
@@ -2363,7 +2389,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_lit(pc, &dst[0], mask, &src[0][0]);
 		break;
 	case TGSI_OPCODE_LG2:
-		emit_flop(pc, 3, brdc, src[0][0]);
+		emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]);
 		break;
 	case TGSI_OPCODE_LRP:
 		temp = temp_temp(pc);
@@ -2413,7 +2439,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_pow(pc, brdc, src[0][0], src[1][0]);
 		break;
 	case TGSI_OPCODE_RCP:
-		emit_flop(pc, 0, brdc, src[0][0]);
+		emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]);
 		break;
 	case TGSI_OPCODE_RET:
 		if (pc->p->type == PIPE_SHADER_FRAGMENT)
@@ -2421,16 +2447,17 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_ret(pc, -1, 0);
 		break;
 	case TGSI_OPCODE_RSQ:
-		emit_flop(pc, 2, brdc, src[0][0]);
+		src[0][0]->mod |= NV50_MOD_ABS;
+		emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]);
 		break;
 	case TGSI_OPCODE_SCS:
 		temp = temp_temp(pc);
 		if (mask & 3)
 			emit_precossin(pc, temp, src[0][0]);
 		if (mask & (1 << 0))
-			emit_flop(pc, 5, dst[0], temp);
+			emit_flop(pc, NV50_FLOP_COS, dst[0], temp);
 		if (mask & (1 << 1))
-			emit_flop(pc, 4, dst[1], temp);
+			emit_flop(pc, NV50_FLOP_SIN, dst[1], temp);
 		if (mask & (1 << 2))
 			emit_mov_immdval(pc, dst[2], 0.0);
 		if (mask & (1 << 3))
@@ -2439,14 +2466,14 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 	case TGSI_OPCODE_SIN:
 		if (mask & 8) {
 			emit_precossin(pc, temp, src[0][3]);
-			emit_flop(pc, 4, dst[3], temp);
+			emit_flop(pc, NV50_FLOP_SIN, dst[3], temp);
 			if (!(mask &= 7))
 				break;
 			if (temp == dst[3])
 				temp = brdc = temp_temp(pc);
 		}
 		emit_precossin(pc, temp, src[0][0]);
-		emit_flop(pc, 4, brdc, temp);
+		emit_flop(pc, NV50_FLOP_SIN, brdc, temp);
 		break;
 	case TGSI_OPCODE_SLT:
 	case TGSI_OPCODE_SGE:
@@ -2781,7 +2808,7 @@ load_interpolant(struct nv50_pc *pc, struct nv50_reg *reg)
 		iv->rhw = popcnt4(pc->p->cfg.regs[1] >> 24) - 1;
 
 		emit_interp(pc, iv, NULL, mode & INTERP_CENTROID);
-		emit_flop(pc, 0, iv, iv);
+		emit_flop(pc, NV50_FLOP_RCP, iv, iv);
 
 		/* XXX: when loading interpolants dynamically, move these
 		 * to the program head, or make sure it can't be skipped.
-- 
cgit v1.2.3


From 0ba518e3274b020deac255061ab23c74aefeb3f1 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Tue, 29 Dec 2009 12:04:34 +0100
Subject: nv50: multiply polygon offset units by 2

---
 src/gallium/drivers/nv50/nv50_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 18a2b819d8..30b2b0f91b 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -392,7 +392,7 @@ nv50_rasterizer_state_create(struct pipe_context *pipe,
 		so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1);
 		so_data  (so, fui(cso->offset_scale));
 		so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1);
-		so_data  (so, fui(cso->offset_units));
+		so_data  (so, fui(cso->offset_units * 2.0f));
 	}
 
 	rso->pipe = *cso;
-- 
cgit v1.2.3


From 170cdb4507683fb9042620f7ab2ad96e57787d6c Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 31 Dec 2009 00:37:47 +0100
Subject: nv50: alloc_reg on reg_instance

If we create multiple instances of an nv50_reg referencing
them same resource, register allocation from alloc_reg has
to be done with the original nv50_reg.
---
 src/gallium/drivers/nv50/nv50_program.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 739621a5b3..295725a6c0 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -163,20 +163,6 @@ struct nv50_pc {
 	uint8_t edgeflag_out;
 };
 
-static INLINE struct nv50_reg *
-reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
-{
-	struct nv50_reg *ri;
-
-	assert(pc->reg_instance_nr < 16);
-	ri = &pc->reg_instances[pc->reg_instance_nr++];
-	if (reg) {
-		*ri = *reg;
-		reg->mod = 0;
-	}
-	return ri;
-}
-
 static INLINE void
 ctor_reg(struct nv50_reg *reg, unsigned type, int index, int hw)
 {
@@ -255,6 +241,21 @@ alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg)
 	assert(0);
 }
 
+static INLINE struct nv50_reg *
+reg_instance(struct nv50_pc *pc, struct nv50_reg *reg)
+{
+	struct nv50_reg *ri;
+
+	assert(pc->reg_instance_nr < 16);
+	ri = &pc->reg_instances[pc->reg_instance_nr++];
+	if (reg) {
+		alloc_reg(pc, reg);
+		*ri = *reg;
+		reg->mod = 0;
+	}
+	return ri;
+}
+
 /* XXX: For shaders that aren't executed linearly (e.g. shaders that
  * contain loops), we need to assign all hw regs to TGSI TEMPs early,
  * lest we risk temp_temps overwriting regs alloc'd "later".
-- 
cgit v1.2.3


From f2cca04bd87547f8a76d87f4cd9b585de0cf40de Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 31 Dec 2009 12:26:14 +0100
Subject: nv50: add support for subroutines

---
 src/gallium/drivers/nv50/nv50_program.c | 131 +++++++++++++++++++-------------
 1 file changed, 78 insertions(+), 53 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 295725a6c0..1847e8ab4c 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -154,6 +154,9 @@ struct nv50_pc {
 	int if_lvl, loop_lvl;
 	unsigned loop_pos[NV50_MAX_LOOP_NESTING];
 
+	unsigned *insn_pos; /* actual program offset of each TGSI insn */
+	boolean in_subroutine;
+
 	/* current instruction and total number of insns */
 	unsigned insn_cur;
 	unsigned insn_nr;
@@ -454,6 +457,14 @@ is_immd(struct nv50_program_exec *e)
 	return FALSE;
 }
 
+static boolean
+is_join(struct nv50_program_exec *e)
+{
+	if (is_long(e) && (e->inst[1] & 3) == 2)
+		return TRUE;
+	return FALSE;
+}
+
 static INLINE void
 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
 	 struct nv50_program_exec *e)
@@ -1354,66 +1365,53 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 }
 
 static struct nv50_program_exec *
-emit_breakaddr(struct nv50_pc *pc)
+emit_control_flow(struct nv50_pc *pc, unsigned op, int pred, unsigned cc)
 {
 	struct nv50_program_exec *e = exec(pc);
 
-	e->inst[0] = 0x40000002;
+	e->inst[0] = (op << 28) | 2;
 	set_long(pc, e);
+	if (pred >= 0)
+		set_pred(pc, cc, pred, e);
 
 	emit(pc, e);
 	return e;
 }
 
-static void
-emit_break(struct nv50_pc *pc, int pred, unsigned cc)
+static INLINE struct nv50_program_exec *
+emit_breakaddr(struct nv50_pc *pc)
 {
-	struct nv50_program_exec *e = exec(pc);
-
-	e->inst[0] = 0x50000002;
-	set_long(pc, e);
-	if (pred >= 0)
-		set_pred(pc, cc, pred, e);
+	return emit_control_flow(pc, 0x4, -1, 0);
+}
 
-	emit(pc, e);
+static INLINE void
+emit_break(struct nv50_pc *pc, int pred, unsigned cc)
+{
+	emit_control_flow(pc, 0x5, pred, cc);
 }
 
-static struct nv50_program_exec *
+static INLINE struct nv50_program_exec *
 emit_joinat(struct nv50_pc *pc)
 {
-	struct nv50_program_exec *e = exec(pc);
-
-	e->inst[0] = 0xa0000002;
-	set_long(pc, e);
-
-	emit(pc, e);
-	return e;
+	return emit_control_flow(pc, 0xa, -1, 0);
 }
 
-static struct nv50_program_exec *
+static INLINE struct nv50_program_exec *
 emit_branch(struct nv50_pc *pc, int pred, unsigned cc)
 {
-	struct nv50_program_exec *e = exec(pc);
+	return emit_control_flow(pc, 0x1, pred, cc);
+}
 
-	e->inst[0] = 0x10000002;
-	set_long(pc, e);
-	if (pred >= 0)
-		set_pred(pc, cc, pred, e);
-	emit(pc, e);
-	return pc->p->exec_tail;
+static INLINE struct nv50_program_exec *
+emit_call(struct nv50_pc *pc, int pred, unsigned cc)
+{
+	return emit_control_flow(pc, 0x2, pred, cc);
 }
 
-static void
+static INLINE void
 emit_ret(struct nv50_pc *pc, int pred, unsigned cc)
 {
-	struct nv50_program_exec *e = exec(pc);
-
-	e->inst[0] = 0x30000002;
-	set_long(pc, e);
-	if (pred >= 0)
-		set_pred(pc, cc, pred, e);
-
-	emit(pc, e);
+	emit_control_flow(pc, 0x3, pred, cc);
 }
 
 #define QOP_ADD 0
@@ -2237,10 +2235,22 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		pc->loop_pos[pc->loop_lvl++] = pc->p->exec_size;
 		terminate_mbb(pc);
 		break;
+	case TGSI_OPCODE_BGNSUB:
+		assert(!pc->in_subroutine);
+		pc->in_subroutine = TRUE;
+		/* probably not necessary, but align to 8 byte boundary */
+		if (!is_long(pc->p->exec_tail))
+			convert_to_long(pc, pc->p->exec_tail);
+		break;
 	case TGSI_OPCODE_BRK:
 		assert(pc->loop_lvl > 0);
 		emit_break(pc, -1, 0);
 		break;
+	case TGSI_OPCODE_CAL:
+		assert(inst->Label.Label < pc->insn_nr);
+		emit_call(pc, -1, 0)->param.index = inst->Label.Label;
+		/* replaced by actual offset in nv50_program_fixup_insns */
+		break;
 	case TGSI_OPCODE_CEIL:
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
@@ -2348,6 +2358,10 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		pc->loop_brka[pc->loop_lvl]->param.index = pc->p->exec_size;
 		terminate_mbb(pc);
 		break;
+	case TGSI_OPCODE_ENDSUB:
+		assert(pc->in_subroutine);
+		pc->in_subroutine = FALSE;
+		break;
 	case TGSI_OPCODE_EX2:
 		emit_preex2(pc, temp, src[0][0]);
 		emit_flop(pc, NV50_FLOP_EX2, brdc, temp);
@@ -2443,7 +2457,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_flop(pc, NV50_FLOP_RCP, brdc, src[0][0]);
 		break;
 	case TGSI_OPCODE_RET:
-		if (pc->p->type == PIPE_SHADER_FRAGMENT)
+		if (pc->p->type == PIPE_SHADER_FRAGMENT && !pc->in_subroutine)
 			nv50_fp_move_results(pc);
 		emit_ret(pc, -1, 0);
 		break;
@@ -2538,6 +2552,17 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			emit_mov_immdval(pc, dst[3], 1.0);
 		break;
 	case TGSI_OPCODE_END:
+		if (pc->p->type == PIPE_SHADER_FRAGMENT)
+			nv50_fp_move_results(pc);
+
+		/* last insn must be long so it can have the exit bit set */
+		if (!is_long(pc->p->exec_tail))
+			convert_to_long(pc, pc->p->exec_tail);
+		else
+		if (is_immd(pc->p->exec_tail) || is_join(pc->p->exec_tail))
+			emit_nop(pc);
+
+		pc->p->exec_tail->inst[1] |= 1; /* set exit bit */
 		break;
 	default:
 		NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode);
@@ -3231,16 +3256,6 @@ nv50_program_fixup_insns(struct nv50_pc *pc)
 		if (e->param.index >= 0 && !e->param.mask)
 			bra_list[n++] = e;
 
-	/* last instruction must be long so it can have the exit bit set */
-	if (!is_long(pc->p->exec_tail))
-		convert_to_long(pc, pc->p->exec_tail);
-	/* set exit bit */
-	pc->p->exec_tail->inst[1] |= 1;
-
-	/* !immd on exit insn simultaneously means !join */
-	assert(!is_immd(pc->p->exec_head));
-	assert(!is_immd(pc->p->exec_tail));
-
 	/* Make sure we don't have any single 32 bit instructions. */
 	for (e = pc->p->exec_head, pos = 0; e; e = e->next) {
 		pos += is_long(e) ? 2 : 1;
@@ -3249,12 +3264,24 @@ nv50_program_fixup_insns(struct nv50_pc *pc)
 			for (i = 0; i < n; ++i)
 				if (bra_list[i]->param.index >= pos)
 					bra_list[i]->param.index += 1;
+			for (i = 0; i < pc->insn_nr; ++i)
+				if (pc->insn_pos[i] >= pos)
+					pc->insn_pos[i] += 1;
 			convert_to_long(pc, e);
 			++pos;
 		}
 	}
 
 	FREE(bra_list);
+
+	if (!pc->p->info.opcode_count[TGSI_OPCODE_CAL])
+		return;
+
+	/* fill in CALL offsets */
+	for (e = pc->p->exec_head; e; e = e->next) {
+		if ((e->inst[0] & 2) && (e->inst[0] >> 28) == 0x2)
+			e->param.index = pc->insn_pos[e->param.index];
+	}
 }
 
 static boolean
@@ -3276,19 +3303,20 @@ nv50_program_tx(struct nv50_program *p)
 	if (ret == FALSE)
 		goto out_cleanup;
 
+	pc->insn_pos = MALLOC(pc->insn_nr * sizeof(unsigned));
+
 	tgsi_parse_init(&parse, pc->p->pipe.tokens);
 	while (!tgsi_parse_end_of_tokens(&parse)) {
 		const union tgsi_full_token *tok = &parse.FullToken;
 
-		/* don't allow half insn/immd on first and last instruction */
+		/* previously allow32 was FALSE for first & last instruction */
 		pc->allow32 = TRUE;
-		if (pc->insn_cur == 0 || pc->insn_cur + 2 == pc->insn_nr)
-			pc->allow32 = FALSE;
 
 		tgsi_parse_token(&parse);
 
 		switch (tok->Token.Type) {
 		case TGSI_TOKEN_TYPE_INSTRUCTION:
+			pc->insn_pos[pc->insn_cur] = pc->p->exec_size;
 			++pc->insn_cur;
 			ret = nv50_tgsi_insn(pc, tok);
 			if (ret == FALSE)
@@ -3299,9 +3327,6 @@ nv50_program_tx(struct nv50_program *p)
 		}
 	}
 
-	if (pc->p->type == PIPE_SHADER_FRAGMENT)
-		nv50_fp_move_results(pc);
-
 	nv50_program_fixup_insns(pc);
 
 	p->param_nr = pc->param_nr * 4;
-- 
cgit v1.2.3


From d9ae8f31d5e687fae412edf888028cce69b039f0 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 30 Dec 2009 20:54:58 +0100
Subject: nv50: handle TGSI_OPCODE_EXP,LOG

Not that they make much sense on nv50, but we also do LIT ...
---
 src/gallium/drivers/nv50/nv50_program.c | 59 +++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 1847e8ab4c..a85587b986 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1840,7 +1840,9 @@ nv50_tgsi_src_mask(const struct tgsi_full_instruction *insn, int c)
 	case TGSI_OPCODE_DST:
 		return mask & (c ? 0xa : 0x6);
 	case TGSI_OPCODE_EX2:
+	case TGSI_OPCODE_EXP:
 	case TGSI_OPCODE_LG2:
+	case TGSI_OPCODE_LOG:
 	case TGSI_OPCODE_POW:
 	case TGSI_OPCODE_RCP:
 	case TGSI_OPCODE_RSQ:
@@ -2062,6 +2064,8 @@ nv50_tgsi_dst_revdep(unsigned op, int s, int c)
 			assert(0);
 			return 0x0;
 		}
+	case TGSI_OPCODE_EXP:
+	case TGSI_OPCODE_LOG:
 	case TGSI_OPCODE_LIT:
 	case TGSI_OPCODE_SCS:
 	case TGSI_OPCODE_TEX:
@@ -2366,6 +2370,33 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		emit_preex2(pc, temp, src[0][0]);
 		emit_flop(pc, NV50_FLOP_EX2, brdc, temp);
 		break;
+	case TGSI_OPCODE_EXP:
+	{
+		struct nv50_reg *t[2];
+
+		assert(!temp);
+		t[0] = temp_temp(pc);
+		t[1] = temp_temp(pc);
+
+		if (mask & 0x6)
+			emit_mov(pc, t[0], src[0][0]);
+		if (mask & 0x3)
+			emit_flr(pc, t[1], src[0][0]);
+
+		if (mask & (1 << 1))
+			emit_sub(pc, dst[1], t[0], t[1]);
+		if (mask & (1 << 0)) {
+			emit_preex2(pc, t[1], t[1]);
+			emit_flop(pc, NV50_FLOP_EX2, dst[0], t[1]);
+		}
+		if (mask & (1 << 2)) {
+			emit_preex2(pc, t[0], t[0]);
+			emit_flop(pc, NV50_FLOP_EX2, dst[2], t[0]);
+		}
+		if (mask & (1 << 3))
+			emit_mov_immdval(pc, dst[3], 1.0f);
+	}
+		break;
 	case TGSI_OPCODE_FLR:
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
@@ -2406,6 +2437,34 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 	case TGSI_OPCODE_LG2:
 		emit_flop(pc, NV50_FLOP_LG2, brdc, src[0][0]);
 		break;
+	case TGSI_OPCODE_LOG:
+	{
+		struct nv50_reg *t[2];
+
+		t[0] = temp_temp(pc);
+		if (mask & (1 << 1))
+			t[1] = temp_temp(pc);
+		else
+			t[1] = t[0];
+
+		emit_abs(pc, t[0], src[0][0]);
+		emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]);
+		if (mask & (1 << 2))
+			emit_mov(pc, dst[2], t[1]);
+		emit_flr(pc, t[1], t[1]);
+		if (mask & (1 << 0))
+			emit_mov(pc, dst[0], t[1]);
+		if (mask & (1 << 1)) {
+			t[1]->mod = NV50_MOD_NEG;
+			emit_preex2(pc, t[1], t[1]);
+			t[1]->mod = 0;
+			emit_flop(pc, NV50_FLOP_EX2, t[1], t[1]);
+			emit_mul(pc, dst[1], t[0], t[1]);
+		}
+		if (mask & (1 << 3))
+			emit_mov_immdval(pc, dst[3], 1.0f);
+	}
+		break;
 	case TGSI_OPCODE_LRP:
 		temp = temp_temp(pc);
 		for (c = 0; c < 4; c++) {
-- 
cgit v1.2.3


From b3425bc3607666e5c8ba4f403b4ead8c8870e49c Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 30 Dec 2009 21:17:31 +0100
Subject: nv50: make assimilate_temp safe

Cannot change hw reg assigned to a TGSI TEMP on the fly if
we are in a loop, conditional, or can jump around wildly.
---
 src/gallium/drivers/nv50/nv50_program.c | 44 +++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 16 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index a85587b986..7d1b5fd82c 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -285,22 +285,6 @@ alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst)
 	return NULL;
 }
 
-/* Assign the hw of the discarded temporary register src
- * to the tgsi register dst and free src.
- */
-static void
-assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
-	assert(src->index == -1 && src->hw != -1);
-
-	if (dst->hw != -1)
-		pc->r_temp[dst->hw] = NULL;
-	pc->r_temp[src->hw] = dst;
-	dst->hw = src->hw;
-
-	FREE(src);
-}
-
 /* release the hardware resource held by r */
 static void
 release_hw(struct nv50_pc *pc, struct nv50_reg *r)
@@ -721,6 +705,34 @@ emit_mov_immdval(struct nv50_pc *pc, struct nv50_reg *dst, float f)
 	FREE(imm);
 }
 
+/* Assign the hw of the discarded temporary register src
+ * to the tgsi register dst and free src.
+ */
+static void
+assimilate_temp(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
+{
+	assert(src->index == -1 && src->hw != -1);
+
+	if (pc->if_lvl || pc->loop_lvl ||
+	    (dst->type != P_TEMP) ||
+	    (src->hw < pc->result_nr * 4 &&
+	     pc->p->type == PIPE_SHADER_FRAGMENT) ||
+	    pc->p->info.opcode_count[TGSI_OPCODE_CAL] ||
+	    pc->p->info.opcode_count[TGSI_OPCODE_BRA]) {
+
+		emit_mov(pc, dst, src);
+		free_temp(pc, src);
+		return;
+	}
+
+	if (dst->hw != -1)
+		pc->r_temp[dst->hw] = NULL;
+	pc->r_temp[src->hw] = dst;
+	dst->hw = src->hw;
+
+	FREE(src);
+}
+
 static void
 emit_nop(struct nv50_pc *pc)
 {
-- 
cgit v1.2.3


From 616ddc8dfa5bfc886db8d4c7ef7fc66793f7940d Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Wed, 30 Dec 2009 21:25:13 +0100
Subject: nv50: cannot kill branch if immediate is used

The immediate's bits eat the condition bits.
---
 src/gallium/drivers/nv50/nv50_program.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 7d1b5fd82c..cc15f45347 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -453,6 +453,7 @@ static INLINE void
 set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx,
 	 struct nv50_program_exec *e)
 {
+	assert(!is_immd(e));
 	set_long(pc, e);
 	e->inst[1] &= ~((0x1f << 7) | (0x3 << 12));
 	e->inst[1] |= (pred << 7) | (idx << 12);
@@ -2118,6 +2119,8 @@ nv50_kill_branch(struct nv50_pc *pc)
 
 	if (pc->if_insn[lvl]->next != pc->p->exec_tail)
 		return FALSE;
+	if (is_immd(pc->p->exec_tail))
+		return FALSE;
 
 	/* if ccode == 'true', the BRA is from an ELSE and the predicate
 	 * reg may no longer be valid, since we currently always use $p0
-- 
cgit v1.2.3


From 4e4244b910c83797e3dde62020618f20eb026908 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 31 Dec 2009 13:38:53 +0100
Subject: nv50: fix TEXLOD sequence and use it only in FPs

---
 src/gallium/drivers/nv50/nv50_program.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index cc15f45347..6733347735 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1566,7 +1566,13 @@ emit_texlod_sequence(struct nv50_pc *pc, struct nv50_reg *tlod,
 		     struct nv50_reg *src, struct nv50_program_exec *tex)
 {
 	struct nv50_program_exec *join_at;
-	unsigned i, target = pc->p->exec_size + 7 * 2;
+	unsigned i, target = pc->p->exec_size + 9 * 2;
+
+	if (pc->p->type != PIPE_SHADER_FRAGMENT) {
+		emit(pc, tex);
+		return;
+	}
+	pc->allow32 = FALSE;
 
 	/* Subtract lod of each pixel from lod of top left pixel, jump
 	 * texlod insn if result is 0, then repeat for 2 other pixels.
@@ -1692,6 +1698,7 @@ emit_tex(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 		emit(pc, e);
 	} else
 	if (bias_lod < 0) {
+		assert(pc->p->type == PIPE_SHADER_FRAGMENT);
 		e->inst[0] |= arg << 22;
 		e->inst[1] |= 0x20000000; /* texbias */
 		emit_mov(pc, t[arg], src[3]);
-- 
cgit v1.2.3


From 45a01bc5bd95a6cc85a9db7d8e2c07812346373d Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 31 Dec 2009 22:24:13 +0100
Subject: nv50: small fix for handling "dangerous" swizzles

---
 src/gallium/drivers/nv50/nv50_program.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 6733347735..2d0b1818ef 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2864,7 +2864,7 @@ nv50_tgsi_insn(struct nv50_pc *pc, const union tgsi_full_token *tok)
 	}
 	pc->r_brdc = NULL;
 
-	if (!deqs)
+	if (!deqs || (!rdep[0] && !rdep[1] && !rdep[2] && !rdep[3]))
 		return nv50_program_tx_insn(pc, &insn);
 
 	deqs = nv50_revdep_reorder(m, rdep);
-- 
cgit v1.2.3


From 41450b03a8e8e0f94f8eefc6880d32e9b0ef6f6d Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Thu, 31 Dec 2009 14:46:15 -0700
Subject: softpipe: implement conditional rendering

---
 src/gallium/drivers/softpipe/sp_clear.c       |  4 ++++
 src/gallium/drivers/softpipe/sp_context.c     | 15 +++++++++++++++
 src/gallium/drivers/softpipe/sp_context.h     |  4 ++++
 src/gallium/drivers/softpipe/sp_draw_arrays.c |  4 ++++
 src/gallium/drivers/softpipe/sp_query.c       | 26 ++++++++++++++++++++++++++
 src/gallium/drivers/softpipe/sp_query.h       |  4 ++++
 6 files changed, 57 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c
index f98087deb8..5f130453c3 100644
--- a/src/gallium/drivers/softpipe/sp_clear.c
+++ b/src/gallium/drivers/softpipe/sp_clear.c
@@ -36,6 +36,7 @@
 #include "util/u_pack_color.h"
 #include "sp_clear.h"
 #include "sp_context.h"
+#include "sp_query.h"
 #include "sp_tile_cache.h"
 
 
@@ -55,6 +56,9 @@ softpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
    if (softpipe->no_rast)
       return;
 
+   if (!softpipe_check_render_cond(softpipe))
+      return;
+
 #if 0
    softpipe_update_derived(softpipe); /* not needed?? */
 #endif
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 82173a3c2a..f3ac6760db 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -176,6 +176,19 @@ softpipe_is_buffer_referenced( struct pipe_context *pipe,
 }
 
 
+static void
+softpipe_render_condition( struct pipe_context *pipe,
+                           struct pipe_query *query,
+                           uint mode )
+{
+   struct softpipe_context *softpipe = softpipe_context( pipe );
+
+   softpipe->render_cond_query = query;
+   softpipe->render_cond_mode = mode;
+}
+
+
+
 struct pipe_context *
 softpipe_create( struct pipe_screen *screen )
 {
@@ -252,6 +265,8 @@ softpipe_create( struct pipe_screen *screen )
 
    softpipe_init_query_funcs( softpipe );
 
+   softpipe->pipe.render_condition = softpipe_render_condition;
+
    /*
     * Alloc caches for accessing drawing surfaces and textures.
     * Must be before quad stage setup!
diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h
index 6a89bd4b06..73fa744f9d 100644
--- a/src/gallium/drivers/softpipe/sp_context.h
+++ b/src/gallium/drivers/softpipe/sp_context.h
@@ -116,6 +116,10 @@ struct softpipe_context {
 
    unsigned line_stipple_counter;
 
+   /** Conditional query object and mode */
+   struct pipe_query *render_cond_query;
+   uint render_cond_mode;
+
    /** Software quad rendering pipeline */
    struct {
       struct quad_stage *shade;
diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 9ea5d6fb9f..7409c3c488 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -38,6 +38,7 @@
 #include "util/u_prim.h"
 
 #include "sp_context.h"
+#include "sp_query.h"
 #include "sp_state.h"
 
 #include "draw/draw_context.h"
@@ -122,6 +123,9 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
    struct draw_context *draw = sp->draw;
    unsigned i;
 
+   if (!softpipe_check_render_cond(sp))
+      return TRUE;
+
    sp->reduced_api_prim = u_reduced_prim(mode);
 
    if (sp->dirty)
diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c
index 379cf4ad06..4ef5d9f7b1 100644
--- a/src/gallium/drivers/softpipe/sp_query.c
+++ b/src/gallium/drivers/softpipe/sp_query.c
@@ -99,6 +99,32 @@ softpipe_get_query_result(struct pipe_context *pipe,
 }
 
 
+/**
+ * Called by rendering function to check rendering is conditional.
+ * \return TRUE if we should render, FALSE if we should skip rendering
+ */
+boolean
+softpipe_check_render_cond(struct softpipe_context *sp)
+{
+   struct pipe_context *pipe = &sp->pipe;
+   boolean b, wait;
+   uint64_t result;
+
+   if (!sp->render_cond_query) {
+      return TRUE;  /* no query predicate, draw normally */
+   }
+
+   wait = (sp->render_cond_mode == PIPE_RENDER_COND_WAIT ||
+           sp->render_cond_mode == PIPE_RENDER_COND_BY_REGION_WAIT);
+
+   b = pipe->get_query_result(pipe, sp->render_cond_query, wait, &result);
+   if (b)
+      return result > 0;
+   else
+      return TRUE;
+}
+
+
 void softpipe_init_query_funcs(struct softpipe_context *softpipe )
 {
    softpipe->pipe.create_query = softpipe_create_query;
diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h
index 05060a4575..736c033897 100644
--- a/src/gallium/drivers/softpipe/sp_query.h
+++ b/src/gallium/drivers/softpipe/sp_query.h
@@ -32,6 +32,10 @@
 #ifndef SP_QUERY_H
 #define SP_QUERY_H
 
+extern boolean
+softpipe_check_render_cond(struct softpipe_context *sp);
+
+
 struct softpipe_context;
 extern void softpipe_init_query_funcs(struct softpipe_context * );
 
-- 
cgit v1.2.3


From f8f4757d46627fb453f08dc63fde3d7f458eafe2 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 31 Dec 2009 22:18:17 +0000
Subject: scons: Aggregate all tiny libraries in a single library.

Makes integration of gallium into out of tree components much easier. No
pratical change for components in this tree,
---
 src/gallium/SConscript                          |  24 +--
 src/gallium/auxiliary/SConscript                | 185 ++++++++++++++++++++++++
 src/gallium/auxiliary/cso_cache/SConscript      |  11 --
 src/gallium/auxiliary/draw/SConscript           |  47 ------
 src/gallium/auxiliary/gallivm/SConscript        |  16 --
 src/gallium/auxiliary/indices/SConscript        |  28 ----
 src/gallium/auxiliary/pipebuffer/SConscript     |  19 ---
 src/gallium/auxiliary/rbug/SConscript           |  14 --
 src/gallium/auxiliary/rtasm/SConscript          |  13 --
 src/gallium/auxiliary/tgsi/SConscript           |  23 ---
 src/gallium/auxiliary/translate/SConscript      |  12 --
 src/gallium/auxiliary/util/SConscript           |  61 --------
 src/gallium/auxiliary/vl/SConscript             |  13 --
 src/gallium/drivers/llvmpipe/SConscript         |   2 +-
 src/gallium/state_trackers/python/SConscript    |   2 +-
 src/gallium/winsys/drm/i965/dri/SConscript      |   2 +-
 src/gallium/winsys/drm/intel/dri/SConscript     |   2 +-
 src/gallium/winsys/drm/radeon/dri/SConscript    |   2 +-
 src/gallium/winsys/drm/radeon/python/SConscript |   2 +-
 src/gallium/winsys/drm/vmware/dri/SConscript    |   2 +-
 src/gallium/winsys/drm/vmware/xorg/SConscript   |   2 +-
 src/gallium/winsys/gdi/SConscript               |   2 +-
 src/gallium/winsys/xlib/SConscript              |   2 +-
 23 files changed, 196 insertions(+), 290 deletions(-)
 create mode 100644 src/gallium/auxiliary/SConscript
 delete mode 100644 src/gallium/auxiliary/cso_cache/SConscript
 delete mode 100644 src/gallium/auxiliary/draw/SConscript
 delete mode 100644 src/gallium/auxiliary/gallivm/SConscript
 delete mode 100644 src/gallium/auxiliary/indices/SConscript
 delete mode 100644 src/gallium/auxiliary/pipebuffer/SConscript
 delete mode 100644 src/gallium/auxiliary/rbug/SConscript
 delete mode 100644 src/gallium/auxiliary/rtasm/SConscript
 delete mode 100644 src/gallium/auxiliary/tgsi/SConscript
 delete mode 100644 src/gallium/auxiliary/translate/SConscript
 delete mode 100644 src/gallium/auxiliary/util/SConscript
 delete mode 100644 src/gallium/auxiliary/vl/SConscript

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/SConscript b/src/gallium/SConscript
index 8be84cddbe..eea32b1314 100644
--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -2,29 +2,7 @@ import os
 
 Import('*')
 
-env = env.Clone()
-
-auxiliaries = []
-
-Export('auxiliaries')
-
-
-if llvm:
-	SConscript(['auxiliary/gallivm/SConscript'])
-
-SConscript([
-	# NOTE: order matters!
-	'auxiliary/util/SConscript',
-	'auxiliary/rtasm/SConscript',
-	'auxiliary/tgsi/SConscript',
-	'auxiliary/cso_cache/SConscript',
-	'auxiliary/translate/SConscript',
-	'auxiliary/draw/SConscript',
-	'auxiliary/pipebuffer/SConscript',
-	'auxiliary/indices/SConscript',
-	'auxiliary/rbug/SConscript',
-	'auxiliary/vl/SConscript',
-])
+SConscript('auxiliary/SConscript')
 
 for driver in env['drivers']:
 	SConscript(os.path.join('drivers', driver, 'SConscript'))
diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript
new file mode 100644
index 0000000000..782eb53386
--- /dev/null
+++ b/src/gallium/auxiliary/SConscript
@@ -0,0 +1,185 @@
+Import('*')
+
+from sys import executable as python_cmd
+
+env.Append(CPPPATH = [
+    'indices',
+    'util',
+])
+
+env.CodeGenerate(
+    target = 'indices/u_indices_gen.c', 
+    script = 'indices/u_indices_gen.py', 
+    source = [],
+    command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+    target = 'indices/u_unfilled_gen.c', 
+    script = 'indices/u_unfilled_gen.py', 
+    source = [],
+    command = python_cmd + ' $SCRIPT > $TARGET'
+)
+
+env.CodeGenerate(
+    target = 'util/u_format_table.c',
+    script = 'util/u_format_table.py',
+    source = ['util/u_format.csv'],
+    command = 'python $SCRIPT $SOURCE > $TARGET'
+)
+
+env.CodeGenerate(
+    target = 'util/u_format_access.c',
+    script = 'util/u_format_access.py',
+    source = ['util/u_format.csv'],
+    command = 'python $SCRIPT $SOURCE > $TARGET'
+)
+
+source = [
+    'cso_cache/cso_context.c',
+    'cso_cache/cso_cache.c',
+    'cso_cache/cso_hash.c',
+    'draw/draw_context.c',
+    'draw/draw_pipe.c',
+    'draw/draw_pipe_aaline.c',
+    'draw/draw_pipe_aapoint.c',
+    'draw/draw_pipe_clip.c',
+    'draw/draw_pipe_cull.c',
+    'draw/draw_pipe_flatshade.c',
+    'draw/draw_pipe_offset.c',
+    'draw/draw_pipe_pstipple.c',
+    'draw/draw_pipe_stipple.c',
+    'draw/draw_pipe_twoside.c',
+    'draw/draw_pipe_unfilled.c',
+    'draw/draw_pipe_util.c',
+    'draw/draw_pipe_validate.c',
+    'draw/draw_pipe_vbuf.c',
+    'draw/draw_pipe_wide_line.c',
+    'draw/draw_pipe_wide_point.c',
+    'draw/draw_pt.c',
+    'draw/draw_pt_elts.c',
+    'draw/draw_pt_emit.c',
+    'draw/draw_pt_fetch.c',
+    'draw/draw_pt_fetch_emit.c',
+    'draw/draw_pt_fetch_shade_emit.c',
+    'draw/draw_pt_fetch_shade_pipeline.c',
+    'draw/draw_pt_post_vs.c',
+    'draw/draw_pt_util.c',
+    'draw/draw_pt_varray.c',
+    'draw/draw_pt_vcache.c',
+    'draw/draw_vertex.c',
+    'draw/draw_vs.c',
+    'draw/draw_vs_aos.c',
+    'draw/draw_vs_aos_io.c',
+    'draw/draw_vs_aos_machine.c',
+    'draw/draw_vs_exec.c',
+    'draw/draw_vs_llvm.c',
+    'draw/draw_vs_ppc.c',
+    'draw/draw_vs_sse.c',
+    'draw/draw_vs_varient.c',
+    'draw/draw_gs.c',
+    #'indices/u_indices.c',
+    #'indices/u_unfilled_indices.c',
+    'indices/u_indices_gen.c',
+    'indices/u_unfilled_gen.c',
+    'pipebuffer/pb_buffer_fenced.c',
+    'pipebuffer/pb_buffer_malloc.c',
+    'pipebuffer/pb_bufmgr_alt.c',
+    'pipebuffer/pb_bufmgr_cache.c',
+    'pipebuffer/pb_bufmgr_debug.c',
+    'pipebuffer/pb_bufmgr_fenced.c',
+    'pipebuffer/pb_bufmgr_mm.c',
+    'pipebuffer/pb_bufmgr_ondemand.c',
+    'pipebuffer/pb_bufmgr_pool.c',
+    'pipebuffer/pb_bufmgr_slab.c',
+    'pipebuffer/pb_validate.c',
+    'rbug/rbug_core.c',
+    'rbug/rbug_shader.c',
+    'rbug/rbug_context.c',
+    'rbug/rbug_texture.c',
+    'rbug/rbug_demarshal.c',
+    'rbug/rbug_connection.c',
+    'rtasm/rtasm_cpu.c',
+    'rtasm/rtasm_execmem.c',
+    'rtasm/rtasm_x86sse.c',
+    'rtasm/rtasm_ppc.c',
+    'rtasm/rtasm_ppc_spe.c',
+    'tgsi/tgsi_build.c',
+    'tgsi/tgsi_dump.c',
+    'tgsi/tgsi_dump_c.c',
+    'tgsi/tgsi_exec.c',
+    'tgsi/tgsi_info.c',
+    'tgsi/tgsi_iterate.c',
+    'tgsi/tgsi_parse.c',
+    'tgsi/tgsi_sanity.c',
+    'tgsi/tgsi_scan.c',
+    'tgsi/tgsi_ppc.c',
+    'tgsi/tgsi_sse2.c',
+    'tgsi/tgsi_text.c',
+    'tgsi/tgsi_transform.c',
+    'tgsi/tgsi_ureg.c',
+    'tgsi/tgsi_util.c',
+    'translate/translate_generic.c',
+    'translate/translate_sse.c',
+    'translate/translate.c',
+    'translate/translate_cache.c',
+    'util/u_bitmask.c',
+    'util/u_blit.c',
+    'util/u_blitter.c',
+    'util/u_cache.c',
+    'util/u_cpu_detect.c',
+    'util/u_debug.c',
+    'util/u_debug_dump.c',
+    'util/u_debug_memory.c',
+    'util/u_debug_stack.c',
+    'util/u_debug_symbol.c',
+    'util/u_dl.c',
+    'util/u_draw_quad.c',
+    'util/u_format.c',
+    'util/u_format_access.c',
+    'util/u_format_table.c',
+    'util/u_gen_mipmap.c',
+    'util/u_handle_table.c',
+    'util/u_hash.c',
+    'util/u_hash_table.c',
+    'util/u_keymap.c',
+    'util/u_network.c',
+    'util/u_math.c',
+    'util/u_mm.c',
+    'util/u_rect.c',
+    'util/u_simple_shaders.c',
+    'util/u_snprintf.c',
+    'util/u_stream_stdc.c',
+    'util/u_stream_wd.c',
+    'util/u_surface.c',
+    'util/u_texture.c',
+    'util/u_tile.c',
+    'util/u_time.c',
+    'util/u_timed_winsys.c',
+    'util/u_upload_mgr.c',
+    'util/u_simple_screen.c',
+    'vl/vl_bitstream_parser.c',
+    'vl/vl_mpeg12_mc_renderer.c',
+    'vl/vl_compositor.c',
+    'vl/vl_csc.c',
+    'vl/vl_shader_build.c',
+]
+
+if env['llvm']:
+    source += [
+        'gallivm/gallivm.cpp',
+        'gallivm/gallivm_cpu.cpp',
+        'gallivm/instructions.cpp',
+        'gallivm/loweringpass.cpp',
+        'gallivm/tgsitollvm.cpp',
+        'gallivm/storage.cpp',
+        'gallivm/storagesoa.cpp',
+        'gallivm/instructionssoa.cpp',
+    ]
+
+gallium = env.ConvenienceLibrary(
+    target = 'gallium',
+    source = source,
+)
+
+Export('gallium')
diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript
deleted file mode 100644
index 651e68a191..0000000000
--- a/src/gallium/auxiliary/cso_cache/SConscript
+++ /dev/null
@@ -1,11 +0,0 @@
-Import('*')
-
-cso_cache = env.ConvenienceLibrary(
-	target = 'cso_cache',
-	source = [
-		'cso_context.c',
-		'cso_cache.c',
-		'cso_hash.c',
-	])
-
-auxiliaries.insert(0, cso_cache)
diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript
deleted file mode 100644
index a022c145e9..0000000000
--- a/src/gallium/auxiliary/draw/SConscript
+++ /dev/null
@@ -1,47 +0,0 @@
-Import('*')
-
-draw = env.ConvenienceLibrary(
-	target = 'draw',
-	source = [
-		'draw_context.c',
-		'draw_pipe.c',
-		'draw_pipe_aaline.c',
-		'draw_pipe_aapoint.c',
-		'draw_pipe_clip.c',
-		'draw_pipe_cull.c',
-		'draw_pipe_flatshade.c',
-		'draw_pipe_offset.c',
-		'draw_pipe_pstipple.c',
-		'draw_pipe_stipple.c',
-		'draw_pipe_twoside.c',
-		'draw_pipe_unfilled.c',
-		'draw_pipe_util.c',
-		'draw_pipe_validate.c',
-		'draw_pipe_vbuf.c',
-		'draw_pipe_wide_line.c',
-		'draw_pipe_wide_point.c',
-		'draw_pt.c',
-		'draw_pt_elts.c',
-		'draw_pt_emit.c',
-		'draw_pt_fetch.c',
-		'draw_pt_fetch_emit.c',
-		'draw_pt_fetch_shade_emit.c',
-		'draw_pt_fetch_shade_pipeline.c',
-		'draw_pt_post_vs.c',
-		'draw_pt_util.c',
-		'draw_pt_varray.c',
-		'draw_pt_vcache.c',
-		'draw_vertex.c',
-		'draw_vs.c',
-		'draw_vs_aos.c',
-		'draw_vs_aos_io.c',
-		'draw_vs_aos_machine.c',
-		'draw_vs_exec.c',
-		'draw_vs_llvm.c',
-		'draw_vs_ppc.c',
-		'draw_vs_sse.c',
-		'draw_vs_varient.c',
-                'draw_gs.c'
-	])
-
-auxiliaries.insert(0, draw)
diff --git a/src/gallium/auxiliary/gallivm/SConscript b/src/gallium/auxiliary/gallivm/SConscript
deleted file mode 100644
index c0aa51b90a..0000000000
--- a/src/gallium/auxiliary/gallivm/SConscript
+++ /dev/null
@@ -1,16 +0,0 @@
-Import('*')
-
-gallivm = env.ConvenienceLibrary(
-	target = 'gallivm',
-	source = [
-        'gallivm.cpp',
-        'gallivm_cpu.cpp',
-        'instructions.cpp',
-        'loweringpass.cpp',
-        'tgsitollvm.cpp',
-        'storage.cpp',
-        'storagesoa.cpp',
-        'instructionssoa.cpp',
-	])
-
-auxiliaries.insert(0, gallivm)
diff --git a/src/gallium/auxiliary/indices/SConscript b/src/gallium/auxiliary/indices/SConscript
deleted file mode 100644
index 712e215534..0000000000
--- a/src/gallium/auxiliary/indices/SConscript
+++ /dev/null
@@ -1,28 +0,0 @@
-Import('*')
-
-from sys import executable as python_cmd
-
-env.CodeGenerate(
-	target = 'u_indices_gen.c', 
-	script = 'u_indices_gen.py', 
-	source = [],
-	command = python_cmd + ' $SCRIPT > $TARGET'
-)
-
-env.CodeGenerate(
-	target = 'u_unfilled_gen.c', 
-	script = 'u_unfilled_gen.py', 
-	source = [],
-	command = python_cmd + ' $SCRIPT > $TARGET'
-)
-
-indices = env.ConvenienceLibrary(
-	target = 'indices',
-	source = [
-#               'u_indices.c',
-#               'u_unfilled_indices.c',
-               'u_indices_gen.c',
-               'u_unfilled_gen.c',
-	])
-
-auxiliaries.insert(0, indices)
diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript
deleted file mode 100644
index 8e9f06abe4..0000000000
--- a/src/gallium/auxiliary/pipebuffer/SConscript
+++ /dev/null
@@ -1,19 +0,0 @@
-Import('*')
-
-pipebuffer = env.ConvenienceLibrary(
-	target = 'pipebuffer',
-	source = [
-		'pb_buffer_fenced.c',
-		'pb_buffer_malloc.c',
-		'pb_bufmgr_alt.c',
-		'pb_bufmgr_cache.c',
-		'pb_bufmgr_debug.c',
-		'pb_bufmgr_fenced.c',
-		'pb_bufmgr_mm.c',
-		'pb_bufmgr_ondemand.c',
-		'pb_bufmgr_pool.c',
-		'pb_bufmgr_slab.c',
-		'pb_validate.c',
-	])
-
-auxiliaries.insert(0, pipebuffer)
diff --git a/src/gallium/auxiliary/rbug/SConscript b/src/gallium/auxiliary/rbug/SConscript
deleted file mode 100644
index 4a9afb45d3..0000000000
--- a/src/gallium/auxiliary/rbug/SConscript
+++ /dev/null
@@ -1,14 +0,0 @@
-Import('*')
-
-rbug = env.ConvenienceLibrary(
-	target = 'rbug',
-	source = [
-		'rbug_core.c',
-		'rbug_shader.c',
-		'rbug_context.c',
-		'rbug_texture.c',
-		'rbug_demarshal.c',
-		'rbug_connection.c',
-	])
-
-auxiliaries.insert(0, rbug)
diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript
deleted file mode 100644
index eb48368acc..0000000000
--- a/src/gallium/auxiliary/rtasm/SConscript
+++ /dev/null
@@ -1,13 +0,0 @@
-Import('*')
-
-rtasm = env.ConvenienceLibrary(
-	target = 'rtasm',
-	source = [
-		'rtasm_cpu.c',
-		'rtasm_execmem.c',
-		'rtasm_x86sse.c',
-		'rtasm_ppc.c',
-		'rtasm_ppc_spe.c',
-	])
-
-auxiliaries.insert(0, rtasm)
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
deleted file mode 100644
index b6bc2924f0..0000000000
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ /dev/null
@@ -1,23 +0,0 @@
-Import('*')
-
-tgsi = env.ConvenienceLibrary(
-	target = 'tgsi',
-	source = [
-		'tgsi_build.c',
-		'tgsi_dump.c',
-		'tgsi_dump_c.c',
-		'tgsi_exec.c',
-		'tgsi_info.c',
-		'tgsi_iterate.c',
-		'tgsi_parse.c',
-		'tgsi_sanity.c',
-		'tgsi_scan.c',
-		'tgsi_ppc.c',
-		'tgsi_sse2.c',
-		'tgsi_text.c',
-		'tgsi_transform.c',
-		'tgsi_ureg.c',
-		'tgsi_util.c',
-	])
-
-auxiliaries.insert(0, tgsi)
diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript
deleted file mode 100644
index 9553a67537..0000000000
--- a/src/gallium/auxiliary/translate/SConscript
+++ /dev/null
@@ -1,12 +0,0 @@
-Import('*')
-
-translate = env.ConvenienceLibrary(
-	target = 'translate',
-	source = [
-		'translate_generic.c',
-		'translate_sse.c',
-		'translate.c',
-		'translate_cache.c',
-	])
-
-auxiliaries.insert(0, translate)
diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript
deleted file mode 100644
index 2a546d19dc..0000000000
--- a/src/gallium/auxiliary/util/SConscript
+++ /dev/null
@@ -1,61 +0,0 @@
-Import('*')
-
-env.Clone()
-
-env.Append(CPPPATH = ['.'])
-
-env.CodeGenerate(
-	target = 'u_format_table.c',
-	script = 'u_format_table.py',
-	source = ['u_format.csv'],
-	command = 'python $SCRIPT $SOURCE > $TARGET'
-)
-
-env.CodeGenerate(
-	target = 'u_format_access.c',
-	script = 'u_format_access.py',
-	source = ['u_format.csv'],
-	command = 'python $SCRIPT $SOURCE > $TARGET'
-)
-
-util = env.ConvenienceLibrary(
-	target = 'util',
-	source = [
-		'u_bitmask.c',
-		'u_blit.c',
-		'u_blitter.c',
-		'u_cache.c',
-		'u_cpu_detect.c',
-		'u_debug.c',
-		'u_debug_dump.c',
-		'u_debug_memory.c',
-		'u_debug_stack.c',
-		'u_debug_symbol.c',
-		'u_dl.c',
-		'u_draw_quad.c',
-		'u_format.c',
-		'u_format_access.c',
-		'u_format_table.c',
-		'u_gen_mipmap.c',
-		'u_handle_table.c',
-		'u_hash.c',
-		'u_hash_table.c',
-		'u_keymap.c',
-		'u_network.c',
-		'u_math.c',
-		'u_mm.c',
-		'u_rect.c',
-		'u_simple_shaders.c',
-		'u_snprintf.c',
-		'u_stream_stdc.c',
-		'u_stream_wd.c',
-		'u_surface.c',
-		'u_texture.c',
-		'u_tile.c',
-		'u_time.c',
-		'u_timed_winsys.c',
-		'u_upload_mgr.c',
-		'u_simple_screen.c',
-	])
-
-auxiliaries.insert(0, util)
diff --git a/src/gallium/auxiliary/vl/SConscript b/src/gallium/auxiliary/vl/SConscript
deleted file mode 100644
index aed69f5efe..0000000000
--- a/src/gallium/auxiliary/vl/SConscript
+++ /dev/null
@@ -1,13 +0,0 @@
-Import('*')
-
-vl = env.ConvenienceLibrary(
-	target = 'vl',
-	source = [
-		'vl_bitstream_parser.c',
-		'vl_mpeg12_mc_renderer.c',
-		'vl_compositor.c',
-                'vl_csc.c',
-		'vl_shader_build.c',
-	])
-
-auxiliaries.insert(0, vl)
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index de6156795d..78331b44fc 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -74,7 +74,7 @@ llvmpipe = env.ConvenienceLibrary(
 
 env = env.Clone()
 
-env.Prepend(LIBS = [llvmpipe] + auxiliaries)
+env.Prepend(LIBS = [llvmpipe] + gallium)
 
 tests = [
     'format',
diff --git a/src/gallium/state_trackers/python/SConscript b/src/gallium/state_trackers/python/SConscript
index ec385e7c44..4fcaf34d57 100644
--- a/src/gallium/state_trackers/python/SConscript
+++ b/src/gallium/state_trackers/python/SConscript
@@ -43,5 +43,5 @@ if 'python' in env['statetrackers']:
         source = [
             'st_hardpipe_winsys.c',
         ],
-        LIBS = [pyst, softpipe, trace] + auxiliaries + env['LIBS'],
+        LIBS = [pyst, softpipe, trace] + gallium + env['LIBS'],
     )
diff --git a/src/gallium/winsys/drm/i965/dri/SConscript b/src/gallium/winsys/drm/i965/dri/SConscript
index 233ef464be..a99533fd24 100644
--- a/src/gallium/winsys/drm/i965/dri/SConscript
+++ b/src/gallium/winsys/drm/i965/dri/SConscript
@@ -14,6 +14,6 @@ drivers = [
 env.LoadableModule(
     target ='i965_dri.so',
     source = COMMON_GALLIUM_SOURCES,
-    LIBS = drivers + mesa + auxiliaries + env['LIBS'],
+    LIBS = drivers + mesa + gallium + env['LIBS'],
     SHLIBPREFIX = '',
 )
diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript
index b1b654d9f8..104e987083 100644
--- a/src/gallium/winsys/drm/intel/dri/SConscript
+++ b/src/gallium/winsys/drm/intel/dri/SConscript
@@ -15,6 +15,6 @@ drivers = [
 env.LoadableModule(
     target ='i915_dri.so',
     source = COMMON_GALLIUM_SOURCES,
-    LIBS = drivers + mesa + auxiliaries + env['LIBS'],
+    LIBS = drivers + mesa + gallium + env['LIBS'],
     SHLIBPREFIX = '',
 )
diff --git a/src/gallium/winsys/drm/radeon/dri/SConscript b/src/gallium/winsys/drm/radeon/dri/SConscript
index aea987a3ac..c4989d1b59 100644
--- a/src/gallium/winsys/drm/radeon/dri/SConscript
+++ b/src/gallium/winsys/drm/radeon/dri/SConscript
@@ -13,5 +13,5 @@ drivers = [
 env.SharedLibrary(
     target ='radeon_dri.so',
     source = COMMON_GALLIUM_SOURCES,
-    LIBS = st_dri + radeonwinsys + mesa + drivers + auxiliaries + env['LIBS'],
+    LIBS = st_dri + radeonwinsys + mesa + drivers + gallium + env['LIBS'],
 )
diff --git a/src/gallium/winsys/drm/radeon/python/SConscript b/src/gallium/winsys/drm/radeon/python/SConscript
index 3200fd8d1b..91cae98697 100644
--- a/src/gallium/winsys/drm/radeon/python/SConscript
+++ b/src/gallium/winsys/drm/radeon/python/SConscript
@@ -29,5 +29,5 @@ if env['platform'] == 'linux':
     env.SharedLibrary(
         target ='_gallium',
         source = sources,
-        LIBS = [pyst] + drivers + auxiliaries + env['LIBS'],
+        LIBS = [pyst] + drivers + gallium + env['LIBS'],
     )
diff --git a/src/gallium/winsys/drm/vmware/dri/SConscript b/src/gallium/winsys/drm/vmware/dri/SConscript
index 1019f577a5..84319f91ff 100644
--- a/src/gallium/winsys/drm/vmware/dri/SConscript
+++ b/src/gallium/winsys/drm/vmware/dri/SConscript
@@ -48,7 +48,7 @@ if env['platform'] == 'linux':
             svgadrm,
             svga,
             mesa,
-            auxiliaries,
+            gallium,
             ])
       
       # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
diff --git a/src/gallium/winsys/drm/vmware/xorg/SConscript b/src/gallium/winsys/drm/vmware/xorg/SConscript
index b8968e7137..f7ce400a7a 100644
--- a/src/gallium/winsys/drm/vmware/xorg/SConscript
+++ b/src/gallium/winsys/drm/vmware/xorg/SConscript
@@ -38,7 +38,7 @@ if env['platform'] == 'linux':
 		st_xorg,
 		svgadrm,
 		svga,
-                auxiliaries,
+                gallium,
 	])
 
 	sources = [
diff --git a/src/gallium/winsys/gdi/SConscript b/src/gallium/winsys/gdi/SConscript
index 74f6b2fd47..bded865785 100644
--- a/src/gallium/winsys/gdi/SConscript
+++ b/src/gallium/winsys/gdi/SConscript
@@ -45,5 +45,5 @@ if env['platform'] == 'windows':
     env.SharedLibrary(
         target ='opengl32',
         source = sources,
-        LIBS = wgl + glapi + mesa + drivers + auxiliaries + glsl + env['LIBS'],
+        LIBS = wgl + glapi + mesa + drivers + gallium + glsl + env['LIBS'],
     )
diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript
index ccec2566b1..713841aeb1 100644
--- a/src/gallium/winsys/xlib/SConscript
+++ b/src/gallium/winsys/xlib/SConscript
@@ -46,7 +46,7 @@ if env['platform'] == 'linux' \
     libgl = env.SharedLibrary(
         target ='GL',
         source = sources,
-        LIBS = st_xlib + glapi + mesa + glsl + drivers + auxiliaries + env['LIBS'],
+        LIBS = st_xlib + glapi + mesa + glsl + drivers + gallium + env['LIBS'],
     )
 
     env.InstallSharedLibrary(libgl, version=(1, 5))
-- 
cgit v1.2.3


From cf9be46c01151c46954316a4cbc693dde06a77af Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 1 Jan 2010 00:20:52 +0000
Subject: llvmpipe: Add missing include path.

The abscense was being masked previously.
---
 src/gallium/drivers/llvmpipe/SConscript | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 78331b44fc..3ca676647c 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -9,6 +9,8 @@ if not env.has_key('LLVM_VERSION'):
 
 env.Tool('udis86')
 
+env.Append(CPPPATH = ['.'])
+
 env.CodeGenerate(
 	target = 'lp_tile_soa.c',
 	script = 'lp_tile_soa.py',
-- 
cgit v1.2.3


From 486ccb55a89f229ffbd181f79f9372cf0b4cbdb6 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 1 Jan 2010 00:35:43 +0000
Subject: r300: Fix scons build.

---
 .gitignore                             | 1 +
 src/gallium/drivers/r300/SConscript    | 7 ++++++-
 src/gallium/drivers/r300/r300_render.c | 7 ++++---
 3 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/.gitignore b/.gitignore
index 1c3d44665e..f43ff3766c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 *.ilk
 *.o
 *.obj
+*.os
 *.pc
 *.pdb
 *.pyc
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
index 0d2de17be9..183aa17f9b 100644
--- a/src/gallium/drivers/r300/SConscript
+++ b/src/gallium/drivers/r300/SConscript
@@ -4,7 +4,12 @@ r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript')
 
 env = env.Clone()
 # add the paths for r300compiler
-env.Append(CPPPATH = ['#/src/mesa/drivers/dri/r300/compiler', '#/include', '#/src/mesa'])
+env.Append(CPPPATH = [
+    '#/src/mesa/drivers/dri/r300/compiler', 
+    '#/src/gallium/winsys/drm/radeon/core',
+    '#/include', 
+    '#/src/mesa',
+])
 
 r300 = env.ConvenienceLibrary(
     target = 'r300',
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 82089f9161..a89cb633e0 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -362,6 +362,7 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
 {
     struct r300_context* r300 = r300_context(pipe);
     int i;
+    void* indices;
 
     if (!u_trim_pipe_prim(mode, &count)) {
         return FALSE;
@@ -378,8 +379,8 @@ boolean r300_swtcl_draw_range_elements(struct pipe_context* pipe,
         draw_set_mapped_vertex_buffer(r300->draw, i, buf);
     }
 
-    void* indices = pipe_buffer_map(pipe->screen, indexBuffer,
-                                    PIPE_BUFFER_USAGE_CPU_READ);
+    indices = pipe_buffer_map(pipe->screen, indexBuffer,
+                              PIPE_BUFFER_USAGE_CPU_READ);
     draw_set_mapped_element_buffer_range(r300->draw, indexSize,
                                          minIndex, maxIndex, indices);
 
@@ -476,7 +477,7 @@ static void* r300_render_map_vertices(struct vbuf_render* render)
     r300render->vbo_ptr = pipe_buffer_map(screen, r300render->vbo,
                                           PIPE_BUFFER_USAGE_CPU_WRITE);
 
-    return (r300render->vbo_ptr + r300render->vbo_offset);
+    return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset);
 }
 
 static void r300_render_unmap_vertices(struct vbuf_render* render,
-- 
cgit v1.2.3


From fd237a879fdc70cb070aa8be0f01a7b31d8207c6 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Fri, 1 Jan 2010 15:38:19 -0800
Subject: llvmpipe: Use C-style comment.

---
 src/gallium/drivers/llvmpipe/lp_test_conv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_test_conv.c b/src/gallium/drivers/llvmpipe/lp_test_conv.c
index 968c7a2d4a..faddfb9677 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_conv.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_conv.c
@@ -330,7 +330,7 @@ test_one(unsigned verbose,
          fprintf(stderr, "conv.bc written\n");
          fprintf(stderr, "Invoke as \"llc -o - conv.bc\"\n");
          firsttime = FALSE;
-         //abort();
+         /* abort(); */
       }
    }
 
-- 
cgit v1.2.3


From 6f19ec64fc8878e39809e77a0022a5c819f41e4b Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 1 Jan 2010 21:58:21 +0000
Subject: i965: Add missing include.

---
 src/gallium/drivers/i965/brw_disasm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_disasm.h b/src/gallium/drivers/i965/brw_disasm.h
index 77d402d35e..ba5b109c48 100644
--- a/src/gallium/drivers/i965/brw_disasm.h
+++ b/src/gallium/drivers/i965/brw_disasm.h
@@ -23,6 +23,8 @@
 #ifndef BRW_DISASM_H
 #define BRW_DISASM_H
 
+#include <stdio.h>
+
 struct brw_instruction;
 
 int brw_disasm_insn (FILE *file, const struct brw_instruction *inst);
-- 
cgit v1.2.3


From 3ae37da6116d7a703b2752fd7978caad79ecfcf2 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Fri, 1 Jan 2010 16:03:53 -0800
Subject: llvmpipe: Silence uninitialized variable warning.

---
 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index a1365a045f..699394c0de 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -1085,7 +1085,7 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
    const struct pipe_sampler_state *sampler = samp->sampler;
    unsigned level0, level1, j, imgFilter;
    int width, height;
-   float levelBlend;
+   float levelBlend = 0.0F;
 
    choose_mipmap_levels(tgsi_sampler, s, t, p, 
                         lodbias,
-- 
cgit v1.2.3


From da6a80301b2cfdc6902198e3fef5ec78e86e39cf Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sat, 2 Jan 2010 21:46:42 +0000
Subject: svga: Rename error labels.

Make it easier to insert/remove error branches.
---
 src/gallium/drivers/svga/svga_context.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index c3de12b4a3..3e7663d6e1 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -130,7 +130,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
 
    svga = CALLOC_STRUCT(svga_context);
    if (svga == NULL)
-      goto error1;
+      goto no_svga;
 
    svga->pipe.winsys = screen->winsys;
    svga->pipe.screen = screen;
@@ -142,7 +142,7 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
 
    svga->swc = svgascreen->sws->context_create(svgascreen->sws);
    if(!svga->swc)
-      goto error2;
+      goto no_swc;
 
    svga_init_blend_functions(svga);
    svga_init_blit_functions(svga);
@@ -165,32 +165,32 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
    svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0);
 
    if (!svga_init_swtnl(svga))
-      goto error3;
+      goto no_swtnl;
 
    svga->upload_ib = u_upload_create( svga->pipe.screen,
                                       32 * 1024,
                                       16,
                                       PIPE_BUFFER_USAGE_INDEX );
    if (svga->upload_ib == NULL)
-      goto error4;
+      goto no_upload_ib;
 
    svga->upload_vb = u_upload_create( svga->pipe.screen,
                                       128 * 1024,
                                       16,
                                       PIPE_BUFFER_USAGE_VERTEX );
    if (svga->upload_vb == NULL)
-      goto error5;
+      goto no_upload_vb;
 
    svga->hwtnl = svga_hwtnl_create( svga,
                                     svga->upload_ib,
                                     svga->swc );
    if (svga->hwtnl == NULL)
-      goto error6;
+      goto no_hwtnl;
 
 
    ret = svga_emit_initial_state( svga );
    if (ret)
-      goto error7;
+      goto no_state;
    
    /* Avoid shortcircuiting state with initial value of zero.
     */
@@ -209,19 +209,19 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
 
    return &svga->pipe;
 
-error7:
+no_state:
    svga_hwtnl_destroy( svga->hwtnl );
-error6:
+no_hwtnl:
    u_upload_destroy( svga->upload_vb );
-error5:
+no_upload_vb:
    u_upload_destroy( svga->upload_ib );
-error4:
+no_upload_ib:
    svga_destroy_swtnl(svga);
-error3:
+no_swtnl:
    svga->swc->destroy(svga->swc);
-error2:
+no_swc:
    FREE(svga);
-error1:
+no_svga:
    return NULL;
 }
 
-- 
cgit v1.2.3


From cdb445f3a9285e2d8f042a07021ade78b94e0156 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Sun, 3 Jan 2010 00:47:30 +0000
Subject: svga: Use a shader id as low as possible.

---
 src/gallium/drivers/svga/svga_context.c  | 16 ++++++++++++
 src/gallium/drivers/svga/svga_context.h  |  7 +++--
 src/gallium/drivers/svga/svga_pipe_fs.c  |  3 +++
 src/gallium/drivers/svga/svga_pipe_vs.c  |  3 +++
 src/gallium/drivers/svga/svga_state_fs.c | 44 ++++++++++++++++++++++----------
 src/gallium/drivers/svga/svga_state_vs.c | 27 +++++++++++++-------
 src/gallium/drivers/svga/svga_tgsi.c     |  2 ++
 7 files changed, 78 insertions(+), 24 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
index 3e7663d6e1..af99c9de37 100644
--- a/src/gallium/drivers/svga/svga_context.c
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -29,6 +29,7 @@
 #include "pipe/p_inlines.h"
 #include "pipe/p_screen.h"
 #include "util/u_memory.h"
+#include "util/u_bitmask.h"
 #include "util/u_upload_mgr.h"
 
 #include "svga_context.h"
@@ -61,6 +62,9 @@ static void svga_destroy( struct pipe_context *pipe )
    u_upload_destroy( svga->upload_vb );
    u_upload_destroy( svga->upload_ib );
 
+   util_bitmask_destroy( svga->vs_bm );
+   util_bitmask_destroy( svga->fs_bm );
+
    for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader)
       pipe_buffer_reference( &svga->curr.cb[shader], NULL );
 
@@ -167,6 +171,14 @@ struct pipe_context *svga_context_create( struct pipe_screen *screen )
    if (!svga_init_swtnl(svga))
       goto no_swtnl;
 
+   svga->fs_bm = util_bitmask_create();
+   if (svga->fs_bm == NULL)
+      goto no_fs_bm;
+
+   svga->vs_bm = util_bitmask_create();
+   if (svga->vs_bm == NULL)
+      goto no_vs_bm;
+
    svga->upload_ib = u_upload_create( svga->pipe.screen,
                                       32 * 1024,
                                       16,
@@ -216,6 +228,10 @@ no_hwtnl:
 no_upload_vb:
    u_upload_destroy( svga->upload_ib );
 no_upload_ib:
+   util_bitmask_destroy( svga->vs_bm );
+no_vs_bm:
+   util_bitmask_destroy( svga->fs_bm );
+no_fs_bm:
    svga_destroy_swtnl(svga);
 no_swtnl:
    svga->swc->destroy(svga->swc);
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index e650a251d1..eb666c2616 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -41,6 +41,7 @@
 struct draw_vertex_shader;
 struct svga_shader_result;
 struct SVGACmdMemory;
+struct util_bitmask;
 struct u_upload_mgr;
 
 
@@ -321,12 +322,14 @@ struct svga_context
       boolean new_vdecl;
    } swtnl;
 
+   /* Bitmask of used shader IDs */
+   struct util_bitmask *fs_bm;
+   struct util_bitmask *vs_bm;
+
    struct {
       unsigned dirty[4];
 
       unsigned texture_timestamp;
-      unsigned next_fs_id;
-      unsigned next_vs_id;
 
       /* Internally generated shaders:
        */
diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c
index e3be840d92..a461a86dd3 100644
--- a/src/gallium/drivers/svga/svga_pipe_fs.c
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -26,6 +26,7 @@
 #include "pipe/p_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_bitmask.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_text.h"
 
@@ -107,6 +108,8 @@ void svga_delete_fs_state(struct pipe_context *pipe, void *shader)
          assert(ret == PIPE_OK);
       }
 
+      util_bitmask_clear( svga->fs_bm, result->id );
+
       svga_destroy_shader_result( result );
    }
 
diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c
index e5ffe668c3..02709e12bd 100644
--- a/src/gallium/drivers/svga/svga_pipe_vs.c
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -27,6 +27,7 @@
 #include "pipe/p_inlines.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_bitmask.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_text.h"
 
@@ -172,6 +173,8 @@ static void svga_delete_vs_state(struct pipe_context *pipe, void *shader)
          assert(ret == PIPE_OK);
       }
 
+      util_bitmask_clear( svga->vs_bm, result->id );
+
       svga_destroy_shader_result( result );
    }
 
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index 6ec38ed3e4..1902b0106b 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -26,6 +26,7 @@
 #include "pipe/p_inlines.h"
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
+#include "util/u_bitmask.h"
 
 #include "svga_context.h"
 #include "svga_state.h"
@@ -74,9 +75,12 @@ static enum pipe_error compile_fs( struct svga_context *svga,
       goto fail;
    }
 
+   result->id = util_bitmask_add(svga->fs_bm);
+   if(result->id == UTIL_BITMASK_INVALID_INDEX)
+      goto fail;
 
    ret = SVGA3D_DefineShader(svga->swc, 
-                             svga->state.next_fs_id,
+                             result->id,
                              SVGA3D_SHADERTYPE_PS,
                              result->tokens, 
                              result->nr_tokens * sizeof result->tokens[0]);
@@ -84,14 +88,16 @@ static enum pipe_error compile_fs( struct svga_context *svga,
       goto fail;
 
    *out_result = result;
-   result->id = svga->state.next_fs_id++;
    result->next = fs->base.results;
    fs->base.results = result;
    return PIPE_OK;
 
 fail:
-   if (result)
+   if (result) {
+      if (result->id != UTIL_BITMASK_INVALID_INDEX)
+         util_bitmask_clear( svga->fs_bm, result->id );
       svga_destroy_shader_result( result );
+   }
    return ret;
 }
 
@@ -116,7 +122,7 @@ fail:
  */
 static int emit_white_fs( struct svga_context *svga )
 {
-   int ret;
+   int ret = PIPE_ERROR;
 
    /* ps_3_0
     * def c0, 1.000000, 0.000000, 0.000000, 1.000000
@@ -137,16 +143,26 @@ static int emit_white_fs( struct svga_context *svga )
       0x0000ffff,
    };
 
+   assert(SVGA3D_INVALID_ID == UTIL_BITMASK_INVALID_INDEX);
+   svga->state.white_fs_id = util_bitmask_add(svga->fs_bm);
+   if(svga->state.white_fs_id == SVGA3D_INVALID_ID)
+      goto no_fs_id;
+
    ret = SVGA3D_DefineShader(svga->swc, 
-                             svga->state.next_fs_id,
+                             svga->state.white_fs_id,
                              SVGA3D_SHADERTYPE_PS,
                              white_tokens, 
                              sizeof(white_tokens));
    if (ret)
-      return ret;
+      goto no_definition;
 
-   svga->state.white_fs_id = svga->state.next_fs_id++;
    return 0;
+
+no_definition:
+   util_bitmask_clear(svga->fs_bm, svga->state.white_fs_id);
+   svga->state.white_fs_id = SVGA3D_INVALID_ID;
+no_fs_id:
+   return ret;
 }
 
 
@@ -251,12 +267,14 @@ static int emit_hw_fs( struct svga_context *svga,
 
    assert(id != SVGA3D_INVALID_ID);
 
-   if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) {
-      ret = SVGA3D_SetShader(svga->swc, 
-                             SVGA3D_SHADERTYPE_PS, 
-                             id );
-      if (ret)
-         return ret;
+   if (result != svga->state.hw_draw.fs) {
+      if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) {
+         ret = SVGA3D_SetShader(svga->swc,
+                                SVGA3D_SHADERTYPE_PS,
+                                id );
+         if (ret)
+            return ret;
+      }
 
       svga->dirty |= SVGA_NEW_FS_RESULT;
       svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id;
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index a947745732..6cf51434d7 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -26,6 +26,7 @@
 #include "pipe/p_inlines.h"
 #include "pipe/p_defines.h"
 #include "util/u_math.h"
+#include "util/u_bitmask.h"
 #include "translate/translate.h"
 
 #include "svga_context.h"
@@ -77,8 +78,12 @@ static enum pipe_error compile_vs( struct svga_context *svga,
       goto fail;
    }
 
+   result->id = util_bitmask_add(svga->vs_bm);
+   if(result->id == UTIL_BITMASK_INVALID_INDEX)
+      goto fail;
+
    ret = SVGA3D_DefineShader(svga->swc, 
-                             svga->state.next_vs_id,
+                             result->id,
                              SVGA3D_SHADERTYPE_VS,
                              result->tokens, 
                              result->nr_tokens * sizeof result->tokens[0]);
@@ -86,14 +91,16 @@ static enum pipe_error compile_vs( struct svga_context *svga,
       goto fail;
 
    *out_result = result;
-   result->id = svga->state.next_vs_id++;
    result->next = vs->base.results;
    vs->base.results = result;
    return PIPE_OK;
 
 fail:
-   if (result)
+   if (result) {
+      if (result->id != UTIL_BITMASK_INVALID_INDEX)
+         util_bitmask_clear( svga->vs_bm, result->id );
       svga_destroy_shader_result( result );
+   }
    return ret;
 }
 
@@ -141,12 +148,14 @@ static int emit_hw_vs( struct svga_context *svga,
       id = result->id;
    }
 
-   if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) {
-      ret = SVGA3D_SetShader(svga->swc, 
-                             SVGA3D_SHADERTYPE_VS, 
-                             id );
-      if (ret)
-         return ret;
+   if (result != svga->state.hw_draw.vs) {
+      if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) {
+         ret = SVGA3D_SetShader(svga->swc,
+                                SVGA3D_SHADERTYPE_VS,
+                                id );
+         if (ret)
+            return ret;
+      }
 
       svga->dirty |= SVGA_NEW_VS_RESULT;
       svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id;
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
index b8ef137c01..0cd620189b 100644
--- a/src/gallium/drivers/svga/svga_tgsi.c
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -31,6 +31,7 @@
 #include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_scan.h"
 #include "util/u_memory.h"
+#include "util/u_bitmask.h"
 
 #include "svgadump/svga_shader_dump.h"
 
@@ -221,6 +222,7 @@ svga_tgsi_translate( const struct svga_shader *shader,
    result->tokens = (const unsigned *)emit.buf;
    result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned);
    memcpy(&result->key, &key, sizeof key);
+   result->id = UTIL_BITMASK_INVALID_INDEX;
 
    if (SVGA_DEBUG & DEBUG_TGSI) 
    {
-- 
cgit v1.2.3


From be6fb5c08350138ac85c1bf727553aaa9f4583e6 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sat, 2 Jan 2010 20:17:43 -0800
Subject: r300g: Use C-style comments.

---
 src/gallium/drivers/r300/r300_emit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index eeb97a2d37..b44c7bdbb3 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -871,7 +871,7 @@ validate:
             goto validate;
         }
     } else {
-        // debug_printf("No VBO while emitting dirty state!\n");
+        /* debug_printf("No VBO while emitting dirty state!\n"); */
     }
     if (!r300->winsys->validate(r300->winsys)) {
         r300->context.flush(&r300->context, 0, NULL);
@@ -999,7 +999,7 @@ validate:
     */
 
     /* Finally, emit the VBO. */
-    //r300_emit_vertex_buffer(r300);
+    /* r300_emit_vertex_buffer(r300); */
 
     r300->dirty_hw++;
 }
-- 
cgit v1.2.3


From 1d95544446f3b862620fe5bb33b37705a2ad8681 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sun, 3 Jan 2010 03:06:33 -0800
Subject: trace: Remove comma at end of enumerator list.

---
 src/gallium/drivers/trace/tr_state.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h
index 1c16042ee5..e2f981d051 100644
--- a/src/gallium/drivers/trace/tr_state.h
+++ b/src/gallium/drivers/trace/tr_state.h
@@ -32,7 +32,7 @@ struct tgsi_token;
 enum trace_shader_type {
    TRACE_SHADER_FRAGMENT = 0,
    TRACE_SHADER_VERTEX   = 1,
-   TRACE_SHADER_GEOMETRY = 2,
+   TRACE_SHADER_GEOMETRY = 2
 };
 
 struct trace_shader
-- 
cgit v1.2.3


From 0bfc579bd435b4d0c258258440866840c473f681 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sun, 3 Jan 2010 14:42:36 -0800
Subject: llvmpipe: Remove comma at end of enumerator list.

---
 src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 2 +-
 src/gallium/drivers/llvmpipe/lp_bld_flow.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index d14f468ba9..ced7b9c11d 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -142,7 +142,7 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
 
 enum lp_build_blend_swizzle {
    LP_BUILD_BLEND_SWIZZLE_RGBA = 0,
-   LP_BUILD_BLEND_SWIZZLE_AAAA = 1,
+   LP_BUILD_BLEND_SWIZZLE_AAAA = 1
 };
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_flow.c b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
index dcc25fbff8..25c10af29f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_flow.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_flow.c
@@ -47,7 +47,7 @@
  */
 enum lp_build_flow_construct_kind {
    lP_BUILD_FLOW_SCOPE,
-   LP_BUILD_FLOW_SKIP,
+   LP_BUILD_FLOW_SKIP
 };
 
 
-- 
cgit v1.2.3


From fe3ea299bf8abcf27c8a168138c8fd3f4e43cefa Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sun, 3 Jan 2010 15:07:35 -0800
Subject: r300g: Remove comma at end of enumerator list.

---
 src/gallium/drivers/r300/r300_reg.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 8ca785cb58..744ea6568d 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2636,7 +2636,7 @@ enum {
 	VE_COND_MUX_GTE			= 25,
 	VE_SET_GREATER_THAN		= 26,
 	VE_SET_EQUAL			= 27,
-	VE_SET_NOT_EQUAL		= 28,
+	VE_SET_NOT_EQUAL		= 28
 };
 
 enum {
@@ -2670,20 +2670,20 @@ enum {
 	ME_PRED_SET_CLR			= 25,
 	ME_PRED_SET_INV			= 26,
 	ME_PRED_SET_POP			= 27,
-	ME_PRED_SET_RESTORE		= 28,
+	ME_PRED_SET_RESTORE		= 28
 };
 
 enum {
 	/* R3XX */
 	PVS_MACRO_OP_2CLK_MADD		= 0,
-	PVS_MACRO_OP_2CLK_M2X_ADD	= 1,
+	PVS_MACRO_OP_2CLK_M2X_ADD	= 1
 };
 
 enum {
 	PVS_SRC_REG_TEMPORARY		= 0,	/* Intermediate Storage */
 	PVS_SRC_REG_INPUT		= 1,	/* Input Vertex Storage */
 	PVS_SRC_REG_CONSTANT		= 2,	/* Constant State Storage */
-	PVS_SRC_REG_ALT_TEMPORARY	= 3,	/* Alternate Intermediate Storage */
+	PVS_SRC_REG_ALT_TEMPORARY	= 3	/* Alternate Intermediate Storage */
 };
 
 enum {
@@ -2692,7 +2692,7 @@ enum {
 	PVS_DST_REG_OUT			= 2,	/* Output Memory. Used for all outputs */
 	PVS_DST_REG_OUT_REPL_X		= 3,	/* Output Memory & Replicate X to all channels */
 	PVS_DST_REG_ALT_TEMPORARY	= 4,	/* Alternate Intermediate Storage */
-	PVS_DST_REG_INPUT		= 5,	/* Output Memory & Replicate X to all channels */
+	PVS_DST_REG_INPUT		= 5	/* Output Memory & Replicate X to all channels */
 };
 
 enum {
@@ -2701,7 +2701,7 @@ enum {
 	PVS_SRC_SELECT_Z		= 2,	/* Select Z Component */
 	PVS_SRC_SELECT_W		= 3,	/* Select W Component */
 	PVS_SRC_SELECT_FORCE_0		= 4,	/* Force Component to 0.0 */
-	PVS_SRC_SELECT_FORCE_1		= 5,	/* Force Component to 1.0 */
+	PVS_SRC_SELECT_FORCE_1		= 5	/* Force Component to 1.0 */
 };
 
 /* PVS Opcode & Destination Operand Description */
@@ -2740,7 +2740,7 @@ enum {
 	PVS_DST_ADDR_SEL_MASK		= 0x3,
 	PVS_DST_ADDR_SEL_SHIFT		= 29,
 	PVS_DST_ADDR_MODE_0_MASK	= 0x1,
-	PVS_DST_ADDR_MODE_0_SHIFT	= 31,
+	PVS_DST_ADDR_MODE_0_SHIFT	= 31
 };
 
 /* PVS Source Operand Description */
@@ -2775,7 +2775,7 @@ enum {
 	PVS_SRC_ADDR_SEL_MASK		= 0x3,
 	PVS_SRC_ADDR_SEL_SHIFT		= 29,
 	PVS_SRC_ADDR_MODE_1_MASK	= 0x0,
-	PVS_SRC_ADDR_MODE_1_SHIFT	= 32,
+	PVS_SRC_ADDR_MODE_1_SHIFT	= 32
 };
 
 /*\}*/
-- 
cgit v1.2.3


From 3e196f433dc02654f1c0f7989e2e7e7a5c4b425d Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Sun, 3 Jan 2010 18:05:12 -0800
Subject: r300g: Silence 'mixed declarations and code' warning.

---
 src/gallium/drivers/r300/r300_cs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
index 86ba91db52..5342488d0d 100644
--- a/src/gallium/drivers/r300/r300_cs.h
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -51,7 +51,7 @@
 #define CS_LOCALS(context) \
     struct r300_context* const cs_context_copy = (context); \
     struct r300_winsys* cs_winsys = cs_context_copy->winsys; \
-    int cs_count = 0;
+    int cs_count = 0
 
 #define CHECK_CS(size) \
     cs_winsys->check_cs(cs_winsys, (size))
-- 
cgit v1.2.3


From acd1451393068762a46a06aee3af9e94bc09e147 Mon Sep 17 00:00:00 2001
From: Michel Dänzer <daenzer@vmware.com>
Date: Mon, 4 Jan 2010 13:05:43 +0100
Subject: svga: Fix debug build crash when rendering to depth/stencil
 renderbuffer only.

---
 src/gallium/drivers/svga/svga_draw.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index 8db40d0fd5..ca73cf9d5a 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -164,7 +164,8 @@ svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
       }
 
       SVGA_DBG(DEBUG_DMA, "draw to sid %p, %d prims\n",
-               svga_surface(svga->curr.framebuffer.cbufs[0])->handle,
+               svga->curr.framebuffer.cbufs[0] ?
+               svga_surface(svga->curr.framebuffer.cbufs[0])->handle : NULL,
                hwtnl->cmd.prim_count);
 
       ret = SVGA3D_BeginDrawPrimitives(swc, 
-- 
cgit v1.2.3


From a3abb9d1cc4698e45e5d794919e8c27a3d7e5908 Mon Sep 17 00:00:00 2001
From: Alan Hourihane <alanh@vmware.com>
Date: Mon, 4 Jan 2010 12:42:51 +0000
Subject: silence compiler warning

---
 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index 0d01c07fb5..68520fa4f0 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -1085,7 +1085,7 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
    const struct pipe_sampler_state *sampler = samp->sampler;
    unsigned level0, level1, j, imgFilter;
    int width, height;
-   float levelBlend;
+   float levelBlend = 0.0f;
 
    choose_mipmap_levels(tgsi_sampler, s, t, p, 
                         lodbias,
@@ -1241,7 +1241,7 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
    /* get/map pipe_surfaces corresponding to 3D tex slices */
    unsigned level0, level1, j, imgFilter;
    int width, height, depth;
-   float levelBlend;
+   float levelBlend = 0.0f;
    const uint face = 0;
 
    choose_mipmap_levels(tgsi_sampler, s, t, p, 
-- 
cgit v1.2.3


From bfc34c880492f15dc47db30e88f888d1c48d5e70 Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@vmware.com>
Date: Mon, 4 Jan 2010 11:28:37 -0800
Subject: i965g: Fix assert.

---
 src/gallium/drivers/i965/brw_eu_emit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_eu_emit.c b/src/gallium/drivers/i965/brw_eu_emit.c
index 4fe7b6acc1..00d8eaccbc 100644
--- a/src/gallium/drivers/i965/brw_eu_emit.c
+++ b/src/gallium/drivers/i965/brw_eu_emit.c
@@ -860,7 +860,7 @@ void brw_land_fwd_jump(struct brw_compile *p,
        jmpi = 2;
 
    assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
-   assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
+   assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
 
    jmp_insn->bits3.ud = jmpi * ((landing - jmp_insn) - 1);
 }
-- 
cgit v1.2.3


From 2c046034dc5c95dd2fe84d0b4fd44f25235480b9 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Wed, 23 Dec 2009 17:02:03 +0100
Subject: Remove TGSI_OPCODE_SHR, map existing usage to TGSI_OPCODE_ISHR.

This is to differentiate it from its unsigned version, TGSI_OPCODE_USHR.
---
 src/gallium/auxiliary/gallivm/tgsitollvm.cpp   | 4 ++--
 src/gallium/auxiliary/tgsi/tgsi_sse2.c         | 2 +-
 src/gallium/drivers/cell/spu/spu_exec.c        | 2 +-
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 2 +-
 src/gallium/drivers/r300/r300_tgsi_to_rc.c     | 2 +-
 src/gallium/drivers/svga/svga_tgsi_insn.c      | 2 +-
 src/gallium/include/pipe/p_shader_tokens.h     | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 5cafe8c3f0..8f7d3b7100 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -552,7 +552,7 @@ translate_instruction(llvm::Module *module,
       break;
    case TGSI_OPCODE_SHL:
       break;
-   case TGSI_OPCODE_SHR:
+   case TGSI_OPCODE_ISHR:
       break;
    case TGSI_OPCODE_AND:
       break;
@@ -919,7 +919,7 @@ translate_instructionir(llvm::Module *module,
       break;
    case TGSI_OPCODE_SHL:
       break;
-   case TGSI_OPCODE_SHR:
+   case TGSI_OPCODE_ISHR:
       break;
    case TGSI_OPCODE_AND:
       break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index d63c75dafb..118059ace9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2578,7 +2578,7 @@ emit_instruction(
       return 0;
       break;
 
-   case TGSI_OPCODE_SHR:
+   case TGSI_OPCODE_ISHR:
       return 0;
       break;
 
diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c
index 5ed330aa6e..d86d8e09a5 100644
--- a/src/gallium/drivers/cell/spu/spu_exec.c
+++ b/src/gallium/drivers/cell/spu/spu_exec.c
@@ -1681,7 +1681,7 @@ exec_instruction(
       }
       break;
 
-   case TGSI_OPCODE_SHR:
+   case TGSI_OPCODE_ISHR:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 7cfa4cc59a..61b033c9fc 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -1315,7 +1315,7 @@ emit_instruction(
       return 0;
       break;
 
-   case TGSI_OPCODE_SHR:
+   case TGSI_OPCODE_ISHR:
       /* deprecated? */
       assert(0);
       return 0;
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 096cdb20bb..a792c2cf98 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -120,7 +120,7 @@ static unsigned translate_opcode(unsigned opcode)
      /* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */
      /* case TGSI_OPCODE_TRUNC: return RC_OPCODE_TRUNC; */
      /* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */
-     /* case TGSI_OPCODE_SHR: return RC_OPCODE_SHR; */
+     /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */
      /* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */
      /* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */
      /* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
index 1670da8bfa..dc5eb8fc60 100644
--- a/src/gallium/drivers/svga/svga_tgsi_insn.c
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -2109,7 +2109,7 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
    case TGSI_OPCODE_I2F:
    case TGSI_OPCODE_NOT:
    case TGSI_OPCODE_SHL:
-   case TGSI_OPCODE_SHR:
+   case TGSI_OPCODE_ISHR:
    case TGSI_OPCODE_XOR:
       return FALSE;
 
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index 0288e3dd72..b150725808 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -264,7 +264,7 @@ struct tgsi_property_data {
 #define TGSI_OPCODE_NOT                 85
 #define TGSI_OPCODE_TRUNC               86
 #define TGSI_OPCODE_SHL                 87
-#define TGSI_OPCODE_SHR                 88
+                                /* gap */
 #define TGSI_OPCODE_AND                 89
 #define TGSI_OPCODE_OR                  90
 #define TGSI_OPCODE_MOD                 91
-- 
cgit v1.2.3


From 8ebef37c7ba67c4449367b95821293176a0a370b Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 5 Jan 2010 17:32:19 +0000
Subject: svga: Rebind shaders when tokens change.

Even shader ID doesn't change. Otherwise the token change is not
effective.

This reverts regression from commit
cdb445f3a9285e2d8f042a07021ade78b94e0156. Regression could be seen in
Quake3's loading screen -- the upper right corner of the screen would
be black.
---
 src/gallium/drivers/svga/svga_context.h  |  2 --
 src/gallium/drivers/svga/svga_state_fs.c | 13 +++++--------
 src/gallium/drivers/svga/svga_state_vs.c | 13 +++++--------
 3 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index eb666c2616..32e9304f81 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -268,8 +268,6 @@ struct svga_hw_draw_state
    unsigned ts[16][TS_MAX];
    float cb[PIPE_SHADER_TYPES][CB_MAX][4];
 
-   unsigned shader_id[PIPE_SHADER_TYPES];
-   
    struct svga_shader_result *fs;
    struct svga_shader_result *vs;
    struct svga_hw_view_state views[PIPE_MAX_SAMPLERS];
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index 1902b0106b..272d1dd14e 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -268,16 +268,13 @@ static int emit_hw_fs( struct svga_context *svga,
    assert(id != SVGA3D_INVALID_ID);
 
    if (result != svga->state.hw_draw.fs) {
-      if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) {
-         ret = SVGA3D_SetShader(svga->swc,
-                                SVGA3D_SHADERTYPE_PS,
-                                id );
-         if (ret)
-            return ret;
-      }
+      ret = SVGA3D_SetShader(svga->swc,
+                             SVGA3D_SHADERTYPE_PS,
+                             id );
+      if (ret)
+         return ret;
 
       svga->dirty |= SVGA_NEW_FS_RESULT;
-      svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id;
       svga->state.hw_draw.fs = result;      
    }
 
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
index 6cf51434d7..db30f2735f 100644
--- a/src/gallium/drivers/svga/svga_state_vs.c
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -149,16 +149,13 @@ static int emit_hw_vs( struct svga_context *svga,
    }
 
    if (result != svga->state.hw_draw.vs) {
-      if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) {
-         ret = SVGA3D_SetShader(svga->swc,
-                                SVGA3D_SHADERTYPE_VS,
-                                id );
-         if (ret)
-            return ret;
-      }
+      ret = SVGA3D_SetShader(svga->swc,
+                             SVGA3D_SHADERTYPE_VS,
+                             id );
+      if (ret)
+         return ret;
 
       svga->dirty |= SVGA_NEW_VS_RESULT;
-      svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id;
       svga->state.hw_draw.vs = result;      
    }
 
-- 
cgit v1.2.3


From 38d8b180038eef692cbc75731d340c9fcc721784 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Tue, 5 Jan 2010 17:56:26 +0000
Subject: svga: Remove stale references to delete shader results.

To ensure that a new result that happens to have the same address
of the old one will be detected as a change.
---
 src/gallium/drivers/svga/svga_pipe_fs.c | 7 +++++++
 src/gallium/drivers/svga/svga_pipe_vs.c | 7 +++++++
 2 files changed, 14 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c
index a461a86dd3..5f1213e46a 100644
--- a/src/gallium/drivers/svga/svga_pipe_fs.c
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -111,6 +111,13 @@ void svga_delete_fs_state(struct pipe_context *pipe, void *shader)
       util_bitmask_clear( svga->fs_bm, result->id );
 
       svga_destroy_shader_result( result );
+
+      /*
+       * Remove stale references to this result to ensure a new result on the
+       * same address will be detected as a change.
+       */
+      if(result == svga->state.hw_draw.fs)
+         svga->state.hw_draw.fs = NULL;
    }
 
    FREE((void *)fs->base.tokens);
diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c
index 02709e12bd..fd9864c51a 100644
--- a/src/gallium/drivers/svga/svga_pipe_vs.c
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -176,6 +176,13 @@ static void svga_delete_vs_state(struct pipe_context *pipe, void *shader)
       util_bitmask_clear( svga->vs_bm, result->id );
 
       svga_destroy_shader_result( result );
+
+      /*
+       * Remove stale references to this result to ensure a new result on the
+       * same address will be detected as a change.
+       */
+      if(result == svga->state.hw_draw.vs)
+         svga->state.hw_draw.vs = NULL;
    }
 
    FREE((void *)vs->base.tokens);
-- 
cgit v1.2.3


From c306ef5e81da5456d39a6e98cfc1f5f00b9c77a7 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Sun, 20 Dec 2009 12:19:19 +0100
Subject: nv50: remove vtxbuf stateobject after a referenced vtxbuf is mapped

- This avoids problematic "reloc'ed while mapped" messages and
some associated corruption as well.

Signed-off-by: Maarten Maathuis <madman2003@gmail.com>
---
 src/gallium/drivers/nouveau/nouveau_screen.c   | 21 +++++++++++++++++++++
 src/gallium/drivers/nouveau/nouveau_screen.h   |  3 +++
 src/gallium/drivers/nouveau/nouveau_stateobj.h | 13 +++++++++++++
 src/gallium/drivers/nv50/nv50_screen.c         | 23 +++++++++++++++++++++++
 src/gallium/drivers/nv50/nv50_screen.h         |  2 ++
 src/gallium/drivers/nv50/nv50_state_validate.c |  3 +++
 6 files changed, 65 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index 0437af3725..7ebc94ed6c 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -127,8 +127,18 @@ nouveau_screen_bo_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
 		      unsigned usage)
 {
 	struct nouveau_bo *bo = nouveau_bo(pb);
+	struct nouveau_screen *nscreen = nouveau_screen(pscreen);
 	int ret;
 
+	if (nscreen->pre_pipebuffer_map_callback) {
+		ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage);
+		if (ret) {
+			debug_printf("pre_pipebuffer_map_callback failed %d\n",
+				ret);
+			return NULL;
+		}
+	}
+
 	ret = nouveau_bo_map(bo, nouveau_screen_map_flags(usage));
 	if (ret) {
 		debug_printf("map failed: %d\n", ret);
@@ -143,11 +153,22 @@ nouveau_screen_bo_map_range(struct pipe_screen *pscreen, struct pipe_buffer *pb,
 			    unsigned offset, unsigned length, unsigned usage)
 {
 	struct nouveau_bo *bo = nouveau_bo(pb);
+	struct nouveau_screen *nscreen = nouveau_screen(pscreen);
 	uint32_t flags = nouveau_screen_map_flags(usage);
 	int ret;
 
+	if (nscreen->pre_pipebuffer_map_callback) {
+		ret = nscreen->pre_pipebuffer_map_callback(pscreen, pb, usage);
+		if (ret) {
+			debug_printf("pre_pipebuffer_map_callback failed %d\n",
+				ret);
+			return NULL;
+		}
+	}
+
 	ret = nouveau_bo_map_range(bo, offset, length, flags);
 	if (ret) {
+		nouveau_bo_unmap(bo);
 		if (!(flags & NOUVEAU_BO_NOWAIT) || ret != -EBUSY)
 			debug_printf("map_range failed: %d\n", ret);
 		return NULL;
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index ebfc67ad1c..a7927d88df 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -5,6 +5,9 @@ struct nouveau_screen {
 	struct pipe_screen base;
 	struct nouveau_device *device;
 	struct nouveau_channel *channel;
+
+	int (*pre_pipebuffer_map_callback) (struct pipe_screen *pscreen,
+		struct pipe_buffer *pb, unsigned usage);
 };
 
 static inline struct nouveau_screen *
diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
index 9aee9e4956..77ff7dcf20 100644
--- a/src/gallium/drivers/nouveau/nouveau_stateobj.h
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -98,6 +98,19 @@ so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo,
 	so_data(so, data);
 }
 
+/* Determine if this buffer object is referenced by this state object. */
+static INLINE boolean
+so_bo_is_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo)
+{
+	int i;
+
+	for (i = 0; i < so->cur_reloc; i++)
+		if (so->reloc[i].bo == bo)
+			return true;
+
+	return false;
+}
+
 static INLINE void
 so_dump(struct nouveau_stateobj *so)
 {
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 7e039ea82e..1778a74517 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -189,6 +189,28 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
 	FREE(screen);
 }
 
+static int
+nv50_pre_pipebuffer_map(struct pipe_screen *pscreen, struct pipe_buffer *pb,
+	unsigned usage)
+{
+	struct nv50_screen *screen = nv50_screen(pscreen);
+	struct nv50_context *ctx = screen->cur_ctx;
+
+	if (!(pb->usage & PIPE_BUFFER_USAGE_VERTEX))
+		return 0;
+
+	/* Our vtxbuf got mapped, it can no longer be considered part of current
+	 * state, remove it to avoid emitting reloc markers.
+	 */
+	if (ctx && ctx->state.vtxbuf && so_bo_is_reloc(ctx->state.vtxbuf,
+			nouveau_bo(pb))) {
+		so_ref(NULL, &ctx->state.vtxbuf);
+		ctx->dirty |= NV50_NEW_ARRAYS;
+	}
+
+	return 0;
+}
+
 struct pipe_screen *
 nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 {
@@ -216,6 +238,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	pscreen->get_param = nv50_screen_get_param;
 	pscreen->get_paramf = nv50_screen_get_paramf;
 	pscreen->is_format_supported = nv50_screen_is_format_supported;
+	screen->base.pre_pipebuffer_map_callback = nv50_pre_pipebuffer_map;
 
 	nv50_screen_init_miptree_functions(pscreen);
 	nv50_transfer_init_screen_functions(pscreen);
diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h
index 61e24a5b57..a038a4e3c2 100644
--- a/src/gallium/drivers/nv50/nv50_screen.h
+++ b/src/gallium/drivers/nv50/nv50_screen.h
@@ -2,6 +2,7 @@
 #define __NV50_SCREEN_H__
 
 #include "nouveau/nouveau_screen.h"
+#include "nv50_context.h"
 
 struct nv50_screen {
 	struct nouveau_screen base;
@@ -9,6 +10,7 @@ struct nv50_screen {
 	struct nouveau_winsys *nvws;
 
 	unsigned cur_pctx;
+	struct nv50_context *cur_ctx;
 
 	struct nouveau_grobj *tesla;
 	struct nouveau_grobj *eng2d;
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index c8bdf9dc27..682786345e 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -185,6 +185,9 @@ nv50_state_emit(struct nv50_context *nv50)
 	struct nv50_screen *screen = nv50->screen;
 	struct nouveau_channel *chan = screen->base.channel;
 
+	/* I don't want to copy headers from the winsys. */
+	screen->cur_ctx = nv50;
+
 	if (nv50->pctx_id != screen->cur_pctx) {
 		if (nv50->state.fb)
 			nv50->state.dirty |= NV50_NEW_FRAMEBUFFER;
-- 
cgit v1.2.3


From 29d2ab37e65c9242d01f63cc5376cb6929f9285f Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Tue, 29 Dec 2009 00:36:17 +0100
Subject: nouveau: kill nouveau_push.h and use libdrm versions of BEGIN_RINGs,
 etc

---
 src/gallium/drivers/nouveau/nouveau_push.h |  93 -----
 src/gallium/drivers/nv04/nv04_context.c    |  44 ++-
 src/gallium/drivers/nv04/nv04_context.h    |   4 -
 src/gallium/drivers/nv04/nv04_prim_vbuf.c  |  84 +++--
 src/gallium/drivers/nv04/nv04_state_emit.c |  72 ++--
 src/gallium/drivers/nv10/nv10_context.c    | 367 ++++++++++----------
 src/gallium/drivers/nv10/nv10_context.h    |   4 -
 src/gallium/drivers/nv10/nv10_fragtex.c    |  28 +-
 src/gallium/drivers/nv10/nv10_prim_vbuf.c  |  34 +-
 src/gallium/drivers/nv10/nv10_state_emit.c | 166 +++++----
 src/gallium/drivers/nv20/nv20_context.c    | 530 +++++++++++++++--------------
 src/gallium/drivers/nv20/nv20_context.h    |   4 -
 src/gallium/drivers/nv20/nv20_fragtex.c    |  28 +-
 src/gallium/drivers/nv20/nv20_prim_vbuf.c  |  56 +--
 src/gallium/drivers/nv20/nv20_state_emit.c | 181 +++++-----
 src/gallium/drivers/nv30/nv30_context.c    |  15 +-
 src/gallium/drivers/nv30/nv30_context.h    |   4 -
 src/gallium/drivers/nv30/nv30_query.c      |  20 +-
 src/gallium/drivers/nv30/nv30_vbo.c        | 106 +++---
 src/gallium/drivers/nv30/nv30_vertprog.c   |  18 +-
 src/gallium/drivers/nv40/nv40_context.c    |  15 +-
 src/gallium/drivers/nv40/nv40_context.h    |   4 -
 src/gallium/drivers/nv40/nv40_draw.c       |  62 ++--
 src/gallium/drivers/nv40/nv40_query.c      |  20 +-
 src/gallium/drivers/nv40/nv40_state_emit.c |  11 +-
 src/gallium/drivers/nv40/nv40_vbo.c        | 106 +++---
 src/gallium/drivers/nv40/nv40_vertprog.c   |  18 +-
 27 files changed, 1089 insertions(+), 1005 deletions(-)
 delete mode 100644 src/gallium/drivers/nouveau/nouveau_push.h

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h
deleted file mode 100644
index 9c235080a5..0000000000
--- a/src/gallium/drivers/nouveau/nouveau_push.h
+++ /dev/null
@@ -1,93 +0,0 @@
-#ifndef __NOUVEAU_PUSH_H__
-#define __NOUVEAU_PUSH_H__
-
-#include "nouveau/nouveau_winsys.h"
-
-#ifndef NOUVEAU_PUSH_CONTEXT
-#error undefined push context
-#endif
-
-#define OUT_RING(data) do {                                                    \
-	NOUVEAU_PUSH_CONTEXT(pc);                                              \
-	(*pc->base.channel->pushbuf->cur++) = (data);                          \
-} while(0)
-
-#define OUT_RINGp(src,size) do {                                               \
-	NOUVEAU_PUSH_CONTEXT(pc);                                              \
-	memcpy(pc->base.channel->pushbuf->cur, (src), (size) * 4);             \
-	pc->base.channel->pushbuf->cur += (size);                              \
-} while(0)
-
-#define OUT_RINGf(data) do {                                                   \
-	union { float v; uint32_t u; } c;                                      \
-	c.v = (data);                                                          \
-	OUT_RING(c.u);                                                         \
-} while(0)
-
-#define BEGIN_RING(obj,mthd,size) do {                                         \
-	NOUVEAU_PUSH_CONTEXT(pc);                                              \
-	struct nouveau_channel *chan = pc->base.channel;                       \
-	if (chan->pushbuf->remaining < ((size) + 1))                           \
-		nouveau_pushbuf_flush(chan, ((size) + 1));                     \
-	OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd));             \
-	chan->pushbuf->remaining -= ((size) + 1);                              \
-} while(0)
-
-#define BEGIN_RING_NI(obj,mthd,size) do {                                      \
-	BEGIN_RING(obj, (mthd) | 0x40000000, (size));                          \
-} while(0)
-
-static inline void
-DO_FIRE_RING(struct nouveau_channel *chan, struct pipe_fence_handle **fence)
-{
-	nouveau_pushbuf_flush(chan, 0);
-	if (fence)
-		*fence = NULL;
-}
-
-#define FIRE_RING(fence) do {                                                  \
-	NOUVEAU_PUSH_CONTEXT(pc);                                              \
-	DO_FIRE_RING(pc->base.channel, fence);                                 \
-} while(0)
-
-#define OUT_RELOC(bo,data,flags,vor,tor) do {                                  \
-	NOUVEAU_PUSH_CONTEXT(pc);                                              \
-	struct nouveau_channel *chan = pc->base.channel;                       \
-	nouveau_pushbuf_emit_reloc(chan, chan->pushbuf->cur++, nouveau_bo(bo), \
-				   (data), 0, (flags), (vor), (tor));          \
-} while(0)
-
-/* Raw data + flags depending on FB/TT buffer */
-#define OUT_RELOCd(bo,data,flags,vor,tor) do {                                 \
-	OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor));        \
-} while(0)
-
-/* FB/TT object handle */
-#define OUT_RELOCo(bo,flags) do {                                              \
-	OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR,                            \
-		  pc->base.channel->vram->handle,                              \
-		  pc->base.channel->gart->handle);                             \
-} while(0)
-
-/* Low 32-bits of offset */
-#define OUT_RELOCl(bo,delta,flags) do {                                        \
-	OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0);              \
-} while(0)
-
-/* High 32-bits of offset */
-#define OUT_RELOCh(bo,delta,flags) do {                                        \
-	OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0);             \
-} while(0)
-
-/* A reloc which'll recombine into a NV_DMA_METHOD packet header */
-#define OUT_RELOCm(bo, flags, obj, mthd, size) do {                            \
-	NOUVEAU_PUSH_CONTEXT(pc);                                              \
-	struct nouveau_channel *chan = pc->base.channel;                       \
-	if (chan->pushbuf->remaining < ((size) + 1))                           \
-		nouveau_pushbuf_flush(chan, ((size) + 1));                     \
-	OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd),      \
-		   (flags), 0, 0);                                             \
-	chan->pushbuf->remaining -= ((size) + 1);                              \
-} while(0)
-
-#endif
diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c
index 770733a4a1..edd96859cf 100644
--- a/src/gallium/drivers/nv04/nv04_context.c
+++ b/src/gallium/drivers/nv04/nv04_context.c
@@ -10,10 +10,14 @@ nv04_flush(struct pipe_context *pipe, unsigned flags,
 	   struct pipe_fence_handle **fence)
 {
 	struct nv04_context *nv04 = nv04_context(pipe);
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
 
 	draw_flush(nv04->draw);
 
-	FIRE_RING(fence);
+	FIRE_RING(chan);
+	if (fence)
+		*fence = NULL;
 }
 
 static void
@@ -30,32 +34,36 @@ nv04_destroy(struct pipe_context *pipe)
 static boolean
 nv04_init_hwctx(struct nv04_context *nv04)
 {
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
 	// requires a valid handle
-//	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1);
+//	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOTIFY, 1);
 //	OUT_RING(0);
-	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1);
-	OUT_RING(0);
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_NOP, 1);
+	OUT_RING(chan, 0);
 
-	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
-	OUT_RING(0x40182800);
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
+	OUT_RING(chan, 0x40182800);
 //	OUT_RING(1<<20/*no cull*/);
-	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
 //	OUT_RING(0x24|(1<<6)|(1<<8));
-	OUT_RING(0x120001a4);
-	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1);
-	OUT_RING(0x332213a1);
-	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1);
-	OUT_RING(0x11001010);
-	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1);
-	OUT_RING(0x0);
-//	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1);
+	OUT_RING(chan, 0x120001a4);
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FORMAT, 1);
+	OUT_RING(chan, 0x332213a1);
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FILTER, 1);
+	OUT_RING(chan, 0x11001010);
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_COLORKEY, 1);
+	OUT_RING(chan, 0x0);
+//	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 1);
 //	OUT_RING(SCREEN_OFFSET);
-	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1);
-	OUT_RING(0xff000000);
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_FOGCOLOR, 1);
+	OUT_RING(chan, 0xff000000);
 
 
-	FIRE_RING (NULL);
+	FIRE_RING (chan);
 	return TRUE;
 }
 
diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h
index 5951115293..fe3b527423 100644
--- a/src/gallium/drivers/nv04/nv04_context.h
+++ b/src/gallium/drivers/nv04/nv04_context.h
@@ -15,10 +15,6 @@
 #include "nouveau/nouveau_gldefs.h"
 #include "nouveau/nouveau_context.h"
 
-#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \
-	struct nv04_screen *ctx = nv04->screen
-#include "nouveau/nouveau_push.h"
-
 #include "nv04_state.h"
 
 #define NOUVEAU_ERR(fmt, args...) \
diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
index 25395edfd7..0b795ea243 100644
--- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c
+++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c
@@ -93,33 +93,45 @@ nv04_vbuf_render_set_primitive( struct vbuf_render *render,
 
 static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5)
 {
-	BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49);
-	OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
-	OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
-	OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
-	OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
-	OUT_RINGp(buffer + VERTEX_SIZE * v4,8);
-	OUT_RINGp(buffer + VERTEX_SIZE * v5,8);
-	OUT_RING(0xFEDCBA);
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA), 49);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v4,8);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v5,8);
+	OUT_RING(chan, 0xFEDCBA);
 }
 
 static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2)
 {
-	BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25);
-	OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
-	OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
-	OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
-	OUT_RING(0xFED);
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD), 25);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8);
+	OUT_RING(chan, 0xFED);
 }
 
 static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3)
 {
-	BEGIN_RING(fahrenheit,NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33);
-	OUT_RINGp(buffer + VERTEX_SIZE * v0,8);
-	OUT_RINGp(buffer + VERTEX_SIZE * v1,8);
-	OUT_RINGp(buffer + VERTEX_SIZE * v2,8);
-	OUT_RINGp(buffer + VERTEX_SIZE * v3,8);
-	OUT_RING(0xFECEDC);
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC), 33);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v0,8);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v1,8);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v2,8);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * v3,8);
+	OUT_RING(chan, 0xFECEDC);
 }
 
 static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices)
@@ -156,7 +168,10 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con
 {
 	const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC};
 	unsigned char* buffer = render->buffer;
-	struct nv04_context* nv04 = render->nv04;
+	struct nv04_context *nv04 = render->nv04;
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *fahrenheit = screen->fahrenheit;
 	int i,j;
 
 	for(i = 0; i<nr_indices; i+=14) 
@@ -166,15 +181,15 @@ static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, con
 		if (numvert<3)
 			break;
 
-		BEGIN_RING( fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 );
+		BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8);
 		for(j = 0; j<numvert; j++)
-			OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 );
+			OUT_RINGp(chan, buffer + VERTEX_SIZE * indices [i+j], 8 );
 
-		BEGIN_RING_NI( fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 );
+		BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2 );
 		for(j = 0; j<numtri/2; j++ )
-			OUT_RING(striptbl[j]);
+			OUT_RING(chan, striptbl[j]);
 		if (numtri%2)
-			OUT_RING(striptbl[numtri/2]&0xFFF);
+			OUT_RING(chan, striptbl[numtri/2]&0xFFF);
 	}
 }
 
@@ -182,11 +197,14 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const
 {
 	const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0};
 	unsigned char* buffer = render->buffer;
-	struct nv04_context* nv04 = render->nv04;
+	struct nv04_context *nv04 = render->nv04;
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *fahrenheit = screen->fahrenheit;
 	int i,j;
 
-	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
-	OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8);
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8);
+	OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[0], 8);
 
 	for(i = 1; i<nr_indices; i+=14)
 	{
@@ -195,16 +213,16 @@ static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const
 		if (numvert < 3)
 			break;
 
-		BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
+		BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8);
 
 		for(j=0;j<numvert;j++)
-			OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 );
+			OUT_RINGp(chan, buffer + VERTEX_SIZE * indices[ i+j ], 8 );
 
-		BEGIN_RING_NI(fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2);
+		BEGIN_RING_NI(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_DRAWPRIMITIVE(0), (numtri+1)/2);
 		for(j = 0; j<numtri/2; j++)
-			OUT_RING(fantbl[j]);
+			OUT_RING(chan, fantbl[j]);
 		if (numtri%2)
-			OUT_RING(fantbl[numtri/2]&0xFFF);
+			OUT_RING(chan, fantbl[numtri/2]&0xFFF);
 	}
 }
 
diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c
index bd98ae091f..b8d6dc560f 100644
--- a/src/gallium/drivers/nv04/nv04_state_emit.c
+++ b/src/gallium/drivers/nv04/nv04_state_emit.c
@@ -57,13 +57,19 @@ static uint32_t nv04_blend_func(uint32_t f)
 static void nv04_emit_control(struct nv04_context* nv04)
 {
 	uint32_t control = nv04->dsa->control;
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *fahrenheit = screen->fahrenheit;
 
-	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
-	OUT_RING(control);
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
+	OUT_RING(chan, control);
 }
 
 static void nv04_emit_blend(struct nv04_context* nv04)
 {
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *fahrenheit = screen->fahrenheit;
 	uint32_t blend;
 
 	blend=0x4; // texture MODULATE_ALPHA
@@ -75,19 +81,23 @@ static void nv04_emit_blend(struct nv04_context* nv04)
 	blend|=(nv04_blend_func(nv04->blend->b_src)<<24);
 	blend|=(nv04_blend_func(nv04->blend->b_dst)<<28);
 
-	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
-	OUT_RING(blend);
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_BLEND, 1);
+	OUT_RING(chan, blend);
 }
 
 static void nv04_emit_sampler(struct nv04_context *nv04, int unit)
 {
 	struct nv04_miptree *nv04mt = nv04->tex_miptree[unit];
 	struct pipe_texture *pt = &nv04mt->base;
-
-	BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3);
-	OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
-	OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
-	OUT_RING(nv04->sampler[unit]->filter);
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+	struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer);
+
+	BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 3);
+	OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+	OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+	OUT_RING(chan, nv04->sampler[unit]->filter);
 }
 
 static void nv04_state_emit_framebuffer(struct nv04_context* nv04)
@@ -97,6 +107,10 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04)
 	uint32_t rt_format, w, h;
 	int colour_format = 0, zeta_format = 0;
 	struct nv04_miptree *nv04mt = 0;
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d;
+	struct nouveau_bo *bo;
 
 	w = fb->cbufs[0]->width;
 	h = fb->cbufs[0]->height;
@@ -128,24 +142,29 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04)
 		assert(0);
 	}
 
-	BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1);
-	OUT_RING(rt_format);
+	BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1);
+	OUT_RING(chan, rt_format);
 
 	nv04mt = (struct nv04_miptree *)rt->base.texture;
+	bo = nouveau_bo(nv04mt->buffer);
 	/* FIXME pitches have to be aligned ! */
-	BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
-	OUT_RING(rt->pitch|(zeta->pitch<<16));
-	OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+	OUT_RING(chan, rt->pitch|(zeta->pitch<<16));
+	OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 	if (fb->zsbuf) {
 		nv04mt = (struct nv04_miptree *)zeta->base.texture;
-		BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
-		OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+		BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+		OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 	}
 }
 
 void
 nv04_emit_hw_state(struct nv04_context *nv04)
 {
+	struct nv04_screen *screen = nv04->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *fahrenheit = screen->fahrenheit;
+	struct nouveau_grobj *context_surfaces_3d = screen->context_surfaces_3d;
 	int i;
 
 	if (nv04->dirty & NV04_NEW_VERTPROG) {
@@ -163,8 +182,8 @@ nv04_emit_hw_state(struct nv04_context *nv04)
 	if (nv04->dirty & NV04_NEW_CONTROL) {
 		nv04->dirty &= ~NV04_NEW_CONTROL;
 
-		BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
-		OUT_RING(nv04->dsa->control);
+		BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_CONTROL, 1);
+		OUT_RING(chan, nv04->dsa->control);
 	}
 
 	if (nv04->dirty & NV04_NEW_BLEND) {
@@ -205,12 +224,12 @@ nv04_emit_hw_state(struct nv04_context *nv04)
 	unsigned rt_pitch = ((struct nv04_surface *)nv04->rt)->pitch;
 	unsigned zeta_pitch = ((struct nv04_surface *)nv04->zeta)->pitch;
 
-	BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
-	OUT_RING(rt_pitch|(zeta_pitch<<16));
-	OUT_RELOCl(nv04->rt, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2);
+	OUT_RING(chan, rt_pitch|(zeta_pitch<<16));
+	OUT_RELOCl(chan, nouveau_bo(nv04->rt), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 	if (nv04->zeta) {
-		BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
-		OUT_RELOCl(nv04->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+		BEGIN_RING(chan, context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1);
+		OUT_RELOCl(chan, nouveau_bo(nv04->zeta), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 	}
 
 	/* Texture images */
@@ -218,9 +237,10 @@ nv04_emit_hw_state(struct nv04_context *nv04)
 		if (!(nv04->fp_samplers & (1 << i)))
 			continue;
 		struct nv04_miptree *nv04mt = nv04->tex_miptree[i];
-		BEGIN_RING(fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2);
-		OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
-		OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+		struct nouveau_bo *bo = nouveau_bo(nv04mt->buffer);
+		BEGIN_RING(chan, fahrenheit, NV04_TEXTURED_TRIANGLE_OFFSET, 2);
+		OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+		OUT_RELOCd(chan, bo, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
 	}
 }
 
diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c
index 0dadeb03dd..1ecb73d06e 100644
--- a/src/gallium/drivers/nv10/nv10_context.c
+++ b/src/gallium/drivers/nv10/nv10_context.c
@@ -10,10 +10,14 @@ nv10_flush(struct pipe_context *pipe, unsigned flags,
 	   struct pipe_fence_handle **fence)
 {
 	struct nv10_context *nv10 = nv10_context(pipe);
+	struct nv10_screen *screen = nv10->screen;
+	struct nouveau_channel *chan = screen->base.channel;
 
 	draw_flush(nv10->draw);
 
-	FIRE_RING(fence);
+	FIRE_RING(chan);
+	if (fence)
+		*fence = NULL;
 }
 
 static void
@@ -31,225 +35,226 @@ static void nv10_init_hwctx(struct nv10_context *nv10)
 {
 	struct nv10_screen *screen = nv10->screen;
 	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *celsius = screen->celsius;
 	int i;
 	float projectionmatrix[16];
 
-	BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1);
-	OUT_RING  (screen->sync->handle);
-	BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2);
-	OUT_RING  (chan->vram->handle);
-	OUT_RING  (chan->gart->handle);
-	BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2);
-	OUT_RING  (chan->vram->handle);
-	OUT_RING  (chan->vram->handle);
+	BEGIN_RING(chan, celsius, NV10TCL_DMA_NOTIFY, 1);
+	OUT_RING  (chan, screen->sync->handle);
+	BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY0, 2);
+	OUT_RING  (chan, chan->vram->handle);
+	OUT_RING  (chan, chan->gart->handle);
+	BEGIN_RING(chan, celsius, NV10TCL_DMA_IN_MEMORY2, 2);
+	OUT_RING  (chan, chan->vram->handle);
+	OUT_RING  (chan, chan->vram->handle);
 
-	BEGIN_RING(celsius, NV10TCL_NOP, 1);
-	OUT_RING  (0);
+	BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+	OUT_RING  (chan, 0);
 
-	BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2);
-	OUT_RING  (0);
-	OUT_RING  (0);
+	BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
 
-	BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
-	OUT_RING  ((0x7ff<<16)|0x800);
-	BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
-	OUT_RING  ((0x7ff<<16)|0x800);
+	BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+	OUT_RING  (chan, (0x7ff<<16)|0x800);
+	BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
+	OUT_RING  (chan, (0x7ff<<16)|0x800);
 
 	for (i=1;i<8;i++) {
-		BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
-		OUT_RING  (0);
-		BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+		OUT_RING  (chan, 0);
+		BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
+		OUT_RING  (chan, 0);
 	}
 
-	BEGIN_RING(celsius, 0x290, 1);
-	OUT_RING  ((0x10<<16)|1);
-	BEGIN_RING(celsius, 0x3f4, 1);
-	OUT_RING  (0);
+	BEGIN_RING(chan, celsius, 0x290, 1);
+	OUT_RING  (chan, (0x10<<16)|1);
+	BEGIN_RING(chan, celsius, 0x3f4, 1);
+	OUT_RING  (chan, 0);
 
-	BEGIN_RING(celsius, NV10TCL_NOP, 1);
-	OUT_RING  (0);
+	BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+	OUT_RING  (chan, 0);
 
 	if (nv10->screen->celsius->grclass != NV10TCL) {
 		/* For nv11, nv17 */
-		BEGIN_RING(celsius, 0x120, 3);
-		OUT_RING  (0);
-		OUT_RING  (1);
-		OUT_RING  (2);
+		BEGIN_RING(chan, celsius, 0x120, 3);
+		OUT_RING  (chan, 0);
+		OUT_RING  (chan, 1);
+		OUT_RING  (chan, 2);
 
-		BEGIN_RING(celsius, NV10TCL_NOP, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+		OUT_RING  (chan, 0);
 	}
 
-	BEGIN_RING(celsius, NV10TCL_NOP, 1);
-	OUT_RING  (0);
+	BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+	OUT_RING  (chan, 0);
 
 	/* Set state */
-	BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2);
-	OUT_RING  (0x207);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2);
-	OUT_RING  (0);
-	OUT_RING  (0);
-
-	BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12);
-	OUT_RING  (0x30141010);
-	OUT_RING  (0);
-	OUT_RING  (0x20040000);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	OUT_RING  (0x00000c00);
-	OUT_RING  (0);
-	OUT_RING  (0x00000c00);
-	OUT_RING  (0x18000000);
-	OUT_RING  (0x300e0300);
-	OUT_RING  (0x0c091c80);
-
-	BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2);
-	OUT_RING  (1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4);
-	OUT_RING  (1);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	OUT_RING  (0x8006);
-	BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8);
-	OUT_RING  (0xff);
-	OUT_RING  (0x207);
-	OUT_RING  (0);
-	OUT_RING  (0xff);
-	OUT_RING  (0x1e00);
-	OUT_RING  (0x1e00);
-	OUT_RING  (0x1e00);
-	OUT_RING  (0x1d01);
-	BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
-	OUT_RING  (0x201);
-	BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
-	OUT_RING  (8);
-	BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1);
-	OUT_RING  (8);
-	BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
-	OUT_RING  (0x1b02);
-	OUT_RING  (0x1b02);
-	BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
-	OUT_RING  (0x405);
-	OUT_RING  (0x901);
-	BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8);
+	BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 2);
+	OUT_RING  (chan, 0x207);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(0), 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+
+	BEGIN_RING(chan, celsius, NV10TCL_RC_IN_ALPHA(0), 12);
+	OUT_RING  (chan, 0x30141010);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0x20040000);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0x00000c00);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0x00000c00);
+	OUT_RING  (chan, 0x18000000);
+	OUT_RING  (chan, 0x300e0300);
+	OUT_RING  (chan, 0x0c091c80);
+
+	BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 2);
+	OUT_RING  (chan, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_SRC, 4);
+	OUT_RING  (chan, 1);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0x8006);
+	BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 8);
+	OUT_RING  (chan, 0xff);
+	OUT_RING  (chan, 0x207);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0xff);
+	OUT_RING  (chan, 0x1e00);
+	OUT_RING  (chan, 0x1e00);
+	OUT_RING  (chan, 0x1e00);
+	OUT_RING  (chan, 0x1d01);
+	BEGIN_RING(chan, celsius, NV10TCL_NORMALIZE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_FOG_ENABLE, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_LIGHT_MODEL, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_COLOR_CONTROL, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_ENABLED_LIGHTS, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1);
+	OUT_RING  (chan, 0x201);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1);
+	OUT_RING  (chan, 8);
+	BEGIN_RING(chan, celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_LINE_WIDTH, 1);
+	OUT_RING  (chan, 8);
+	BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+	OUT_RING  (chan, 0x1b02);
+	OUT_RING  (chan, 0x1b02);
+	BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2);
+	OUT_RING  (chan, 0x405);
+	OUT_RING  (chan, 0x901);
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_TX_GEN_S(0), 8);
 	for (i=0;i<8;i++) {
-		OUT_RING  (0);
+		OUT_RING  (chan, 0);
 	}
-	BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
-	OUT_RING  (0x3fc00000);	/* -1.50 */
-	OUT_RING  (0xbdb8aa0a);	/* -0.09 */
-	OUT_RING  (0);		/*  0.00 */
+	BEGIN_RING(chan, celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
+	OUT_RING  (chan, 0x3fc00000);	/* -1.50 */
+	OUT_RING  (chan, 0xbdb8aa0a);	/* -0.09 */
+	OUT_RING  (chan, 0);		/*  0.00 */
 
-	BEGIN_RING(celsius, NV10TCL_NOP, 1);
-	OUT_RING  (0);
+	BEGIN_RING(chan, celsius, NV10TCL_NOP, 1);
+	OUT_RING  (chan, 0);
 
-	BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2);
-	OUT_RING  (0x802);
-	OUT_RING  (2);
+	BEGIN_RING(chan, celsius, NV10TCL_FOG_MODE, 2);
+	OUT_RING  (chan, 0x802);
+	OUT_RING  (chan, 2);
 	/* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
 	 * using texturing, except when using the texture matrix
 	 */
-	BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
-	OUT_RING  (6);
-	BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
-	OUT_RING  (0x01010101);
+	BEGIN_RING(chan, celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
+	OUT_RING  (chan, 6);
+	BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1);
+	OUT_RING  (chan, 0x01010101);
 
 	/* Set vertex component */
-	BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4);
-	OUT_RINGf (1.0);
-	OUT_RINGf (1.0);
-	OUT_RINGf (1.0);
-	OUT_RINGf (1.0);
-	BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	OUT_RINGf (1.0);
-	BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4);
-	OUT_RINGf (0.0);
-	OUT_RINGf (0.0);
-	OUT_RINGf (0.0);
-	OUT_RINGf (1.0);
-	BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4);
-	OUT_RINGf (0.0);
-	OUT_RINGf (0.0);
-	OUT_RINGf (0.0);
-	OUT_RINGf (1.0);
-	BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1);
-	OUT_RINGf (0.0);
-	BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1);
-	OUT_RING  (1);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL_4F_R, 4);
+	OUT_RINGf (chan, 1.0);
+	OUT_RINGf (chan, 1.0);
+	OUT_RINGf (chan, 1.0);
+	OUT_RINGf (chan, 1.0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_COL2_3F_R, 3);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_NOR_3F_X, 3);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RINGf (chan, 1.0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX0_4F_S, 4);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 1.0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_TX1_4F_S, 4);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 1.0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_FOG_1F, 1);
+	OUT_RINGf (chan, 0.0);
+	BEGIN_RING(chan, celsius, NV10TCL_EDGEFLAG_ENABLE, 1);
+	OUT_RING  (chan, 1);
 
 	memset(projectionmatrix, 0, sizeof(projectionmatrix));
-	BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16);
+	BEGIN_RING(chan, celsius, NV10TCL_PROJECTION_MATRIX(0), 16);
 	projectionmatrix[0*4+0] = 1.0;
 	projectionmatrix[1*4+1] = 1.0;
 	projectionmatrix[2*4+2] = 1.0;
 	projectionmatrix[3*4+3] = 1.0;
 	for (i=0;i<16;i++) {
-		OUT_RINGf  (projectionmatrix[i]);
+		OUT_RINGf  (chan, projectionmatrix[i]);
 	}
 
-	BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
-	OUT_RING  (0.0);
-	OUT_RINGf  (16777216.0);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
+	OUT_RING  (chan, 0.0);
+	OUT_RINGf  (chan, 16777216.0);
 
-	BEGIN_RING(celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4);
-	OUT_RINGf  (-2048.0);
-	OUT_RINGf  (-2048.0);
-	OUT_RINGf  (16777215.0 * 0.5);
-	OUT_RING  (0);
+	BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4);
+	OUT_RINGf  (chan, -2048.0);
+	OUT_RINGf  (chan, -2048.0);
+	OUT_RINGf  (chan, 16777215.0 * 0.5);
+	OUT_RING  (chan, 0);
 
-	FIRE_RING (NULL);
+	FIRE_RING (chan);
 }
 
 struct pipe_context *
diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h
index 3f829fd106..ab4b825487 100644
--- a/src/gallium/drivers/nv10/nv10_context.h
+++ b/src/gallium/drivers/nv10/nv10_context.h
@@ -15,10 +15,6 @@
 #include "nouveau/nouveau_gldefs.h"
 #include "nouveau/nouveau_context.h"
 
-#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \
-	struct nv10_screen *ctx = nv10->screen
-#include "nouveau/nouveau_push.h"
-
 #include "nv10_state.h"
 
 #define NOUVEAU_ERR(fmt, args...) \
diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c
index 906fdfeeb9..c1f7ccb9ab 100644
--- a/src/gallium/drivers/nv10/nv10_fragtex.c
+++ b/src/gallium/drivers/nv10/nv10_fragtex.c
@@ -52,6 +52,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit)
 	struct nv10_miptree *nv10mt = nv10->tex_miptree[unit];
 	struct pipe_texture *pt = &nv10mt->base;
 	struct nv10_texture_format *tf;
+	struct nv10_screen *screen = nv10->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *celsius = screen->celsius;
 	uint32_t txf, txs, txp;
 
 	tf = nv10_fragtex_format(pt->format);
@@ -82,15 +85,15 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit)
 		return;
 	}
 
-	BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8);
-	OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
-	OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
-	OUT_RING  (ps->wrap);
-	OUT_RING  (0x40000000); /* enable */
-	OUT_RING  (txs);
-	OUT_RING  (ps->filt | 0x2000 /* magic */);
-	OUT_RING  ((pt->width0 << 16) | pt->height0);
-	OUT_RING  (ps->bcol);
+	BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(unit), 8);
+	OUT_RELOCl(chan, nouveau_bo(nv10mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+	OUT_RELOCd(chan, nouveau_bo(nv10mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+	OUT_RING  (chan, ps->wrap);
+	OUT_RING  (chan, 0x40000000); /* enable */
+	OUT_RING  (chan, txs);
+	OUT_RING  (chan, ps->filt | 0x2000 /* magic */);
+	OUT_RING  (chan, (pt->width0 << 16) | pt->height0);
+	OUT_RING  (chan, ps->bcol);
 #endif
 }
 
@@ -99,6 +102,9 @@ nv10_fragtex_bind(struct nv10_context *nv10)
 {
 #if 0
 	struct nv10_fragment_program *fp = nv10->fragprog.active;
+	struct nv10_screen *screen = nv10->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *celsius = screen->celsius;
 	unsigned samplers, unit;
 
 	samplers = nv10->fp_samplers & ~fp->samplers;
@@ -106,8 +112,8 @@ nv10_fragtex_bind(struct nv10_context *nv10)
 		unit = ffs(samplers) - 1;
 		samplers &= ~(1 << unit);
 
-		BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, celsius, NV10TCL_TX_ENABLE(unit), 1);
+		OUT_RING  (chan, 0);
 	}
 
 	samplers = nv10->dirty_samplers & fp->samplers;
diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
index 7ba9777a22..c5dbe43dbc 100644
--- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c
+++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c
@@ -67,12 +67,15 @@ struct nv10_vbuf_render {
 
 void nv10_vtxbuf_bind( struct nv10_context* nv10 )
 {
+	struct nv10_screen *screen = nv10->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *celsius = screen->celsius;
 	int i;
 	for(i = 0; i < 8; i++) {
-		BEGIN_RING(celsius, NV10TCL_VTXBUF_ADDRESS(i), 1);
-		OUT_RING(0/*nv10->vtxbuf*/);
-		BEGIN_RING(celsius, NV10TCL_VTXFMT(i), 1);
-		OUT_RING(0/*XXX*/);
+		BEGIN_RING(chan, celsius, NV10TCL_VTXBUF_ADDRESS(i), 1);
+		OUT_RING(chan, 0/*nv10->vtxbuf*/);
+		BEGIN_RING(chan, celsius, NV10TCL_VTXFMT(i), 1);
+		OUT_RING(chan, 0/*XXX*/);
 	}
 }
 
@@ -163,19 +166,22 @@ nv10_vbuf_render_draw( struct vbuf_render *render,
 {
 	struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render);
 	struct nv10_context *nv10 = nv10_render->nv10;
+	struct nv10_screen *screen = nv10->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *celsius = screen->celsius;
 	int push, i;
 
 	nv10_emit_hw_state(nv10);
 
-	BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
-	OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+	OUT_RELOCl(chan, nouveau_bo(nv10_render->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
 
-	BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
-	OUT_RING(nv10_render->hwprim);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+	OUT_RING(chan, nv10_render->hwprim);
 
 	if (nr_indices & 1) {
-		BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1);
-		OUT_RING  (indices[0]);
+		BEGIN_RING(chan, celsius, NV10TCL_VB_ELEMENT_U32, 1);
+		OUT_RING  (chan, indices[0]);
 		indices++; nr_indices--;
 	}
 
@@ -183,16 +189,16 @@ nv10_vbuf_render_draw( struct vbuf_render *render,
 		// XXX too big/small ? check the size
 		push = MIN2(nr_indices, 1200 * 2);
 
-		BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1);
+		BEGIN_RING_NI(chan, celsius, NV10TCL_VB_ELEMENT_U16, push >> 1);
 		for (i = 0; i < push; i+=2)
-			OUT_RING((indices[i+1] << 16) | indices[i]);
+			OUT_RING(chan, (indices[i+1] << 16) | indices[i]);
 
 		nr_indices -= push;
 		indices  += push;
 	}
 
-	BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
-	OUT_RING  (0);
+	BEGIN_RING(chan, celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+	OUT_RING  (chan, 0);
 }
 
 
diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c
index 2577ab73b5..30a596ca60 100644
--- a/src/gallium/drivers/nv10/nv10_state_emit.c
+++ b/src/gallium/drivers/nv10/nv10_state_emit.c
@@ -4,25 +4,32 @@
 static void nv10_state_emit_blend(struct nv10_context* nv10)
 {
 	struct nv10_blend_state *b = nv10->blend;
+	struct nv10_screen *screen = nv10->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *celsius = screen->celsius;
 
-	BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1);
-	OUT_RING  (b->d_enable);
+	BEGIN_RING(chan, celsius, NV10TCL_DITHER_ENABLE, 1);
+	OUT_RING  (chan, b->d_enable);
 
-	BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3);
-	OUT_RING  (b->b_enable);
-	OUT_RING  (b->b_srcfunc);
-	OUT_RING  (b->b_dstfunc);
+	BEGIN_RING(chan, celsius, NV10TCL_BLEND_FUNC_ENABLE, 3);
+	OUT_RING  (chan, b->b_enable);
+	OUT_RING  (chan, b->b_srcfunc);
+	OUT_RING  (chan, b->b_dstfunc);
 
-	BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1);
-	OUT_RING  (b->c_mask);
+	BEGIN_RING(chan, celsius, NV10TCL_COLOR_MASK, 1);
+	OUT_RING  (chan, b->c_mask);
 }
 
 static void nv10_state_emit_blend_color(struct nv10_context* nv10)
 {
 	struct pipe_blend_color *c = nv10->blend_color;
+	struct nv10_screen *screen = nv10->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *celsius = screen->celsius;
 
-	BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1);
-	OUT_RING  ((float_to_ubyte(c->color[3]) << 24)|
+	BEGIN_RING(chan, celsius, NV10TCL_BLEND_COLOR, 1);
+	OUT_RING  (chan,
+		   (float_to_ubyte(c->color[3]) << 24)|
 		   (float_to_ubyte(c->color[0]) << 16)|
 		   (float_to_ubyte(c->color[1]) << 8) |
 		   (float_to_ubyte(c->color[2]) << 0));
@@ -31,60 +38,66 @@ static void nv10_state_emit_blend_color(struct nv10_context* nv10)
 static void nv10_state_emit_rast(struct nv10_context* nv10)
 {
 	struct nv10_rasterizer_state *r = nv10->rast;
+	struct nv10_screen *screen = nv10->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *celsius = screen->celsius;
 
-	BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2);
-	OUT_RING  (r->shade_model);
-	OUT_RING  (r->line_width);
+	BEGIN_RING(chan, celsius, NV10TCL_SHADE_MODEL, 2);
+	OUT_RING  (chan, r->shade_model);
+	OUT_RING  (chan, r->line_width);
 
 
-	BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1);
-	OUT_RING  (r->point_size);
+	BEGIN_RING(chan, celsius, NV10TCL_POINT_SIZE, 1);
+	OUT_RING  (chan, r->point_size);
 
-	BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
-	OUT_RING  (r->poly_mode_front);
-	OUT_RING  (r->poly_mode_back);
+	BEGIN_RING(chan, celsius, NV10TCL_POLYGON_MODE_FRONT, 2);
+	OUT_RING  (chan, r->poly_mode_front);
+	OUT_RING  (chan, r->poly_mode_back);
 
 
-	BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2);
-	OUT_RING  (r->cull_face);
-	OUT_RING  (r->front_face);
+	BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE, 2);
+	OUT_RING  (chan, r->cull_face);
+	OUT_RING  (chan, r->front_face);
 
-	BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2);
-	OUT_RING  (r->line_smooth_en);
-	OUT_RING  (r->poly_smooth_en);
+	BEGIN_RING(chan, celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2);
+	OUT_RING  (chan, r->line_smooth_en);
+	OUT_RING  (chan, r->poly_smooth_en);
 
-	BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1);
-	OUT_RING  (r->cull_face_en);
+	BEGIN_RING(chan, celsius, NV10TCL_CULL_FACE_ENABLE, 1);
+	OUT_RING  (chan, r->cull_face_en);
 }
 
 static void nv10_state_emit_dsa(struct nv10_context* nv10)
 {
 	struct nv10_depth_stencil_alpha_state *d = nv10->dsa;
+	struct nv10_screen *screen = nv10->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *celsius = screen->celsius;
 
-	BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1);
-	OUT_RING (d->depth.func);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_FUNC, 1);
+	OUT_RING (chan, d->depth.func);
 
-	BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
-	OUT_RING (d->depth.write_enable);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
+	OUT_RING (chan, d->depth.write_enable);
 
-	BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
-	OUT_RING (d->depth.test_enable);
+	BEGIN_RING(chan, celsius, NV10TCL_DEPTH_TEST_ENABLE, 1);
+	OUT_RING (chan, d->depth.test_enable);
 
 #if 0
-	BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1);
-	OUT_RING (d->stencil.enable);
-	BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7);
-	OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+	BEGIN_RING(chan, celsius, NV10TCL_STENCIL_ENABLE, 1);
+	OUT_RING (chan, d->stencil.enable);
+	BEGIN_RING(chan, celsius, NV10TCL_STENCIL_MASK, 7);
+	OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7);
 #endif
 
-	BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
-	OUT_RING (d->alpha.enabled);
+	BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
+	OUT_RING (chan, d->alpha.enabled);
 
-	BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1);
-	OUT_RING (d->alpha.func);
+	BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_FUNC, 1);
+	OUT_RING (chan, d->alpha.func);
 
-	BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1);
-	OUT_RING (d->alpha.ref);
+	BEGIN_RING(chan, celsius, NV10TCL_ALPHA_FUNC_REF, 1);
+	OUT_RING (chan, d->alpha.ref);
 }
 
 static void nv10_state_emit_viewport(struct nv10_context* nv10)
@@ -108,6 +121,10 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
 	int colour_format = 0, zeta_format = 0;
         struct nv10_miptree *nv10mt = 0;
 
+	struct nv10_screen *screen = nv10->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *celsius = screen->celsius;
+
 	w = fb->cbufs[0]->width;
 	h = fb->cbufs[0]->height;
 	colour_format = fb->cbufs[0]->format;
@@ -144,11 +161,11 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
 	}
 
 	if (zeta) {
-		BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
-		OUT_RING  (rt->pitch | (zeta->pitch << 16));
+		BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1);
+		OUT_RING  (chan, rt->pitch | (zeta->pitch << 16));
 	} else {
-		BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1);
-		OUT_RING  (rt->pitch | (rt->pitch << 16));
+		BEGIN_RING(chan, celsius, NV10TCL_RT_PITCH, 1);
+		OUT_RING  (chan, rt->pitch | (rt->pitch << 16));
 	}
 
 	nv10mt = (struct nv10_miptree *)rt->base.texture;
@@ -160,13 +177,13 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10)
 		nv10->zeta = nv10mt->buffer;
 	}
 
-	BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3);
-	OUT_RING  ((w << 16) | 0);
-	OUT_RING  ((h << 16) | 0);
-	OUT_RING  (rt_format);
-	BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2);
-	OUT_RING  (((w - 1) << 16) | 0 | 0x08000800);
-	OUT_RING  (((h - 1) << 16) | 0 | 0x08000800);
+	BEGIN_RING(chan, celsius, NV10TCL_RT_HORIZ, 3);
+	OUT_RING  (chan, (w << 16) | 0);
+	OUT_RING  (chan, (h << 16) | 0);
+	OUT_RING  (chan, rt_format);
+	BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+	OUT_RING  (chan, ((w - 1) << 16) | 0 | 0x08000800);
+	OUT_RING  (chan, ((h - 1) << 16) | 0 | 0x08000800);
 }
 
 static void nv10_vertex_layout(struct nv10_context *nv10)
@@ -201,6 +218,10 @@ static void nv10_vertex_layout(struct nv10_context *nv10)
 void
 nv10_emit_hw_state(struct nv10_context *nv10)
 {
+	struct nv10_screen *screen = nv10->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *celsius = screen->celsius;
+	struct nouveau_bo *rt_bo;
 	int i;
 
 	if (nv10->dirty & NV10_NEW_VERTPROG) {
@@ -269,38 +290,41 @@ nv10_emit_hw_state(struct nv10_context *nv10)
 	 */
 
 	/* Render target */
+	rt_bo = nouveau_bo(nv10->rt[0]);
 // XXX figre out who's who for NV10TCL_DMA_* and fill accordingly
-//	BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1);
-//	OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-	BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
-	OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+//	BEGIN_RING(chan, celsius, NV10TCL_DMA_COLOR0, 1);
+//	OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1);
+	OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 
 	if (nv10->zeta) {
+		struct nouveau_bo *zeta_bo = nouveau_bo(nv10->zeta);
 // XXX
-//		BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1);
-//		OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-		BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1);
-		OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+//		BEGIN_RING(chan, celsius, NV10TCL_DMA_ZETA, 1);
+//		OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+		BEGIN_RING(chan, celsius, NV10TCL_ZETA_OFFSET, 1);
+		OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 		/* XXX for when we allocate LMA on nv17 */
-/*		BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
-		OUT_RELOCl(nv10->zeta + lma_offset);*/
+/*		BEGIN_RING(chan, celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+		OUT_RELOCl(chan, nouveau_bo(nv10->zeta + lma_offset));*/
 	}
 
 	/* Vertex buffer */
-	BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1);
-	OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-	BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1);
-	OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	BEGIN_RING(chan, celsius, NV10TCL_DMA_VTXBUF0, 1);
+	OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	BEGIN_RING(chan, celsius, NV10TCL_COLOR_OFFSET, 1);
+	OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 
 	/* Texture images */
 	for (i = 0; i < 2; i++) {
 		if (!(nv10->fp_samplers & (1 << i)))
 			continue;
-		BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1);
-		OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+		struct nouveau_bo *bo = nouveau_bo(nv10->tex[i].buffer);
+		BEGIN_RING(chan, celsius, NV10TCL_TX_OFFSET(i), 1);
+		OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM |
 			   NOUVEAU_BO_GART | NOUVEAU_BO_RD);
-		BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1);
-		OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format,
+		BEGIN_RING(chan, celsius, NV10TCL_TX_FORMAT(i), 1);
+		OUT_RELOCd(chan, bo, nv10->tex[i].format,
 			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
 			   NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0,
 			   NV10TCL_TX_FORMAT_DMA1);
diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c
index 6a147a4159..1dba724887 100644
--- a/src/gallium/drivers/nv20/nv20_context.c
+++ b/src/gallium/drivers/nv20/nv20_context.c
@@ -10,10 +10,14 @@ nv20_flush(struct pipe_context *pipe, unsigned flags,
 	   struct pipe_fence_handle **fence)
 {
 	struct nv20_context *nv20 = nv20_context(pipe);
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
 
 	draw_flush(nv20->draw);
 
-	FIRE_RING(fence);
+	FIRE_RING(chan);
+	if (fence)
+		*fence = NULL;
 }
 
 static void
@@ -31,348 +35,352 @@ static void nv20_init_hwctx(struct nv20_context *nv20)
 {
 	struct nv20_screen *screen = nv20->screen;
 	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 	int i;
 	float projectionmatrix[16];
-	const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL);
+	const boolean is_nv25tcl = (kelvin->grclass == NV25TCL);
 
-	BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1);
-	OUT_RING  (screen->sync->handle);
-	BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2);
-	OUT_RING  (chan->vram->handle);
-	OUT_RING  (chan->gart->handle); /* TEXTURE1 */
-	BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2);
-	OUT_RING  (chan->vram->handle);
-	OUT_RING  (chan->vram->handle); /* ZETA */
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_NOTIFY, 1);
+	OUT_RING  (chan, screen->sync->handle);
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_TEXTURE0, 2);
+	OUT_RING  (chan, chan->vram->handle);
+	OUT_RING  (chan, chan->gart->handle); /* TEXTURE1 */
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 2);
+	OUT_RING  (chan, chan->vram->handle);
+	OUT_RING  (chan, chan->vram->handle); /* ZETA */
 
-	BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1);
-	OUT_RING  (0); /* renouveau: beef0351, unique */
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_QUERY, 1);
+	OUT_RING  (chan, 0); /* renouveau: beef0351, unique */
 
-	BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
-	OUT_RING  (0);
-	OUT_RING  (0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
 
-	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1);
-	OUT_RING  ((0xfff << 16) | 0x0);
-	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1);
-	OUT_RING  ((0xfff << 16) | 0x0);
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1);
+	OUT_RING  (chan, (0xfff << 16) | 0x0);
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1);
+	OUT_RING  (chan, (0xfff << 16) | 0x0);
 
 	for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) {
-		BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1);
-		OUT_RING  (0);
-		BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1);
+		OUT_RING  (chan, 0);
+		BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1);
+		OUT_RING  (chan, 0);
 	}
 
-	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1);
-	OUT_RING  (0);
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1);
+	OUT_RING  (chan, 0);
 
-	BEGIN_RING(kelvin, 0x17e0, 3);
-	OUT_RINGf (0.0);
-	OUT_RINGf (0.0);
-	OUT_RINGf (1.0);
+	BEGIN_RING(chan, kelvin, 0x17e0, 3);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 1.0);
 
 	if (is_nv25tcl) {
-		BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
-		OUT_RING  (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0);
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1);
+		OUT_RING  (chan, NV20TCL_TX_RCOMP_LEQUAL | 0xdb0);
 	} else {
-		BEGIN_RING(kelvin, 0x1e68, 1);
-		OUT_RING  (0x4b800000); /* 16777216.000000 */
-		BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1);
-		OUT_RING  (NV20TCL_TX_RCOMP_LEQUAL);
+		BEGIN_RING(chan, kelvin, 0x1e68, 1);
+		OUT_RING  (chan, 0x4b800000); /* 16777216.000000 */
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_RCOMP, 1);
+		OUT_RING  (chan, NV20TCL_TX_RCOMP_LEQUAL);
 	}
 
-	BEGIN_RING(kelvin, 0x290, 1);
-	OUT_RING  ((0x10 << 16) | 1);
-	BEGIN_RING(kelvin, 0x9fc, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, 0x1d80, 1);
-	OUT_RING  (1);
-	BEGIN_RING(kelvin, 0x9f8, 1);
-	OUT_RING  (4);
-	BEGIN_RING(kelvin, 0x17ec, 3);
-	OUT_RINGf (0.0);
-	OUT_RINGf (1.0);
-	OUT_RINGf (0.0);
+	BEGIN_RING(chan, kelvin, 0x290, 1);
+	OUT_RING  (chan, (0x10 << 16) | 1);
+	BEGIN_RING(chan, kelvin, 0x9fc, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, 0x1d80, 1);
+	OUT_RING  (chan, 1);
+	BEGIN_RING(chan, kelvin, 0x9f8, 1);
+	OUT_RING  (chan, 4);
+	BEGIN_RING(chan, kelvin, 0x17ec, 3);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 1.0);
+	OUT_RINGf (chan, 0.0);
 
 	if (is_nv25tcl) {
-		BEGIN_RING(kelvin, 0x1d88, 1);
-		OUT_RING  (3);
+		BEGIN_RING(chan, kelvin, 0x1d88, 1);
+		OUT_RING  (chan, 3);
 
-		BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1);
-		OUT_RING  (chan->vram->handle);
-		BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1);
-		OUT_RING  (chan->vram->handle);
+		BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY9, 1);
+		OUT_RING  (chan, chan->vram->handle);
+		BEGIN_RING(chan, kelvin, NV25TCL_DMA_IN_MEMORY8, 1);
+		OUT_RING  (chan, chan->vram->handle);
 	}
-	BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1);
-	OUT_RING  (0);	/* renouveau: beef1e10 */
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_FENCE, 1);
+	OUT_RING  (chan, 0);	/* renouveau: beef1e10 */
 
-	BEGIN_RING(kelvin, 0x1e98, 1);
-	OUT_RING  (0);
+	BEGIN_RING(chan, kelvin, 0x1e98, 1);
+	OUT_RING  (chan, 0);
 #if 0
 	if (is_nv25tcl) {
-		BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2);
-		OUT_RING  (NvDmaTT);	/* renouveau: beef0202 */
-		OUT_RING  (NvDmaFB);	/* renouveau: beef0201 */
+		BEGIN_RING(chan, NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2);
+		OUT_RING  (chan, NvDmaTT);	/* renouveau: beef0202 */
+		OUT_RING  (chan, NvDmaFB);	/* renouveau: beef0201 */
 
-		BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1);
-		OUT_RING  (NvDmaTT);	/* renouveau: beef0202 */
+		BEGIN_RING(chan, NvSub3D, NV20TCL_DMA_TEXTURE1, 1);
+		OUT_RING  (chan, NvDmaTT);	/* renouveau: beef0202 */
 	}
 #endif
-	BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1);
-	OUT_RING  (0);
+	BEGIN_RING(chan, kelvin, NV20TCL_NOTIFY, 1);
+	OUT_RING  (chan, 0);
 
-	BEGIN_RING(kelvin, 0x120, 3);
-	OUT_RING  (0);
-	OUT_RING  (1);
-	OUT_RING  (2);
+	BEGIN_RING(chan, kelvin, 0x120, 3);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 1);
+	OUT_RING  (chan, 2);
 
 /* error: ILLEGAL_MTHD, PROTECTION_FAULT
-	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
-	OUT_RINGf (0.0);
-	OUT_RINGf (512.0);
-	OUT_RINGf (0.0);
-	OUT_RINGf (0.0);
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 512.0);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);
 */
 
 	if (is_nv25tcl) {
-		BEGIN_RING(kelvin, 0x022c, 2);
-		OUT_RING  (0x280);
-		OUT_RING  (0x07d28000);
+		BEGIN_RING(chan, kelvin, 0x022c, 2);
+		OUT_RING  (chan, 0x280);
+		OUT_RING  (chan, 0x07d28000);
 	}
 
 /* * illegal method, protection fault
-	BEGIN_RING(NvSub3D, 0x1c2c, 1);
-	OUT_RING  (0); */
+	BEGIN_RING(chan, NvSub3D, 0x1c2c, 1);
+	OUT_RING  (chan, 0); */
 
 	if (is_nv25tcl) {
-		BEGIN_RING(kelvin, 0x1da4, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, kelvin, 0x1da4, 1);
+		OUT_RING  (chan, 0);
 	}
 
 /* * crashes with illegal method, protection fault
-	BEGIN_RING(NvSub3D, 0x1c18, 1);
-	OUT_RING  (0x200); */
+	BEGIN_RING(chan, NvSub3D, 0x1c18, 1);
+	OUT_RING  (chan, 0x200); */
 
-	BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2);
-	OUT_RING  ((0 << 16) | 0);
-	OUT_RING  ((0 << 16) | 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 2);
+	OUT_RING  (chan, (0 << 16) | 0);
+	OUT_RING  (chan, (0 << 16) | 0);
 
 	/* *** Set state *** */
 
-	BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2);
-	OUT_RING  (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS);
-	OUT_RING  (0);			/* NV20TCL_ALPHA_FUNC_REF */
+	BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2);
+	OUT_RING  (chan, NV20TCL_ALPHA_FUNC_FUNC_ALWAYS);
+	OUT_RING  (chan, 0);			/* NV20TCL_ALPHA_FUNC_REF */
 
 	for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) {
-		BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_ENABLE(i), 1);
+		OUT_RING  (chan, 0);
 	}
-	BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4);
-	OUT_RING  (0x30d410d0);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4);
-	OUT_RING  (0x00000c00);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1);
-	OUT_RING  (0x00011101);
-	BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2);
-	OUT_RING  (0x130e0300);
-	OUT_RING  (0x0c091c80);
-	BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4);
-	OUT_RING  (0x00000c00);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4);
-	OUT_RING  (0x20c400c0);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2);
-	OUT_RING  (0);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4);
-	OUT_RING  (0x035125a0);
-	OUT_RING  (0);
-	OUT_RING  (0x40002000);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1);
-	OUT_RING  (0xffff0000);
-
-	BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4);
-	OUT_RING  (NV20TCL_BLEND_FUNC_SRC_ONE);
-	OUT_RING  (NV20TCL_BLEND_FUNC_DST_ZERO);
-	OUT_RING  (0);			/* NV20TCL_BLEND_COLOR */
-	OUT_RING  (NV20TCL_BLEND_EQUATION_FUNC_ADD);
-	BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
-	OUT_RING  (0xff);
-	OUT_RING  (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS);
-	OUT_RING  (0);			/* NV20TCL_STENCIL_FUNC_REF */
-	OUT_RING  (0xff);		/* NV20TCL_STENCIL_FUNC_MASK */
-	OUT_RING  (NV20TCL_STENCIL_OP_FAIL_KEEP);
-	OUT_RING  (NV20TCL_STENCIL_OP_ZFAIL_KEEP);
-	OUT_RING  (NV20TCL_STENCIL_OP_ZPASS_KEEP);
-
-	BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2);
-	OUT_RING  (0);
-	OUT_RING  (NV20TCL_COLOR_LOGIC_OP_OP_COPY);
-	BEGIN_RING(kelvin, 0x17cc, 1);
-	OUT_RING  (0);
+	BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_OP, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_ALPHA(0), 4);
+	OUT_RING  (chan, 0x30d410d0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_RGB(0), 4);
+	OUT_RING  (chan, 0x00000c00);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_ENABLE, 1);
+	OUT_RING  (chan, 0x00011101);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_FINAL0, 2);
+	OUT_RING  (chan, 0x130e0300);
+	OUT_RING  (chan, 0x0c091c80);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_OUT_ALPHA(0), 4);
+	OUT_RING  (chan, 0x00000c00);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_IN_RGB(0), 4);
+	OUT_RING  (chan, 0x20c400c0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_COLOR0, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4);
+	OUT_RING  (chan, 0x035125a0);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0x40002000);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1);
+	OUT_RING  (chan, 0xffff0000);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 4);
+	OUT_RING  (chan, NV20TCL_BLEND_FUNC_SRC_ONE);
+	OUT_RING  (chan, NV20TCL_BLEND_FUNC_DST_ZERO);
+	OUT_RING  (chan, 0);			/* NV20TCL_BLEND_COLOR */
+	OUT_RING  (chan, NV20TCL_BLEND_EQUATION_FUNC_ADD);
+	BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7);
+	OUT_RING  (chan, 0xff);
+	OUT_RING  (chan, NV20TCL_STENCIL_FUNC_FUNC_ALWAYS);
+	OUT_RING  (chan, 0);			/* NV20TCL_STENCIL_FUNC_REF */
+	OUT_RING  (chan, 0xff);		/* NV20TCL_STENCIL_FUNC_MASK */
+	OUT_RING  (chan, NV20TCL_STENCIL_OP_FAIL_KEEP);
+	OUT_RING  (chan, NV20TCL_STENCIL_OP_ZFAIL_KEEP);
+	OUT_RING  (chan, NV20TCL_STENCIL_OP_ZPASS_KEEP);
+
+	BEGIN_RING(chan, kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, NV20TCL_COLOR_LOGIC_OP_OP_COPY);
+	BEGIN_RING(chan, kelvin, 0x17cc, 1);
+	OUT_RING  (chan, 0);
 	if (is_nv25tcl) {
-		BEGIN_RING(kelvin, 0x1d84, 1);
-		OUT_RING  (1);
+		BEGIN_RING(chan, kelvin, 0x1d84, 1);
+		OUT_RING  (chan, 1);
 	}
-	BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1);
-	OUT_RING  (0x00020000);
-	BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0),
+	BEGIN_RING(chan, kelvin, NV20TCL_LIGHTING_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_CONTROL, 1);
+	OUT_RING  (chan, 0x00020000);
+	BEGIN_RING(chan, kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_ENABLED_LIGHTS, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_NORMALIZE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0),
 					NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE);
 	for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) {
-		OUT_RING(0xffffffff);
+		OUT_RING(chan, 0xffffffff);
 	}
 
-	BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
-	OUT_RING  (0);
-	OUT_RING  (0);		/* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */
-	OUT_RING  (0);		/* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */
-	BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
-	OUT_RING  (NV20TCL_DEPTH_FUNC_LESS);
-	BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2);
-	OUT_RINGf (0.0);
-	OUT_RINGf (0.0);	/* NV20TCL.POLYGON_OFFSET_UNITS */
-	BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1);
-	OUT_RING  (1);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);		/* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */
+	OUT_RING  (chan, 0);		/* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1);
+	OUT_RING  (chan, NV20TCL_DEPTH_FUNC_LESS);
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 0.0);	/* NV20TCL.POLYGON_OFFSET_UNITS */
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1);
+	OUT_RING  (chan, 1);
 	if (!is_nv25tcl) {
-		BEGIN_RING(kelvin, 0x1d84, 1);
-		OUT_RING  (3);
+		BEGIN_RING(chan, kelvin, 0x1d84, 1);
+		OUT_RING  (chan, 3);
 	}
-	BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
+	BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1);
 	if (!is_nv25tcl) {
-		OUT_RING  (8);
+		OUT_RING  (chan, 8);
 	} else {
-		OUT_RINGf (1.0);
+		OUT_RINGf (chan, 1.0);
 	}
 	if (!is_nv25tcl) {
-		BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2);
-		OUT_RING  (0);
-		OUT_RING  (0);		/* NV20TCL.POINT_SMOOTH_ENABLE */
+		BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2);
+		OUT_RING  (chan, 0);
+		OUT_RING  (chan, 0);		/* NV20TCL.POINT_SMOOTH_ENABLE */
 	} else {
-		BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1);
-		OUT_RING  (0);
-		BEGIN_RING(kelvin, 0x0a1c, 1);
-		OUT_RING  (0x800);
+		BEGIN_RING(chan, kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1);
+		OUT_RING  (chan, 0);
+		BEGIN_RING(chan, kelvin, 0x0a1c, 1);
+		OUT_RING  (chan, 0x800);
 	}
-	BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1);
-	OUT_RING  (8);
-	BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
-	OUT_RING  (NV20TCL_POLYGON_MODE_FRONT_FILL);
-	OUT_RING  (NV20TCL_POLYGON_MODE_BACK_FILL);
-	BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
-	OUT_RING  (NV20TCL_CULL_FACE_BACK);
-	OUT_RING  (NV20TCL_FRONT_FACE_CCW);
-	BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1);
-	OUT_RING  (NV20TCL_SHADE_MODEL_SMOOTH);
-	BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1);
-	OUT_RING  (0);
-	BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE);
+	BEGIN_RING(chan, kelvin, NV20TCL_LINE_WIDTH, 1);
+	OUT_RING  (chan, 8);
+	BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+	OUT_RING  (chan, NV20TCL_POLYGON_MODE_FRONT_FILL);
+	OUT_RING  (chan, NV20TCL_POLYGON_MODE_BACK_FILL);
+	BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2);
+	OUT_RING  (chan, NV20TCL_CULL_FACE_BACK);
+	OUT_RING  (chan, NV20TCL_FRONT_FACE_CCW);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 1);
+	OUT_RING  (chan, NV20TCL_SHADE_MODEL_SMOOTH);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1);
+	OUT_RING  (chan, 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE);
 	for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) {
-		OUT_RING(0);
+		OUT_RING(chan, 0);
 	}
-	BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3);
-	OUT_RINGf (1.5);
-	OUT_RINGf (-0.090168);		/* NV20TCL.FOG_EQUATION_LINEAR */
-	OUT_RINGf (0.0);		/* NV20TCL.FOG_EQUATION_QUADRATIC */
-	BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2);
-	OUT_RING  (NV20TCL_FOG_MODE_EXP_2);
-	OUT_RING  (NV20TCL_FOG_COORD_DIST_COORD_FOG);
-	BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2);
-	OUT_RING  (0);
-	OUT_RING  (0);			/* NV20TCL.FOG_COLOR */
-	BEGIN_RING(kelvin, NV20TCL_ENGINE, 1);
-	OUT_RING  (NV20TCL_ENGINE_FIXED);
+	BEGIN_RING(chan, kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3);
+	OUT_RINGf (chan, 1.5);
+	OUT_RINGf (chan, -0.090168);		/* NV20TCL.FOG_EQUATION_LINEAR */
+	OUT_RINGf (chan, 0.0);		/* NV20TCL.FOG_EQUATION_QUADRATIC */
+	BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 2);
+	OUT_RING  (chan, NV20TCL_FOG_MODE_EXP_2);
+	OUT_RING  (chan, NV20TCL_FOG_COORD_DIST_COORD_FOG);
+	BEGIN_RING(chan, kelvin, NV20TCL_FOG_ENABLE, 2);
+	OUT_RING  (chan, 0);
+	OUT_RING  (chan, 0);			/* NV20TCL.FOG_COLOR */
+	BEGIN_RING(chan, kelvin, NV20TCL_ENGINE, 1);
+	OUT_RING  (chan, NV20TCL_ENGINE_FIXED);
 
 	for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) {
-		BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1);
+		OUT_RING  (chan, 0);
 	}
 
-	BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15);
-	OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0);
-	OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
-	OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0);
+	BEGIN_RING(chan, kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15);
+	OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0);
+	OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 0.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0);
+	OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0); OUT_RINGf(chan, 1.0);
 	for (i = 4; i < 16; ++i) {
-		OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0);	OUT_RINGf(1.0);
+		OUT_RINGf(chan, 0.0);
+		OUT_RINGf(chan, 0.0);
+		OUT_RINGf(chan, 0.0);
+		OUT_RINGf(chan, 1.0);
 	}
 
-	BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1);
-	OUT_RING  (1);
-	BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
-	OUT_RING (0x00010101);
-	BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1);
-	OUT_RING (0);
+	BEGIN_RING(chan, kelvin, NV20TCL_EDGEFLAG_ENABLE, 1);
+	OUT_RING  (chan, 1);
+	BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1);
+	OUT_RING (chan, 0x00010101);
+	BEGIN_RING(chan, kelvin, NV20TCL_CLEAR_VALUE, 1);
+	OUT_RING (chan, 0);
 
 	memset(projectionmatrix, 0, sizeof(projectionmatrix));
 	projectionmatrix[0*4+0] = 1.0;
 	projectionmatrix[1*4+1] = 1.0;
 	projectionmatrix[2*4+2] = 16777215.0;
 	projectionmatrix[3*4+3] = 1.0;
-	BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16);
+	BEGIN_RING(chan, kelvin, NV20TCL_PROJECTION_MATRIX(0), 16);
 	for (i = 0; i < 16; i++) {
-		OUT_RINGf  (projectionmatrix[i]);
+		OUT_RINGf  (chan, projectionmatrix[i]);
 	}
 
-	BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2);
-	OUT_RINGf (0.0);
-	OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2);
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 16777216.0); /* [0, 1] scaled approx to [0, 2^24] */
 
-	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
-	OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */
-	OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */
-	OUT_RINGf (0.0);
-	OUT_RINGf (16777215.0);
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4);
+	OUT_RINGf (chan, 0.0); /* x-offset, w/2 + 1.031250 */
+	OUT_RINGf (chan, 0.0); /* y-offset, h/2 + 0.030762 */
+	OUT_RINGf (chan, 0.0);
+	OUT_RINGf (chan, 16777215.0);
 
-	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE_X, 4);
-	OUT_RINGf (0.0); /* no effect?, w/2 */
-	OUT_RINGf (0.0); /* no effect?, h/2 */
-	OUT_RINGf (16777215.0 * 0.5);
-	OUT_RINGf (65535.0);
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_SCALE_X, 4);
+	OUT_RINGf (chan, 0.0); /* no effect?, w/2 */
+	OUT_RINGf (chan, 0.0); /* no effect?, h/2 */
+	OUT_RINGf (chan, 16777215.0 * 0.5);
+	OUT_RINGf (chan, 65535.0);
 
-	FIRE_RING (NULL);
+	FIRE_RING (chan);
 }
 
 struct pipe_context *
diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h
index c88a1bd9bd..c7dfadaa31 100644
--- a/src/gallium/drivers/nv20/nv20_context.h
+++ b/src/gallium/drivers/nv20/nv20_context.h
@@ -15,10 +15,6 @@
 #include "nouveau/nouveau_gldefs.h"
 #include "nouveau/nouveau_context.h"
 
-#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \
-	struct nv20_screen *ctx = nv20->screen
-#include "nouveau/nouveau_push.h"
-
 #include "nv20_state.h"
 
 #define NOUVEAU_ERR(fmt, args...) \
diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c
index 2db4a4015a..dedbec73f3 100644
--- a/src/gallium/drivers/nv20/nv20_fragtex.c
+++ b/src/gallium/drivers/nv20/nv20_fragtex.c
@@ -52,6 +52,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit)
 	struct nv20_miptree *nv20mt = nv20->tex_miptree[unit];
 	struct pipe_texture *pt = &nv20mt->base;
 	struct nv20_texture_format *tf;
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 	uint32_t txf, txs, txp;
 
 	tf = nv20_fragtex_format(pt->format);
@@ -82,15 +85,15 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit)
 		return;
 	}
 
-	BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8);
-	OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
-	OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
-	OUT_RING  (ps->wrap);
-	OUT_RING  (0x40000000); /* enable */
-	OUT_RING  (txs);
-	OUT_RING  (ps->filt | 0x2000 /* magic */);
-	OUT_RING  ((pt->width0 << 16) | pt->height0);
-	OUT_RING  (ps->bcol);
+	BEGIN_RING(chan, kelvin, NV10TCL_TX_OFFSET(unit), 8);
+	OUT_RELOCl(chan, nouveau_bo(nv20mt->buffer), 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+	OUT_RELOCd(chan, nouveau_bo(nv20mt->buffer),txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/);
+	OUT_RING  (chan, ps->wrap);
+	OUT_RING  (chan, 0x40000000); /* enable */
+	OUT_RING  (chan, txs);
+	OUT_RING  (chan, ps->filt | 0x2000 /* magic */);
+	OUT_RING  (chan, (pt->width0 << 16) | pt->height0);
+	OUT_RING  (chan, ps->bcol);
 #endif
 }
 
@@ -99,6 +102,9 @@ nv20_fragtex_bind(struct nv20_context *nv20)
 {
 #if 0
 	struct nv20_fragment_program *fp = nv20->fragprog.active;
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 	unsigned samplers, unit;
 
 	samplers = nv20->fp_samplers & ~fp->samplers;
@@ -106,8 +112,8 @@ nv20_fragtex_bind(struct nv20_context *nv20)
 		unit = ffs(samplers) - 1;
 		samplers &= ~(1 << unit);
 
-		BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, kelvin, NV10TCL_TX_ENABLE(unit), 1);
+		OUT_RING  (chan, 0);
 	}
 
 	samplers = nv20->dirty_samplers & fp->samplers;
diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
index ddfcdb8057..2e145672da 100644
--- a/src/gallium/drivers/nv20/nv20_prim_vbuf.c
+++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c
@@ -81,12 +81,15 @@ nv20_vbuf_render(struct vbuf_render *render)
 void nv20_vtxbuf_bind( struct nv20_context* nv20 )
 {
 #if 0
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 	int i;
 	for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) {
-		BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1);
-		OUT_RING(0/*nv20->vtxbuf*/);
-		BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1);
-		OUT_RING(0/*XXX*/);
+		BEGIN_RING(chan, kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1);
+		OUT_RING(chan, 0/*nv20->vtxbuf*/);
+		BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(i) ,1);
+		OUT_RING(chan, 0/*XXX*/);
 	}
 #endif
 }
@@ -202,6 +205,9 @@ nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type)
 static unsigned
 nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr)
 {
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 	uint32_t hwfmt = 0;
 	unsigned fields;
 
@@ -231,8 +237,8 @@ nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr)
 		return 0;
 	}
 
-	BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1);
-	OUT_RING(hwfmt);
+	BEGIN_RING(chan, kelvin, NV20TCL_VTXFMT(hwattr), 1);
+	OUT_RING(chan, hwfmt);
 	return fields;
 }
 
@@ -262,6 +268,9 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render,
 		uint nr_indices)
 {
 	struct nv20_context *nv20 = nv20_render->nv20;
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 	struct vertex_info *vinfo = &nv20->vertex_info;
 	unsigned nr_fields;
 	int max_push;
@@ -270,29 +279,29 @@ nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render,
 
 	nr_fields = nv20__emit_vertex_array_format(nv20);
 
-	BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
-	OUT_RING(nv20_render->hwprim);
+	BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+	OUT_RING(chan, nv20_render->hwprim);
 
 	max_push = 1200 / nr_fields;
 	while (nr_indices) {
 		int i;
 		int push = MIN2(nr_indices, max_push);
 
-		BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields);
+		BEGIN_RING_NI(chan, kelvin, NV20TCL_VERTEX_DATA, push * nr_fields);
 		for (i = 0; i < push; i++) {
 			/* XXX: fixme to handle other than floats? */
 			int f = nr_fields;
 			float *attrv = (float*)&data[indices[i] * vsz];
 			while (f-- > 0)
-				OUT_RINGf(*attrv++);
+				OUT_RINGf(chan, *attrv++);
 		}
 
 		nr_indices -= push;
 		indices += push;
 	}
 
-	BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
-	OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP);
+	BEGIN_RING(chan, kelvin, NV20TCL_VERTEX_BEGIN_END, 1);
+	OUT_RING(chan, NV20TCL_VERTEX_BEGIN_END_STOP);
 }
 
 static void
@@ -301,20 +310,23 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render,
 		uint nr_indices)
 {
 	struct nv20_context *nv20 = nv20_render->nv20;
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 	int push, i;
 
 	NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n");
 
-	BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
-	OUT_RELOCl(nv20_render->pbuffer, 0,
+	BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1);
+	OUT_RELOCl(chan, nouveau_bo(nv20_render->pbuffer), 0,
 			NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
 
-	BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
-	OUT_RING(nv20_render->hwprim);
+	BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+	OUT_RING(chan, nv20_render->hwprim);
 
 	if (nr_indices & 1) {
-		BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1);
-		OUT_RING  (indices[0]);
+		BEGIN_RING(chan, kelvin, NV10TCL_VB_ELEMENT_U32, 1);
+		OUT_RING  (chan, indices[0]);
 		indices++; nr_indices--;
 	}
 
@@ -322,16 +334,16 @@ nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render,
 		// XXX too big/small ? check the size
 		push = MIN2(nr_indices, 1200 * 2);
 
-		BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1);
+		BEGIN_RING_NI(chan, kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1);
 		for (i = 0; i < push; i+=2)
-			OUT_RING((indices[i+1] << 16) | indices[i]);
+			OUT_RING(chan, (indices[i+1] << 16) | indices[i]);
 
 		nr_indices -= push;
 		indices  += push;
 	}
 
-	BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
-	OUT_RING  (0);
+	BEGIN_RING(chan, kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1);
+	OUT_RING  (chan, 0);
 }
 
 static void
diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c
index 63cba1f412..6bbd1fdae9 100644
--- a/src/gallium/drivers/nv20/nv20_state_emit.c
+++ b/src/gallium/drivers/nv20/nv20_state_emit.c
@@ -5,27 +5,34 @@
 static void nv20_state_emit_blend(struct nv20_context* nv20)
 {
 	struct nv20_blend_state *b = nv20->blend;
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 
-	BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1);
-	OUT_RING  (b->d_enable);
+	BEGIN_RING(chan, kelvin, NV20TCL_DITHER_ENABLE, 1);
+	OUT_RING  (chan, b->d_enable);
 
-	BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
-	OUT_RING  (b->b_enable);
+	BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1);
+	OUT_RING  (chan, b->b_enable);
 
-	BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2);
-	OUT_RING  (b->b_srcfunc);
-	OUT_RING  (b->b_dstfunc);
+	BEGIN_RING(chan, kelvin, NV20TCL_BLEND_FUNC_SRC, 2);
+	OUT_RING  (chan, b->b_srcfunc);
+	OUT_RING  (chan, b->b_dstfunc);
 
-	BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1);
-	OUT_RING  (b->c_mask);
+	BEGIN_RING(chan, kelvin, NV20TCL_COLOR_MASK, 1);
+	OUT_RING  (chan, b->c_mask);
 }
 
 static void nv20_state_emit_blend_color(struct nv20_context* nv20)
 {
 	struct pipe_blend_color *c = nv20->blend_color;
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 
-	BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1);
-	OUT_RING  ((float_to_ubyte(c->color[3]) << 24)|
+	BEGIN_RING(chan, kelvin, NV20TCL_BLEND_COLOR, 1);
+	OUT_RING  (chan,
+		   (float_to_ubyte(c->color[3]) << 24)|
 		   (float_to_ubyte(c->color[0]) << 16)|
 		   (float_to_ubyte(c->color[1]) << 8) |
 		   (float_to_ubyte(c->color[2]) << 0));
@@ -34,63 +41,69 @@ static void nv20_state_emit_blend_color(struct nv20_context* nv20)
 static void nv20_state_emit_rast(struct nv20_context* nv20)
 {
 	struct nv20_rasterizer_state *r = nv20->rast;
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 
-	BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2);
-	OUT_RING  (r->shade_model);
-	OUT_RING  (r->line_width);
+	BEGIN_RING(chan, kelvin, NV20TCL_SHADE_MODEL, 2);
+	OUT_RING  (chan, r->shade_model);
+	OUT_RING  (chan, r->line_width);
 
 
-	BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1);
-	OUT_RING  (r->point_size);
+	BEGIN_RING(chan, kelvin, NV20TCL_POINT_SIZE, 1);
+	OUT_RING  (chan, r->point_size);
 
-	BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
-	OUT_RING  (r->poly_mode_front);
-	OUT_RING  (r->poly_mode_back);
+	BEGIN_RING(chan, kelvin, NV20TCL_POLYGON_MODE_FRONT, 2);
+	OUT_RING  (chan, r->poly_mode_front);
+	OUT_RING  (chan, r->poly_mode_back);
 
 
-	BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2);
-	OUT_RING  (r->cull_face);
-	OUT_RING  (r->front_face);
+	BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE, 2);
+	OUT_RING  (chan, r->cull_face);
+	OUT_RING  (chan, r->front_face);
 
-	BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2);
-	OUT_RING  (r->line_smooth_en);
-	OUT_RING  (r->poly_smooth_en);
+	BEGIN_RING(chan, kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2);
+	OUT_RING  (chan, r->line_smooth_en);
+	OUT_RING  (chan, r->poly_smooth_en);
 
-	BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
-	OUT_RING  (r->cull_face_en);
+	BEGIN_RING(chan, kelvin, NV20TCL_CULL_FACE_ENABLE, 1);
+	OUT_RING  (chan, r->cull_face_en);
 }
 
 static void nv20_state_emit_dsa(struct nv20_context* nv20)
 {
 	struct nv20_depth_stencil_alpha_state *d = nv20->dsa;
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 
-	BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1);
-	OUT_RING (d->depth.func);
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_FUNC, 1);
+	OUT_RING (chan, d->depth.func);
 
-	BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
-	OUT_RING (d->depth.write_enable);
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1);
+	OUT_RING (chan, d->depth.write_enable);
 
-	BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
-	OUT_RING (d->depth.test_enable);
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1);
+	OUT_RING (chan, d->depth.test_enable);
 
-	BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1);
-	OUT_RING (1);
+	BEGIN_RING(chan, kelvin, NV20TCL_DEPTH_UNK17D8, 1);
+	OUT_RING (chan, 1);
 
 #if 0
-	BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1);
-	OUT_RING (d->stencil.enable);
-	BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7);
-	OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7);
+	BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_ENABLE, 1);
+	OUT_RING (chan, d->stencil.enable);
+	BEGIN_RING(chan, kelvin, NV20TCL_STENCIL_MASK, 7);
+	OUT_RINGp (chan, (uint32_t *)&(d->stencil.wmask), 7);
 #endif
 
-	BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
-	OUT_RING (d->alpha.enabled);
+	BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1);
+	OUT_RING (chan, d->alpha.enabled);
 
-	BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1);
-	OUT_RING (d->alpha.func);
+	BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1);
+	OUT_RING (chan, d->alpha.func);
 
-	BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1);
-	OUT_RING (d->alpha.ref);
+	BEGIN_RING(chan, kelvin, NV20TCL_ALPHA_FUNC_REF, 1);
+	OUT_RING (chan, d->alpha.ref);
 }
 
 static void nv20_state_emit_viewport(struct nv20_context* nv20)
@@ -101,9 +114,13 @@ static void nv20_state_emit_scissor(struct nv20_context* nv20)
 {
 	/* NV20TCL_SCISSOR_* is probably a software method */
 /*	struct pipe_scissor_state *s = nv20->scissor;
-	BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2);
-	OUT_RING  (((s->maxx - s->minx) << 16) | s->minx);
-	OUT_RING  (((s->maxy - s->miny) << 16) | s->miny);*/
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
+
+	BEGIN_RING(chan, kelvin, NV20TCL_SCISSOR_HORIZ, 2);
+	OUT_RING  (chan, ((s->maxx - s->minx) << 16) | s->minx);
+	OUT_RING  (chan, ((s->maxy - s->miny) << 16) | s->miny);*/
 }
 
 static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
@@ -113,6 +130,9 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
 	uint32_t rt_format, w, h;
 	int colour_format = 0, zeta_format = 0;
 	struct nv20_miptree *nv20mt = 0;
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
 
 	w = fb->cbufs[0]->width;
 	h = fb->cbufs[0]->height;
@@ -150,11 +170,11 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
 	}
 
 	if (zeta) {
-		BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
-		OUT_RING  (rt->pitch | (zeta->pitch << 16));
+		BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1);
+		OUT_RING  (chan, rt->pitch | (zeta->pitch << 16));
 	} else {
-		BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1);
-		OUT_RING  (rt->pitch | (rt->pitch << 16));
+		BEGIN_RING(chan, kelvin, NV20TCL_RT_PITCH, 1);
+		OUT_RING  (chan, rt->pitch | (rt->pitch << 16));
 	}
 
 	nv20mt = (struct nv20_miptree *)rt->base.texture;
@@ -166,13 +186,13 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20)
 		nv20->zeta = nv20mt->buffer;
 	}
 
-	BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3);
-	OUT_RING  ((w << 16) | 0);
-	OUT_RING  ((h << 16) | 0); /*NV20TCL_RT_VERT */
-	OUT_RING  (rt_format); /* NV20TCL_RT_FORMAT */
-	BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2);
-	OUT_RING  (((w - 1) << 16) | 0);
-	OUT_RING  (((h - 1) << 16) | 0);
+	BEGIN_RING(chan, kelvin, NV20TCL_RT_HORIZ, 3);
+	OUT_RING  (chan, (w << 16) | 0);
+	OUT_RING  (chan, (h << 16) | 0); /*NV20TCL_RT_VERT */
+	OUT_RING  (chan, rt_format); /* NV20TCL_RT_FORMAT */
+	BEGIN_RING(chan, kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2);
+	OUT_RING  (chan, ((w - 1) << 16) | 0);
+	OUT_RING  (chan, ((h - 1) << 16) | 0);
 }
 
 static void nv20_vertex_layout(struct nv20_context *nv20)
@@ -293,6 +313,10 @@ static void nv20_vertex_layout(struct nv20_context *nv20)
 void
 nv20_emit_hw_state(struct nv20_context *nv20)
 {
+	struct nv20_screen *screen = nv20->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *kelvin = screen->kelvin;
+	struct nouveau_bo *rt_bo;
 	int i;
 
 	if (nv20->dirty & NV20_NEW_VERTPROG) {
@@ -361,36 +385,39 @@ nv20_emit_hw_state(struct nv20_context *nv20)
 	 */
 
 	/* Render target */
-	BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 1);
-	OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-	BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
-	OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	rt_bo = nouveau_bo(nv20->rt[0]);
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_COLOR, 1);
+	OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1);
+	OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 
 	if (nv20->zeta) {
-		BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1);
-		OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-		BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1);
-		OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+		struct nouveau_bo *zeta_bo = nouveau_bo(nv20->zeta);
+		BEGIN_RING(chan, kelvin, NV20TCL_DMA_ZETA, 1);
+		OUT_RELOCo(chan, zeta_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+		BEGIN_RING(chan, kelvin, NV20TCL_ZETA_OFFSET, 1);
+		OUT_RELOCl(chan, zeta_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 		/* XXX for when we allocate LMA on nv17 */
-/*		BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
-		OUT_RELOCl(nv20->zeta + lma_offset);*/
+/*		BEGIN_RING(chan, kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1);
+		OUT_RELOCl(chan, nouveau_bo(nv20->zeta + lma_offset));*/
 	}
 
 	/* Vertex buffer */
-	BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1);
-	OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
-	BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1);
-	OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	BEGIN_RING(chan, kelvin, NV20TCL_DMA_VTXBUF0, 1);
+	OUT_RELOCo(chan, rt_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	BEGIN_RING(chan, kelvin, NV20TCL_COLOR_OFFSET, 1);
+	OUT_RELOCl(chan, rt_bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
 
 	/* Texture images */
 	for (i = 0; i < 2; i++) {
 		if (!(nv20->fp_samplers & (1 << i)))
 			continue;
-		BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1);
-		OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM |
+		struct nouveau_bo *bo = nouveau_bo(nv20->tex[i].buffer);
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_OFFSET(i), 1);
+		OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM |
 			   NOUVEAU_BO_GART | NOUVEAU_BO_RD);
-		BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1);
-		OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format,
+		BEGIN_RING(chan, kelvin, NV20TCL_TX_FORMAT(i), 1);
+		OUT_RELOCd(chan, bo, nv20->tex[i].format,
 			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD |
 			   NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0,
 			   NV20TCL_TX_FORMAT_DMA1);
diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c
index 38b39159f1..54572e9ab3 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -10,15 +10,20 @@ nv30_flush(struct pipe_context *pipe, unsigned flags,
 	   struct pipe_fence_handle **fence)
 {
 	struct nv30_context *nv30 = nv30_context(pipe);
+	struct nv30_screen *screen = nv30->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *rankine = screen->rankine;
 
 	if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
-		BEGIN_RING(rankine, 0x1fd8, 1);
-		OUT_RING  (2);
-		BEGIN_RING(rankine, 0x1fd8, 1);
-		OUT_RING  (1);
+		BEGIN_RING(chan, rankine, 0x1fd8, 1);
+		OUT_RING  (chan, 2);
+		BEGIN_RING(chan, rankine, 0x1fd8, 1);
+		OUT_RING  (chan, 1);
 	}
 
-	FIRE_RING(fence);
+	FIRE_RING(chan);
+	if (fence)
+		*fence = NULL;
 }
 
 static void
diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h
index e175dfa0c4..e59449287b 100644
--- a/src/gallium/drivers/nv30/nv30_context.h
+++ b/src/gallium/drivers/nv30/nv30_context.h
@@ -14,10 +14,6 @@
 #include "nouveau/nouveau_winsys.h"
 #include "nouveau/nouveau_gldefs.h"
 #include "nouveau/nouveau_context.h"
-
-#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \
-	struct nv30_screen *ctx = nv30->screen
-#include "nouveau/nouveau_push.h"
 #include "nouveau/nouveau_stateobj.h"
 
 #include "nv30_state.h"
diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c
index 1d1c8a484e..e27e9ccbf6 100644
--- a/src/gallium/drivers/nv30/nv30_query.c
+++ b/src/gallium/drivers/nv30/nv30_query.c
@@ -41,6 +41,9 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
 {
 	struct nv30_context *nv30 = nv30_context(pipe);
 	struct nv30_query *q = nv30_query(pq);
+	struct nv30_screen *screen = nv30->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *rankine = screen->rankine;
 
 	assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
 
@@ -57,10 +60,10 @@ nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
 		assert(0);
 	nouveau_notifier_reset(nv30->screen->query, q->object->start);
 
-	BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1);
-	OUT_RING  (1);
-	BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1);
-	OUT_RING  (1);
+	BEGIN_RING(chan, rankine, NV34TCL_QUERY_RESET, 1);
+	OUT_RING  (chan, 1);
+	BEGIN_RING(chan, rankine, NV34TCL_QUERY_UNK17CC, 1);
+	OUT_RING  (chan, 1);
 
 	q->ready = FALSE;
 }
@@ -69,12 +72,15 @@ static void
 nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq)
 {
 	struct nv30_context *nv30 = nv30_context(pipe);
+	struct nv30_screen *screen = nv30->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *rankine = screen->rankine;
 	struct nv30_query *q = nv30_query(pq);
 
-	BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1);
-	OUT_RING  ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
+	BEGIN_RING(chan, rankine, NV34TCL_QUERY_GET, 1);
+	OUT_RING  (chan, (0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) |
 		   ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT));
-	FIRE_RING(NULL);
+	FIRE_RING(chan);
 }
 
 static boolean
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
index 0e620b67a2..bccc805324 100644
--- a/src/gallium/drivers/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -168,7 +168,9 @@ nv30_draw_arrays(struct pipe_context *pipe,
 		 unsigned mode, unsigned start, unsigned count)
 {
 	struct nv30_context *nv30 = nv30_context(pipe);
-	struct nouveau_channel *chan = nv30->screen->base.channel;
+	struct nv30_screen *screen = nv30->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *rankine = screen->rankine;
 	unsigned restart = 0;
 
 	nv30_vbo_set_idxbuf(nv30, NULL, 0);
@@ -186,17 +188,17 @@ nv30_draw_arrays(struct pipe_context *pipe,
 		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
 					mode, start, count, &restart);
 		if (!vc) {
-			FIRE_RING(NULL);
+			FIRE_RING(chan);
 			continue;
 		}
 
-		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
-		OUT_RING  (nvgl_primitive(mode));
+		BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+		OUT_RING  (chan, nvgl_primitive(mode));
 
 		nr = (vc & 0xff);
 		if (nr) {
-			BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1);
-			OUT_RING  (((nr - 1) << 24) | start);
+			BEGIN_RING(chan, rankine, NV34TCL_VB_VERTEX_BATCH, 1);
+			OUT_RING  (chan, ((nr - 1) << 24) | start);
 			start += nr;
 		}
 
@@ -206,15 +208,15 @@ nv30_draw_arrays(struct pipe_context *pipe,
 
 			nr -= push;
 
-			BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push);
+			BEGIN_RING_NI(chan, rankine, NV34TCL_VB_VERTEX_BATCH, push);
 			while (push--) {
-				OUT_RING(((0x100 - 1) << 24) | start);
+				OUT_RING(chan, ((0x100 - 1) << 24) | start);
 				start += 0x100;
 			}
 		}
 
-		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+		OUT_RING  (chan, 0);
 
 		count -= vc;
 		start = restart;
@@ -228,7 +230,9 @@ static INLINE void
 nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
 		       unsigned mode, unsigned start, unsigned count)
 {
-	struct nouveau_channel *chan = nv30->screen->base.channel;
+	struct nv30_screen *screen = nv30->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *rankine = screen->rankine;
 
 	while (count) {
 		uint8_t *elts = (uint8_t *)ib + start;
@@ -239,17 +243,17 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
 		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
 					mode, start, count, &restart);
 		if (vc == 0) {
-			FIRE_RING(NULL);
+			FIRE_RING(chan);
 			continue;
 		}
 		count -= vc;
 
-		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
-		OUT_RING  (nvgl_primitive(mode));
+		BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+		OUT_RING  (chan, nvgl_primitive(mode));
 
 		if (vc & 1) {
-			BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
-			OUT_RING  (elts[0]);
+			BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1);
+			OUT_RING  (chan, elts[0]);
 			elts++; vc--;
 		}
 
@@ -258,16 +262,16 @@ nv30_draw_elements_u08(struct nv30_context *nv30, void *ib,
 
 			push = MIN2(vc, 2047 * 2);
 
-			BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+			BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
 			for (i = 0; i < push; i+=2)
-				OUT_RING((elts[i+1] << 16) | elts[i]);
+				OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
 
 			vc -= push;
 			elts += push;
 		}
 
-		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+		OUT_RING  (chan, 0);
 
 		start = restart;
 	}
@@ -277,7 +281,9 @@ static INLINE void
 nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
 		       unsigned mode, unsigned start, unsigned count)
 {
-	struct nouveau_channel *chan = nv30->screen->base.channel;
+	struct nv30_screen *screen = nv30->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *rankine = screen->rankine;
 
 	while (count) {
 		uint16_t *elts = (uint16_t *)ib + start;
@@ -288,17 +294,17 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
 		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
 					mode, start, count, &restart);
 		if (vc == 0) {
-			FIRE_RING(NULL);
+			FIRE_RING(chan);
 			continue;
 		}
 		count -= vc;
 
-		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
-		OUT_RING  (nvgl_primitive(mode));
+		BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+		OUT_RING  (chan, nvgl_primitive(mode));
 
 		if (vc & 1) {
-			BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1);
-			OUT_RING  (elts[0]);
+			BEGIN_RING(chan, rankine, NV34TCL_VB_ELEMENT_U32, 1);
+			OUT_RING  (chan, elts[0]);
 			elts++; vc--;
 		}
 
@@ -307,16 +313,16 @@ nv30_draw_elements_u16(struct nv30_context *nv30, void *ib,
 
 			push = MIN2(vc, 2047 * 2);
 
-			BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
+			BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U16, push >> 1);
 			for (i = 0; i < push; i+=2)
-				OUT_RING((elts[i+1] << 16) | elts[i]);
+				OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
 
 			vc -= push;
 			elts += push;
 		}
 
-		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+		OUT_RING  (chan, 0);
 
 		start = restart;
 	}
@@ -326,7 +332,9 @@ static INLINE void
 nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
 		       unsigned mode, unsigned start, unsigned count)
 {
-	struct nouveau_channel *chan = nv30->screen->base.channel;
+	struct nv30_screen *screen = nv30->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *rankine = screen->rankine;
 
 	while (count) {
 		uint32_t *elts = (uint32_t *)ib + start;
@@ -337,26 +345,26 @@ nv30_draw_elements_u32(struct nv30_context *nv30, void *ib,
 		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
 					mode, start, count, &restart);
 		if (vc == 0) {
-			FIRE_RING(NULL);
+			FIRE_RING(chan);
 			continue;
 		}
 		count -= vc;
 
-		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
-		OUT_RING  (nvgl_primitive(mode));
+		BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+		OUT_RING  (chan, nvgl_primitive(mode));
 
 		while (vc) {
 			push = MIN2(vc, 2047);
 
-			BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push);
-			OUT_RINGp    (elts, push);
+			BEGIN_RING_NI(chan, rankine, NV34TCL_VB_ELEMENT_U32, push);
+			OUT_RINGp    (chan, elts, push);
 
 			vc -= push;
 			elts += push;
 		}
 
-		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+		OUT_RING  (chan, 0);
 
 		start = restart;
 	}
@@ -400,7 +408,9 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,
 		       unsigned mode, unsigned start, unsigned count)
 {
 	struct nv30_context *nv30 = nv30_context(pipe);
-	struct nouveau_channel *chan = nv30->screen->base.channel;
+	struct nv30_screen *screen = nv30->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *rankine = screen->rankine;
 	unsigned restart = 0;
 
 	while (count) {
@@ -411,17 +421,17 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,
 		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
 					mode, start, count, &restart);
 		if (!vc) {
-			FIRE_RING(NULL);
+			FIRE_RING(chan);
 			continue;
 		}
 		
-		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
-		OUT_RING  (nvgl_primitive(mode));
+		BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+		OUT_RING  (chan, nvgl_primitive(mode));
 
 		nr = (vc & 0xff);
 		if (nr) {
-			BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1);
-			OUT_RING  (((nr - 1) << 24) | start);
+			BEGIN_RING(chan, rankine, NV34TCL_VB_INDEX_BATCH, 1);
+			OUT_RING  (chan, ((nr - 1) << 24) | start);
 			start += nr;
 		}
 
@@ -431,15 +441,15 @@ nv30_draw_elements_vbo(struct pipe_context *pipe,
 
 			nr -= push;
 
-			BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push);
+			BEGIN_RING_NI(chan, rankine, NV34TCL_VB_INDEX_BATCH, push);
 			while (push--) {
-				OUT_RING(((0x100 - 1) << 24) | start);
+				OUT_RING(chan, ((0x100 - 1) << 24) | start);
 				start += 0x100;
 			}
 		}
 
-		BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, rankine, NV34TCL_VERTEX_BEGIN_END, 1);
+		OUT_RING  (chan, 0);
 
 		count -= vc;
 		start = restart;
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index 5d60984622..4e6d3d01c7 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -650,7 +650,9 @@ static boolean
 nv30_vertprog_validate(struct nv30_context *nv30)
 { 
 	struct pipe_screen *pscreen = nv30->pipe.screen;
-	struct nouveau_grobj *rankine = nv30->screen->rankine;
+	struct nv30_screen *screen = nv30->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *rankine = screen->rankine;
 	struct nv30_vertex_program *vp;
 	struct pipe_buffer *constbuf;
 	boolean upload_code = FALSE, upload_data = FALSE;
@@ -770,9 +772,9 @@ nv30_vertprog_validate(struct nv30_context *nv30)
 				       4 * sizeof(float));
 			}
 
-			BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
-			OUT_RING  (i + vp->data->start);
-			OUT_RINGp ((uint32_t *)vpd->value, 4);
+			BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5);
+			OUT_RING  (chan, i + vp->data->start);
+			OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
 		}
 
 		if (constbuf)
@@ -788,11 +790,11 @@ nv30_vertprog_validate(struct nv30_context *nv30)
 				vp->insns[i].data[2], vp->insns[i].data[3]);
 		}
 #endif
-		BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
-		OUT_RING  (vp->exec->start);
+		BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1);
+		OUT_RING  (chan, vp->exec->start);
 		for (i = 0; i < vp->nr_insns; i++) {
-			BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
-			OUT_RINGp (vp->insns[i].data, 4);
+			BEGIN_RING(chan, rankine, NV34TCL_VP_UPLOAD_INST(0), 4);
+			OUT_RINGp (chan, vp->insns[i].data, 4);
 		}
 	}
 
diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c
index d56c7a6b49..f79ae4db84 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -10,15 +10,20 @@ nv40_flush(struct pipe_context *pipe, unsigned flags,
 	   struct pipe_fence_handle **fence)
 {
 	struct nv40_context *nv40 = nv40_context(pipe);
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 
 	if (flags & PIPE_FLUSH_TEXTURE_CACHE) {
-		BEGIN_RING(curie, 0x1fd8, 1);
-		OUT_RING  (2);
-		BEGIN_RING(curie, 0x1fd8, 1);
-		OUT_RING  (1);
+		BEGIN_RING(chan, curie, 0x1fd8, 1);
+		OUT_RING  (chan, 2);
+		BEGIN_RING(chan, curie, 0x1fd8, 1);
+		OUT_RING  (chan, 1);
 	}
 
-	FIRE_RING(fence);
+	FIRE_RING(chan);
+	if (fence)
+		*fence = NULL;
 }
 
 static void
diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h
index 112f017e8a..e219bb537a 100644
--- a/src/gallium/drivers/nv40/nv40_context.h
+++ b/src/gallium/drivers/nv40/nv40_context.h
@@ -14,10 +14,6 @@
 #include "nouveau/nouveau_winsys.h"
 #include "nouveau/nouveau_gldefs.h"
 #include "nouveau/nouveau_context.h"
-
-#define NOUVEAU_PUSH_CONTEXT(ctx)                                              \
-	struct nv40_screen *ctx = nv40->screen
-#include "nouveau/nouveau_push.h"
 #include "nouveau/nouveau_stateobj.h"
 
 #include "nv40_state.h"
diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c
index e5b9f4a5c8..d826f8c2f5 100644
--- a/src/gallium/drivers/nv40/nv40_draw.c
+++ b/src/gallium/drivers/nv40/nv40_draw.c
@@ -31,6 +31,9 @@ nv40_render_stage(struct draw_stage *stage)
 static INLINE void
 nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
 {
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 	unsigned i;
 
 	for (i = 0; i < nv40->swtnl.nr_attribs; i++) {
@@ -41,30 +44,30 @@ nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v)
 		case EMIT_OMIT:
 			break;
 		case EMIT_1F:
-			BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1);
-			OUT_RING  (fui(v->data[idx][0]));
+			BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_1F(hw), 1);
+			OUT_RING  (chan, fui(v->data[idx][0]));
 			break;
 		case EMIT_2F:
-			BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
-			OUT_RING  (fui(v->data[idx][0]));
-			OUT_RING  (fui(v->data[idx][1]));
+			BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(hw), 2);
+			OUT_RING  (chan, fui(v->data[idx][0]));
+			OUT_RING  (chan, fui(v->data[idx][1]));
 			break;
 		case EMIT_3F:
-			BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
-			OUT_RING  (fui(v->data[idx][0]));
-			OUT_RING  (fui(v->data[idx][1]));
-			OUT_RING  (fui(v->data[idx][2]));
+			BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_3F_X(hw), 3);
+			OUT_RING  (chan, fui(v->data[idx][0]));
+			OUT_RING  (chan, fui(v->data[idx][1]));
+			OUT_RING  (chan, fui(v->data[idx][2]));
 			break;
 		case EMIT_4F:
-			BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
-			OUT_RING  (fui(v->data[idx][0]));
-			OUT_RING  (fui(v->data[idx][1]));
-			OUT_RING  (fui(v->data[idx][2]));
-			OUT_RING  (fui(v->data[idx][3]));
+			BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4F_X(hw), 4);
+			OUT_RING  (chan, fui(v->data[idx][0]));
+			OUT_RING  (chan, fui(v->data[idx][1]));
+			OUT_RING  (chan, fui(v->data[idx][2]));
+			OUT_RING  (chan, fui(v->data[idx][3]));
 			break;
 		case EMIT_4UB:
-			BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
-			OUT_RING  (pack_ub4(float_to_ubyte(v->data[idx][0]),
+			BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_4UB(hw), 1);
+			OUT_RING  (chan, pack_ub4(float_to_ubyte(v->data[idx][0]),
 					    float_to_ubyte(v->data[idx][1]),
 					    float_to_ubyte(v->data[idx][2]),
 					    float_to_ubyte(v->data[idx][3])));
@@ -82,7 +85,11 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
 {
 	struct nv40_render_stage *rs = nv40_render_stage(stage);
 	struct nv40_context *nv40 = rs->nv40;
-	struct nouveau_pushbuf *pb = nv40->screen->base.channel->pushbuf;
+
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_pushbuf *pb = chan->pushbuf;
+	struct nouveau_grobj *curie = screen->curie;
 	unsigned i;
 
 	/* Ensure there's room for 4xfloat32 + potentially 3 begin/end */
@@ -91,19 +98,19 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
 			NOUVEAU_ERR("AIII, missed flush\n");
 			assert(0);
 		}
-		FIRE_RING(NULL);
+		FIRE_RING(chan);
 		nv40_state_emit(nv40);
 	}
 
 	/* Switch primitive modes if necessary */
 	if (rs->prim != mode) {
 		if (rs->prim != NV40TCL_BEGIN_END_STOP) {
-			BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-			OUT_RING  (NV40TCL_BEGIN_END_STOP);	
+			BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+			OUT_RING  (chan, NV40TCL_BEGIN_END_STOP);
 		}
 
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (mode);
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, mode);
 		rs->prim = mode;
 	}
 
@@ -115,8 +122,8 @@ nv40_render_prim(struct draw_stage *stage, struct prim_header *prim,
 	 * off the primitive now.
 	 */
 	if (pb->remaining < ((count * 20) + 6)) {
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (NV40TCL_BEGIN_END_STOP);
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, NV40TCL_BEGIN_END_STOP);
 		rs->prim = NV40TCL_BEGIN_END_STOP;
 	}
 }
@@ -144,10 +151,13 @@ nv40_render_flush(struct draw_stage *draw, unsigned flags)
 {
 	struct nv40_render_stage *rs = nv40_render_stage(draw);
 	struct nv40_context *nv40 = rs->nv40;
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 
 	if (rs->prim != NV40TCL_BEGIN_END_STOP) {
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (NV40TCL_BEGIN_END_STOP);
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, NV40TCL_BEGIN_END_STOP);
 		rs->prim = NV40TCL_BEGIN_END_STOP;
 	}
 }
diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c
index 7874aedd42..8ed4a67dd0 100644
--- a/src/gallium/drivers/nv40/nv40_query.c
+++ b/src/gallium/drivers/nv40/nv40_query.c
@@ -41,6 +41,9 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
 {
 	struct nv40_context *nv40 = nv40_context(pipe);
 	struct nv40_query *q = nv40_query(pq);
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 
 	assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER);
 
@@ -57,10 +60,10 @@ nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
 		assert(0);
 	nouveau_notifier_reset(nv40->screen->query, q->object->start);
 
-	BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1);
-	OUT_RING  (1);
-	BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1);
-	OUT_RING  (1);
+	BEGIN_RING(chan, curie, NV40TCL_QUERY_RESET, 1);
+	OUT_RING  (chan, 1);
+	BEGIN_RING(chan, curie, NV40TCL_QUERY_UNK17CC, 1);
+	OUT_RING  (chan, 1);
 
 	q->ready = FALSE;
 }
@@ -70,11 +73,14 @@ nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq)
 {
 	struct nv40_context *nv40 = nv40_context(pipe);
 	struct nv40_query *q = nv40_query(pq);
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 
-	BEGIN_RING(curie, NV40TCL_QUERY_GET, 1);
-	OUT_RING  ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) |
+	BEGIN_RING(chan, curie, NV40TCL_QUERY_GET, 1);
+	OUT_RING  (chan, (0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) |
 		   ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT));
-	FIRE_RING(NULL);
+	FIRE_RING(chan);
 }
 
 static boolean
diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c
index 789ed16126..13fe854915 100644
--- a/src/gallium/drivers/nv40/nv40_state_emit.c
+++ b/src/gallium/drivers/nv40/nv40_state_emit.c
@@ -54,9 +54,10 @@ nv40_state_do_validate(struct nv40_context *nv40,
 void
 nv40_state_emit(struct nv40_context *nv40)
 {
-	struct nouveau_channel *chan = nv40->screen->base.channel;
 	struct nv40_state *state = &nv40->state;
 	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 	unsigned i;
 	uint64_t states;
 
@@ -80,10 +81,10 @@ nv40_state_emit(struct nv40_context *nv40)
 
 	if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) |
 			    (1ULL << NV40_STATE_FRAGTEX0))) {
-		BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
-		OUT_RING  (2);
-		BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1);
-		OUT_RING  (1);
+		BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
+		OUT_RING  (chan, 2);
+		BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
+		OUT_RING  (chan, 1);
 	}
 
 	state->dirty = 0;
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
index d45de15000..90087f0bee 100644
--- a/src/gallium/drivers/nv40/nv40_vbo.c
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -169,7 +169,9 @@ nv40_draw_arrays(struct pipe_context *pipe,
 		 unsigned mode, unsigned start, unsigned count)
 {
 	struct nv40_context *nv40 = nv40_context(pipe);
-	struct nouveau_channel *chan = nv40->screen->base.channel;
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 	unsigned restart;
 
 	nv40_vbo_set_idxbuf(nv40, NULL, 0);
@@ -187,17 +189,17 @@ nv40_draw_arrays(struct pipe_context *pipe,
 		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
 					mode, start, count, &restart);
 		if (!vc) {
-			FIRE_RING(NULL);
+			FIRE_RING(chan);
 			continue;
 		}
 
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (nvgl_primitive(mode));
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, nvgl_primitive(mode));
 
 		nr = (vc & 0xff);
 		if (nr) {
-			BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1);
-			OUT_RING  (((nr - 1) << 24) | start);
+			BEGIN_RING(chan, curie, NV40TCL_VB_VERTEX_BATCH, 1);
+			OUT_RING  (chan, ((nr - 1) << 24) | start);
 			start += nr;
 		}
 
@@ -207,15 +209,15 @@ nv40_draw_arrays(struct pipe_context *pipe,
 
 			nr -= push;
 
-			BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push);
+			BEGIN_RING_NI(chan, curie, NV40TCL_VB_VERTEX_BATCH, push);
 			while (push--) {
-				OUT_RING(((0x100 - 1) << 24) | start);
+				OUT_RING(chan, ((0x100 - 1) << 24) | start);
 				start += 0x100;
 			}
 		}
 
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, 0);
 
 		count -= vc;
 		start = restart;
@@ -228,7 +230,9 @@ static INLINE void
 nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
 		       unsigned mode, unsigned start, unsigned count)
 {
-	struct nouveau_channel *chan = nv40->screen->base.channel;
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 
 	while (count) {
 		uint8_t *elts = (uint8_t *)ib + start;
@@ -239,17 +243,17 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
 		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
 					mode, start, count, &restart);
 		if (vc == 0) {
-			FIRE_RING(NULL);
+			FIRE_RING(chan);
 			continue;
 		}
 		count -= vc;
 
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (nvgl_primitive(mode));
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, nvgl_primitive(mode));
 
 		if (vc & 1) {
-			BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
-			OUT_RING  (elts[0]);
+			BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1);
+			OUT_RING  (chan, elts[0]);
 			elts++; vc--;
 		}
 
@@ -258,16 +262,16 @@ nv40_draw_elements_u08(struct nv40_context *nv40, void *ib,
 
 			push = MIN2(vc, 2047 * 2);
 
-			BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+			BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
 			for (i = 0; i < push; i+=2)
-				OUT_RING((elts[i+1] << 16) | elts[i]);
+				OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
 
 			vc -= push;
 			elts += push;
 		}
 
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, 0);
 
 		start = restart;
 	}
@@ -277,7 +281,9 @@ static INLINE void
 nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
 		       unsigned mode, unsigned start, unsigned count)
 {
-	struct nouveau_channel *chan = nv40->screen->base.channel;
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 
 	while (count) {
 		uint16_t *elts = (uint16_t *)ib + start;
@@ -288,17 +294,17 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
 		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2,
 					mode, start, count, &restart);
 		if (vc == 0) {
-			FIRE_RING(NULL);
+			FIRE_RING(chan);
 			continue;
 		}
 		count -= vc;
 
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (nvgl_primitive(mode));
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, nvgl_primitive(mode));
 
 		if (vc & 1) {
-			BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1);
-			OUT_RING  (elts[0]);
+			BEGIN_RING(chan, curie, NV40TCL_VB_ELEMENT_U32, 1);
+			OUT_RING  (chan, elts[0]);
 			elts++; vc--;
 		}
 
@@ -307,16 +313,16 @@ nv40_draw_elements_u16(struct nv40_context *nv40, void *ib,
 
 			push = MIN2(vc, 2047 * 2);
 
-			BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
+			BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U16, push >> 1);
 			for (i = 0; i < push; i+=2)
-				OUT_RING((elts[i+1] << 16) | elts[i]);
+				OUT_RING(chan, (elts[i+1] << 16) | elts[i]);
 
 			vc -= push;
 			elts += push;
 		}
 
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, 0);
 
 		start = restart;
 	}
@@ -326,7 +332,9 @@ static INLINE void
 nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
 		       unsigned mode, unsigned start, unsigned count)
 {
-	struct nouveau_channel *chan = nv40->screen->base.channel;
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 
 	while (count) {
 		uint32_t *elts = (uint32_t *)ib + start;
@@ -337,26 +345,26 @@ nv40_draw_elements_u32(struct nv40_context *nv40, void *ib,
 		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1,
 					mode, start, count, &restart);
 		if (vc == 0) {
-			FIRE_RING(NULL);
+			FIRE_RING(chan);
 			continue;
 		}
 		count -= vc;
 
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (nvgl_primitive(mode));
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, nvgl_primitive(mode));
 
 		while (vc) {
 			push = MIN2(vc, 2047);
 
-			BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push);
-			OUT_RINGp    (elts, push);
+			BEGIN_RING_NI(chan, curie, NV40TCL_VB_ELEMENT_U32, push);
+			OUT_RINGp    (chan, elts, push);
 
 			vc -= push;
 			elts += push;
 		}
 
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, 0);
 
 		start = restart;
 	}
@@ -400,7 +408,9 @@ nv40_draw_elements_vbo(struct pipe_context *pipe,
 		       unsigned mode, unsigned start, unsigned count)
 {
 	struct nv40_context *nv40 = nv40_context(pipe);
-	struct nouveau_channel *chan = nv40->screen->base.channel;
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 	unsigned restart;
 
 	while (count) {
@@ -411,17 +421,17 @@ nv40_draw_elements_vbo(struct pipe_context *pipe,
 		vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256,
 					mode, start, count, &restart);
 		if (!vc) {
-			FIRE_RING(NULL);
+			FIRE_RING(chan);
 			continue;
 		}
 		
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (nvgl_primitive(mode));
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, nvgl_primitive(mode));
 
 		nr = (vc & 0xff);
 		if (nr) {
-			BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1);
-			OUT_RING  (((nr - 1) << 24) | start);
+			BEGIN_RING(chan, curie, NV40TCL_VB_INDEX_BATCH, 1);
+			OUT_RING  (chan, ((nr - 1) << 24) | start);
 			start += nr;
 		}
 
@@ -431,15 +441,15 @@ nv40_draw_elements_vbo(struct pipe_context *pipe,
 
 			nr -= push;
 
-			BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push);
+			BEGIN_RING_NI(chan, curie, NV40TCL_VB_INDEX_BATCH, push);
 			while (push--) {
-				OUT_RING(((0x100 - 1) << 24) | start);
+				OUT_RING(chan, ((0x100 - 1) << 24) | start);
 				start += 0x100;
 			}
 		}
 
-		BEGIN_RING(curie, NV40TCL_BEGIN_END, 1);
-		OUT_RING  (0);
+		BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
+		OUT_RING  (chan, 0);
 
 		count -= vc;
 		start = restart;
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index d9fc31006f..afbb2cb47b 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -834,7 +834,9 @@ static boolean
 nv40_vertprog_validate(struct nv40_context *nv40)
 { 
 	struct pipe_screen *pscreen = nv40->pipe.screen;
-	struct nouveau_grobj *curie = nv40->screen->curie;
+	struct nv40_screen *screen = nv40->screen;
+	struct nouveau_channel *chan = screen->base.channel;
+	struct nouveau_grobj *curie = screen->curie;
 	struct nv40_vertex_program *vp;
 	struct pipe_buffer *constbuf;
 	boolean upload_code = FALSE, upload_data = FALSE;
@@ -974,9 +976,9 @@ check_gpu_resources:
 				       4 * sizeof(float));
 			}
 
-			BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
-			OUT_RING  (i + vp->data->start);
-			OUT_RINGp ((uint32_t *)vpd->value, 4);
+			BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_CONST_ID, 5);
+			OUT_RING  (chan, i + vp->data->start);
+			OUT_RINGp (chan, (uint32_t *)vpd->value, 4);
 		}
 
 		if (constbuf)
@@ -993,11 +995,11 @@ check_gpu_resources:
 			NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]);
 		}
 #endif
-		BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
-		OUT_RING  (vp->exec->start);
+		BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
+		OUT_RING  (chan, vp->exec->start);
 		for (i = 0; i < vp->nr_insns; i++) {
-			BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4);
-			OUT_RINGp (vp->insns[i].data, 4);
+			BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4);
+			OUT_RINGp (chan, vp->insns[i].data, 4);
 		}
 	}
 
-- 
cgit v1.2.3


From c77ade8fed2be933af3f493932cedee7ca868b04 Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Tue, 29 Dec 2009 23:59:08 +0100
Subject: nouveau: rewrite nouveau_stateobj to use BEGIN_RING properly

- The previous solution was hacky and didn't do subchannel autobinding.
- The beheaviour should match what libdrm_nouveau does closely.
- The solution remains statically sized, but when debugging is on it will check
for abuse.

Signed-off-by: Maarten Maathuis <madman2003@gmail.com>
---
 src/gallium/drivers/nouveau/nouveau_stateobj.h | 288 ++++++++++++++++++-------
 src/gallium/drivers/nv10/nv10_screen.c         |   1 -
 src/gallium/drivers/nv20/nv20_screen.c         |   1 -
 src/gallium/drivers/nv30/nv30_fragprog.c       |   2 +-
 src/gallium/drivers/nv30/nv30_fragtex.c        |   4 +-
 src/gallium/drivers/nv30/nv30_screen.c         |   3 +-
 src/gallium/drivers/nv30/nv30_state.c          |   6 +-
 src/gallium/drivers/nv30/nv30_state_blend.c    |   2 +-
 src/gallium/drivers/nv30/nv30_state_fb.c       |   2 +-
 src/gallium/drivers/nv30/nv30_state_scissor.c  |   2 +-
 src/gallium/drivers/nv30/nv30_state_stipple.c  |   4 +-
 src/gallium/drivers/nv30/nv30_state_viewport.c |   2 +-
 src/gallium/drivers/nv30/nv30_vbo.c            |   6 +-
 src/gallium/drivers/nv30/nv30_vertprog.c       |   2 +-
 src/gallium/drivers/nv40/nv40_fragprog.c       |   2 +-
 src/gallium/drivers/nv40/nv40_fragtex.c        |   4 +-
 src/gallium/drivers/nv40/nv40_screen.c         |   3 +-
 src/gallium/drivers/nv40/nv40_state.c          |   6 +-
 src/gallium/drivers/nv40/nv40_state_blend.c    |   2 +-
 src/gallium/drivers/nv40/nv40_state_fb.c       |   2 +-
 src/gallium/drivers/nv40/nv40_state_scissor.c  |   2 +-
 src/gallium/drivers/nv40/nv40_state_stipple.c  |   4 +-
 src/gallium/drivers/nv40/nv40_state_viewport.c |   2 +-
 src/gallium/drivers/nv40/nv40_vbo.c            |   6 +-
 src/gallium/drivers/nv40/nv40_vertprog.c       |   2 +-
 src/gallium/drivers/nv50/nv50_program.c        |   6 +-
 src/gallium/drivers/nv50/nv50_screen.c         |   9 +-
 src/gallium/drivers/nv50/nv50_state.c          |   6 +-
 src/gallium/drivers/nv50/nv50_state_validate.c |  13 +-
 src/gallium/drivers/nv50/nv50_tex.c            |  10 +-
 src/gallium/drivers/nv50/nv50_vbo.c            |   6 +-
 31 files changed, 265 insertions(+), 145 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h
index 77ff7dcf20..e844f6abb3 100644
--- a/src/gallium/drivers/nouveau/nouveau_stateobj.h
+++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h
@@ -3,41 +3,95 @@
 
 #include "util/u_debug.h"
 
+#ifdef DEBUG
+#define DEBUG_NOUVEAU_STATEOBJ
+#endif /* DEBUG */
+
 struct nouveau_stateobj_reloc {
 	struct nouveau_bo *bo;
 
-	unsigned offset;
-	unsigned packet;
+	struct nouveau_grobj *gr;
+	uint32_t push_offset;
+	uint32_t mthd;
 
-	unsigned data;
+	uint32_t data;
 	unsigned flags;
 	unsigned vor;
 	unsigned tor;
 };
 
+struct nouveau_stateobj_start {
+	struct nouveau_grobj *gr;
+	uint32_t mthd;
+	uint32_t size;
+	unsigned offset;
+};
+
 struct nouveau_stateobj {
 	struct pipe_reference reference;
 
-	unsigned *push;
+	struct nouveau_stateobj_start *start;
 	struct nouveau_stateobj_reloc *reloc;
 
-	unsigned *cur;
-	unsigned cur_packet;
+	/* Common memory pool for data. */
+	uint32_t *pool;
+	unsigned pool_cur;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+	unsigned start_alloc;
+	unsigned reloc_alloc;
+	unsigned pool_alloc;
+#endif  /* DEBUG_NOUVEAU_STATEOBJ */
+
+	unsigned total; /* includes begin_ring */
+	unsigned cur; /* excludes begin_ring, offset from "cur_start" */
+	unsigned cur_start;
 	unsigned cur_reloc;
 };
 
+static INLINE void
+so_dump(struct nouveau_stateobj *so)
+{
+	unsigned i, nr, total = 0;
+
+	for (i = 0; i < so->cur_start; i++) {
+		if (so->start[i].gr->subc > -1)
+			debug_printf("+0x%04x: 0x%08x\n", total++,
+				(so->start[i].size << 18) | (so->start[i].gr->subc << 13)
+				| so->start[i].mthd);
+		else
+			debug_printf("+0x%04x: 0x%08x\n", total++,
+				(so->start[i].size << 18) | so->start[i].mthd);
+		for (nr = 0; nr < so->start[i].size; nr++, total++)
+			debug_printf("+0x%04x: 0x%08x\n", total,
+				so->pool[so->start[i].offset + nr]);
+	}
+}
+
 static INLINE struct nouveau_stateobj *
-so_new(unsigned push, unsigned reloc)
+so_new(unsigned start, unsigned push, unsigned reloc)
 {
 	struct nouveau_stateobj *so;
 
 	so = MALLOC(sizeof(struct nouveau_stateobj));
 	pipe_reference_init(&so->reference, 1);
-	so->push = MALLOC(sizeof(unsigned) * push);
-	so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc);
-
-	so->cur = so->push;
-	so->cur_reloc = so->cur_packet = 0;
+	so->total = so->cur = so->cur_start = so->cur_reloc = 0;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+	so->start_alloc = start;
+	so->reloc_alloc = reloc;
+	so->pool_alloc = push;
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+	so->start = MALLOC(start * sizeof(struct nouveau_stateobj_start));
+	so->reloc = MALLOC(reloc * sizeof(struct nouveau_stateobj_reloc));
+	so->pool = MALLOC(push * sizeof(uint32_t));
+	so->pool_cur = 0;
+
+	if (!so->start || !so->reloc || !so->pool) {
+		debug_printf("malloc failed\n");
+		assert(0);
+	}
 
 	return so;
 }
@@ -48,54 +102,115 @@ so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso)
 	struct nouveau_stateobj *so = *pso;
 	int i;
 
-        if (pipe_reference(&(*pso)->reference, &ref->reference)) {
-		free(so->push);
+	if (pipe_reference(&(*pso)->reference, &ref->reference)) {
+		FREE(so->start);
 		for (i = 0; i < so->cur_reloc; i++)
 			nouveau_bo_ref(NULL, &so->reloc[i].bo);
-		free(so->reloc);
-		free(so);
+		FREE(so->reloc);
+		FREE(so->pool);
+		FREE(so);
 	}
 	*pso = ref;
 }
 
 static INLINE void
-so_data(struct nouveau_stateobj *so, unsigned data)
+so_data(struct nouveau_stateobj *so, uint32_t data)
 {
-	(*so->cur++) = (data);
-	so->cur_packet += 4;
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+	if (so->cur >= so->start[so->cur_start - 1].size) {
+		debug_printf("exceeding specified size\n");
+		assert(0);
+	}
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+	so->pool[so->start[so->cur_start - 1].offset + so->cur++] = data;
 }
 
 static INLINE void
-so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size)
+so_datap(struct nouveau_stateobj *so, uint32_t *data, unsigned size)
 {
-	so->cur_packet += (4 * size);
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+	if ((so->cur + size) > so->start[so->cur_start - 1].size) {
+		debug_printf("exceeding specified size\n");
+		assert(0);
+	}
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
 	while (size--)
-		(*so->cur++) = (*data++);
+		so->pool[so->start[so->cur_start - 1].offset + so->cur++] =
+			*data++;
 }
 
 static INLINE void
 so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr,
 	  unsigned mthd, unsigned size)
 {
-	so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4);
-	so_data(so, (gr->subc << 13) | (size << 18) | mthd);
+	struct nouveau_stateobj_start *start;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+	if (so->start_alloc <= so->cur_start) {
+		debug_printf("exceeding num_start size\n");
+		assert(0);
+	} else
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+		start = so->start;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+	if (so->cur_start > 0 && start[so->cur_start - 1].size > so->cur) {
+		debug_printf("previous so_method was not filled\n");
+		assert(0);
+	}
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+	so->start = start;
+	start[so->cur_start].gr = gr;
+	start[so->cur_start].mthd = mthd;
+	start[so->cur_start].size = size;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+	if (so->pool_alloc < (size + so->pool_cur)) {
+		debug_printf("exceeding num_pool size\n");
+		assert(0);
+	}
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+	start[so->cur_start].offset = so->pool_cur;
+	so->pool_cur += size;
+
+	so->cur_start++;
+	/* The 1 is for *this* begin_ring. */
+	so->total += so->cur + 1;
+	so->cur = 0;
 }
 
 static INLINE void
 so_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo,
 	 unsigned data, unsigned flags, unsigned vor, unsigned tor)
 {
-	struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++];
-	
-	r->bo = NULL;
-	nouveau_bo_ref(bo, &r->bo);
-	r->offset = so->cur - so->push;
-	r->packet = so->cur_packet;
-	r->data = data;
-	r->flags = flags;
-	r->vor = vor;
-	r->tor = tor;
+	struct nouveau_stateobj_reloc *r;
+
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+	if (so->reloc_alloc <= so->cur_reloc) {
+		debug_printf("exceeding num_reloc size\n");
+		assert(0);
+	} else
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+		r = so->reloc;
+
+	so->reloc = r;
+	r[so->cur_reloc].bo = NULL;
+	nouveau_bo_ref(bo, &(r[so->cur_reloc].bo));
+	r[so->cur_reloc].gr = so->start[so->cur_start-1].gr;
+	r[so->cur_reloc].push_offset = so->total + so->cur;
+	r[so->cur_reloc].data = data;
+	r[so->cur_reloc].flags = flags;
+	r[so->cur_reloc].mthd = so->start[so->cur_start-1].mthd +
+							(so->cur << 2);
+	r[so->cur_reloc].vor = vor;
+	r[so->cur_reloc].tor = tor;
+
 	so_data(so, data);
+	so->cur_reloc++;
 }
 
 /* Determine if this buffer object is referenced by this state object. */
@@ -111,15 +226,6 @@ so_bo_is_reloc(struct nouveau_stateobj *so, struct nouveau_bo *bo)
 	return false;
 }
 
-static INLINE void
-so_dump(struct nouveau_stateobj *so)
-{
-	unsigned i, nr = so->cur - so->push;
-
-	for (i = 0; i < nr; i++)
-		debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]);
-}
-
 static INLINE void
 so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so)
 {
@@ -127,75 +233,93 @@ so_emit(struct nouveau_channel *chan, struct nouveau_stateobj *so)
 	unsigned nr, i;
 	int ret = 0;
 
-	nr = so->cur - so->push;
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+	if (so->start[so->cur_start - 1].size > so->cur) {
+		debug_printf("emit: previous so_method was not filled\n");
+		assert(0);
+	}
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+	/* We cannot update total in case we so_emit again. */
+	nr = so->total + so->cur;
+
 	/* This will flush if we need space.
 	 * We don't actually need the marker.
 	 */
 	if ((ret = nouveau_pushbuf_marker_emit(chan, nr, so->cur_reloc))) {
 		debug_printf("so_emit failed marker emit with error %d\n", ret);
-		return;
+		assert(0);
+	}
+
+	/* Submit data. This will ensure proper binding of objects. */
+	for (i = 0; i < so->cur_start; i++) {
+		BEGIN_RING(chan, so->start[i].gr, so->start[i].mthd, so->start[i].size);
+		OUT_RINGp(chan, &(so->pool[so->start[i].offset]), so->start[i].size);
 	}
-	pb->remaining -= nr;
 
-	memcpy(pb->cur, so->push, nr * 4);
 	for (i = 0; i < so->cur_reloc; i++) {
 		struct nouveau_stateobj_reloc *r = &so->reloc[i];
 
-		if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur + r->offset,
-					   r->bo, r->data, 0, r->flags,
-					   r->vor, r->tor))) {
+		if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur - nr +
+						r->push_offset, r->bo, r->data,
+						0, r->flags, r->vor, r->tor))) {
 			debug_printf("so_emit failed reloc with error %d\n", ret);
-			goto out;
+			assert(0);
 		}
 	}
-out:
-	pb->cur += nr;
 }
 
 static INLINE void
 so_emit_reloc_markers(struct nouveau_channel *chan, struct nouveau_stateobj *so)
 {
 	struct nouveau_pushbuf *pb = chan->pushbuf;
+	struct nouveau_grobj *gr = NULL;
 	unsigned i;
 	int ret = 0;
 
 	if (!so)
 		return;
 
-	i = so->cur_reloc << 1;
-	/* This will flush if we need space.
-	 * We don't actually need the marker.
-	 */
-	if ((ret = nouveau_pushbuf_marker_emit(chan, i, i))) {
-		debug_printf("so_emit_reloc_markers failed marker emit with" \
-			"error %d\n", ret);
-		return;
-	}
-	pb->remaining -= i;
-
+	/* If we need to flush in flush notify, then we have a problem anyway. */
 	for (i = 0; i < so->cur_reloc; i++) {
 		struct nouveau_stateobj_reloc *r = &so->reloc[i];
 
-		if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
-					   r->packet, 0,
-					   (r->flags & (NOUVEAU_BO_VRAM |
-							NOUVEAU_BO_GART |
-							NOUVEAU_BO_RDWR)) |
-					   NOUVEAU_BO_DUMMY, 0, 0))) {
-			debug_printf("so_emit_reloc_markers failed reloc" \
-						"with error %d\n", ret);
-			pb->remaining += ((so->cur_reloc - i) << 1);
-			return;
+#ifdef DEBUG_NOUVEAU_STATEOBJ
+		if (r->mthd & 0x40000000) {
+			debug_printf("error: NI mthd 0x%08X\n", r->mthd);
+			continue;
+		}
+#endif /* DEBUG_NOUVEAU_STATEOBJ */
+
+		/* The object needs to be bound and the system must know the
+		 * subchannel is being used. Otherwise it will discard it.
+		 */
+		if (gr != r->gr) {
+			BEGIN_RING(chan, r->gr, 0x100, 1);
+			OUT_RING(chan, 0);
+			gr = r->gr;
+		}
+
+		/* Some relocs really don't like to be hammered,
+		 * NOUVEAU_BO_DUMMY makes sure it only
+		 * happens when needed.
+		 */
+		ret = OUT_RELOC(chan, r->bo, (r->gr->subc << 13) | (1<< 18) |
+			r->mthd, (r->flags & (NOUVEAU_BO_VRAM | NOUVEAU_BO_GART
+				| NOUVEAU_BO_RDWR)) | NOUVEAU_BO_DUMMY, 0, 0);
+		if (ret) {
+			debug_printf("OUT_RELOC failed %d\n", ret);
+			assert(0);
 		}
-		if ((ret = nouveau_pushbuf_emit_reloc(chan, pb->cur++, r->bo,
-					   r->data, 0,
-					   r->flags | NOUVEAU_BO_DUMMY,
-					   r->vor, r->tor))) {
-			debug_printf("so_emit_reloc_markers failed reloc" \
-						"with error %d\n", ret);
-			pb->remaining += ((so->cur_reloc - i) << 1) - 1;
-			return;
+
+		ret = OUT_RELOC(chan, r->bo, r->data, r->flags |
+			NOUVEAU_BO_DUMMY, r->vor, r->tor);
+		if (ret) {
+			debug_printf("OUT_RELOC failed %d\n", ret);
+			assert(0);
 		}
+
+		pb->remaining -= 2;
 	}
 }
 
diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c
index 6a39ddeaac..69a6dab866 100644
--- a/src/gallium/drivers/nv10/nv10_screen.c
+++ b/src/gallium/drivers/nv10/nv10_screen.c
@@ -180,7 +180,6 @@ nv10_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
 		return FALSE;
 	}
-	BIND_RING(chan, screen->celsius, 7);
 
 	/* 2D engine setup */
 	screen->eng2d = nv04_surface_2d_init(&screen->base);
diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c
index a0973f1ebd..d091335063 100644
--- a/src/gallium/drivers/nv20/nv20_screen.c
+++ b/src/gallium/drivers/nv20/nv20_screen.c
@@ -176,7 +176,6 @@ nv20_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
 		return FALSE;
 	}
-	BIND_RING(chan, screen->kelvin, 7);
 
 	/* 2D engine setup */
 	screen->eng2d = nv04_surface_2d_init(&screen->base);
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index d1ff18e2df..2d565cb631 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -837,7 +837,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
 	fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
 	nv30_fragprog_upload(nv30, fp);
 
-	so = so_new(8, 1);
+	so = so_new(4, 4, 1);
 	so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
 	so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
 		      NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
index b3293ee700..9893567891 100644
--- a/src/gallium/drivers/nv30/nv30_fragtex.c
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -106,7 +106,7 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
 
 	txs = tf->swizzle;
 
-	so = so_new(16, 2);
+	so = so_new(1, 8, 2);
 	so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8);
 	so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
 	so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
@@ -135,7 +135,7 @@ nv30_fragtex_validate(struct nv30_context *nv30)
 		unit = ffs(samplers) - 1;
 		samplers &= ~(1 << unit);
 
-		so = so_new(2, 0);
+		so = so_new(1, 1, 0);
 		so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1);
 		so_data  (so, 0);
 		so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]);
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index 760467f736..9ed48178dc 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -233,7 +233,6 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
 		return FALSE;
 	}
-	BIND_RING(chan, screen->rankine, 7);
 
 	/* 2D engine setup */
 	screen->eng2d = nv04_surface_2d_init(&screen->base);
@@ -270,7 +269,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	}
 
 	/* Static rankine initialisation */
-	so = so_new(128, 0);
+	so = so_new(36, 60, 0);
 	so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1);
 	so_data  (so, screen->sync->handle);
 	so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2);
diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c
index e6321b480f..a80dfb0488 100644
--- a/src/gallium/drivers/nv30/nv30_state.c
+++ b/src/gallium/drivers/nv30/nv30_state.c
@@ -14,7 +14,7 @@ nv30_blend_state_create(struct pipe_context *pipe,
 	struct nv30_context *nv30 = nv30_context(pipe);
 	struct nouveau_grobj *rankine = nv30->screen->rankine;
 	struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso));
-	struct nouveau_stateobj *so = so_new(16, 0);
+	struct nouveau_stateobj *so = so_new(5, 8, 0);
 
 	if (cso->blend_enable) {
 		so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3);
@@ -300,7 +300,7 @@ nv30_rasterizer_state_create(struct pipe_context *pipe,
 {
 	struct nv30_context *nv30 = nv30_context(pipe);
 	struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
-	struct nouveau_stateobj *so = so_new(32, 0);
+	struct nouveau_stateobj *so = so_new(9, 19, 0);
 	struct nouveau_grobj *rankine = nv30->screen->rankine;
 
 	/*XXX: ignored:
@@ -435,7 +435,7 @@ nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe,
 {
 	struct nv30_context *nv30 = nv30_context(pipe);
 	struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
-	struct nouveau_stateobj *so = so_new(32, 0);
+	struct nouveau_stateobj *so = so_new(5, 21, 0);
 	struct nouveau_grobj *rankine = nv30->screen->rankine;
 
 	so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3);
diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c
index 64cf9ae93a..c36d58c040 100644
--- a/src/gallium/drivers/nv30/nv30_state_blend.c
+++ b/src/gallium/drivers/nv30/nv30_state_blend.c
@@ -18,7 +18,7 @@ struct nv30_state_entry nv30_state_blend = {
 static boolean
 nv30_state_blend_colour_validate(struct nv30_context *nv30)
 {
-	struct nouveau_stateobj *so = so_new(2, 0);
+	struct nouveau_stateobj *so = so_new(1, 1, 0);
 	struct pipe_blend_color *bcol = &nv30->blend_colour;
 
 	so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1);
diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c
index 6f6d1740d6..2ed2ea55e8 100644
--- a/src/gallium/drivers/nv30/nv30_state_fb.c
+++ b/src/gallium/drivers/nv30/nv30_state_fb.c
@@ -10,7 +10,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30)
 	struct nv04_surface *rt[2], *zeta = NULL;
 	uint32_t rt_enable = 0, rt_format = 0;
 	int i, colour_format = 0, zeta_format = 0, depth_only = 0;
-	struct nouveau_stateobj *so = so_new(64, 10);
+	struct nouveau_stateobj *so = so_new(12, 18, 10);
 	unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
 	unsigned w = fb->width;
 	unsigned h = fb->height;
diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c
index 3ac7a8471e..ba61a9e24a 100644
--- a/src/gallium/drivers/nv30/nv30_state_scissor.c
+++ b/src/gallium/drivers/nv30/nv30_state_scissor.c
@@ -12,7 +12,7 @@ nv30_state_scissor_validate(struct nv30_context *nv30)
 		return FALSE;
 	nv30->state.scissor_enabled = rast->scissor;
 
-	so = so_new(3, 0);
+	so = so_new(1, 2, 0);
 	so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2);
 	if (nv30->state.scissor_enabled) {
 		so_data  (so, ((s->maxx - s->minx) << 16) | s->minx);
diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c
index d0c791ac08..ed520a4f43 100644
--- a/src/gallium/drivers/nv30/nv30_state_stipple.c
+++ b/src/gallium/drivers/nv30/nv30_state_stipple.c
@@ -14,14 +14,14 @@ nv30_state_stipple_validate(struct nv30_context *nv30)
 	if (rast->poly_stipple_enable) {
 		unsigned i;
 
-		so = so_new(35, 0);
+		so = so_new(2, 33, 0);
 		so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
 		so_data  (so, 1);
 		so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32);
 		for (i = 0; i < 32; i++)
 			so_data(so, nv30->stipple[i]);
 	} else {
-		so = so_new(2, 0);
+		so = so_new(1, 1, 0);
 		so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1);
 		so_data  (so, 0);
 	}
diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c
index c3eb413dac..2d7781292b 100644
--- a/src/gallium/drivers/nv30/nv30_state_viewport.c
+++ b/src/gallium/drivers/nv30/nv30_state_viewport.c
@@ -19,7 +19,7 @@ nv30_state_viewport_validate(struct nv30_context *nv30)
 		return FALSE;
 	nv30->state.viewport_bypass = bypass;
 
-	so = so_new(11, 0);
+	so = so_new(3, 10, 0);
 	if (!bypass) {
 		so_method(so, nv30->screen->rankine,
 			  NV34TCL_VIEWPORT_TRANSLATE_X, 8);
diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c
index bccc805324..1c5db03ea2 100644
--- a/src/gallium/drivers/nv30/nv30_vbo.c
+++ b/src/gallium/drivers/nv30/nv30_vbo.c
@@ -491,9 +491,9 @@ nv30_vbo_validate(struct nv30_context *nv30)
 	unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
 	int hw;
 
-	vtxbuf = so_new(20, 18);
+	vtxbuf = so_new(3, 17, 18);
 	so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr);
-	vtxfmt = so_new(17, 0);
+	vtxfmt = so_new(1, 16, 0);
 	so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr);
 
 	for (hw = 0; hw < nv30->vtxelt_nr; hw++) {
@@ -506,7 +506,7 @@ nv30_vbo_validate(struct nv30_context *nv30)
 
 		if (!vb->stride) {
 			if (!sattr)
-				sattr = so_new(16 * 5, 0);
+				sattr = so_new(16, 16 * 4, 0);
 
 			if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) {
 				so_data(vtxbuf, 0);
diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c
index 4e6d3d01c7..e77a5be3f2 100644
--- a/src/gallium/drivers/nv30/nv30_vertprog.c
+++ b/src/gallium/drivers/nv30/nv30_vertprog.c
@@ -686,7 +686,7 @@ nv30_vertprog_validate(struct nv30_context *nv30)
 				assert(0);
 		}
 
-		so = so_new(2, 0);
+		so = so_new(1, 1, 0);
 		so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1);
 		so_data  (so, vp->exec->start);
 		so_ref(so, &vp->so);
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c
index bb9c85cc43..1237066c39 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -919,7 +919,7 @@ nv40_fragprog_validate(struct nv40_context *nv40)
 	fp->buffer = pscreen->buffer_create(pscreen, 0x100, 0, fp->insn_len * 4);
 	nv40_fragprog_upload(nv40, fp);
 
-	so = so_new(4, 1);
+	so = so_new(2, 2, 1);
 	so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1);
 	so_reloc (so, nouveau_bo(fp->buffer), 0, NOUVEAU_BO_VRAM |
 		      NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c
index 44abc84596..aad9198210 100644
--- a/src/gallium/drivers/nv40/nv40_fragtex.c
+++ b/src/gallium/drivers/nv40/nv40_fragtex.c
@@ -108,7 +108,7 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit)
 
 	txs = tf->swizzle;
 
-	so = so_new(16, 2);
+	so = so_new(2, 9, 2);
 	so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8);
 	so_reloc (so, bo, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0);
 	so_reloc (so, bo, txf, tex_flags | NOUVEAU_BO_OR,
@@ -139,7 +139,7 @@ nv40_fragtex_validate(struct nv40_context *nv40)
 		unit = ffs(samplers) - 1;
 		samplers &= ~(1 << unit);
 
-		so = so_new(2, 0);
+		so = so_new(1, 1, 0);
 		so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1);
 		so_data  (so, 0);
 		so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]);
diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c
index d01e712805..9e55e5a089 100644
--- a/src/gallium/drivers/nv40/nv40_screen.c
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -215,7 +215,6 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		NOUVEAU_ERR("Error creating 3D object: %d\n", ret);
 		return FALSE;
 	}
-	BIND_RING(chan, screen->curie, 7);
 
 	/* 2D engine setup */
 	screen->eng2d = nv04_surface_2d_init(&screen->base);
@@ -252,7 +251,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	}
 
 	/* Static curie initialisation */
-	so = so_new(128, 0);
+	so = so_new(16, 25, 0);
 	so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1);
 	so_data  (so, screen->sync->handle);
 	so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2);
diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c
index ed55d29aff..ed0ca9e02c 100644
--- a/src/gallium/drivers/nv40/nv40_state.c
+++ b/src/gallium/drivers/nv40/nv40_state.c
@@ -16,7 +16,7 @@ nv40_blend_state_create(struct pipe_context *pipe,
 	struct nv40_context *nv40 = nv40_context(pipe);
 	struct nouveau_grobj *curie = nv40->screen->curie;
 	struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso));
-	struct nouveau_stateobj *so = so_new(16, 0);
+	struct nouveau_stateobj *so = so_new(5, 8, 0);
 
 	if (cso->blend_enable) {
 		so_method(so, curie, NV40TCL_BLEND_ENABLE, 3);
@@ -310,7 +310,7 @@ nv40_rasterizer_state_create(struct pipe_context *pipe,
 {
 	struct nv40_context *nv40 = nv40_context(pipe);
 	struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso));
-	struct nouveau_stateobj *so = so_new(32, 0);
+	struct nouveau_stateobj *so = so_new(8, 18, 0);
 	struct nouveau_grobj *curie = nv40->screen->curie;
 
 	/*XXX: ignored:
@@ -445,7 +445,7 @@ nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe,
 {
 	struct nv40_context *nv40 = nv40_context(pipe);
 	struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso));
-	struct nouveau_stateobj *so = so_new(32, 0);
+	struct nouveau_stateobj *so = so_new(4, 21, 0);
 	struct nouveau_grobj *curie = nv40->screen->curie;
 
 	so_method(so, curie, NV40TCL_DEPTH_FUNC, 3);
diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c
index 8cd05ce66e..3ff00a37f6 100644
--- a/src/gallium/drivers/nv40/nv40_state_blend.c
+++ b/src/gallium/drivers/nv40/nv40_state_blend.c
@@ -18,7 +18,7 @@ struct nv40_state_entry nv40_state_blend = {
 static boolean
 nv40_state_blend_colour_validate(struct nv40_context *nv40)
 {
-	struct nouveau_stateobj *so = so_new(2, 0);
+	struct nouveau_stateobj *so = so_new(1, 1, 0);
 	struct pipe_blend_color *bcol = &nv40->blend_colour;
 
 	so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1);
diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c
index 1c7a7cd64f..a58fe9ddb1 100644
--- a/src/gallium/drivers/nv40/nv40_state_fb.c
+++ b/src/gallium/drivers/nv40/nv40_state_fb.c
@@ -19,7 +19,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40)
 	struct nv04_surface *rt[4], *zeta;
 	uint32_t rt_enable, rt_format;
 	int i, colour_format = 0, zeta_format = 0;
-	struct nouveau_stateobj *so = so_new(64, 10);
+	struct nouveau_stateobj *so = so_new(18, 24, 10);
 	unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM;
 	unsigned w = fb->width;
 	unsigned h = fb->height;
diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c
index cf58d33906..753a505e93 100644
--- a/src/gallium/drivers/nv40/nv40_state_scissor.c
+++ b/src/gallium/drivers/nv40/nv40_state_scissor.c
@@ -12,7 +12,7 @@ nv40_state_scissor_validate(struct nv40_context *nv40)
 		return FALSE;
 	nv40->state.scissor_enabled = rast->scissor;
 
-	so = so_new(3, 0);
+	so = so_new(1, 2, 0);
 	so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2);
 	if (nv40->state.scissor_enabled) {
 		so_data  (so, ((s->maxx - s->minx) << 16) | s->minx);
diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c
index b51024ad9b..2b371ebfec 100644
--- a/src/gallium/drivers/nv40/nv40_state_stipple.c
+++ b/src/gallium/drivers/nv40/nv40_state_stipple.c
@@ -14,14 +14,14 @@ nv40_state_stipple_validate(struct nv40_context *nv40)
 	if (rast->poly_stipple_enable) {
 		unsigned i;
 
-		so = so_new(35, 0);
+		so = so_new(2, 33, 0);
 		so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
 		so_data  (so, 1);
 		so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32);
 		for (i = 0; i < 32; i++)
 			so_data(so, nv40->stipple[i]);
 	} else {
-		so = so_new(2, 0);
+		so = so_new(1, 1, 0);
 		so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1);
 		so_data  (so, 0);
 	}
diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c
index 665d2d5fca..9919ba1d0b 100644
--- a/src/gallium/drivers/nv40/nv40_state_viewport.c
+++ b/src/gallium/drivers/nv40/nv40_state_viewport.c
@@ -19,7 +19,7 @@ nv40_state_viewport_validate(struct nv40_context *nv40)
 		return FALSE;
 	nv40->state.viewport_bypass = bypass;
 
-	so = so_new(11, 0);
+	so = so_new(2, 9, 0);
 	if (!bypass) {
 		so_method(so, nv40->screen->curie,
 			  NV40TCL_VIEWPORT_TRANSLATE_X, 8);
diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c
index 90087f0bee..a777898f68 100644
--- a/src/gallium/drivers/nv40/nv40_vbo.c
+++ b/src/gallium/drivers/nv40/nv40_vbo.c
@@ -491,9 +491,9 @@ nv40_vbo_validate(struct nv40_context *nv40)
 	unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD;
 	int hw;
 
-	vtxbuf = so_new(20, 18);
+	vtxbuf = so_new(3, 17, 18);
 	so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr);
-	vtxfmt = so_new(17, 0);
+	vtxfmt = so_new(1, 16, 0);
 	so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr);
 
 	for (hw = 0; hw < nv40->vtxelt_nr; hw++) {
@@ -506,7 +506,7 @@ nv40_vbo_validate(struct nv40_context *nv40)
 
 		if (!vb->stride) {
 			if (!sattr)
-				sattr = so_new(16 * 5, 0);
+				sattr = so_new(16, 16 * 4, 0);
 
 			if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) {
 				so_data(vtxbuf, 0);
diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c
index afbb2cb47b..8d80fcad38 100644
--- a/src/gallium/drivers/nv40/nv40_vertprog.c
+++ b/src/gallium/drivers/nv40/nv40_vertprog.c
@@ -886,7 +886,7 @@ check_gpu_resources:
 				assert(0);
 		}
 
-		so = so_new(7, 0);
+		so = so_new(3, 4, 0);
 		so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1);
 		so_data  (so, vp->exec->start);
 		so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2);
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 2d0b1818ef..af0759e503 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -3579,7 +3579,7 @@ nv50_vertprog_validate(struct nv50_context *nv50)
 	nv50_program_validate_data(nv50, p);
 	nv50_program_validate_code(nv50, p);
 
-	so = so_new(13, 2);
+	so = so_new(5, 8, 2);
 	so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2);
 	so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
 		      NOUVEAU_BO_HIGH, 0, 0);
@@ -3615,7 +3615,7 @@ nv50_fragprog_validate(struct nv50_context *nv50)
 	nv50_program_validate_data(nv50, p);
 	nv50_program_validate_code(nv50, p);
 
-	so = so_new(64, 2);
+	so = so_new(6, 7, 2);
 	so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2);
 	so_reloc (so, p->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD |
 		      NOUVEAU_BO_HIGH, 0, 0);
@@ -3783,7 +3783,7 @@ nv50_linkage_validate(struct nv50_context *nv50)
 	}
 
 	/* now fill the stateobj */
-	so = so_new(64, 0);
+	so = so_new(6, 58, 0);
 
 	n = (m + 3) / 4;
 	so_method(so, tesla, NV50TCL_VP_RESULT_MAP_SIZE, 1);
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index 1778a74517..28e2b35dea 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -251,7 +251,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		nv50_screen_destroy(pscreen);
 		return NULL;
 	}
-	BIND_RING(chan, screen->m2mf, 1);
 
 	/* 2D object */
 	ret = nouveau_grobj_alloc(chan, 0xbeef502d, NV50_2D, &screen->eng2d);
@@ -260,7 +259,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		nv50_screen_destroy(pscreen);
 		return NULL;
 	}
-	BIND_RING(chan, screen->eng2d, 2);
 
 	/* 3D object */
 	switch (chipset & 0xf0) {
@@ -296,7 +294,6 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 		nv50_screen_destroy(pscreen);
 		return NULL;
 	}
-	BIND_RING(chan, screen->tesla, 3);
 
 	/* Sync notifier */
 	ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
@@ -307,7 +304,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	}
 
 	/* Static M2MF init */
-	so = so_new(32, 0);
+	so = so_new(1, 3, 0);
 	so_method(so, screen->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 3);
 	so_data  (so, screen->sync->handle);
 	so_data  (so, chan->vram->handle);
@@ -316,7 +313,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_ref (NULL, &so);
 
 	/* Static 2D init */
-	so = so_new(64, 0);
+	so = so_new(4, 7, 0);
 	so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4);
 	so_data  (so, screen->sync->handle);
 	so_data  (so, chan->vram->handle);
@@ -332,7 +329,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	so_ref(NULL, &so);
 
 	/* Static tesla init */
-	so = so_new(256, 20);
+	so = so_new(40, 84, 20);
 
 	so_method(so, screen->tesla, NV50TCL_COND_MODE, 1);
 	so_data  (so, NV50TCL_COND_MODE_ALWAYS);
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index 30b2b0f91b..fd0a33d7f7 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -35,7 +35,7 @@ static void *
 nv50_blend_state_create(struct pipe_context *pipe,
 			const struct pipe_blend_state *cso)
 {
-	struct nouveau_stateobj *so = so_new(64, 0);
+	struct nouveau_stateobj *so = so_new(5, 24, 0);
 	struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
 	struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj);
 	unsigned cmask = 0, i;
@@ -280,7 +280,7 @@ static void *
 nv50_rasterizer_state_create(struct pipe_context *pipe,
 			     const struct pipe_rasterizer_state *cso)
 {
-	struct nouveau_stateobj *so = so_new(64, 0);
+	struct nouveau_stateobj *so = so_new(15, 21, 0);
 	struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
 	struct nv50_rasterizer_stateobj *rso =
 		CALLOC_STRUCT(nv50_rasterizer_stateobj);
@@ -425,7 +425,7 @@ nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe,
 {
 	struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla;
 	struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj);
-	struct nouveau_stateobj *so = so_new(64, 0);
+	struct nouveau_stateobj *so = so_new(8, 22, 0);
 
 	so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1);
 	so_data  (so, cso->depth.writemask ? 1 : 0);
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 682786345e..f83232f43c 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -33,7 +33,7 @@ static void
 nv50_state_validate_fb(struct nv50_context *nv50)
 {
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	struct nouveau_stateobj *so = so_new(128, 18);
+	struct nouveau_stateobj *so = so_new(32, 79, 18);
 	struct pipe_framebuffer_state *fb = &nv50->framebuffer;
 	unsigned i, w, h, gw = 0;
 
@@ -299,7 +299,7 @@ nv50_state_validate(struct nv50_context *nv50)
 		so_ref(nv50->rasterizer->so, &nv50->state.rast);
 
 	if (nv50->dirty & NV50_NEW_BLEND_COLOUR) {
-		so = so_new(5, 0);
+		so = so_new(1, 4, 0);
 		so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4);
 		so_data  (so, fui(nv50->blend_colour.color[0]));
 		so_data  (so, fui(nv50->blend_colour.color[1]));
@@ -310,7 +310,7 @@ nv50_state_validate(struct nv50_context *nv50)
 	}
 
 	if (nv50->dirty & NV50_NEW_STIPPLE) {
-		so = so_new(33, 0);
+		so = so_new(1, 32, 0);
 		so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
 		for (i = 0; i < 32; i++)
 			so_data(so, util_bswap32(nv50->stipple.stipple[i]));
@@ -327,7 +327,7 @@ nv50_state_validate(struct nv50_context *nv50)
 			goto scissor_uptodate;
 		nv50->state.scissor_enabled = rast->scissor;
 
-		so = so_new(3, 0);
+		so = so_new(1, 2, 0);
 		so_method(so, tesla, NV50TCL_SCISSOR_HORIZ(0), 2);
 		if (nv50->state.scissor_enabled) {
 			so_data(so, (s->maxx << 16) | s->minx);
@@ -356,7 +356,7 @@ scissor_uptodate:
 			goto viewport_uptodate;
 		nv50->state.viewport_bypass = bypass;
 
-		so = so_new(14, 0);
+		so = so_new(5, 9, 0);
 		if (!bypass) {
 			so_method(so, tesla, NV50TCL_VIEWPORT_TRANSLATE_X(0), 3);
 			so_data  (so, fui(nv50->viewport.translate[0]));
@@ -400,7 +400,8 @@ viewport_uptodate:
 		for (i = 0; i < PIPE_SHADER_TYPES; ++i)
 			nr += nv50->sampler_nr[i];
 
-		so = so_new(nr * 8 + 24 * PIPE_SHADER_TYPES + 2, 4);
+		so = so_new(1+ 5 * PIPE_SHADER_TYPES, 1+ 19 * PIPE_SHADER_TYPES
+					+ nr * 8, PIPE_SHADER_TYPES * 2);
 
 		nv50_validate_samplers(nv50, so, PIPE_SHADER_VERTEX);
 		nv50_validate_samplers(nv50, so, PIPE_SHADER_FRAGMENT);
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index c4ca096d6a..bef548b728 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -199,16 +199,18 @@ nv50_tex_validate(struct nv50_context *nv50)
 {
 	struct nouveau_stateobj *so;
 	struct nouveau_grobj *tesla = nv50->screen->tesla;
-	unsigned p, push, nrlc;
+	unsigned p, start, push, nrlc;
 
-	for (nrlc = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) {
+	for (nrlc = 0, start = 0, push = 0, p = 0; p < PIPE_SHADER_TYPES; ++p) {
+		start += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
 		push += MAX2(nv50->miptree_nr[p], nv50->state.miptree_nr[p]);
 		nrlc += nv50->miptree_nr[p];
 	}
-	push = push * 11 + 23 * PIPE_SHADER_TYPES + 4;
+	start = start * 2 + 4 * PIPE_SHADER_TYPES + 2;
+	push = push * 9 + 19 * PIPE_SHADER_TYPES + 2;
 	nrlc = nrlc * 2 + 2 * PIPE_SHADER_TYPES;
 
-	so = so_new(push, nrlc);
+	so = so_new(start, push, nrlc);
 
 	if (nv50_validate_textures(nv50, so, PIPE_SHADER_VERTEX) == FALSE ||
 	    nv50_validate_textures(nv50, so, PIPE_SHADER_FRAGMENT) == FALSE) {
diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c
index df18c2dd20..f2e510fba6 100644
--- a/src/gallium/drivers/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nv50/nv50_vbo.c
@@ -354,7 +354,7 @@ nv50_vbo_static_attrib(struct nv50_context *nv50, unsigned attrib,
 
 	so = *pso;
 	if (!so)
-		*pso = so = so_new(nv50->vtxelt_nr * 5, 0);
+		*pso = so = so_new(nv50->vtxelt_nr, nv50->vtxelt_nr * 4, 0);
 
 	switch (ve->nr_components) {
 	case 4:
@@ -415,8 +415,8 @@ nv50_vbo_validate(struct nv50_context *nv50)
 	n_ve = MAX2(nv50->vtxelt_nr, nv50->state.vtxelt_nr);
 
 	vtxattr = NULL;
-	vtxbuf = so_new(n_ve * 7, nv50->vtxelt_nr * 4);
-	vtxfmt = so_new(n_ve + 1, 0);
+	vtxbuf = so_new(n_ve * 2, n_ve * 5, nv50->vtxelt_nr * 4);
+	vtxfmt = so_new(1, n_ve, 0);
 	so_method(vtxfmt, tesla, NV50TCL_VERTEX_ARRAY_ATTRIB(0), n_ve);
 
 	for (i = 0; i < nv50->vtxelt_nr; i++) {
-- 
cgit v1.2.3


From 15b05c54b7937eca57f50405c4bafb848b87edfa Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Tue, 5 Jan 2010 15:53:46 -0700
Subject: i965g: fix invalid assertion in emit_xpd()

Invalid assertion found by Roel Kluin <roel.kluin@gmail.com>
---
 src/gallium/drivers/i965/brw_wm_emit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 7e57d0306b..3d162a67e7 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -691,7 +691,7 @@ static void emit_xpd( struct brw_compile *p,
 {
    GLuint i;
 
-   assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_X);
+   assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_W);
    
    for (i = 0 ; i < 3; i++) {
       if (mask & (1<<i)) {
-- 
cgit v1.2.3


From f02347374819a9b145a3e26c625709aea0c6f61b Mon Sep 17 00:00:00 2001
From: Luca Barbieri <luca@luca-barbieri.com>
Date: Wed, 6 Jan 2010 10:35:47 +0000
Subject: gallium: remove PIPE_TEX_FILTER_ANISO

This patch removes PIPE_TEX_FILTER_ANISO.

Anisotropic filtering is enabled if and only if max_anisotropy > 1.0.
Values between 0.0 and 1.0, inclusive, of max_anisotropy are to be
considered equivalent, and meaning to turn off anisotropic filtering.

This approach has the small drawback of eliminating the possibility of
enabling anisotropic filter on either minification or magnification
separately, which Radeon hardware seems to support, is currently
support by Gallium but not exposed to OpenGL.  If this is actually
useful it could be handled by splitting max_anisotropy in two values
and adding an appropriate OpenGL extension.

NOTE: some fiddling & reformatting by keithw to get this patch to
apply.  Hopefully nothing broken in the process.
---
 src/gallium/auxiliary/util/u_debug_dump.c        |  6 ++----
 src/gallium/drivers/cell/spu/spu_command.c       |  4 ----
 src/gallium/drivers/i915/i915_state.c            |  5 +++--
 src/gallium/drivers/i965/brw_pipe_sampler.c      |  2 --
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c |  1 -
 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c   |  3 ---
 src/gallium/drivers/nv50/nv50_state.c            |  2 --
 src/gallium/drivers/r300/r300_state.c            |  3 ++-
 src/gallium/drivers/r300/r300_state_inlines.h    | 16 +++++++---------
 src/gallium/drivers/svga/svga_pipe_sampler.c     |  3 ++-
 src/gallium/include/pipe/p_defines.h             |  2 --
 src/gallium/state_trackers/vega/image.c          |  2 +-
 src/mesa/state_tracker/st_atom_sampler.c         |  4 ----
 13 files changed, 17 insertions(+), 36 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/util/u_debug_dump.c b/src/gallium/auxiliary/util/u_debug_dump.c
index 09866880ae..61624d05c0 100644
--- a/src/gallium/auxiliary/util/u_debug_dump.c
+++ b/src/gallium/auxiliary/util/u_debug_dump.c
@@ -255,15 +255,13 @@ DEFINE_DEBUG_DUMP_CONTINUOUS(tex_mipfilter)
 static const char *
 debug_dump_tex_filter_names[] = {
    "PIPE_TEX_FILTER_NEAREST",
-   "PIPE_TEX_FILTER_LINEAR",
-   "PIPE_TEX_FILTER_ANISO"
+   "PIPE_TEX_FILTER_LINEAR"
 };
 
 static const char *
 debug_dump_tex_filter_short_names[] = {
    "nearest",
-   "linear",
-   "aniso"
+   "linear"
 };
 
 DEFINE_DEBUG_DUMP_CONTINUOUS(tex_filter)
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
index 5c0179d954..12b855a3db 100644
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -405,8 +405,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler)
    case PIPE_TEX_FILTER_LINEAR:
       spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
       break;
-   case PIPE_TEX_FILTER_ANISO:
-      /* fall-through, for now */
    case PIPE_TEX_FILTER_NEAREST:
       spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
       break;
@@ -418,8 +416,6 @@ cmd_state_sampler(const struct cell_command_sampler *sampler)
    case PIPE_TEX_FILTER_LINEAR:
       spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
       break;
-   case PIPE_TEX_FILTER_ANISO:
-      /* fall-through, for now */
    case PIPE_TEX_FILTER_NEAREST:
       spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
       break;
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 1528afc859..5f5b6f8e18 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -74,8 +74,6 @@ static unsigned translate_img_filter( unsigned filter )
       return FILTER_NEAREST;
    case PIPE_TEX_FILTER_LINEAR:
       return FILTER_LINEAR;
-   case PIPE_TEX_FILTER_ANISO:
-      return FILTER_ANISOTROPIC;
    default:
       assert(0);
       return FILTER_NEAREST;
@@ -221,6 +219,9 @@ i915_create_sampler_state(struct pipe_context *pipe,
    minFilt = translate_img_filter( sampler->min_img_filter );
    magFilt = translate_img_filter( sampler->mag_img_filter );
    
+   if (sampler->max_anisotropy > 1.0)
+      minFilt = magFilt = FILTER_ANISOTROPIC;
+
    if (sampler->max_anisotropy > 2.0) {
       cso->state[0] |= SS2_MAX_ANISO_4;
    }
diff --git a/src/gallium/drivers/i965/brw_pipe_sampler.c b/src/gallium/drivers/i965/brw_pipe_sampler.c
index 5ddc63f57e..81712798a5 100644
--- a/src/gallium/drivers/i965/brw_pipe_sampler.c
+++ b/src/gallium/drivers/i965/brw_pipe_sampler.c
@@ -48,8 +48,6 @@ static GLuint translate_img_filter( unsigned filter )
       return BRW_MAPFILTER_NEAREST;
    case PIPE_TEX_FILTER_LINEAR:
       return BRW_MAPFILTER_LINEAR;
-   case PIPE_TEX_FILTER_ANISO:
-      return BRW_MAPFILTER_ANISOTROPIC;
    default:
       assert(0);
       return BRW_MAPFILTER_NEAREST;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index 47b68b71e2..c46fef5010 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -577,7 +577,6 @@ lp_build_sample_soa(LLVMBuilderRef builder,
       lp_build_sample_2d_nearest_soa(&bld, s, t, width, height, stride, data_ptr, texel);
       break;
    case PIPE_TEX_FILTER_LINEAR:
-   case PIPE_TEX_FILTER_ANISO:
       if(lp_format_is_rgba8(bld.format_desc))
          lp_build_sample_2d_linear_aos(&bld, s, t, width, height, stride, data_ptr, texel);
       else
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index 68520fa4f0..eaff6d5cdf 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -1131,7 +1131,6 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
       }
       break;
    case PIPE_TEX_FILTER_LINEAR:
-   case PIPE_TEX_FILTER_ANISO:
       {
          int x0[4], y0[4], x1[4], y1[4];
          float xw[4], yw[4]; /* weights */
@@ -1283,7 +1282,6 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
       }
       break;
    case PIPE_TEX_FILTER_LINEAR:
-   case PIPE_TEX_FILTER_ANISO:
       {
          int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
          float xw[4], yw[4], zw[4]; /* interpolation weights */
@@ -1414,7 +1412,6 @@ lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
       }
       break;
    case PIPE_TEX_FILTER_LINEAR:
-   case PIPE_TEX_FILTER_ANISO:
       {
          int x0[4], y0[4], x1[4], y1[4];
          float xw[4], yw[4]; /* weights */
diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c
index fd0a33d7f7..1f67df814b 100644
--- a/src/gallium/drivers/nv50/nv50_state.c
+++ b/src/gallium/drivers/nv50/nv50_state.c
@@ -146,7 +146,6 @@ nv50_sampler_state_create(struct pipe_context *pipe,
 		  (wrap_mode(cso->wrap_r) << 6));
 
 	switch (cso->mag_img_filter) {
-	case PIPE_TEX_FILTER_ANISO:
 	case PIPE_TEX_FILTER_LINEAR:
 		tsc[1] |= NV50TSC_1_1_MAGF_LINEAR;
 		break;
@@ -157,7 +156,6 @@ nv50_sampler_state_create(struct pipe_context *pipe,
 	}
 
 	switch (cso->min_img_filter) {
-	case PIPE_TEX_FILTER_ANISO:
 	case PIPE_TEX_FILTER_LINEAR:
 		tsc[1] |= NV50TSC_1_1_MINF_LINEAR;
 		break;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 49072462ec..534c1b5935 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -556,7 +556,8 @@ static void*
 
     sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter,
                                                    state->mag_img_filter,
-                                                   state->min_mip_filter);
+                                                   state->min_mip_filter,
+                                                   state->max_anisotropy > 1.0);
 
     /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
     /* We must pass these to the emit function to clamp them properly. */
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index dbe42edd91..02adee2701 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -260,35 +260,33 @@ static INLINE uint32_t r300_translate_wrap(int wrap)
 static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip)
 {
     uint32_t retval = 0;
-    switch (min) {
+    if (is_anisotropic)
+        retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO;
+    else {
+        switch (min) {
         case PIPE_TEX_FILTER_NEAREST:
             retval |= R300_TX_MIN_FILTER_NEAREST;
             break;
         case PIPE_TEX_FILTER_LINEAR:
             retval |= R300_TX_MIN_FILTER_LINEAR;
             break;
-        case PIPE_TEX_FILTER_ANISO:
-            retval |= R300_TX_MIN_FILTER_ANISO;
-            break;
         default:
             debug_printf("r300: Unknown texture filter %d\n", min);
             assert(0);
             break;
-    }
-    switch (mag) {
+        }
+        switch (mag) {
         case PIPE_TEX_FILTER_NEAREST:
             retval |= R300_TX_MAG_FILTER_NEAREST;
             break;
         case PIPE_TEX_FILTER_LINEAR:
             retval |= R300_TX_MAG_FILTER_LINEAR;
             break;
-        case PIPE_TEX_FILTER_ANISO:
-            retval |= R300_TX_MAG_FILTER_ANISO;
-            break;
         default:
             debug_printf("r300: Unknown texture filter %d\n", mag);
             assert(0);
             break;
+        }
     }
     switch (mip) {
         case PIPE_TEX_MIPFILTER_NONE:
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
index 78053e755e..460a101f8c 100644
--- a/src/gallium/drivers/svga/svga_pipe_sampler.c
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -76,7 +76,6 @@ static INLINE unsigned translate_img_filter( unsigned filter )
    switch (filter) {
    case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST;
    case PIPE_TEX_FILTER_LINEAR:  return SVGA3D_TEX_FILTER_LINEAR;
-   case PIPE_TEX_FILTER_ANISO:   return SVGA3D_TEX_FILTER_ANISOTROPIC;
    default:
       assert(0);
       return SVGA3D_TEX_FILTER_NEAREST;
@@ -107,6 +106,8 @@ svga_create_sampler_state(struct pipe_context *pipe,
    cso->magfilter = translate_img_filter( sampler->mag_img_filter );
    cso->minfilter = translate_img_filter( sampler->min_img_filter );
    cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 );
+   if(cso->aniso_level != 1)
+      cso->magfilter = cso->minfilter = SVGA3D_TEX_FILTER_ANISOTROPIC;
    cso->lod_bias = sampler->lod_bias;
    cso->addressu = translate_wrap_mode(sampler->wrap_s);
    cso->addressv = translate_wrap_mode(sampler->wrap_t);
diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
index c3b1e634ff..35f3830ebc 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -171,8 +171,6 @@ enum pipe_texture_target {
  */
 #define PIPE_TEX_FILTER_NEAREST      0
 #define PIPE_TEX_FILTER_LINEAR       1
-#define PIPE_TEX_FILTER_ANISO        2 
-
 
 #define PIPE_TEX_COMPARE_NONE          0
 #define PIPE_TEX_COMPARE_R_TO_TEXTURE  1
diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c
index 278ba6d46e..1112ad9839 100644
--- a/src/gallium/state_trackers/vega/image.c
+++ b/src/gallium/state_trackers/vega/image.c
@@ -644,7 +644,7 @@ VGint image_sampler_filter(struct vg_context *ctx)
        return PIPE_TEX_FILTER_NEAREST;
        break;
     case VG_IMAGE_QUALITY_BETTER:
-       /*return PIPE_TEX_FILTER_ANISO;*/
+       /* possibly use anisotropic filtering */
        return PIPE_TEX_FILTER_LINEAR;
        break;
     default:
diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c
index e1d6fa9eca..7b84a86ba4 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -213,10 +213,6 @@ update_samplers(struct st_context *st)
                             sampler->border_color);
 
 	 sampler->max_anisotropy = texobj->MaxAnisotropy;
-         if (sampler->max_anisotropy > 1.0) {
-            sampler->min_img_filter = PIPE_TEX_FILTER_ANISO;
-            sampler->mag_img_filter = PIPE_TEX_FILTER_ANISO;
-         }
 
          /* only care about ARB_shadow, not SGI shadow */
          if (texobj->CompareMode == GL_COMPARE_R_TO_TEXTURE) {
-- 
cgit v1.2.3


From 1b5476a353d9772fe86d02f0a733a55f9cf0ea76 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 6 Jan 2010 07:17:48 -0700
Subject: i965g: fix invalid assertion in emit_xpd(), again

---
 src/gallium/drivers/i965/brw_wm_emit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_wm_emit.c b/src/gallium/drivers/i965/brw_wm_emit.c
index 3d162a67e7..8f983a60ae 100644
--- a/src/gallium/drivers/i965/brw_wm_emit.c
+++ b/src/gallium/drivers/i965/brw_wm_emit.c
@@ -691,7 +691,7 @@ static void emit_xpd( struct brw_compile *p,
 {
    GLuint i;
 
-   assert(!(mask & BRW_WRITEMASK_W) == BRW_WRITEMASK_W);
+   assert((mask & BRW_WRITEMASK_W) != BRW_WRITEMASK_W);
    
    for (i = 0 ; i < 3; i++) {
       if (mask & (1<<i)) {
-- 
cgit v1.2.3


From 955f51270bb60ad77dba049799587dc7c0fb4dda Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Wed, 6 Jan 2010 11:23:43 +0100
Subject: Make sure we use only signed/unsigned ints with bitfields.

Seems to be the only way to stay fully portable.
---
 src/gallium/drivers/svga/svga_tgsi.h               | 26 ++++++++++------------
 .../dri/r300/compiler/radeon_pair_regalloc.c       |  2 +-
 .../drivers/dri/r300/compiler/radeon_program.h     | 14 ++++++------
 .../dri/r300/compiler/radeon_program_pair.h        | 10 ++++-----
 4 files changed, 25 insertions(+), 27 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
index 896c90a89a..1309c33923 100644
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -39,26 +39,24 @@ struct tgsi_token;
 
 struct svga_vs_compile_key
 {
-   ubyte need_prescale:1;
-   ubyte allow_psiz:1;
    unsigned zero_stride_vertex_elements;
-   ubyte num_zero_stride_vertex_elements:6;
+   unsigned need_prescale:1;
+   unsigned allow_psiz:1;
+   unsigned num_zero_stride_vertex_elements:6;
 };
 
 struct svga_fs_compile_key
 {
-   boolean light_twoside:1;
-   boolean front_cw:1;
-   ubyte num_textures;
-   ubyte num_unnormalized_coords;
+   unsigned light_twoside:1;
+   unsigned front_cw:1;
+   unsigned num_textures:8;
+   unsigned num_unnormalized_coords:8;
    struct {
-      ubyte compare_mode       : 1;
-      ubyte compare_func       : 3;
-      ubyte unnormalized       : 1;
-
-      ubyte width_height_idx   : 7;
-
-      ubyte texture_target;
+      unsigned compare_mode:1;
+      unsigned compare_func:3;
+      unsigned unnormalized:1;
+      unsigned width_height_idx:7;
+      unsigned texture_target:8;
    } tex[PIPE_MAX_SAMPLERS];
 };
 
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
index 828d0c8e28..b2fe7f76b2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -49,7 +49,7 @@ struct register_info {
 
 	unsigned int Used:1;
 	unsigned int Allocated:1;
-	rc_register_file File:3;
+	unsigned int File:3;
 	unsigned int Index:RC_REGISTER_INDEX_BITS;
 };
 
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
index 03592884eb..e318867696 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -39,7 +39,7 @@
 struct radeon_compiler;
 
 struct rc_src_register {
-	rc_register_file File:3;
+	unsigned int File:3;
 
 	/** Negative values may be used for relative addressing. */
 	signed int Index:(RC_REGISTER_INDEX_BITS+1);
@@ -55,7 +55,7 @@ struct rc_src_register {
 };
 
 struct rc_dst_register {
-	rc_register_file File:3;
+	unsigned int File:3;
 
 	/** Negative values may be used for relative addressing. */
 	signed int Index:(RC_REGISTER_INDEX_BITS+1);
@@ -79,20 +79,20 @@ struct rc_sub_instruction {
 	/**
 	 * Opcode of this instruction, according to \ref rc_opcode enums.
 	 */
-	rc_opcode Opcode:8;
+	unsigned int Opcode:8;
 
 	/**
 	 * Saturate each value of the result to the range [0,1] or [-1,1],
 	 * according to \ref rc_saturate_mode enums.
 	 */
-	rc_saturate_mode SaturateMode:2;
+	unsigned int SaturateMode:2;
 
 	/**
 	 * Writing to the special register RC_SPECIAL_ALU_RESULT
 	 */
 	/*@{*/
-	rc_write_aluresult WriteALUResult:2;
-	rc_compare_func ALUResultCompare:3;
+	unsigned int WriteALUResult:2;
+	unsigned int ALUResultCompare:3;
 	/*@}*/
 
 	/**
@@ -103,7 +103,7 @@ struct rc_sub_instruction {
 	unsigned int TexSrcUnit:5;
 
 	/** Source texture target, one of the \ref rc_texture_target enums */
-	rc_texture_target TexSrcTarget:3;
+	unsigned int TexSrcTarget:3;
 
 	/** True if tex instruction should do shadow comparison */
 	unsigned int TexShadow:1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
index 1600598428..6685ade3ea 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -52,12 +52,12 @@ struct r300_fragment_program_compiler;
 
 struct radeon_pair_instruction_source {
 	unsigned int Used:1;
-	rc_register_file File:3;
+	unsigned int File:3;
 	unsigned int Index:RC_REGISTER_INDEX_BITS;
 };
 
 struct radeon_pair_instruction_rgb {
-	rc_opcode Opcode:8;
+	unsigned int Opcode:8;
 	unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
 	unsigned int WriteMask:3;
 	unsigned int OutputWriteMask:3;
@@ -74,7 +74,7 @@ struct radeon_pair_instruction_rgb {
 };
 
 struct radeon_pair_instruction_alpha {
-	rc_opcode Opcode:8;
+	unsigned int Opcode:8;
 	unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
 	unsigned int WriteMask:1;
 	unsigned int OutputWriteMask:1;
@@ -95,8 +95,8 @@ struct rc_pair_instruction {
 	struct radeon_pair_instruction_rgb RGB;
 	struct radeon_pair_instruction_alpha Alpha;
 
-	rc_write_aluresult WriteALUResult:2;
-	rc_compare_func ALUResultCompare:3;
+	unsigned int WriteALUResult:2;
+	unsigned int ALUResultCompare:3;
 };
 
 
-- 
cgit v1.2.3


From 4e014c0a148ba3ac015d0e83dcf975ca6e814e1f Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Wed, 6 Jan 2010 14:12:10 +0100
Subject: pipe_sampler_state::compare_mode is not a boolean enable flag.

It's a 1-bit enum.
---
 src/gallium/drivers/llvmpipe/lp_bld_sample.c     | 4 ++--
 src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c | 2 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c       | 4 ++--
 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c   | 4 ++--
 src/gallium/drivers/softpipe/sp_tex_sample.c     | 2 +-
 src/gallium/drivers/trace/tr_dump_state.c        | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
index af70ddc6ab..9003e108c1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
@@ -69,8 +69,8 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
    state->min_img_filter    = sampler->min_img_filter;
    state->min_mip_filter    = sampler->min_mip_filter;
    state->mag_img_filter    = sampler->mag_img_filter;
-   if(sampler->compare_mode) {
-      state->compare_mode      = sampler->compare_mode;
+   state->compare_mode      = sampler->compare_mode;
+   if(sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
       state->compare_func      = sampler->compare_func;
    }
    state->normalized_coords = sampler->normalized_coords;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
index c46fef5010..5ee8d556a6 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample_soa.c
@@ -488,7 +488,7 @@ lp_build_sample_compare(struct lp_build_sample_context *bld,
    LLVMValueRef res;
    unsigned chan;
 
-   if(!bld->static_state->compare_mode)
+   if(bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE)
       return;
 
    /* TODO: Compare before swizzling, to avoid redundant computations */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index f2b8c36264..47078fbae4 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -453,8 +453,8 @@ generate_fragment(struct llvmpipe_context *lp,
                          debug_dump_tex_mipfilter(key->sampler[i].min_mip_filter, TRUE));
             debug_printf("  .mag_img_filter = %s\n",
                          debug_dump_tex_filter(key->sampler[i].mag_img_filter, TRUE));
-            if(key->sampler[i].compare_mode)
-               debug_printf("  .compare_mode = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
+            if(key->sampler[i].compare_mode != PIPE_TEX_COMPARE_NONE)
+               debug_printf("  .compare_func = %s\n", debug_dump_func(key->sampler[i].compare_func, TRUE));
             debug_printf("  .normalized_coords = %u\n", key->sampler[i].normalized_coords);
             debug_printf("  .prefilter = %u\n", key->sampler[i].prefilter);
          }
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index eaff6d5cdf..ccc8c8cec4 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -1520,7 +1520,7 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler,
    if (texture->target == PIPE_TEXTURE_2D &&
        sampler->min_img_filter == sampler->mag_img_filter &&
        sampler->wrap_s == sampler->wrap_t &&
-       sampler->compare_mode == FALSE &&
+       sampler->compare_mode == PIPE_TEX_COMPARE_NONE &&
        sampler->normalized_coords) 
    {
       if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
@@ -1567,7 +1567,7 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler,
                     sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE,
                     sampler->min_img_filter, sampler->mag_img_filter,
                     sampler->wrap_s, sampler->wrap_t,
-                    sampler->compare_mode, FALSE,
+                    sampler->compare_mode, PIPE_TEX_COMPARE_NONE,
                     sampler->normalized_coords, TRUE);
    }
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index e26153b1d9..903d21a021 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -1862,7 +1862,7 @@ sp_create_sampler_varient( const struct pipe_sampler_state *sampler,
       break;
    }
 
-   if (sampler->compare_mode != FALSE) {
+   if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
       samp->compare = sample_compare;
    }
    else {
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index 0102cc1876..86237e03bc 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -409,7 +409,7 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state)
    trace_dump_member(uint, state, min_img_filter);
    trace_dump_member(uint, state, min_mip_filter);
    trace_dump_member(uint, state, mag_img_filter);
-   trace_dump_member(bool, state, compare_mode);
+   trace_dump_member(uint, state, compare_mode);
    trace_dump_member(uint, state, compare_func);
    trace_dump_member(bool, state, normalized_coords);
    trace_dump_member(uint, state, prefilter);
-- 
cgit v1.2.3


From d8331aaf5b4847fd969b315045ec6f1185074722 Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 6 Jan 2010 09:54:34 -0700
Subject: svga: make texture_target a ubyte, not a bitfield

gcc won't let us take the address of a bitfield in the svga_fs_key_size()
function.
---
 src/gallium/drivers/svga/svga_tgsi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
index 1309c33923..043b99115f 100644
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -56,7 +56,7 @@ struct svga_fs_compile_key
       unsigned compare_func:3;
       unsigned unnormalized:1;
       unsigned width_height_idx:7;
-      unsigned texture_target:8;
+      ubyte texture_target;
    } tex[PIPE_MAX_SAMPLERS];
 };
 
-- 
cgit v1.2.3


From 60f5608bd7027a5c26a84f1d63250353ec4ea43c Mon Sep 17 00:00:00 2001
From: Brian Paul <brianp@vmware.com>
Date: Wed, 6 Jan 2010 10:36:51 -0700
Subject: r300g: add missing is_anistropic param

---
 src/gallium/drivers/r300/r300_state_inlines.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
index 02adee2701..35be00e1b0 100644
--- a/src/gallium/drivers/r300/r300_state_inlines.h
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -257,7 +257,8 @@ static INLINE uint32_t r300_translate_wrap(int wrap)
     }
 }
 
-static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip)
+static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip,
+                                                  int is_anisotropic)
 {
     uint32_t retval = 0;
     if (is_anisotropic)
-- 
cgit v1.2.3


From 8e559e05a8887df6477eb5ee26c4f4461b79b303 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Wed, 6 Jan 2010 18:05:08 +0000
Subject: softpipe: fix draw return value

---
 src/gallium/drivers/softpipe/sp_draw_arrays.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c
index 3826a9e41a..03d35fb3cb 100644
--- a/src/gallium/drivers/softpipe/sp_draw_arrays.c
+++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c
@@ -124,7 +124,7 @@ softpipe_draw_range_elements(struct pipe_context *pipe,
    unsigned i;
 
    if (!softpipe_check_render_cond(sp))
-      return TRUE;
+      return;
 
    sp->reduced_api_prim = u_reduced_prim(mode);
 
-- 
cgit v1.2.3


From 717c28055a9093e4fea37450d6db787e5b61e254 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sun, 20 Dec 2009 04:28:30 +0100
Subject: r300g: fix blending when SRC_ALPHA_SATURATE is used

---
 src/gallium/drivers/r300/r300_state.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 534c1b5935..1dbf0bbd8c 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -66,7 +66,11 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
             ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) |
             ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT);
 
-        /* optimization: some operations do not require the destination color */
+        /* Optimization: some operations do not require the destination color.
+         *
+         * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled,
+         * otherwise blending gives incorrect results. It seems to be
+         * a hardware bug. */
         if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN ||
             eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX ||
             dstRGB != PIPE_BLENDFACTOR_ZERO ||
@@ -78,7 +82,8 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
             srcA == PIPE_BLENDFACTOR_DST_COLOR ||
             srcA == PIPE_BLENDFACTOR_DST_ALPHA ||
             srcA == PIPE_BLENDFACTOR_INV_DST_COLOR ||
-            srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA)
+            srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
+            srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
             blend->blend_control |= R300_READ_ENABLE;
 
         /* XXX implement the optimization with DISCARD_SRC_PIXELS*/
-- 
cgit v1.2.3


From 6b9b3213c545644d155be54fd633e8b630a22465 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sun, 20 Dec 2009 18:19:03 +0100
Subject: r300g: optimize blending by discarding pixels that don't change the
 colorbuffer

Let's get some performance out of the blender.
---
 src/gallium/drivers/r300/r300_state.c           | 161 +++++++++++++++++++++++-
 src/gallium/drivers/r300/r300_state_invariant.c |   4 +-
 2 files changed, 162 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 1dbf0bbd8c..f8cce112ee 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -41,6 +42,120 @@
 /* r300_state: Functions used to intialize state context by translating
  * Gallium state objects into semi-native r300 state objects. */
 
+static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA,
+                                            unsigned dstRGB, unsigned dstA)
+{
+    /* If the blend equation is ADD or REVERSE_SUBTRACT,
+     * SRC_ALPHA == 0, and the following state is set, the colorbuffer
+     * will not be changed.
+     * Notice that the dst factors are the src factors inverted. */
+    return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+            srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+           (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
+            srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+            srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+            srcA == PIPE_BLENDFACTOR_ZERO) &&
+           (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+            dstRGB == PIPE_BLENDFACTOR_ONE) &&
+           (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+            dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+            dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA,
+                                            unsigned dstRGB, unsigned dstA)
+{
+    /* If the blend equation is ADD or REVERSE_SUBTRACT,
+     * SRC_ALPHA == 1, and the following state is set, the colorbuffer
+     * will not be changed.
+     * Notice that the dst factors are the src factors inverted. */
+    return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+           (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+            srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+            srcA == PIPE_BLENDFACTOR_ZERO) &&
+           (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+            dstRGB == PIPE_BLENDFACTOR_ONE) &&
+           (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+            dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+            dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA,
+                                            unsigned dstRGB, unsigned dstA)
+{
+    /* If the blend equation is ADD or REVERSE_SUBTRACT,
+     * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer
+     * will not be changed.
+     * Notice that the dst factors are the src factors inverted. */
+    return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+           (srcA == PIPE_BLENDFACTOR_ZERO) &&
+           (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+            dstRGB == PIPE_BLENDFACTOR_ONE) &&
+           (dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA,
+                                            unsigned dstRGB, unsigned dstA)
+{
+    /* If the blend equation is ADD or REVERSE_SUBTRACT,
+     * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer
+     * will not be changed.
+     * Notice that the dst factors are the src factors inverted. */
+    return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+           (srcA == PIPE_BLENDFACTOR_ZERO) &&
+           (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+            dstRGB == PIPE_BLENDFACTOR_ONE) &&
+           (dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA,
+                                                  unsigned dstRGB, unsigned dstA)
+{
+    /* If the blend equation is ADD or REVERSE_SUBTRACT,
+     * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set,
+     * the colorbuffer will not be changed.
+     * Notice that the dst factors are the src factors inverted. */
+    return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+            srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+            srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+           (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
+            srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+            srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+            srcA == PIPE_BLENDFACTOR_ZERO) &&
+           (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+            dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+            dstRGB == PIPE_BLENDFACTOR_ONE) &&
+           (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+            dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+            dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA,
+                                                  unsigned dstRGB, unsigned dstA)
+{
+    /* If the blend equation is ADD or REVERSE_SUBTRACT,
+     * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set,
+     * the colorbuffer will not be changed.
+     * Notice that the dst factors are the src factors inverted. */
+    return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+            srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+            srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+           (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+            srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+            srcA == PIPE_BLENDFACTOR_ZERO) &&
+           (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+            dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+            dstRGB == PIPE_BLENDFACTOR_ONE) &&
+           (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+            dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+            dstA == PIPE_BLENDFACTOR_ONE);
+}
+
 /* Create a new blend state based on the CSO blend state.
  *
  * This encompasses alpha blending, logic/raster ops, and blend dithering. */
@@ -86,7 +201,51 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
             srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
             blend->blend_control |= R300_READ_ENABLE;
 
-        /* XXX implement the optimization with DISCARD_SRC_PIXELS*/
+        /* Optimization: discard pixels which don't change the colorbuffer.
+         *
+         * The code below is non-trivial and some math is involved.
+         *
+         * Discarding pixels must be disabled when FP16 AA is enabled.
+         * This is a hardware bug. Also, this implementation wouldn't work
+         * with FP blending enabled and equation clamping disabled.
+         *
+         * Equations other than ADD are rarely used and therefore won't be
+         * optimized. */
+        if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) &&
+            (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) {
+            /* ADD: X+Y
+             * REVERSE_SUBTRACT: Y-X
+             *
+             * The idea is:
+             * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1,
+             * then CB will not be changed.
+             *
+             * Given the srcFactor and dstFactor variables, we can derive
+             * what src and dst should be equal to and discard appropriate
+             * pixels.
+             */
+            if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) {
+                blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
+            } else if (blend_discard_if_src_alpha_1(srcRGB, srcA,
+                                                    dstRGB, dstA)) {
+                blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1;
+            } else if (blend_discard_if_src_color_0(srcRGB, srcA,
+                                                    dstRGB, dstA)) {
+                blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0;
+            } else if (blend_discard_if_src_color_1(srcRGB, srcA,
+                                                    dstRGB, dstA)) {
+                blend->blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1;
+            } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA,
+                                                          dstRGB, dstA)) {
+                blend->blend_control |=
+                    R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0;
+            } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA,
+                                                          dstRGB, dstA)) {
+                blend->blend_control |=
+                    R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1;
+            }
+        }
+
         /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */
 
         /* separate alpha */
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index bcd4c030f9..64dc14ea76 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -118,8 +118,8 @@ void r300_emit_invariant_state(struct r300_context* r300)
     OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
     OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
     if (caps->is_r500) {
-        OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000);
-        OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF);
+        OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
+        OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
     }
     OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000);
     OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
-- 
cgit v1.2.3


From 7c902b43d84f5508764e64d95f7359897a1aad0a Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Mon, 21 Dec 2009 01:38:45 +0100
Subject: r300g: optimize blending by conditionally disabling reads from the
 colorbuffer

---
 src/gallium/drivers/r300/r300_reg.h   |  2 ++
 src/gallium/drivers/r300/r300_state.c | 31 ++++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 0aa1da07f8..034bfc15cf 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -2186,6 +2186,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1     (4 << 3)
 #       define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1     (5 << 3)
 #       define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1     (6 << 3)
+#       define R500_SRC_ALPHA_0_NO_READ                (1 << 30)
+#       define R500_SRC_ALPHA_1_NO_READ                (1 << 31)
 
 /* the following are shared between CBLEND and ABLEND */
 #       define R300_FCN_MASK                         (3  << 12)
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index f8cce112ee..872a393321 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -198,9 +198,36 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
             srcA == PIPE_BLENDFACTOR_DST_ALPHA ||
             srcA == PIPE_BLENDFACTOR_INV_DST_COLOR ||
             srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
-            srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
+            srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) {
+            /* Enable reading from the colorbuffer. */
             blend->blend_control |= R300_READ_ENABLE;
 
+            if (r300_screen(r300_context(pipe)->context.screen)->caps->is_r500) {
+                /* Optimization: Depending on incoming pixels, we can
+                 * conditionally disable the reading in hardware... */
+                if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN &&
+                    eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) {
+                    /* Disable reading if SRC_ALPHA == 0. */
+                    if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+                         dstRGB == PIPE_BLENDFACTOR_ZERO) &&
+                        (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+                         dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+                         dstA == PIPE_BLENDFACTOR_ZERO)) {
+                         blend->blend_control |= R500_SRC_ALPHA_0_NO_READ;
+                    }
+
+                    /* Disable reading if SRC_ALPHA == 1. */
+                    if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+                         dstRGB == PIPE_BLENDFACTOR_ZERO) &&
+                        (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+                         dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+                         dstA == PIPE_BLENDFACTOR_ZERO)) {
+                         blend->blend_control |= R500_SRC_ALPHA_1_NO_READ;
+                    }
+                }
+            }
+        }
+
         /* Optimization: discard pixels which don't change the colorbuffer.
          *
          * The code below is non-trivial and some math is involved.
@@ -246,8 +273,6 @@ static void* r300_create_blend_state(struct pipe_context* pipe,
             }
         }
 
-        /* XXX implement the optimization with SRC_ALPHA_?_NO_READ */
-
         /* separate alpha */
         if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
             blend->blend_control |= R300_SEPARATE_ALPHA_ENABLE;
-- 
cgit v1.2.3


From e78648db0a91269ed83238bfa3ced5d4d7dae2e0 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 24 Dec 2009 17:40:53 +0100
Subject: r300g: fix outputting depth in the FS

---
 src/gallium/drivers/r300/r300_emit.c            | 17 +++++++++++++++++
 src/gallium/drivers/r300/r300_state_invariant.c |  4 +---
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 1dc9216a7b..596ec21bc0 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -283,6 +283,22 @@ void r300_emit_fs_constant_buffer(struct r300_context* r300,
     END_CS;
 }
 
+static void r300_emit_fragment_depth_config(struct r300_context* r300,
+                                            struct r300_fragment_shader* fs)
+{
+    CS_LOCALS(r300);
+
+    BEGIN_CS(4);
+    if (r300_fragment_shader_writes_depth(fs)) {
+        OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SHADER);
+        OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W24 | R300_W_SRC_US);
+    } else {
+        OUT_CS_REG(R300_FG_DEPTH_SRC, R300_FG_DEPTH_SRC_SCAN);
+        OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0 | R300_W_SRC_US);
+    }
+    END_CS;
+}
+
 void r500_emit_fragment_program_code(struct r300_context* r300,
                                      struct rX00_fragment_program_code* generic_code)
 {
@@ -1036,6 +1052,7 @@ validate:
     }
 
     if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) {
+        r300_emit_fragment_depth_config(r300, r300->fs);
         if (r300screen->caps->is_r500) {
             r500_emit_fragment_program_code(r300, &r300->fs->shader->code);
         } else {
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index 64dc14ea76..f25f3ca217 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -43,7 +43,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
     struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps;
     CS_LOCALS(r300);
 
-    BEGIN_CS(20 + (caps->has_tcl ? 2: 0));
+    BEGIN_CS(16 + (caps->has_tcl ? 2: 0));
 
     /*** Graphics Backend (GB) ***/
     /* Various GB enables */
@@ -66,8 +66,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
     OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0);
     OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0);
     OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0);
-    OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0);
-    OUT_CS_REG(R300_US_W_FMT, 0x0);
 
     /*** VAP ***/
     /* Sign/normalize control */
-- 
cgit v1.2.3


From ddfc4e31ada05148ed1eb56f89efe1decadbbde3 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Mon, 21 Dec 2009 02:10:34 +0100
Subject: r300g: consolidate params

---
 src/gallium/drivers/r300/r300_screen.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 2a8667d483..287664b1d2 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -83,6 +83,7 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
 
     switch (param) {
         case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+        case PIPE_CAP_MAX_COMBINED_SAMPLERS:
             /* XXX I'm told this goes up to 16 */
             return 8;
         case PIPE_CAP_NPOT_TEXTURES:
@@ -143,9 +144,11 @@ static int r300_get_param(struct pipe_screen* pscreen, int param)
         case PIPE_CAP_BLEND_EQUATION_SEPARATE:
             return 1;
         case PIPE_CAP_SM3:
-            return 1;
-        case PIPE_CAP_MAX_COMBINED_SAMPLERS:
-            return 8;
+            if (r300screen->caps->is_r500) {
+                return 1;
+            } else {
+                return 0;
+            }
         default:
             debug_printf("r300: Implementation error: Bad param %d\n",
                 param);
-- 
cgit v1.2.3


From bf60eb3fec844a7c3793aba0c70da56b74a17344 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 24 Dec 2009 03:10:33 +0100
Subject: r300g: add WPOS

---
 src/gallium/drivers/r300/r300_emit.c             | 22 ++++++++++
 src/gallium/drivers/r300/r300_fs.c               | 21 +++++++++
 src/gallium/drivers/r300/r300_shader_semantics.h |  2 +
 src/gallium/drivers/r300/r300_state.c            |  6 +++
 src/gallium/drivers/r300/r300_state_derived.c    | 20 +++++++++
 src/gallium/drivers/r300/r300_vs.c               | 56 ++++++++++++++++++++++--
 6 files changed, 124 insertions(+), 3 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 596ec21bc0..30a1000c29 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -165,6 +165,28 @@ static const float * get_shader_constant(
                     vec[3] = 0;
                     break;
 
+                case RC_STATE_R300_VIEWPORT_SCALE:
+                    if (r300->rs_state->enable_vte) {
+                        vec[0] = r300->viewport_state->xscale;
+                        vec[1] = r300->viewport_state->yscale;
+                        vec[2] = r300->viewport_state->zscale;
+                    } else {
+                        vec[0] = 1;
+                        vec[1] = 1;
+                        vec[2] = 1;
+                    }
+                    break;
+
+                case RC_STATE_R300_VIEWPORT_OFFSET:
+                    if (r300->rs_state->enable_vte) {
+                        vec[0] = r300->viewport_state->xoffset;
+                        vec[1] = r300->viewport_state->yoffset;
+                        vec[2] = r300->viewport_state->zoffset;
+                    } else {
+                        /* Zeros. */
+                    }
+                    break;
+
                 default:
                     debug_printf("r300: Implementation error: "
                         "Unknown RC_CONSTANT type %d\n", constant->u.State[0]);
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
index 4e1b61ca40..60ea9c171d 100644
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -63,6 +63,11 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
                 fs_inputs->fog = i;
                 break;
 
+            case TGSI_SEMANTIC_POSITION:
+                assert(index == 0);
+                fs_inputs->wpos = i;
+                break;
+
             default:
                 assert(0);
         }
@@ -114,6 +119,9 @@ static void allocate_hardware_inputs(
     if (inputs->fog != ATTR_UNUSED) {
         allocate(mydata, inputs->fog, reg++);
     }
+    if (inputs->wpos != ATTR_UNUSED) {
+        allocate(mydata, inputs->wpos, reg++);
+    }
 }
 
 static void get_compare_state(
@@ -144,6 +152,7 @@ static void r300_translate_fragment_shader(
     struct r300_fragment_shader* fs = r300->fs;
     struct r300_fragment_program_compiler compiler;
     struct tgsi_to_rc ttr;
+    int wpos = fs->inputs.wpos;
 
     /* Setup the compiler. */
     memset(&compiler, 0, sizeof(compiler));
@@ -171,6 +180,18 @@ static void r300_translate_fragment_shader(
 
     fs->shadow_samplers = compiler.Base.Program.ShadowSamplers;
 
+    /**
+     * Transform the program to support WPOS.
+     *
+     * Introduce a small fragment at the start of the program that will be
+     * the only code that directly reads the WPOS input.
+     * All other code pieces that reference that input will be rewritten
+     * to read from a newly allocated temporary. */
+    if (wpos != ATTR_UNUSED) {
+        /* Moving the input to some other reg is not really necessary. */
+        rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE);
+    }
+
     /* Invoke the compiler */
     r3xx_compile_fragment_program(&compiler);
     if (compiler.Base.Error) {
diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h
index 85184e2cfd..6796841b29 100644
--- a/src/gallium/drivers/r300/r300_shader_semantics.h
+++ b/src/gallium/drivers/r300/r300_shader_semantics.h
@@ -40,6 +40,7 @@ struct r300_shader_semantics {
     int bcolor[ATTR_COLOR_COUNT];
     int generic[ATTR_GENERIC_COUNT];
     int fog;
+    int wpos;
 };
 
 static INLINE void r300_shader_semantics_reset(
@@ -50,6 +51,7 @@ static INLINE void r300_shader_semantics_reset(
     info->pos = ATTR_UNUSED;
     info->psize = ATTR_UNUSED;
     info->fog = ATTR_UNUSED;
+    info->wpos = ATTR_UNUSED;
 
     for (i = 0; i < ATTR_COLOR_COUNT; i++) {
         info->color[i] = ATTR_UNUSED;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 872a393321..ad8e210dcd 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -719,6 +719,9 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
     r300->dirty_state |= R300_NEW_RS_BLOCK;
     r300->dirty_state |= R300_NEW_SCISSOR;
     r300->dirty_state |= R300_NEW_VIEWPORT;
+    if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) {
+        r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+    }
 }
 
 /* Free rasterizer state. */
@@ -897,6 +900,9 @@ static void r300_set_viewport_state(struct pipe_context* pipe,
     }
 
     r300->dirty_state |= R300_NEW_VIEWPORT;
+    if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) {
+        r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+    }
 }
 
 static void r300_set_vertex_buffers(struct pipe_context* pipe,
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 727ae7ade6..242716fccc 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -410,6 +410,26 @@ static void r300_update_rs_block(struct r300_context* r300,
         }
     }
 
+    /* Rasterize WPOS. */
+    if (vs_outputs->wpos != ATTR_UNUSED) {
+        /* Always rasterize if it's written by the VS,
+         * otherwise it locks up. */
+        rX00_rs_tex(rs, tex_count, tex_count, FALSE);
+
+        /* Write it to the FS input register if it's used by the FS. */
+        if (fs_inputs->wpos != ATTR_UNUSED) {
+            rX00_rs_tex_write(rs, tex_count, fp_offset);
+            fp_offset++;
+        }
+        tex_count++;
+    } else {
+        /* Skip the FS input register, leave it uninitialized. */
+        /* If we try to set it to (0,0,0,1), it will lock up. */
+        if (fs_inputs->wpos != ATTR_UNUSED) {
+            fp_offset++;
+        }
+    }
+
     /* Rasterize at least one color, or bad things happen. */
     if (col_count == 0 && tex_count == 0) {
         rX00_rs_col(rs, 0, 0, TRUE);
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index c4ed0d712f..57531fc554 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -33,6 +33,8 @@
 
 #include "radeon_compiler.h"
 
+#include "util/u_math.h"
+
 /* Convert info about VS output semantics into r300_shader_semantics. */
 static void r300_shader_read_vs_outputs(
     struct tgsi_shader_info* info,
@@ -144,6 +146,13 @@ static void r300_shader_vap_output_fmt(
         gen_count++;
     }
 
+    /* WPOS. */
+    if (vs_outputs->wpos != ATTR_UNUSED) {
+        hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count);
+        hwfmt[3] |= (4 << (3 * gen_count));
+        gen_count++;
+    }
+
     /* XXX magic */
     assert(gen_count <= 8);
 }
@@ -195,6 +204,13 @@ static void r300_stream_locations_notcl(
         gen_count++;
     }
 
+    /* WPOS. */
+    if (vs_outputs->wpos != ATTR_UNUSED) {
+        assert(tabi < 16);
+        stream_loc[tabi++] = 6 + gen_count;
+        gen_count++;
+    }
+
     /* XXX magic */
     assert(gen_count <= 8);
 
@@ -246,6 +262,33 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
     if (outputs->fog != ATTR_UNUSED) {
         c->code->outputs[outputs->fog] = reg++;
     }
+
+    /* WPOS. */
+    if (outputs->wpos != ATTR_UNUSED) {
+        c->code->outputs[outputs->wpos] = reg++;
+    }
+}
+
+static void r300_insert_wpos(struct r300_vertex_program_compiler* c,
+                             struct r300_shader_semantics* outputs)
+{
+    int i, lastOutput = 0;
+
+    /* Find the max output index. */
+    lastOutput = MAX2(lastOutput, outputs->psize);
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        lastOutput = MAX2(lastOutput, outputs->color[i]);
+        lastOutput = MAX2(lastOutput, outputs->bcolor[i]);
+    }
+    for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+        lastOutput = MAX2(lastOutput, outputs->generic[i]);
+    }
+    lastOutput = MAX2(lastOutput, outputs->fog);
+
+    /* Set WPOS after the last output. */
+    lastOutput++;
+    rc_copy_output(&c->Base, 0, lastOutput); /* out[lastOutput] = out[0]; */
+    outputs->wpos = lastOutput;
 }
 
 void r300_translate_vertex_shader(struct r300_context* r300,
@@ -253,11 +296,10 @@ void r300_translate_vertex_shader(struct r300_context* r300,
 {
     struct r300_vertex_program_compiler compiler;
     struct tgsi_to_rc ttr;
+    boolean use_wpos = TRUE;
 
     /* Initialize. */
     r300_shader_read_vs_outputs(&vs->info, &vs->outputs);
-    r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt);
-    r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl);
 
     /* Setup the compiler */
     rc_init(&compiler.Base);
@@ -277,9 +319,17 @@ void r300_translate_vertex_shader(struct r300_context* r300,
 
     r300_tgsi_to_rc(&ttr, vs->state.tokens);
 
-    compiler.RequiredOutputs = ~(~0 << vs->info.num_outputs);
+    compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs+use_wpos));
     compiler.SetHwInputOutput = &set_vertex_inputs_outputs;
 
+    /* Insert the WPOS output. */
+    if (use_wpos) {
+        r300_insert_wpos(&compiler, &vs->outputs);
+    }
+
+    r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt);
+    r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl);
+
     /* Invoke the compiler */
     r3xx_compile_vertex_program(&compiler);
     if (compiler.Base.Error) {
-- 
cgit v1.2.3


From 26f67a272b6668258fe3552a675414502e077dd9 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 24 Dec 2009 16:05:44 +0100
Subject: r300g: disable the rasterization of WPOS if it's unused by the FS

---
 src/gallium/drivers/r300/r300_state.c         | 12 +++++-
 src/gallium/drivers/r300/r300_state_derived.c | 18 ++-------
 src/gallium/drivers/r300/r300_vs.c            | 56 ++++++++++++++++++---------
 src/gallium/drivers/r300/r300_vs.h            |  6 +++
 4 files changed, 58 insertions(+), 34 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index ad8e210dcd..affd0b4591 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -556,6 +556,10 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
     r300->fs = fs;
     r300_pick_fragment_shader(r300);
 
+    if (r300->vs && r300_vertex_shader_setup_wpos(r300)) {
+        r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
+    }
+
     r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS;
 }
 
@@ -974,7 +978,13 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
         }
 
         r300->vs = vs;
-        r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS;
+        if (r300->fs) {
+            r300_vertex_shader_setup_wpos(r300);
+        }
+
+        r300->dirty_state |=
+            R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS |
+            R300_NEW_VERTEX_FORMAT;
     } else {
         draw_flush(r300->draw);
         draw_bind_vertex_shader(r300->draw,
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 242716fccc..2a12a02e18 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -411,23 +411,13 @@ static void r300_update_rs_block(struct r300_context* r300,
     }
 
     /* Rasterize WPOS. */
-    if (vs_outputs->wpos != ATTR_UNUSED) {
-        /* Always rasterize if it's written by the VS,
-         * otherwise it locks up. */
+    /* If the FS doesn't need it, it's not written by the VS. */
+    if (fs_inputs->wpos != ATTR_UNUSED) {
         rX00_rs_tex(rs, tex_count, tex_count, FALSE);
+        rX00_rs_tex_write(rs, tex_count, fp_offset);
 
-        /* Write it to the FS input register if it's used by the FS. */
-        if (fs_inputs->wpos != ATTR_UNUSED) {
-            rX00_rs_tex_write(rs, tex_count, fp_offset);
-            fp_offset++;
-        }
+        fp_offset++;
         tex_count++;
-    } else {
-        /* Skip the FS input register, leave it uninitialized. */
-        /* If we try to set it to (0,0,0,1), it will lock up. */
-        if (fs_inputs->wpos != ATTR_UNUSED) {
-            fp_offset++;
-        }
     }
 
     /* Rasterize at least one color, or bad things happen. */
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 57531fc554..3512a6dc76 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -22,6 +22,7 @@
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
 #include "r300_vs.h"
+#include "r300_fs.h"
 
 #include "r300_context.h"
 #include "r300_screen.h"
@@ -90,10 +91,10 @@ static void r300_shader_read_vs_outputs(
     }
 }
 
-static void r300_shader_vap_output_fmt(
-    struct r300_shader_semantics* vs_outputs,
-    uint* hwfmt)
+static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs)
 {
+    struct r300_shader_semantics* vs_outputs = &vs->outputs;
+    uint32_t* hwfmt = vs->hwfmt;
     int i, gen_count;
 
     /* Do the actual vertex_info setup.
@@ -146,15 +147,11 @@ static void r300_shader_vap_output_fmt(
         gen_count++;
     }
 
-    /* WPOS. */
-    if (vs_outputs->wpos != ATTR_UNUSED) {
-        hwfmt[1] |= (R300_INPUT_CNTL_TC0 << gen_count);
-        hwfmt[3] |= (4 << (3 * gen_count));
-        gen_count++;
-    }
-
     /* XXX magic */
     assert(gen_count <= 8);
+
+    /* WPOS. */
+    vs->wpos_tex_output = gen_count;
 }
 
 /* Sets up stream mapping to equivalent VS outputs if TCL is bypassed
@@ -211,9 +208,6 @@ static void r300_stream_locations_notcl(
         gen_count++;
     }
 
-    /* XXX magic */
-    assert(gen_count <= 8);
-
     for (; tabi < 16;) {
         stream_loc[tabi++] = -1;
     }
@@ -296,7 +290,6 @@ void r300_translate_vertex_shader(struct r300_context* r300,
 {
     struct r300_vertex_program_compiler compiler;
     struct tgsi_to_rc ttr;
-    boolean use_wpos = TRUE;
 
     /* Initialize. */
     r300_shader_read_vs_outputs(&vs->info, &vs->outputs);
@@ -319,15 +312,13 @@ void r300_translate_vertex_shader(struct r300_context* r300,
 
     r300_tgsi_to_rc(&ttr, vs->state.tokens);
 
-    compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs+use_wpos));
+    compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs+1));
     compiler.SetHwInputOutput = &set_vertex_inputs_outputs;
 
     /* Insert the WPOS output. */
-    if (use_wpos) {
-        r300_insert_wpos(&compiler, &vs->outputs);
-    }
+    r300_insert_wpos(&compiler, &vs->outputs);
 
-    r300_shader_vap_output_fmt(&vs->outputs, vs->hwfmt);
+    r300_shader_vap_output_fmt(vs);
     r300_stream_locations_notcl(&vs->outputs, vs->stream_loc_notcl);
 
     /* Invoke the compiler */
@@ -342,3 +333,30 @@ void r300_translate_vertex_shader(struct r300_context* r300,
     rc_destroy(&compiler.Base);
     vs->translated = TRUE;
 }
+
+boolean r300_vertex_shader_setup_wpos(struct r300_context* r300)
+{
+    struct r300_vertex_shader* vs = r300->vs;
+    int tex_output = r300->vs->wpos_tex_output;
+    uint32_t tex_fmt = R300_INPUT_CNTL_TC0 << tex_output;
+    uint32_t* hwfmt = vs->hwfmt;
+
+    if (r300->fs->inputs.wpos != ATTR_UNUSED) {
+        /* Enable WPOS in VAP. */
+        if (!(hwfmt[1] & tex_fmt)) {
+            hwfmt[1] |= tex_fmt;
+            hwfmt[3] |= (4 << (3 * tex_output));
+
+            assert(tex_output < 8);
+            return TRUE;
+        }
+    } else {
+        /* Disable WPOS in VAP. */
+        if (hwfmt[1] & tex_fmt) {
+            hwfmt[1] &= ~tex_fmt;
+            hwfmt[3] &= ~(4 << (3 * tex_output));
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 67e9db5366..18cfeee3cd 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -43,6 +43,9 @@ struct r300_vertex_shader {
     /* Stream locations for SWTCL or if TCL is bypassed. */
     int stream_loc_notcl[16];
 
+    /* Output stream location for WPOS. */
+    int wpos_tex_output;
+
     /* Has this shader been translated yet? */
     boolean translated;
 
@@ -53,4 +56,7 @@ struct r300_vertex_shader {
 void r300_translate_vertex_shader(struct r300_context* r300,
                                   struct r300_vertex_shader* vs);
 
+/* Return TRUE if VAP (hwfmt) needs to be re-emitted. */
+boolean r300_vertex_shader_setup_wpos(struct r300_context* r300);
+
 #endif /* R300_VS_H */
-- 
cgit v1.2.3


From 1a4f242be60237fb1f1acf346b1e641167bc6cee Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Thu, 24 Dec 2009 14:23:57 +0100
Subject: r300g: if no DS buffer is set, disable reading from and writing to it

It fixes another "Bad CS" issue.
---
 src/gallium/drivers/r300/r300_emit.c  | 11 +++++++++--
 src/gallium/drivers/r300/r300_state.c |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 30a1000c29..f8bfa714fe 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -121,8 +121,15 @@ void r300_emit_dsa_state(struct r300_context* r300,
     }*/
 
     OUT_CS_REG_SEQ(R300_ZB_CNTL, 3);
-    OUT_CS(dsa->z_buffer_control);
-    OUT_CS(dsa->z_stencil_control);
+
+    if (r300->framebuffer_state.zsbuf) {
+        OUT_CS(dsa->z_buffer_control);
+        OUT_CS(dsa->z_stencil_control);
+    } else {
+        OUT_CS(0);
+        OUT_CS(0);
+    }
+
     OUT_CS(dsa->stencil_ref_mask);
     OUT_CS_REG(R300_ZB_ZTOP, r300->ztop_state.z_buffer_top);
 
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index affd0b4591..a145a7f18a 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -522,6 +522,7 @@ static void
     }
     r300->dirty_state |= R300_NEW_FRAMEBUFFERS;
     r300->dirty_state |= R300_NEW_BLEND;
+    r300->dirty_state |= R300_NEW_DSA;
 }
 
 /* Create fragment shader state. */
-- 
cgit v1.2.3


From 7679ac2caa4fbc304c38e95b1230a6cdebdbc691 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Fri, 25 Dec 2009 17:09:21 +0100
Subject: r300g: fix SWTCL stream locations of texture coordinates

It might have caused hardlocks when TCL was bypassed, not sure.
---
 src/gallium/drivers/r300/r300_vs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index 3512a6dc76..cb4b6ee71e 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -187,7 +187,7 @@ static void r300_stream_locations_notcl(
     /* Texture coordinates. */
     gen_count = 0;
     for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
-        if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
+        if (vs_outputs->generic[i] != ATTR_UNUSED) {
             assert(tabi < 16);
             stream_loc[tabi++] = 6 + gen_count;
             gen_count++;
-- 
cgit v1.2.3


From 6f498a7eff26dc055c0a1f75ce3102aa4a1a0141 Mon Sep 17 00:00:00 2001
From: Marek Olšák <maraeo@gmail.com>
Date: Sat, 19 Dec 2009 23:55:34 +0100
Subject: r300g: add back-face color VS outputs

Required for two-sided lighting. BTW we are approaching softpipe-level
quality. ;)
---
 src/gallium/drivers/r300/r300_state_derived.c |  4 ++-
 src/gallium/drivers/r300/r300_vs.c            | 40 +++++++++++++++++++++++----
 2 files changed, 37 insertions(+), 7 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 2a12a02e18..22660a52d9 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -333,6 +333,8 @@ static void r300_update_rs_block(struct r300_context* r300,
     void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
     void (*rX00_rs_tex)(struct r300_rs_block*, int, int, boolean);
     void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
+    boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
+                              vs_outputs->bcolor[1] != ATTR_UNUSED;
 
     if (r300_screen(r300->context.screen)->caps->is_r500) {
         rX00_rs_col       = r500_rs_col;
@@ -348,7 +350,7 @@ static void r300_update_rs_block(struct r300_context* r300,
 
     /* Rasterize colors. */
     for (i = 0; i < ATTR_COLOR_COUNT; i++) {
-        if (vs_outputs->color[i] != ATTR_UNUSED) {
+        if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
             /* Always rasterize if it's written by the VS,
              * otherwise it locks up. */
             rX00_rs_col(rs, col_count, i, FALSE);
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
index cb4b6ee71e..68aef70872 100644
--- a/src/gallium/drivers/r300/r300_vs.c
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -96,6 +96,8 @@ static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs)
     struct r300_shader_semantics* vs_outputs = &vs->outputs;
     uint32_t* hwfmt = vs->hwfmt;
     int i, gen_count;
+    boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
+                              vs_outputs->bcolor[1] != ATTR_UNUSED;
 
     /* Do the actual vertex_info setup.
      *
@@ -122,13 +124,19 @@ static void r300_shader_vap_output_fmt(struct r300_vertex_shader* vs)
 
     /* Colors. */
     for (i = 0; i < ATTR_COLOR_COUNT; i++) {
-        if (vs_outputs->color[i] != ATTR_UNUSED) {
+        if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
             hwfmt[1] |= R300_INPUT_CNTL_COLOR;
             hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
         }
     }
 
-    /* XXX Back-face colors. */
+    /* Back-face colors. */
+    if (any_bcolor_used) {
+        for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+            hwfmt[1] |= R300_INPUT_CNTL_COLOR;
+            hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i);
+        }
+    }
 
     /* Texture coordinates. */
     gen_count = 0;
@@ -161,6 +169,8 @@ static void r300_stream_locations_notcl(
     int* stream_loc)
 {
     int i, tabi = 0, gen_count;
+    boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
+                              vs_outputs->bcolor[1] != ATTR_UNUSED;
 
     /* Position. */
     stream_loc[tabi++] = 0;
@@ -172,14 +182,14 @@ static void r300_stream_locations_notcl(
 
     /* Colors. */
     for (i = 0; i < ATTR_COLOR_COUNT; i++) {
-        if (vs_outputs->color[i] != ATTR_UNUSED) {
+        if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used) {
             stream_loc[tabi++] = 2 + i;
         }
     }
 
     /* Back-face colors. */
-    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
-        if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
+    if (any_bcolor_used) {
+        for (i = 0; i < ATTR_COLOR_COUNT; i++) {
             stream_loc[tabi++] = 4 + i;
         }
     }
@@ -219,6 +229,8 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
     struct r300_shader_semantics* outputs = &vs->outputs;
     struct tgsi_shader_info* info = &vs->info;
     int i, reg = 0;
+    boolean any_bcolor_used = outputs->bcolor[0] != ATTR_UNUSED ||
+                              outputs->bcolor[1] != ATTR_UNUSED;
 
     /* Fill in the input mapping */
     for (i = 0; i < info->num_inputs; i++)
@@ -236,14 +248,30 @@ static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
         c->code->outputs[outputs->psize] = reg++;
     }
 
+    /* If we're writing back facing colors we need to send
+     * four colors to make front/back face colors selection work.
+     * If the vertex program doesn't write all 4 colors, lets
+     * pretend it does by skipping output index reg so the colors
+     * get written into appropriate output vectors.
+     */
+
     /* Colors. */
     for (i = 0; i < ATTR_COLOR_COUNT; i++) {
         if (outputs->color[i] != ATTR_UNUSED) {
             c->code->outputs[outputs->color[i]] = reg++;
+        } else if (any_bcolor_used) {
+            reg++;
         }
     }
 
-    /* XXX Back-face colors. */
+    /* Back-face colors. */
+    for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+        if (outputs->bcolor[i] != ATTR_UNUSED) {
+            c->code->outputs[outputs->bcolor[i]] = reg++;
+        } else if (any_bcolor_used) {
+            reg++;
+        }
+    }
 
     /* Texture coordinates. */
     for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
-- 
cgit v1.2.3


From 8d38ef455196e6ddda22536e435638a02a7b48f3 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Wed, 6 Jan 2010 18:36:45 +0100
Subject: svga: Fix fs key size computation and key comparison.

This also allows us to have texture_target
back as a bitfield and save us a few bytes.
---
 src/gallium/drivers/svga/svga_state_fs.c | 9 +++++++--
 src/gallium/drivers/svga/svga_tgsi.h     | 5 ++---
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
index 272d1dd14e..bba80a93a5 100644
--- a/src/gallium/drivers/svga/svga_state_fs.c
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -40,8 +40,13 @@
 static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a,
                                    const struct svga_fs_compile_key *b )
 {
-   unsigned keysize = svga_fs_key_size( a );
-   return memcmp( a, b, keysize );
+   unsigned keysize_a = svga_fs_key_size( a );
+   unsigned keysize_b = svga_fs_key_size( b );
+
+   if (keysize_a != keysize_b) {
+      return (int)(keysize_a - keysize_b);
+   }
+   return memcmp( a, b, keysize_a );
 }
 
 
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
index 043b99115f..737a2213af 100644
--- a/src/gallium/drivers/svga/svga_tgsi.h
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -56,7 +56,7 @@ struct svga_fs_compile_key
       unsigned compare_func:3;
       unsigned unnormalized:1;
       unsigned width_height_idx:7;
-      ubyte texture_target;
+      unsigned texture_target:8;
    } tex[PIPE_MAX_SAMPLERS];
 };
 
@@ -119,8 +119,7 @@ static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
 
 static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
 {
-   return (const char *)&key->tex[key->num_textures].texture_target -
-      (const char *)key;
+   return (const char *)&key->tex[key->num_textures] - (const char *)key;
 }
 
 struct svga_shader_result *
-- 
cgit v1.2.3


From 4bfe1c955fe679547c8a03119d1681e33593c768 Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Thu, 7 Jan 2010 12:48:10 +0100
Subject: gallium: Pass per-element (not per-quad) LOD bias values down to
 texture sampler.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c       |  58 +++++------
 src/gallium/auxiliary/tgsi/tgsi_exec.h       |   5 +-
 src/gallium/auxiliary/tgsi/tgsi_sse2.c       |   2 +-
 src/gallium/drivers/softpipe/sp_tex_sample.c | 150 ++++++++++++++++-----------
 src/gallium/drivers/softpipe/sp_tex_sample.h |   6 +-
 5 files changed, 123 insertions(+), 98 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 6beeb9a2ca..dcf3cc0baa 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1509,7 +1509,7 @@ fetch_texel( struct tgsi_sampler *sampler,
              const union tgsi_exec_channel *s,
              const union tgsi_exec_channel *t,
              const union tgsi_exec_channel *p,
-             float lodbias,  /* XXX should be float[4] */
+             const union tgsi_exec_channel *lodbias,
              union tgsi_exec_channel *r,
              union tgsi_exec_channel *g,
              union tgsi_exec_channel *b,
@@ -1518,7 +1518,7 @@ fetch_texel( struct tgsi_sampler *sampler,
    uint j;
    float rgba[NUM_CHANNELS][QUAD_SIZE];
 
-   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba);
+   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias->f, rgba);
 
    for (j = 0; j < 4; j++) {
       r->f[j] = rgba[0][j];
@@ -1542,21 +1542,23 @@ exec_tex(struct tgsi_exec_machine *mach,
 {
    const uint unit = inst->Src[1].Register.Index;
    union tgsi_exec_channel r[4];
+   const union tgsi_exec_channel *lodBias = &ZeroVec;
    uint chan_index;
-   float lodBias = 0.0f;
+
+   if (modifier != TEX_MODIFIER_NONE) {
+      FETCH(&r[3], 0, CHAN_W);
+      if (modifier != TEX_MODIFIER_PROJECTED) {
+         lodBias = &r[3];
+      }
+   }
 
    switch (inst->Texture.Texture) {
    case TGSI_TEXTURE_1D:
    case TGSI_TEXTURE_SHADOW1D:
       FETCH(&r[0], 0, CHAN_X);
 
-      if (modifier != TEX_MODIFIER_NONE) {
-         FETCH(&r[1], 0, CHAN_W);
-         if (modifier == TEX_MODIFIER_PROJECTED) {
-            micro_div(&r[0], &r[0], &r[1]);
-         } else {
-            lodBias = r[1].f[0];
-         }
+      if (modifier == TEX_MODIFIER_PROJECTED) {
+         micro_div(&r[0], &r[0], &r[3]);
       }
 
       fetch_texel(mach->Samplers[unit],
@@ -1572,15 +1574,10 @@ exec_tex(struct tgsi_exec_machine *mach,
       FETCH(&r[1], 0, CHAN_Y);
       FETCH(&r[2], 0, CHAN_Z);
 
-      if (modifier != TEX_MODIFIER_NONE) {
-         FETCH(&r[3], 0, CHAN_W);
-         if (modifier == TEX_MODIFIER_PROJECTED) {
-            micro_div(&r[0], &r[0], &r[3]);
-            micro_div(&r[1], &r[1], &r[3]);
-            micro_div(&r[2], &r[2], &r[3]);
-         } else {
-            lodBias = r[3].f[0];
-         }
+      if (modifier == TEX_MODIFIER_PROJECTED) {
+         micro_div(&r[0], &r[0], &r[3]);
+         micro_div(&r[1], &r[1], &r[3]);
+         micro_div(&r[2], &r[2], &r[3]);
       }
 
       fetch_texel(mach->Samplers[unit],
@@ -1594,15 +1591,10 @@ exec_tex(struct tgsi_exec_machine *mach,
       FETCH(&r[1], 0, CHAN_Y);
       FETCH(&r[2], 0, CHAN_Z);
 
-      if (modifier != TEX_MODIFIER_NONE) {
-         FETCH(&r[3], 0, CHAN_W);
-         if (modifier == TEX_MODIFIER_PROJECTED) {
-            micro_div(&r[0], &r[0], &r[3]);
-            micro_div(&r[1], &r[1], &r[3]);
-            micro_div(&r[2], &r[2], &r[3]);
-         } else {
-            lodBias = r[3].f[0];
-         }
+      if (modifier == TEX_MODIFIER_PROJECTED) {
+         micro_div(&r[0], &r[0], &r[3]);
+         micro_div(&r[1], &r[1], &r[3]);
+         micro_div(&r[2], &r[2], &r[3]);
       }
 
       fetch_texel(mach->Samplers[unit],
@@ -1638,8 +1630,8 @@ exec_txd(struct tgsi_exec_machine *mach,
       FETCH(&r[0], 0, CHAN_X);
 
       fetch_texel(mach->Samplers[unit],
-                  &r[0], &ZeroVec, &ZeroVec, 0.0f,  /* S, T, P, BIAS */
-                  &r[0], &r[1], &r[2], &r[3]);      /* R, G, B, A */
+                  &r[0], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, BIAS */
+                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
       break;
 
    case TGSI_TEXTURE_2D:
@@ -1652,8 +1644,8 @@ exec_txd(struct tgsi_exec_machine *mach,
       FETCH(&r[2], 0, CHAN_Z);
 
       fetch_texel(mach->Samplers[unit],
-                  &r[0], &r[1], &r[2], 0.0f,    /* inputs */
-                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
+                  &r[0], &r[1], &r[2], &ZeroVec,   /* inputs */
+                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
       break;
 
    case TGSI_TEXTURE_3D:
@@ -1664,7 +1656,7 @@ exec_txd(struct tgsi_exec_machine *mach,
       FETCH(&r[2], 0, CHAN_Z);
 
       fetch_texel(mach->Samplers[unit],
-                  &r[0], &r[1], &r[2], 0.0f,
+                  &r[0], &r[1], &r[2], &ZeroVec,
                   &r[0], &r[1], &r[2], &r[3]);
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index aa3a98d7f1..67853ed4fe 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -2,6 +2,7 @@
  * 
  * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
+ * Copyright 2009-2010 VMware, Inc.  All rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -35,11 +36,13 @@
 extern "C" {
 #endif
 
+
 #define MAX_LABELS (4 * 1024)  /**< basically, max instructions */
 
 #define NUM_CHANNELS 4  /* R,G,B,A */
 #define QUAD_SIZE    4  /* 4 pixel/quad */
 
+
 /**
   * Registers may be treated as float, signed int or unsigned int.
   */
@@ -80,7 +83,7 @@ struct tgsi_sampler
                        const float s[QUAD_SIZE],
                        const float t[QUAD_SIZE],
                        const float p[QUAD_SIZE],
-                       float lodbias,
+                       const float lodbias[QUAD_SIZE],
                        float rgba[NUM_CHANNELS][QUAD_SIZE]);
 };
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index 118059ace9..e133bc0255 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -1433,7 +1433,7 @@ fetch_texel( struct tgsi_sampler **sampler,
                               &store[0],  /* s */
                               &store[4],  /* t */
                               &store[8],  /* r */
-                              store[12],  /* lodbias */
+                              &store[12], /* lodbias */
                               rgba);      /* results */
 
       memcpy( store, rgba, 16 * sizeof(float));
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 903d21a021..9c535f82a5 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -2,7 +2,7 @@
  * 
  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
- * Copyright 2008 VMware, Inc.  All rights reserved.
+ * Copyright 2008-2010 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -514,21 +514,15 @@ static float
 compute_lambda_1d(const struct sp_sampler_varient *samp,
                   const float s[QUAD_SIZE],
                   const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias)
+                  const float p[QUAD_SIZE])
 {
    const struct pipe_texture *texture = samp->texture;
    const struct pipe_sampler_state *sampler = samp->sampler;
    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
    float rho = MAX2(dsdx, dsdy) * texture->width0;
-   float lambda;
 
-   lambda = util_fast_log2(rho);
-   lambda += lodbias + sampler->lod_bias;
-   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
-
-   return lambda;
+   return util_fast_log2(rho);
 }
 
 
@@ -536,8 +530,7 @@ static float
 compute_lambda_2d(const struct sp_sampler_varient *samp,
                   const float s[QUAD_SIZE],
                   const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias)
+                  const float p[QUAD_SIZE])
 {
    const struct pipe_texture *texture = samp->texture;
    const struct pipe_sampler_state *sampler = samp->sampler;
@@ -548,13 +541,8 @@ compute_lambda_2d(const struct sp_sampler_varient *samp,
    float maxx = MAX2(dsdx, dsdy) * texture->width0;
    float maxy = MAX2(dtdx, dtdy) * texture->height0;
    float rho  = MAX2(maxx, maxy);
-   float lambda;
-
-   lambda = util_fast_log2(rho);
-   lambda += lodbias + sampler->lod_bias;
-   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
 
-   return lambda;
+   return util_fast_log2(rho);
 }
 
 
@@ -562,8 +550,7 @@ static float
 compute_lambda_3d(const struct sp_sampler_varient *samp,
                   const float s[QUAD_SIZE],
                   const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias)
+                  const float p[QUAD_SIZE])
 {
    const struct pipe_texture *texture = samp->texture;
    const struct pipe_sampler_state *sampler = samp->sampler;
@@ -576,31 +563,26 @@ compute_lambda_3d(const struct sp_sampler_varient *samp,
    float maxx = MAX2(dsdx, dsdy) * texture->width0;
    float maxy = MAX2(dtdx, dtdy) * texture->height0;
    float maxz = MAX2(dpdx, dpdy) * texture->depth0;
-   float rho, lambda;
+   float rho;
 
    rho = MAX2(maxx, maxy);
    rho = MAX2(rho, maxz);
 
-   lambda = util_fast_log2(rho);
-   lambda += lodbias + sampler->lod_bias;
-   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
-
-   return lambda;
+   return util_fast_log2(rho);
 }
 
 
 /**
  * Compute lambda for a vertex texture sampler.
- * Since there aren't derivatives to use, just return the LOD bias.
+ * Since there aren't derivatives to use, just return 0.
  */
 static float
 compute_lambda_vert(const struct sp_sampler_varient *samp,
                     const float s[QUAD_SIZE],
                     const float t[QUAD_SIZE],
-                    const float p[QUAD_SIZE],
-                    float lodbias)
+                    const float p[QUAD_SIZE])
 {
-   return lodbias;
+   return 0.0f;
 }
 
 
@@ -769,7 +751,7 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
                                 const float s[QUAD_SIZE],
                                 const float t[QUAD_SIZE],
                                 const float p[QUAD_SIZE],
-                                float lodbias,
+                                const float lodbias[QUAD_SIZE],
                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -827,7 +809,7 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
                                  const float s[QUAD_SIZE],
                                  const float t[QUAD_SIZE],
                                  const float p[QUAD_SIZE],
-                                 float lodbias,
+                                 const float lodbias[QUAD_SIZE],
                                  float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -866,7 +848,7 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
                                 const float s[QUAD_SIZE],
                                 const float t[QUAD_SIZE],
                                 const float p[QUAD_SIZE],
-                                float lodbias,
+                                const float lodbias[QUAD_SIZE],
                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -914,7 +896,7 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
                         const float s[QUAD_SIZE],
                         const float t[QUAD_SIZE],
                         const float p[QUAD_SIZE],
-                        float lodbias,
+                        const float lodbias[QUAD_SIZE],
                         float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -949,7 +931,7 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
                       const float s[QUAD_SIZE],
                       const float t[QUAD_SIZE],
                       const float p[QUAD_SIZE],
-                      float lodbias,
+                      const float lodbias[QUAD_SIZE],
                       float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -996,7 +978,7 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
                         const float s[QUAD_SIZE],
                         const float t[QUAD_SIZE],
                         const float p[QUAD_SIZE],
-                        float lodbias,
+                        const float lodbias[QUAD_SIZE],
                         float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1035,7 +1017,7 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
                       const float s[QUAD_SIZE],
                       const float t[QUAD_SIZE],
                       const float p[QUAD_SIZE],
-                      float lodbias,
+                      const float lodbias[QUAD_SIZE],
                       float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1076,7 +1058,7 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
                      const float s[QUAD_SIZE],
                      const float t[QUAD_SIZE],
                      const float p[QUAD_SIZE],
-                     float lodbias,
+                     const float lodbias[QUAD_SIZE],
                      float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1115,7 +1097,7 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
                      const float s[QUAD_SIZE],
                      const float t[QUAD_SIZE],
                      const float p[QUAD_SIZE],
-                     float lodbias,
+                     const float lodbias[QUAD_SIZE],
                      float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1161,7 +1143,7 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
                        const float s[QUAD_SIZE],
                        const float t[QUAD_SIZE],
                        const float p[QUAD_SIZE],
-                       float lodbias,
+                       const float lodbias[QUAD_SIZE],
                        float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1209,7 +1191,7 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
                      const float s[QUAD_SIZE],
                      const float t[QUAD_SIZE],
                      const float p[QUAD_SIZE],
-                     float lodbias,
+                     const float lodbias[QUAD_SIZE],
                      float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1261,29 +1243,54 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
 }
 
 
+/* Calculate level of detail for every fragment.
+ * Note that lambda has already been biased by global LOD bias.
+ */
+static INLINE void
+compute_lod(const struct pipe_sampler_state *sampler,
+            const float biased_lambda,
+            const float lodbias[QUAD_SIZE],
+            float lod[QUAD_SIZE])
+{
+   uint i;
+
+   for (i = 0; i < QUAD_SIZE; i++) {
+      lod[i] = biased_lambda + lodbias[i];
+      lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod);
+   }
+}
+
+
 static void
 mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
                   const float s[QUAD_SIZE],
                   const float t[QUAD_SIZE],
                   const float p[QUAD_SIZE],
-                  float lodbias,
+                  const float lodbias[QUAD_SIZE],
                   float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
    const struct pipe_texture *texture = samp->texture;
    int level0;
    float lambda;
+   float lod[QUAD_SIZE];
 
-   lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+   lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+
+   compute_lod(samp->sampler, lambda, lodbias, lod);
+
+   /* XXX: Take into account all lod values.
+    */
+   lambda = lod[0];
    level0 = (int)lambda;
 
    if (lambda < 0.0) { 
       samp->level = 0;
-      samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+      samp->mag_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
    }
    else if (level0 >= texture->last_level) {
       samp->level = texture->last_level;
-      samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+      samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
    }
    else {
       float levelBlend = lambda - level0;
@@ -1292,10 +1299,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
       int c,j;
 
       samp->level = level0;
-      samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba0 );
+      samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba0 );
 
       samp->level = level0+1;
-      samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba1 );
+      samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba1 );
 
       for (j = 0; j < QUAD_SIZE; j++) {
          for (c = 0; c < 4; c++) {
@@ -1311,14 +1318,21 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
                    const float s[QUAD_SIZE],
                    const float t[QUAD_SIZE],
                    const float p[QUAD_SIZE],
-                   float lodbias,
+                   const float lodbias[QUAD_SIZE],
                    float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
    const struct pipe_texture *texture = samp->texture;
    float lambda;
+   float lod[QUAD_SIZE];
 
-   lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+   lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+
+   compute_lod(samp->sampler, lambda, lodbias, lod);
+
+   /* XXX: Take into account all lod values.
+    */
+   lambda = lod[0];
 
    if (lambda < 0.0) { 
       samp->level = 0;
@@ -1327,7 +1341,7 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
    else {
       samp->level = (int)(lambda + 0.5) ;
       samp->level = MIN2(samp->level, (int)texture->last_level);
-      samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+      samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
    }
 
 #if 0
@@ -1345,17 +1359,26 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler,
                 const float s[QUAD_SIZE],
                 const float t[QUAD_SIZE],
                 const float p[QUAD_SIZE],
-                float lodbias,
+                const float lodbias[QUAD_SIZE],
                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
-   float lambda = samp->compute_lambda(samp, s, t, p, lodbias);
+   float lambda;
+   float lod[QUAD_SIZE];
+
+   lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+
+   compute_lod(samp->sampler, lambda, lodbias, lod);
+
+   /* XXX: Take into account all lod values.
+    */
+   lambda = lod[0];
 
    if (lambda < 0.0) { 
-      samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+      samp->mag_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
    }
    else {
-      samp->min_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+      samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
    }
 }
 
@@ -1371,15 +1394,22 @@ mip_filter_linear_2d_linear_repeat_POT(
    const float s[QUAD_SIZE],
    const float t[QUAD_SIZE],
    const float p[QUAD_SIZE],
-   float lodbias,
+   const float lodbias[QUAD_SIZE],
    float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
    const struct pipe_texture *texture = samp->texture;
    int level0;
    float lambda;
+   float lod[QUAD_SIZE];
+
+   lambda = compute_lambda_2d(samp, s, t, p) + samp->sampler->lod_bias;
 
-   lambda = compute_lambda_2d(samp, s, t, p, lodbias);
+   compute_lod(samp->sampler, lambda, lodbias, lod);
+
+   /* XXX: Take into account all lod values.
+    */
+   lambda = lod[0];
    level0 = (int)lambda;
 
    /* Catches both negative and large values of level0:
@@ -1390,7 +1420,7 @@ mip_filter_linear_2d_linear_repeat_POT(
       else
          samp->level = texture->last_level;
 
-      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba );
+      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, NULL, rgba );
    }
    else {
       float levelBlend = lambda - level0;
@@ -1399,10 +1429,10 @@ mip_filter_linear_2d_linear_repeat_POT(
       int c,j;
 
       samp->level = level0;
-      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba0 );
+      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, NULL, rgba0 );
 
       samp->level = level0+1;
-      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, 0, rgba1 );
+      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, NULL, rgba1 );
 
       for (j = 0; j < QUAD_SIZE; j++) {
          for (c = 0; c < 4; c++) {
@@ -1422,7 +1452,7 @@ sample_compare(struct tgsi_sampler *tgsi_sampler,
                const float s[QUAD_SIZE],
                const float t[QUAD_SIZE],
                const float p[QUAD_SIZE],
-               float lodbias,
+               const float lodbias[QUAD_SIZE],
                float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1508,7 +1538,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
             const float s[QUAD_SIZE],
             const float t[QUAD_SIZE],
             const float p[QUAD_SIZE],
-            float lodbias,
+            const float lodbias[QUAD_SIZE],
             float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index b0797711d3..d01b384e85 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -2,6 +2,7 @@
  * 
  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
+ * Copyright 2010 VMware, Inc.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -46,14 +47,13 @@ typedef void (*wrap_linear_func)(const float s[4],
 typedef float (*compute_lambda_func)(const struct sp_sampler_varient *sampler,
                                      const float s[QUAD_SIZE],
                                      const float t[QUAD_SIZE],
-                                     const float p[QUAD_SIZE],
-                                     float lodbias);
+                                     const float p[QUAD_SIZE]);
 
 typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler,
                             const float s[QUAD_SIZE],
                             const float t[QUAD_SIZE],
                             const float p[QUAD_SIZE],
-                            float lodbias,
+                            const float lodbias[QUAD_SIZE],
                             float rgba[NUM_CHANNELS][QUAD_SIZE]);
 
 
-- 
cgit v1.2.3


From 4440428faa82f01b4dfb4be89618be2aaf153abd Mon Sep 17 00:00:00 2001
From: Michal Krol <michal@vmware.com>
Date: Thu, 7 Jan 2010 13:48:41 +0100
Subject: gallium: Fix texture sampling with explicit LOD in softpipe.

---
 src/gallium/auxiliary/tgsi/tgsi_exec.c       |  30 +++++--
 src/gallium/auxiliary/tgsi/tgsi_exec.h       |   8 +-
 src/gallium/auxiliary/tgsi/tgsi_sse2.c       |  12 +--
 src/gallium/drivers/softpipe/sp_tex_sample.c | 113 ++++++++++++++++++---------
 src/gallium/drivers/softpipe/sp_tex_sample.h |   3 +-
 5 files changed, 113 insertions(+), 53 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index dcf3cc0baa..b139cab8ed 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -1509,7 +1509,8 @@ fetch_texel( struct tgsi_sampler *sampler,
              const union tgsi_exec_channel *s,
              const union tgsi_exec_channel *t,
              const union tgsi_exec_channel *p,
-             const union tgsi_exec_channel *lodbias,
+             const union tgsi_exec_channel *c0,
+             enum tgsi_sampler_control control,
              union tgsi_exec_channel *r,
              union tgsi_exec_channel *g,
              union tgsi_exec_channel *b,
@@ -1518,7 +1519,7 @@ fetch_texel( struct tgsi_sampler *sampler,
    uint j;
    float rgba[NUM_CHANNELS][QUAD_SIZE];
 
-   sampler->get_samples(sampler, s->f, t->f, p->f, lodbias->f, rgba);
+   sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba);
 
    for (j = 0; j < 4; j++) {
       r->f[j] = rgba[0][j];
@@ -1542,16 +1543,23 @@ exec_tex(struct tgsi_exec_machine *mach,
 {
    const uint unit = inst->Src[1].Register.Index;
    union tgsi_exec_channel r[4];
-   const union tgsi_exec_channel *lodBias = &ZeroVec;
+   const union tgsi_exec_channel *lod = &ZeroVec;
+   enum tgsi_sampler_control control;
    uint chan_index;
 
    if (modifier != TEX_MODIFIER_NONE) {
       FETCH(&r[3], 0, CHAN_W);
       if (modifier != TEX_MODIFIER_PROJECTED) {
-         lodBias = &r[3];
+         lod = &r[3];
       }
    }
 
+   if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
+      control = tgsi_sampler_lod_explicit;
+   } else {
+      control = tgsi_sampler_lod_bias;
+   }
+
    switch (inst->Texture.Texture) {
    case TGSI_TEXTURE_1D:
    case TGSI_TEXTURE_SHADOW1D:
@@ -1562,8 +1570,9 @@ exec_tex(struct tgsi_exec_machine *mach,
       }
 
       fetch_texel(mach->Samplers[unit],
-                  &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */
-                  &r[0], &r[1], &r[2], &r[3]);        /* R, G, B, A */
+                  &r[0], &ZeroVec, &ZeroVec, lod,  /* S, T, P, LOD */
+                  control,
+                  &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
       break;
 
    case TGSI_TEXTURE_2D:
@@ -1581,7 +1590,8 @@ exec_tex(struct tgsi_exec_machine *mach,
       }
 
       fetch_texel(mach->Samplers[unit],
-                  &r[0], &r[1], &r[2], lodBias, /* inputs */
+                  &r[0], &r[1], &r[2], lod,     /* S, T, P, LOD */
+                  control,
                   &r[0], &r[1], &r[2], &r[3]);  /* outputs */
       break;
 
@@ -1598,7 +1608,8 @@ exec_tex(struct tgsi_exec_machine *mach,
       }
 
       fetch_texel(mach->Samplers[unit],
-                  &r[0], &r[1], &r[2], lodBias,
+                  &r[0], &r[1], &r[2], lod,
+                  control,
                   &r[0], &r[1], &r[2], &r[3]);
       break;
 
@@ -1631,6 +1642,7 @@ exec_txd(struct tgsi_exec_machine *mach,
 
       fetch_texel(mach->Samplers[unit],
                   &r[0], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, BIAS */
+                  tgsi_sampler_lod_bias,
                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
       break;
 
@@ -1645,6 +1657,7 @@ exec_txd(struct tgsi_exec_machine *mach,
 
       fetch_texel(mach->Samplers[unit],
                   &r[0], &r[1], &r[2], &ZeroVec,   /* inputs */
+                  tgsi_sampler_lod_bias,
                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
       break;
 
@@ -1657,6 +1670,7 @@ exec_txd(struct tgsi_exec_machine *mach,
 
       fetch_texel(mach->Samplers[unit],
                   &r[0], &r[1], &r[2], &ZeroVec,
+                  tgsi_sampler_lod_bias,
                   &r[0], &r[1], &r[2], &r[3]);
       break;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 67853ed4fe..59e3b445cc 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -72,6 +72,11 @@ struct tgsi_interp_coef
    float dady[NUM_CHANNELS];
 };
 
+enum tgsi_sampler_control {
+   tgsi_sampler_lod_bias,
+   tgsi_sampler_lod_explicit
+};
+
 /**
  * Information for sampling textures, which must be implemented
  * by code outside the TGSI executor.
@@ -83,7 +88,8 @@ struct tgsi_sampler
                        const float s[QUAD_SIZE],
                        const float t[QUAD_SIZE],
                        const float p[QUAD_SIZE],
-                       const float lodbias[QUAD_SIZE],
+                       const float c0[QUAD_SIZE],
+                       enum tgsi_sampler_control control,
                        float rgba[NUM_CHANNELS][QUAD_SIZE]);
 };
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index e133bc0255..2e13a7aaf9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -2,6 +2,7 @@
  * 
  * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  * All Rights Reserved.
+ * Copyright 2009-2010 VMware, Inc.  All rights Reserved.
  * 
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -1418,13 +1419,13 @@ fetch_texel( struct tgsi_sampler **sampler,
                 sampler, *sampler,
                 store );
 
-   debug_printf("lodbias %f\n", store[12]);
-
    for (j = 0; j < 4; j++)
-      debug_printf("sample %d texcoord %f %f\n", 
+      debug_printf("sample %d texcoord %f %f %f lodbias %f\n",
                    j, 
                    store[0+j],
-                   store[4+j]);
+                   store[4+j],
+                   store[8 + j],
+                   store[12 + j]);
 #endif
 
    {
@@ -1434,6 +1435,7 @@ fetch_texel( struct tgsi_sampler **sampler,
                               &store[4],  /* t */
                               &store[8],  /* r */
                               &store[12], /* lodbias */
+                              tgsi_sampler_lod_bias,
                               rgba);      /* results */
 
       memcpy( store, rgba, 16 * sizeof(float));
@@ -2506,7 +2508,7 @@ emit_instruction(
       break;
 
    case TGSI_OPCODE_TXL:
-      emit_tex( func, inst, TRUE, FALSE );
+      return 0;
       break;
 
    case TGSI_OPCODE_TXP:
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index 9c535f82a5..1ae8fecacf 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -751,7 +751,8 @@ img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
                                 const float s[QUAD_SIZE],
                                 const float t[QUAD_SIZE],
                                 const float p[QUAD_SIZE],
-                                const float lodbias[QUAD_SIZE],
+                                const float c0[QUAD_SIZE],
+                                enum tgsi_sampler_control control,
                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -809,7 +810,8 @@ img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
                                  const float s[QUAD_SIZE],
                                  const float t[QUAD_SIZE],
                                  const float p[QUAD_SIZE],
-                                 const float lodbias[QUAD_SIZE],
+                                 const float c0[QUAD_SIZE],
+                                 enum tgsi_sampler_control control,
                                  float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -848,7 +850,8 @@ img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
                                 const float s[QUAD_SIZE],
                                 const float t[QUAD_SIZE],
                                 const float p[QUAD_SIZE],
-                                const float lodbias[QUAD_SIZE],
+                                const float c0[QUAD_SIZE],
+                                enum tgsi_sampler_control control,
                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -896,7 +899,8 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
                         const float s[QUAD_SIZE],
                         const float t[QUAD_SIZE],
                         const float p[QUAD_SIZE],
-                        const float lodbias[QUAD_SIZE],
+                        const float c0[QUAD_SIZE],
+                        enum tgsi_sampler_control control,
                         float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -931,7 +935,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
                       const float s[QUAD_SIZE],
                       const float t[QUAD_SIZE],
                       const float p[QUAD_SIZE],
-                      const float lodbias[QUAD_SIZE],
+                      const float c0[QUAD_SIZE],
+                      enum tgsi_sampler_control control,
                       float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -978,7 +983,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
                         const float s[QUAD_SIZE],
                         const float t[QUAD_SIZE],
                         const float p[QUAD_SIZE],
-                        const float lodbias[QUAD_SIZE],
+                        const float c0[QUAD_SIZE],
+                        enum tgsi_sampler_control control,
                         float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1017,7 +1023,8 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
                       const float s[QUAD_SIZE],
                       const float t[QUAD_SIZE],
                       const float p[QUAD_SIZE],
-                      const float lodbias[QUAD_SIZE],
+                      const float c0[QUAD_SIZE],
+                      enum tgsi_sampler_control control,
                       float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1058,7 +1065,8 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
                      const float s[QUAD_SIZE],
                      const float t[QUAD_SIZE],
                      const float p[QUAD_SIZE],
-                     const float lodbias[QUAD_SIZE],
+                     const float c0[QUAD_SIZE],
+                     enum tgsi_sampler_control control,
                      float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1097,7 +1105,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
                      const float s[QUAD_SIZE],
                      const float t[QUAD_SIZE],
                      const float p[QUAD_SIZE],
-                     const float lodbias[QUAD_SIZE],
+                     const float c0[QUAD_SIZE],
+                     enum tgsi_sampler_control control,
                      float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1143,7 +1152,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
                        const float s[QUAD_SIZE],
                        const float t[QUAD_SIZE],
                        const float p[QUAD_SIZE],
-                       const float lodbias[QUAD_SIZE],
+                       const float c0[QUAD_SIZE],
+                       enum tgsi_sampler_control control,
                        float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1191,7 +1201,8 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
                      const float s[QUAD_SIZE],
                      const float t[QUAD_SIZE],
                      const float p[QUAD_SIZE],
-                     const float lodbias[QUAD_SIZE],
+                     const float c0[QUAD_SIZE],
+                     enum tgsi_sampler_control control,
                      float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    const struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1266,7 +1277,8 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
                   const float s[QUAD_SIZE],
                   const float t[QUAD_SIZE],
                   const float p[QUAD_SIZE],
-                  const float lodbias[QUAD_SIZE],
+                  const float c0[QUAD_SIZE],
+                  enum tgsi_sampler_control control,
                   float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1275,9 +1287,14 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
    float lambda;
    float lod[QUAD_SIZE];
 
-   lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+   if (control == tgsi_sampler_lod_bias) {
+      lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+      compute_lod(samp->sampler, lambda, c0, lod);
+   } else {
+      assert(control == tgsi_sampler_lod_explicit);
 
-   compute_lod(samp->sampler, lambda, lodbias, lod);
+      memcpy(lod, c0, sizeof(lod));
+   }
 
    /* XXX: Take into account all lod values.
     */
@@ -1286,11 +1303,11 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
 
    if (lambda < 0.0) { 
       samp->level = 0;
-      samp->mag_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
+      samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
    }
    else if (level0 >= texture->last_level) {
       samp->level = texture->last_level;
-      samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
+      samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
    }
    else {
       float levelBlend = lambda - level0;
@@ -1299,10 +1316,10 @@ mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
       int c,j;
 
       samp->level = level0;
-      samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba0 );
+      samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0);
 
       samp->level = level0+1;
-      samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba1 );
+      samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1);
 
       for (j = 0; j < QUAD_SIZE; j++) {
          for (c = 0; c < 4; c++) {
@@ -1318,7 +1335,8 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
                    const float s[QUAD_SIZE],
                    const float t[QUAD_SIZE],
                    const float p[QUAD_SIZE],
-                   const float lodbias[QUAD_SIZE],
+                   const float c0[QUAD_SIZE],
+                   enum tgsi_sampler_control control,
                    float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1326,9 +1344,14 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
    float lambda;
    float lod[QUAD_SIZE];
 
-   lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+   if (control == tgsi_sampler_lod_bias) {
+      lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+      compute_lod(samp->sampler, lambda, c0, lod);
+   } else {
+      assert(control == tgsi_sampler_lod_explicit);
 
-   compute_lod(samp->sampler, lambda, lodbias, lod);
+      memcpy(lod, c0, sizeof(lod));
+   }
 
    /* XXX: Take into account all lod values.
     */
@@ -1336,12 +1359,12 @@ mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
 
    if (lambda < 0.0) { 
       samp->level = 0;
-      samp->mag_img_filter( tgsi_sampler, s, t, p, 0, rgba );
+      samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
    }
    else {
       samp->level = (int)(lambda + 0.5) ;
       samp->level = MIN2(samp->level, (int)texture->last_level);
-      samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
+      samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
    }
 
 #if 0
@@ -1359,26 +1382,32 @@ mip_filter_none(struct tgsi_sampler *tgsi_sampler,
                 const float s[QUAD_SIZE],
                 const float t[QUAD_SIZE],
                 const float p[QUAD_SIZE],
-                const float lodbias[QUAD_SIZE],
+                const float c0[QUAD_SIZE],
+                enum tgsi_sampler_control control,
                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
    float lambda;
    float lod[QUAD_SIZE];
 
-   lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+   if (control == tgsi_sampler_lod_bias) {
+      lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+      compute_lod(samp->sampler, lambda, c0, lod);
+   } else {
+      assert(control == tgsi_sampler_lod_explicit);
 
-   compute_lod(samp->sampler, lambda, lodbias, lod);
+      memcpy(lod, c0, sizeof(lod));
+   }
 
    /* XXX: Take into account all lod values.
     */
    lambda = lod[0];
 
    if (lambda < 0.0) { 
-      samp->mag_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
+      samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
    }
    else {
-      samp->min_img_filter( tgsi_sampler, s, t, p, NULL, rgba );
+      samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
    }
 }
 
@@ -1394,7 +1423,8 @@ mip_filter_linear_2d_linear_repeat_POT(
    const float s[QUAD_SIZE],
    const float t[QUAD_SIZE],
    const float p[QUAD_SIZE],
-   const float lodbias[QUAD_SIZE],
+   const float c0[QUAD_SIZE],
+   enum tgsi_sampler_control control,
    float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1403,9 +1433,14 @@ mip_filter_linear_2d_linear_repeat_POT(
    float lambda;
    float lod[QUAD_SIZE];
 
-   lambda = compute_lambda_2d(samp, s, t, p) + samp->sampler->lod_bias;
+   if (control == tgsi_sampler_lod_bias) {
+      lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
+      compute_lod(samp->sampler, lambda, c0, lod);
+   } else {
+      assert(control == tgsi_sampler_lod_explicit);
 
-   compute_lod(samp->sampler, lambda, lodbias, lod);
+      memcpy(lod, c0, sizeof(lod));
+   }
 
    /* XXX: Take into account all lod values.
     */
@@ -1420,7 +1455,7 @@ mip_filter_linear_2d_linear_repeat_POT(
       else
          samp->level = texture->last_level;
 
-      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, NULL, rgba );
+      img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
    }
    else {
       float levelBlend = lambda - level0;
@@ -1429,10 +1464,10 @@ mip_filter_linear_2d_linear_repeat_POT(
       int c,j;
 
       samp->level = level0;
-      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, NULL, rgba0 );
+      img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0);
 
       samp->level = level0+1;
-      img_filter_2d_linear_repeat_POT( tgsi_sampler, s, t, p, NULL, rgba1 );
+      img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1);
 
       for (j = 0; j < QUAD_SIZE; j++) {
          for (c = 0; c < 4; c++) {
@@ -1452,7 +1487,8 @@ sample_compare(struct tgsi_sampler *tgsi_sampler,
                const float s[QUAD_SIZE],
                const float t[QUAD_SIZE],
                const float p[QUAD_SIZE],
-               const float lodbias[QUAD_SIZE],
+               const float c0[QUAD_SIZE],
+               enum tgsi_sampler_control control,
                float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1460,7 +1496,7 @@ sample_compare(struct tgsi_sampler *tgsi_sampler,
    int j, k0, k1, k2, k3;
    float val;
 
-   samp->mip_filter( tgsi_sampler, s, t, p, lodbias, rgba );
+   samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba);
 
    /**
     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
@@ -1538,7 +1574,8 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
             const float s[QUAD_SIZE],
             const float t[QUAD_SIZE],
             const float p[QUAD_SIZE],
-            const float lodbias[QUAD_SIZE],
+            const float c0[QUAD_SIZE],
+            enum tgsi_sampler_control control,
             float rgba[NUM_CHANNELS][QUAD_SIZE])
 {
    struct sp_sampler_varient *samp = sp_sampler_varient(tgsi_sampler);
@@ -1619,7 +1656,7 @@ sample_cube(struct tgsi_sampler *tgsi_sampler,
     * is not active, this will point somewhere deeper into the
     * pipeline, eg. to mip_filter or even img_filter.
     */
-   samp->compare(tgsi_sampler, ssss, tttt, NULL, lodbias, rgba);
+   samp->compare(tgsi_sampler, ssss, tttt, NULL, c0, control, rgba);
 }
 
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
index d01b384e85..b6e66c998a 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.h
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
@@ -53,7 +53,8 @@ typedef void (*filter_func)(struct tgsi_sampler *tgsi_sampler,
                             const float s[QUAD_SIZE],
                             const float t[QUAD_SIZE],
                             const float p[QUAD_SIZE],
-                            const float lodbias[QUAD_SIZE],
+                            const float c0[QUAD_SIZE],
+                            enum tgsi_sampler_control control,
                             float rgba[NUM_CHANNELS][QUAD_SIZE]);
 
 
-- 
cgit v1.2.3


From 7bd7e2da75bfee90037dcb34e55c5a338a952c1a Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 7 Jan 2010 15:34:52 +0000
Subject: llvmpipe: Axe texture sampling code inherited from softpipe.

Was used only as a reference, since texture sampling is now code generated.
Already axed in the lp-binning branch too.

This fixes the llvmpipe build after recent sampling changes.
---
 src/gallium/drivers/llvmpipe/Makefile          |    1 -
 src/gallium/drivers/llvmpipe/SConscript        |    1 -
 src/gallium/drivers/llvmpipe/lp_state_fs.c     |    5 -
 src/gallium/drivers/llvmpipe/lp_tex_sample.h   |   10 -
 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c | 1710 ------------------------
 5 files changed, 1727 deletions(-)
 delete mode 100644 src/gallium/drivers/llvmpipe/lp_tex_sample_c.c

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/Makefile b/src/gallium/drivers/llvmpipe/Makefile
index e038a5229e..7c6e46006b 100644
--- a/src/gallium/drivers/llvmpipe/Makefile
+++ b/src/gallium/drivers/llvmpipe/Makefile
@@ -50,7 +50,6 @@ C_SOURCES = \
 	lp_state_vs.c \
 	lp_surface.c \
 	lp_tex_cache.c \
-	lp_tex_sample_c.c \
 	lp_tex_sample_llvm.c \
 	lp_texture.c \
 	lp_tile_cache.c \
diff --git a/src/gallium/drivers/llvmpipe/SConscript b/src/gallium/drivers/llvmpipe/SConscript
index 3ca676647c..6bb545a501 100644
--- a/src/gallium/drivers/llvmpipe/SConscript
+++ b/src/gallium/drivers/llvmpipe/SConscript
@@ -66,7 +66,6 @@ llvmpipe = env.ConvenienceLibrary(
 		'lp_state_vs.c',
 		'lp_surface.c',
 		'lp_tex_cache.c',
-		'lp_tex_sample_c.c',
 		'lp_tex_sample_llvm.c',
 		'lp_texture.c',
 		'lp_tile_cache.c',
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 47078fbae4..b73ca2d41e 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -550,13 +550,8 @@ generate_fragment(struct llvmpipe_context *lp,
                             a0_ptr, dadx_ptr, dady_ptr,
                             x0, y0, 2, 0);
 
-#if 0
-   /* C texture sampling */
-   sampler = lp_c_sampler_soa_create(context_ptr);
-#else
    /* code generated texture sampling */
    sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
-#endif
 
    for(i = 0; i < num_fs; ++i) {
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index 9ad1bde956..43be38eaee 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -78,16 +78,6 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler,
                float rgba[NUM_CHANNELS][QUAD_SIZE]);
 
 
-/**
- * Texture sampling code generator that just calls lp_get_samples C function
- * for the actual sampling computation.
- *
- * @param context_ptr LLVM value with the pointer to the struct lp_jit_context.
- */
-struct lp_build_sampler_soa *
-lp_c_sampler_soa_create(LLVMValueRef context_ptr);
-
-
 /**
  * Pure-LLVM texture sampling code generator.
  *
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
deleted file mode 100644
index ccc8c8cec4..0000000000
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ /dev/null
@@ -1,1710 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * Copyright 2008 VMware, Inc.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * Texture sampling
- *
- * Authors:
- *   Brian Paul
- */
-
-#include "lp_context.h"
-#include "lp_quad.h"
-#include "lp_surface.h"
-#include "lp_texture.h"
-#include "lp_tex_sample.h"
-#include "lp_tex_cache.h"
-#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
-#include "util/u_math.h"
-#include "util/u_memory.h"
-
-
-
-/*
- * Note, the FRAC macro has to work perfectly.  Otherwise you'll sometimes
- * see 1-pixel bands of improperly weighted linear-filtered textures.
- * The tests/texwrap.c demo is a good test.
- * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
- * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
- */
-#define FRAC(f)  ((f) - util_ifloor(f))
-
-
-/**
- * Linear interpolation macro
- */
-static INLINE float
-lerp(float a, float v0, float v1)
-{
-   return v0 + a * (v1 - v0);
-}
-
-
-/**
- * Do 2D/biliner interpolation of float values.
- * v00, v10, v01 and v11 are typically four texture samples in a square/box.
- * a and b are the horizontal and vertical interpolants.
- * It's important that this function is inlined when compiled with
- * optimization!  If we find that's not true on some systems, convert
- * to a macro.
- */
-static INLINE float
-lerp_2d(float a, float b,
-        float v00, float v10, float v01, float v11)
-{
-   const float temp0 = lerp(a, v00, v10);
-   const float temp1 = lerp(a, v01, v11);
-   return lerp(b, temp0, temp1);
-}
-
-
-/**
- * As above, but 3D interpolation of 8 values.
- */
-static INLINE float
-lerp_3d(float a, float b, float c,
-        float v000, float v100, float v010, float v110,
-        float v001, float v101, float v011, float v111)
-{
-   const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
-   const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
-   return lerp(c, temp0, temp1);
-}
-
-
-
-/**
- * If A is a signed integer, A % B doesn't give the right value for A < 0
- * (in terms of texture repeat).  Just casting to unsigned fixes that.
- */
-#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
-
-
-/**
- * Apply texture coord wrapping mode and return integer texture indexes
- * for a vector of four texcoords (S or T or P).
- * \param wrapMode  PIPE_TEX_WRAP_x
- * \param s  the incoming texcoords
- * \param size  the texture image size
- * \param icoord  returns the integer texcoords
- * \return  integer texture index
- */
-static INLINE void
-nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
-                   int icoord[4])
-{
-   uint ch;
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_REPEAT:
-      /* s limited to [0,1) */
-      /* i limited to [0,size-1] */
-      for (ch = 0; ch < 4; ch++) {
-         int i = util_ifloor(s[ch] * size);
-         icoord[ch] = REMAINDER(i, size);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP:
-      /* s limited to [0,1] */
-      /* i limited to [0,size-1] */
-      for (ch = 0; ch < 4; ch++) {
-         if (s[ch] <= 0.0F)
-            icoord[ch] = 0;
-         else if (s[ch] >= 1.0F)
-            icoord[ch] = size - 1;
-         else
-            icoord[ch] = util_ifloor(s[ch] * size);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [0, size-1] */
-         const float min = 1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            if (s[ch] < min)
-               icoord[ch] = 0;
-            else if (s[ch] > max)
-               icoord[ch] = size - 1;
-            else
-               icoord[ch] = util_ifloor(s[ch] * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [-1, size] */
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            if (s[ch] <= min)
-               icoord[ch] = -1;
-            else if (s[ch] >= max)
-               icoord[ch] = size;
-            else
-               icoord[ch] = util_ifloor(s[ch] * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_REPEAT:
-      {
-         const float min = 1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            const int flr = util_ifloor(s[ch]);
-            float u;
-            if (flr & 1)
-               u = 1.0F - (s[ch] - (float) flr);
-            else
-               u = s[ch] - (float) flr;
-            if (u < min)
-               icoord[ch] = 0;
-            else if (u > max)
-               icoord[ch] = size - 1;
-            else
-               icoord[ch] = util_ifloor(u * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         /* s limited to [0,1] */
-         /* i limited to [0,size-1] */
-         const float u = fabsf(s[ch]);
-         if (u <= 0.0F)
-            icoord[ch] = 0;
-         else if (u >= 1.0F)
-            icoord[ch] = size - 1;
-         else
-            icoord[ch] = util_ifloor(u * size);
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [0, size-1] */
-         const float min = 1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            const float u = fabsf(s[ch]);
-            if (u < min)
-               icoord[ch] = 0;
-            else if (u > max)
-               icoord[ch] = size - 1;
-            else
-               icoord[ch] = util_ifloor(u * size);
-         }
-      }
-      return;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-      {
-         /* s limited to [min,max] */
-         /* i limited to [0, size-1] */
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            const float u = fabsf(s[ch]);
-            if (u < min)
-               icoord[ch] = -1;
-            else if (u > max)
-               icoord[ch] = size;
-            else
-               icoord[ch] = util_ifloor(u * size);
-         }
-      }
-      return;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * Used to compute texel locations for linear sampling for four texcoords.
- * \param wrapMode  PIPE_TEX_WRAP_x
- * \param s  the texcoords
- * \param size  the texture image size
- * \param icoord0  returns first texture indexes
- * \param icoord1  returns second texture indexes (usually icoord0 + 1)
- * \param w  returns blend factor/weight between texture indexes
- * \param icoord  returns the computed integer texture coords
- */
-static INLINE void
-linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
-                  int icoord0[4], int icoord1[4], float w[4])
-{
-   uint ch;
-
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_REPEAT:
-      for (ch = 0; ch < 4; ch++) {
-         float u = s[ch] * size - 0.5F;
-         icoord0[ch] = REMAINDER(util_ifloor(u), size);
-         icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         float u = CLAMP(s[ch], 0.0F, 1.0F);
-         u = u * size - 0.5f;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      for (ch = 0; ch < 4; ch++) {
-         float u = CLAMP(s[ch], 0.0F, 1.0F);
-         u = u * size - 0.5f;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord0[ch] < 0)
-            icoord0[ch] = 0;
-         if (icoord1[ch] >= (int) size)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      {
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            float u = CLAMP(s[ch], min, max);
-            u = u * size - 0.5f;
-            icoord0[ch] = util_ifloor(u);
-            icoord1[ch] = icoord0[ch] + 1;
-            w[ch] = FRAC(u);
-         }
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_REPEAT:
-      for (ch = 0; ch < 4; ch++) {
-         const int flr = util_ifloor(s[ch]);
-         float u;
-         if (flr & 1)
-            u = 1.0F - (s[ch] - (float) flr);
-         else
-            u = s[ch] - (float) flr;
-         u = u * size - 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord0[ch] < 0)
-            icoord0[ch] = 0;
-         if (icoord1[ch] >= (int) size)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         float u = fabsf(s[ch]);
-         if (u >= 1.0F)
-            u = (float) size;
-         else
-            u *= size;
-         u -= 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
-      for (ch = 0; ch < 4; ch++) {
-         float u = fabsf(s[ch]);
-         if (u >= 1.0F)
-            u = (float) size;
-         else
-            u *= size;
-         u -= 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord0[ch] < 0)
-            icoord0[ch] = 0;
-         if (icoord1[ch] >= (int) size)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;;
-   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
-      {
-         const float min = -1.0F / (2.0F * size);
-         const float max = 1.0F - min;
-         for (ch = 0; ch < 4; ch++) {
-            float u = fabsf(s[ch]);
-            if (u <= min)
-               u = min * size;
-            else if (u >= max)
-               u = max * size;
-            else
-               u *= size;
-            u -= 0.5F;
-            icoord0[ch] = util_ifloor(u);
-            icoord1[ch] = icoord0[ch] + 1;
-            w[ch] = FRAC(u);
-         }
-      }
-      break;;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * For RECT textures / unnormalized texcoords
- * Only a subset of wrap modes supported.
- */
-static INLINE void
-nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
-                          int icoord[4])
-{
-   uint ch;
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         int i = util_ifloor(s[ch]);
-         icoord[ch]= CLAMP(i, 0, (int) size-1);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      /* fall-through */
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      for (ch = 0; ch < 4; ch++) {
-         icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
-      }
-      return;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * For RECT textures / unnormalized texcoords.
- * Only a subset of wrap modes supported.
- */
-static INLINE void
-linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
-                         int icoord0[4], int icoord1[4], float w[4])
-{
-   uint ch;
-   switch (wrapMode) {
-   case PIPE_TEX_WRAP_CLAMP:
-      for (ch = 0; ch < 4; ch++) {
-         /* Not exactly what the spec says, but it matches NVIDIA output */
-         float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         w[ch] = FRAC(u);
-      }
-      return;
-   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
-      /* fall-through */
-   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
-      for (ch = 0; ch < 4; ch++) {
-         float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
-         u -= 0.5F;
-         icoord0[ch] = util_ifloor(u);
-         icoord1[ch] = icoord0[ch] + 1;
-         if (icoord1[ch] > (int) size - 1)
-            icoord1[ch] = size - 1;
-         w[ch] = FRAC(u);
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-static unsigned
-choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
-{
-   /*
-      major axis
-      direction     target                             sc     tc    ma
-      ----------    -------------------------------    ---    ---   ---
-       +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
-       -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
-       +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
-       -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
-       +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
-       -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
-   */
-   const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
-   unsigned face;
-   float sc, tc, ma;
-
-   if (arx > ary && arx > arz) {
-      if (rx >= 0.0F) {
-         face = PIPE_TEX_FACE_POS_X;
-         sc = -rz;
-         tc = -ry;
-         ma = arx;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_X;
-         sc = rz;
-         tc = -ry;
-         ma = arx;
-      }
-   }
-   else if (ary > arx && ary > arz) {
-      if (ry >= 0.0F) {
-         face = PIPE_TEX_FACE_POS_Y;
-         sc = rx;
-         tc = rz;
-         ma = ary;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_Y;
-         sc = rx;
-         tc = -rz;
-         ma = ary;
-      }
-   }
-   else {
-      if (rz > 0.0F) {
-         face = PIPE_TEX_FACE_POS_Z;
-         sc = rx;
-         tc = -ry;
-         ma = arz;
-      }
-      else {
-         face = PIPE_TEX_FACE_NEG_Z;
-         sc = -rx;
-         tc = -ry;
-         ma = arz;
-      }
-   }
-
-   *newS = ( sc / ma + 1.0F ) * 0.5F;
-   *newT = ( tc / ma + 1.0F ) * 0.5F;
-
-   return face;
-}
-
-
-/**
- * Examine the quad's texture coordinates to compute the partial
- * derivatives w.r.t X and Y, then compute lambda (level of detail).
- *
- * This is only done for fragment shaders, not vertex shaders.
- */
-static float
-compute_lambda(struct tgsi_sampler *tgsi_sampler,
-               const float s[QUAD_SIZE],
-               const float t[QUAD_SIZE],
-               const float p[QUAD_SIZE],
-               float lodbias)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   float rho, lambda;
-
-   if (samp->processor == TGSI_PROCESSOR_VERTEX)
-      return lodbias;
-
-   assert(sampler->normalized_coords);
-
-   assert(s);
-   {
-      float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
-      float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
-      dsdx = fabsf(dsdx);
-      dsdy = fabsf(dsdy);
-      rho = MAX2(dsdx, dsdy) * texture->width0;
-   }
-   if (t) {
-      float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
-      float dtdy = t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT];
-      float max;
-      dtdx = fabsf(dtdx);
-      dtdy = fabsf(dtdy);
-      max = MAX2(dtdx, dtdy) * texture->height0;
-      rho = MAX2(rho, max);
-   }
-   if (p) {
-      float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT];
-      float dpdy = p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT];
-      float max;
-      dpdx = fabsf(dpdx);
-      dpdy = fabsf(dpdy);
-      max = MAX2(dpdx, dpdy) * texture->depth0;
-      rho = MAX2(rho, max);
-   }
-
-   lambda = util_fast_log2(rho);
-   lambda += lodbias + sampler->lod_bias;
-   lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
-
-   return lambda;
-}
-
-
-/**
- * Do several things here:
- * 1. Compute lambda from the texcoords, if needed
- * 2. Determine if we're minifying or magnifying
- * 3. If minifying, choose mipmap levels
- * 4. Return image filter to use within mipmap images
- * \param level0  Returns first mipmap level to sample from
- * \param level1  Returns second mipmap level to sample from
- * \param levelBlend  Returns blend factor between levels, in [0,1]
- * \param imgFilter  Returns either the min or mag filter, depending on lambda
- */
-static void
-choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler,
-                     const float s[QUAD_SIZE],
-                     const float t[QUAD_SIZE],
-                     const float p[QUAD_SIZE],
-                     float lodbias,
-                     unsigned *level0, unsigned *level1, float *levelBlend,
-                     unsigned *imgFilter)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-
-   if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
-      /* no mipmap selection needed */
-      *level0 = *level1 = CLAMP((int) sampler->min_lod,
-                                0, (int) texture->last_level);
-
-      if (sampler->min_img_filter != sampler->mag_img_filter) {
-         /* non-mipmapped texture, but still need to determine if doing
-          * minification or magnification.
-          */
-         float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
-         if (lambda <= 0.0) {
-            *imgFilter = sampler->mag_img_filter;
-         }
-         else {
-            *imgFilter = sampler->min_img_filter;
-         }
-      }
-      else {
-         *imgFilter = sampler->mag_img_filter;
-      }
-   }
-   else {
-      float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
-
-      if (lambda <= 0.0) { /* XXX threshold depends on the filter */
-         /* magnifying */
-         *imgFilter = sampler->mag_img_filter;
-         *level0 = *level1 = 0;
-      }
-      else {
-         /* minifying */
-         *imgFilter = sampler->min_img_filter;
-
-         /* choose mipmap level(s) and compute the blend factor between them */
-         if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
-            /* Nearest mipmap level */
-            const int lvl = (int) (lambda + 0.5);
-            *level0 =
-            *level1 = CLAMP(lvl, 0, (int) texture->last_level);
-         }
-         else {
-            /* Linear interpolation between mipmap levels */
-            const int lvl = (int) lambda;
-            *level0 = CLAMP(lvl,     0, (int) texture->last_level);
-            *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
-            *levelBlend = FRAC(lambda);  /* blending weight between levels */
-         }
-      }
-   }
-}
-
-
-/**
- * Get a texel from a texture, using the texture tile cache.
- *
- * \param face  the cube face in 0..5
- * \param level  the mipmap level
- * \param x  the x coord of texel within 2D image
- * \param y  the y coord of texel within 2D image
- * \param z  which slice of a 3D texture
- * \param rgba  the quad to put the texel/color into
- * \param j  which element of the rgba quad to write to
- *
- * XXX maybe move this into lp_tile_cache.c and merge with the
- * lp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
- */
-static void
-get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler,
-                  unsigned face, unsigned level, int x, int y, 
-                  const uint8_t *out[4])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-
-   const struct llvmpipe_cached_tex_tile *tile
-      = lp_get_cached_tex_tile(samp->cache,
-                               tex_tile_address(x, y, 0, face, level));
-
-   y %= TEX_TILE_SIZE;
-   x %= TEX_TILE_SIZE;
-      
-   out[0] = &tile->color[y  ][x  ][0];
-   out[1] = &tile->color[y  ][x+1][0];
-   out[2] = &tile->color[y+1][x  ][0];
-   out[3] = &tile->color[y+1][x+1][0];
-}
-
-static INLINE const uint8_t *
-get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler,
-                 unsigned face, unsigned level, int x, int y)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-
-   const struct llvmpipe_cached_tex_tile *tile
-      = lp_get_cached_tex_tile(samp->cache,
-                               tex_tile_address(x, y, 0, face, level));
-
-   y %= TEX_TILE_SIZE;
-   x %= TEX_TILE_SIZE;
-
-   return &tile->color[y][x][0];
-}
-
-
-static void
-get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler,
-                     unsigned face, unsigned level, 
-                     int x0, int y0, 
-                     int x1, int y1,
-                     const uint8_t *out[4])
-{
-   unsigned i;
-
-   for (i = 0; i < 4; i++) {
-      unsigned tx = (i & 1) ? x1 : x0;
-      unsigned ty = (i >> 1) ? y1 : y0;
-
-      out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty );
-   }
-}
-
-static void
-get_texel(const struct tgsi_sampler *tgsi_sampler,
-                 unsigned face, unsigned level, int x, int y, int z,
-                 float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-
-   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
-       y < 0 || y >= (int) u_minify(texture->height0, level) ||
-       z < 0 || z >= (int) u_minify(texture->depth0, level)) {
-      rgba[0][j] = sampler->border_color[0];
-      rgba[1][j] = sampler->border_color[1];
-      rgba[2][j] = sampler->border_color[2];
-      rgba[3][j] = sampler->border_color[3];
-   }
-   else {
-      const unsigned tx = x % TEX_TILE_SIZE;
-      const unsigned ty = y % TEX_TILE_SIZE;
-      const struct llvmpipe_cached_tex_tile *tile;
-
-      tile = lp_get_cached_tex_tile(samp->cache,
-                                    tex_tile_address(x, y, z, face, level));
-
-      rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]);
-      rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]);
-      rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]);
-      rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]);
-      if (0)
-      {
-         debug_printf("Get texel %f %f %f %f from %s\n",
-                      rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
-                      pf_name(texture->format));
-      }
-   }
-}
-
-
-/**
- * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
- * When we sampled the depth texture, the depth value was put into all
- * RGBA channels.  We look at the red channel here.
- * \param rgba  quad of (depth) texel values
- * \param p  texture 'P' components for four pixels in quad
- * \param j  which pixel in the quad to test [0..3]
- */
-static INLINE void
-shadow_compare(const struct pipe_sampler_state *sampler,
-               float rgba[NUM_CHANNELS][QUAD_SIZE],
-               const float p[QUAD_SIZE],
-               uint j)
-{
-   int k;
-   switch (sampler->compare_func) {
-   case PIPE_FUNC_LESS:
-      k = p[j] < rgba[0][j];
-      break;
-   case PIPE_FUNC_LEQUAL:
-      k = p[j] <= rgba[0][j];
-      break;
-   case PIPE_FUNC_GREATER:
-      k = p[j] > rgba[0][j];
-      break;
-   case PIPE_FUNC_GEQUAL:
-      k = p[j] >= rgba[0][j];
-      break;
-   case PIPE_FUNC_EQUAL:
-      k = p[j] == rgba[0][j];
-      break;
-   case PIPE_FUNC_NOTEQUAL:
-      k = p[j] != rgba[0][j];
-      break;
-   case PIPE_FUNC_ALWAYS:
-      k = 1;
-      break;
-   case PIPE_FUNC_NEVER:
-      k = 0;
-      break;
-   default:
-      k = 0;
-      assert(0);
-      break;
-   }
-
-   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
-   rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k;
-   rgba[3][j] = 1.0F;
-}
-
-
-/**
- * As above, but do four z/texture comparisons.
- */
-static INLINE void
-shadow_compare4(const struct pipe_sampler_state *sampler,
-                float rgba[NUM_CHANNELS][QUAD_SIZE],
-                const float p[QUAD_SIZE])
-{
-   int j, k0, k1, k2, k3;
-   float val;
-
-   /* compare four texcoords vs. four texture samples */
-   switch (sampler->compare_func) {
-   case PIPE_FUNC_LESS:
-      k0 = p[0] < rgba[0][0];
-      k1 = p[1] < rgba[0][1];
-      k2 = p[2] < rgba[0][2];
-      k3 = p[3] < rgba[0][3];
-      break;
-   case PIPE_FUNC_LEQUAL:
-      k0 = p[0] <= rgba[0][0];
-      k1 = p[1] <= rgba[0][1];
-      k2 = p[2] <= rgba[0][2];
-      k3 = p[3] <= rgba[0][3];
-      break;
-   case PIPE_FUNC_GREATER:
-      k0 = p[0] > rgba[0][0];
-      k1 = p[1] > rgba[0][1];
-      k2 = p[2] > rgba[0][2];
-      k3 = p[3] > rgba[0][3];
-      break;
-   case PIPE_FUNC_GEQUAL:
-      k0 = p[0] >= rgba[0][0];
-      k1 = p[1] >= rgba[0][1];
-      k2 = p[2] >= rgba[0][2];
-      k3 = p[3] >= rgba[0][3];
-      break;
-   case PIPE_FUNC_EQUAL:
-      k0 = p[0] == rgba[0][0];
-      k1 = p[1] == rgba[0][1];
-      k2 = p[2] == rgba[0][2];
-      k3 = p[3] == rgba[0][3];
-      break;
-   case PIPE_FUNC_NOTEQUAL:
-      k0 = p[0] != rgba[0][0];
-      k1 = p[1] != rgba[0][1];
-      k2 = p[2] != rgba[0][2];
-      k3 = p[3] != rgba[0][3];
-      break;
-   case PIPE_FUNC_ALWAYS:
-      k0 = k1 = k2 = k3 = 1;
-      break;
-   case PIPE_FUNC_NEVER:
-      k0 = k1 = k2 = k3 = 0;
-      break;
-   default:
-      k0 = k1 = k2 = k3 = 0;
-      assert(0);
-      break;
-   }
-
-   /* convert four pass/fail values to an intensity in [0,1] */
-   val = 0.25F * (k0 + k1 + k2 + k3);
-
-   /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
-   for (j = 0; j < 4; j++) {
-      rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
-      rgba[3][j] = 1.0F;
-   }
-}
-
-
-
-static void
-lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
-                                    const float s[QUAD_SIZE],
-                                    const float t[QUAD_SIZE],
-                                    const float p[QUAD_SIZE],
-                                    float lodbias,
-                                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   unsigned  j;
-   unsigned level = samp->level;
-   unsigned xpot = 1 << (samp->xpot - level);
-   unsigned ypot = 1 << (samp->ypot - level);
-   unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
-   unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
-      
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int c;
-
-      float u = s[j] * xpot - 0.5F;
-      float v = t[j] * ypot - 0.5F;
-
-      int uflr = util_ifloor(u);
-      int vflr = util_ifloor(v);
-
-      float xw = u - (float)uflr;
-      float yw = v - (float)vflr;
-
-      int x0 = uflr & (xpot - 1);
-      int y0 = vflr & (ypot - 1);
-
-      const uint8_t *tx[4];
-      
-
-      /* Can we fetch all four at once:
-       */
-      if (x0 < xmax && y0 < ymax)
-      {
-         get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx);
-      }
-      else 
-      {
-         unsigned x1 = (x0 + 1) & (xpot - 1);
-         unsigned y1 = (y0 + 1) & (ypot - 1);
-         get_texel_quad_2d_mt(tgsi_sampler, 0, level, 
-                              x0, y0, x1, y1, tx);
-      }
-
-
-      /* interpolate R, G, B, A */
-      for (c = 0; c < 4; c++) {
-         rgba[c][j] = lerp_2d(xw, yw, 
-                              ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]),
-                              ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c]));
-      }
-   }
-}
-
-
-static void
-lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
-                                     const float s[QUAD_SIZE],
-                                     const float t[QUAD_SIZE],
-                                     const float p[QUAD_SIZE],
-                                     float lodbias,
-                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   unsigned  j;
-   unsigned level = samp->level;
-   unsigned xpot = 1 << (samp->xpot - level);
-   unsigned ypot = 1 << (samp->ypot - level);
-
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int c;
-
-      float u = s[j] * xpot;
-      float v = t[j] * ypot;
-
-      int uflr = util_ifloor(u);
-      int vflr = util_ifloor(v);
-
-      int x0 = uflr & (xpot - 1);
-      int y0 = vflr & (ypot - 1);
-
-      const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
-
-      for (c = 0; c < 4; c++) {
-         rgba[c][j] = ubyte_to_float(out[c]);
-      }
-   }
-}
-
-
-static void
-lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
-                                     const float s[QUAD_SIZE],
-                                     const float t[QUAD_SIZE],
-                                     const float p[QUAD_SIZE],
-                                     float lodbias,
-                                     float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   unsigned  j;
-   unsigned level = samp->level;
-   unsigned xpot = 1 << (samp->xpot - level);
-   unsigned ypot = 1 << (samp->ypot - level);
-
-   for (j = 0; j < QUAD_SIZE; j++) {
-      int c;
-
-      float u = s[j] * xpot;
-      float v = t[j] * ypot;
-
-      int x0, y0;
-      const uint8_t *out;
-
-      x0 = util_ifloor(u);
-      if (x0 < 0) 
-         x0 = 0;
-      else if (x0 > xpot - 1)
-         x0 = xpot - 1;
-
-      y0 = util_ifloor(v);
-      if (y0 < 0) 
-         y0 = 0;
-      else if (y0 > ypot - 1)
-         y0 = ypot - 1;
-      
-      out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
-
-      for (c = 0; c < 4; c++) {
-         rgba[c][j] = ubyte_to_float(out[c]);
-      }
-   }
-}
-
-
-static void
-lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
-                                               const float s[QUAD_SIZE],
-                                               const float t[QUAD_SIZE],
-                                               const float p[QUAD_SIZE],
-                                               float lodbias,
-                                               float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   int level0;
-   float lambda;
-
-   lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
-   level0 = (int)lambda;
-
-   if (lambda < 0.0) { 
-      samp->level = 0;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba );
-   }
-   else if (level0 >= texture->last_level) {
-      samp->level = texture->last_level;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba );
-   }
-   else {
-      float levelBlend = lambda - level0;
-      float rgba0[4][4];
-      float rgba1[4][4];
-      int c,j;
-
-      samp->level = level0;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba0 );
-
-      samp->level = level0+1;
-      lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
-                                           s, t, p, 0, rgba1 );
-
-      for (j = 0; j < QUAD_SIZE; j++) {
-         for (c = 0; c < 4; c++) {
-            rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
-         }
-      }
-   }
-}
-
-/**
- * Common code for sampling 1D/2D/cube textures.
- * Could probably extend for 3D...
- */
-static void
-lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
-                         const float s[QUAD_SIZE],
-                         const float t[QUAD_SIZE],
-                         const float p[QUAD_SIZE],
-                         float lodbias,
-                         float rgba[NUM_CHANNELS][QUAD_SIZE],
-                         const unsigned faces[4])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   unsigned level0, level1, j, imgFilter;
-   int width, height;
-   float levelBlend = 0.0f;
-
-   choose_mipmap_levels(tgsi_sampler, s, t, p, 
-                        lodbias,
-                        &level0, &level1, &levelBlend, &imgFilter);
-
-   assert(sampler->normalized_coords);
-
-   width = u_minify(texture->width0, level0);
-   height = u_minify(texture->height0, level0);
-
-   assert(width > 0);
-
-   switch (imgFilter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      {
-         int x[4], y[4];
-         nearest_texcoord_4(sampler->wrap_s, s, width, x);
-         nearest_texcoord_4(sampler->wrap_t, t, height, y);
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare(sampler, rgba, p, j);
-            }
-
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-               unsigned c;
-               x[j] /= 2;
-               y[j] /= 2;
-               get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
-                         rgba2, j);
-               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
-                  shadow_compare(sampler, rgba2, p, j);
-               }
-
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   case PIPE_TEX_FILTER_LINEAR:
-      {
-         int x0[4], y0[4], x1[4], y1[4];
-         float xw[4], yw[4]; /* weights */
-
-         linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
-         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            float tx[4][4]; /* texels */
-            int c;
-            get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
-            get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
-            get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
-            get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare4(sampler, tx, p);
-            }
-
-            /* interpolate R, G, B, A */
-            for (c = 0; c < 4; c++) {
-               rgba[c][j] = lerp_2d(xw[j], yw[j],
-                                    tx[c][0], tx[c][1],
-                                    tx[c][2], tx[c][3]);
-            }
-
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-
-               /* XXX: This is incorrect -- will often end up with (x0
-                *  == x1 && y0 == y1), meaning that we fetch the same
-                *  texel four times and linearly interpolate between
-                *  identical values.  The correct approach would be to
-                *  call linear_texcoord again for the second level.
-                */
-               x0[j] /= 2;
-               y0[j] /= 2;
-               x1[j] /= 2;
-               y1[j] /= 2;
-               get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
-               get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
-               get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
-               get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
-               if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
-                  shadow_compare4(sampler, tx, p);
-               }
-
-               /* interpolate R, G, B, A */
-               for (c = 0; c < 4; c++) {
-                  rgba2[c][j] = lerp_2d(xw[j], yw[j],
-                                        tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
-               }
-
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-static INLINE void
-lp_get_samples_1d(struct tgsi_sampler *sampler,
-                  const float s[QUAD_SIZE],
-                  const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias,
-                  float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   static const unsigned faces[4] = {0, 0, 0, 0};
-   static const float tzero[4] = {0, 0, 0, 0};
-   lp_get_samples_2d_common(sampler, s, tzero, NULL,
-                            lodbias, rgba, faces);
-}
-
-
-static INLINE void
-lp_get_samples_2d(struct tgsi_sampler *sampler,
-                  const float s[QUAD_SIZE],
-                  const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias,
-                  float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   static const unsigned faces[4] = {0, 0, 0, 0};
-   lp_get_samples_2d_common(sampler, s, t, p,
-                            lodbias, rgba, faces);
-}
-
-
-static INLINE void
-lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
-                  const float s[QUAD_SIZE],
-                  const float t[QUAD_SIZE],
-                  const float p[QUAD_SIZE],
-                  float lodbias,
-                  float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   /* get/map pipe_surfaces corresponding to 3D tex slices */
-   unsigned level0, level1, j, imgFilter;
-   int width, height, depth;
-   float levelBlend = 0.0f;
-   const uint face = 0;
-
-   choose_mipmap_levels(tgsi_sampler, s, t, p, 
-                        lodbias,
-                        &level0, &level1, &levelBlend, &imgFilter);
-
-   assert(sampler->normalized_coords);
-
-   width = u_minify(texture->width0, level0);
-   height = u_minify(texture->height0, level0);
-   depth = u_minify(texture->depth0, level0);
-
-   assert(width > 0);
-   assert(height > 0);
-   assert(depth > 0);
-
-   switch (imgFilter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      {
-         int x[4], y[4], z[4];
-         nearest_texcoord_4(sampler->wrap_s, s, width, x);
-         nearest_texcoord_4(sampler->wrap_t, t, height, y);
-         nearest_texcoord_4(sampler->wrap_r, p, depth, z);
-         for (j = 0; j < QUAD_SIZE; j++) {
-            get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-               unsigned c;
-               x[j] /= 2;
-               y[j] /= 2;
-               z[j] /= 2;
-               get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   case PIPE_TEX_FILTER_LINEAR:
-      {
-         int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
-         float xw[4], yw[4], zw[4]; /* interpolation weights */
-         linear_texcoord_4(sampler->wrap_s, s, width,  x0, x1, xw);
-         linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
-         linear_texcoord_4(sampler->wrap_r, p, depth,  z0, z1, zw);
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            int c;
-            float tx0[4][4], tx1[4][4];
-            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
-            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
-            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
-            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
-            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
-            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
-            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
-            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
-
-            /* interpolate R, G, B, A */
-            for (c = 0; c < 4; c++) {
-               rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
-                                    tx0[c][0], tx0[c][1],
-                                    tx0[c][2], tx0[c][3],
-                                    tx1[c][0], tx1[c][1],
-                                    tx1[c][2], tx1[c][3]);
-            }
-
-            if (level0 != level1) {
-               /* get texels from second mipmap level and blend */
-               float rgba2[4][4];
-               x0[j] /= 2;
-               y0[j] /= 2;
-               z0[j] /= 2;
-               x1[j] /= 2;
-               y1[j] /= 2;
-               z1[j] /= 2;
-               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
-               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
-               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
-               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
-               get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
-               get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
-               get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
-               get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
-
-               /* interpolate R, G, B, A */
-               for (c = 0; c < 4; c++) {
-                  rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
-                                        tx0[c][0], tx0[c][1],
-                                        tx0[c][2], tx0[c][3],
-                                        tx1[c][0], tx1[c][1],
-                                        tx1[c][2], tx1[c][3]);
-               }
-
-               /* blend mipmap levels */
-               for (c = 0; c < NUM_CHANNELS; c++) {
-                  rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
-               }
-            }
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-static void
-lp_get_samples_cube(struct tgsi_sampler *sampler,
-                    const float s[QUAD_SIZE],
-                    const float t[QUAD_SIZE],
-                    const float p[QUAD_SIZE],
-                    float lodbias,
-                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   unsigned faces[QUAD_SIZE], j;
-   float ssss[4], tttt[4];
-   for (j = 0; j < QUAD_SIZE; j++) {
-      faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
-   }
-   lp_get_samples_2d_common(sampler, ssss, tttt, NULL,
-                            lodbias, rgba, faces);
-}
-
-
-static void
-lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
-                    const float s[QUAD_SIZE],
-                    const float t[QUAD_SIZE],
-                    const float p[QUAD_SIZE],
-                    float lodbias,
-                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-   const uint face = 0;
-   unsigned level0, level1, j, imgFilter;
-   int width, height;
-   float levelBlend;
-
-   choose_mipmap_levels(tgsi_sampler, s, t, p, 
-                        lodbias,
-                        &level0, &level1, &levelBlend, &imgFilter);
-
-   /* texture RECTS cannot be mipmapped */
-   assert(level0 == level1);
-
-   width = u_minify(texture->width0, level0);
-   height = u_minify(texture->height0, level0);
-
-   assert(width > 0);
-
-   switch (imgFilter) {
-   case PIPE_TEX_FILTER_NEAREST:
-      {
-         int x[4], y[4];
-         nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
-         nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
-         for (j = 0; j < QUAD_SIZE; j++) {
-            get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare(sampler, rgba, p, j);
-            }
-         }
-      }
-      break;
-   case PIPE_TEX_FILTER_LINEAR:
-      {
-         int x0[4], y0[4], x1[4], y1[4];
-         float xw[4], yw[4]; /* weights */
-         linear_texcoord_unnorm_4(sampler->wrap_s, s, width,  x0, x1, xw);
-         linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
-         for (j = 0; j < QUAD_SIZE; j++) {
-            float tx[4][4]; /* texels */
-            int c;
-            get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
-            get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
-            get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
-            get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
-            if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
-               shadow_compare4(sampler, tx, p);
-            }
-            for (c = 0; c < 4; c++) {
-               rgba[c][j] = lerp_2d(xw[j], yw[j],
-                                    tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
-            }
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-/**
- * Error condition handler
- */
-static INLINE void
-lp_get_samples_null(struct tgsi_sampler *tgsi_sampler,
-                    const float s[QUAD_SIZE],
-                    const float t[QUAD_SIZE],
-                    const float p[QUAD_SIZE],
-                    float lodbias,
-                    float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   int i,j;
-
-   for (i = 0; i < 4; i++)
-      for (j = 0; j < 4; j++)
-         rgba[i][j] = 1.0;
-}
-
-/**
- * Called via tgsi_sampler::get_samples() when using a sampler for the
- * first time.  Determine the actual sampler function, link it in and
- * call it.
- */
-void
-lp_get_samples(struct tgsi_sampler *tgsi_sampler,
-               const float s[QUAD_SIZE],
-               const float t[QUAD_SIZE],
-               const float p[QUAD_SIZE],
-               float lodbias,
-               float rgba[NUM_CHANNELS][QUAD_SIZE])
-{
-   struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
-   const struct pipe_texture *texture = samp->texture;
-   const struct pipe_sampler_state *sampler = samp->sampler;
-
-   /* Default to the 'undefined' case:
-    */
-   tgsi_sampler->get_samples = lp_get_samples_null;
-
-   if (!texture) {
-      assert(0);                /* is this legal?? */
-      goto out;
-   }
-
-   if (!sampler->normalized_coords) {
-      assert (texture->target == PIPE_TEXTURE_2D);
-      tgsi_sampler->get_samples = lp_get_samples_rect;
-      goto out;
-   }
-
-   switch (texture->target) {
-   case PIPE_TEXTURE_1D:
-      tgsi_sampler->get_samples = lp_get_samples_1d;
-      break;
-   case PIPE_TEXTURE_2D:
-      tgsi_sampler->get_samples = lp_get_samples_2d;
-      break;
-   case PIPE_TEXTURE_3D:
-      tgsi_sampler->get_samples = lp_get_samples_3d;
-      break;
-   case PIPE_TEXTURE_CUBE:
-      tgsi_sampler->get_samples = lp_get_samples_cube;
-      break;
-   default:
-      assert(0);
-      break;
-   }
-
-   /* Do this elsewhere: 
-    */
-   samp->xpot = util_unsigned_logbase2( samp->texture->width0 );
-   samp->ypot = util_unsigned_logbase2( samp->texture->height0 );
-
-   /* Try to hook in a faster sampler.  Ultimately we'll have to
-    * code-generate these.  Luckily most of this looks like it is
-    * orthogonal state within the sampler.
-    */
-   if (texture->target == PIPE_TEXTURE_2D &&
-       sampler->min_img_filter == sampler->mag_img_filter &&
-       sampler->wrap_s == sampler->wrap_t &&
-       sampler->compare_mode == PIPE_TEX_COMPARE_NONE &&
-       sampler->normalized_coords) 
-   {
-      if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
-         samp->level = CLAMP((int) sampler->min_lod,
-                             0, (int) texture->last_level);
-
-         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
-            switch (sampler->min_img_filter) {
-            case PIPE_TEX_FILTER_NEAREST:
-               tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT;
-               break;
-            case PIPE_TEX_FILTER_LINEAR:
-               tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT;
-               break;
-            default:
-               break;
-            }
-         } 
-         else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) {
-            switch (sampler->min_img_filter) {
-            case PIPE_TEX_FILTER_NEAREST:
-               tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT;
-               break;
-            default:
-               break;
-            }
-         }
-      }
-      else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
-         if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
-            switch (sampler->min_img_filter) {
-            case PIPE_TEX_FILTER_LINEAR:
-               tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT;
-               break;
-            default:
-               break;
-            }
-         } 
-      }
-   }
-   else if (0) {
-      _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n",
-                    texture->target, PIPE_TEXTURE_2D,
-                    sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE,
-                    sampler->min_img_filter, sampler->mag_img_filter,
-                    sampler->wrap_s, sampler->wrap_t,
-                    sampler->compare_mode, PIPE_TEX_COMPARE_NONE,
-                    sampler->normalized_coords, TRUE);
-   }
-
-out:
-   tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
-}
-
-
-void PIPE_CDECL
-lp_fetch_texel_soa( struct tgsi_sampler **samplers,
-                    uint32_t unit,
-                    float *store )
-{
-   struct tgsi_sampler *sampler = samplers[unit];
-
-#if 0
-   uint j;
-
-   debug_printf("%s sampler: %p (%p) store: %p\n",
-                __FUNCTION__,
-                sampler, *sampler,
-                store );
-
-   debug_printf("lodbias %f\n", store[12]);
-
-   for (j = 0; j < 4; j++)
-      debug_printf("sample %d texcoord %f %f\n",
-                   j,
-                   store[0+j],
-                   store[4+j]);
-#endif
-
-   {
-      float rgba[NUM_CHANNELS][QUAD_SIZE];
-      sampler->get_samples(sampler,
-                           &store[0],
-                           &store[4],
-                           &store[8],
-                           0.0f, /*store[12],  lodbias */
-                           rgba);
-      memcpy(store, rgba, sizeof rgba);
-   }
-
-#if 0
-   for (j = 0; j < 4; j++)
-      debug_printf("sample %d result %f %f %f %f\n",
-                   j,
-                   store[0+j],
-                   store[4+j],
-                   store[8+j],
-                   store[12+j]);
-#endif
-}
-
-
-#include "lp_bld_type.h"
-#include "lp_bld_intr.h"
-#include "lp_bld_tgsi.h"
-
-
-struct lp_c_sampler_soa
-{
-   struct lp_build_sampler_soa base;
-
-   LLVMValueRef context_ptr;
-
-   LLVMValueRef samplers_ptr;
-
-   /** Coords/texels store */
-   LLVMValueRef store_ptr;
-};
-
-
-static void
-lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
-{
-   FREE(sampler);
-}
-
-
-static void
-lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
-                                  LLVMBuilderRef builder,
-                                  struct lp_type type,
-                                  unsigned unit,
-                                  unsigned num_coords,
-                                  const LLVMValueRef *coords,
-                                  LLVMValueRef lodbias,
-                                  LLVMValueRef *texel)
-{
-   struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler;
-   LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
-   LLVMValueRef args[3];
-   unsigned i;
-
-   if(!sampler->samplers_ptr)
-      sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr);
-
-   if(!sampler->store_ptr)
-      sampler->store_ptr = LLVMBuildArrayAlloca(builder,
-                                            vec_type,
-                                            LLVMConstInt(LLVMInt32Type(), 4, 0),
-                                            "texel_store");
-
-   for (i = 0; i < num_coords; i++) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
-      LLVMBuildStore(builder, coords[i], coord_ptr);
-   }
-
-   args[0] = sampler->samplers_ptr;
-   args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
-   args[2] = sampler->store_ptr;
-
-   lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
-
-   for (i = 0; i < NUM_CHANNELS; ++i) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
-      LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
-      texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
-   }
-}
-
-
-struct lp_build_sampler_soa *
-lp_c_sampler_soa_create(LLVMValueRef context_ptr)
-{
-   struct lp_c_sampler_soa *sampler;
-
-   sampler = CALLOC_STRUCT(lp_c_sampler_soa);
-   if(!sampler)
-      return NULL;
-
-   sampler->base.destroy = lp_c_sampler_soa_destroy;
-   sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel;
-   sampler->context_ptr = context_ptr;
-
-   return &sampler->base;
-}
-
-- 
cgit v1.2.3


From 150108024daec6dfc44a28e6809fbb2a19cb2e1a Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keithw@vmware.com>
Date: Thu, 7 Jan 2010 11:19:38 +0000
Subject: i965g: updates for draw retval

---
 src/gallium/drivers/i965/brw_draw.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/i965/brw_draw.c b/src/gallium/drivers/i965/brw_draw.c
index 852fd22982..ea8d39adaf 100644
--- a/src/gallium/drivers/i965/brw_draw.c
+++ b/src/gallium/drivers/i965/brw_draw.c
@@ -176,7 +176,7 @@ try_draw_range_elements(struct brw_context *brw,
 }
 
 
-static boolean
+static void
 brw_draw_range_elements(struct pipe_context *pipe,
 			struct pipe_buffer *index_buffer,
 			unsigned index_size,
@@ -228,29 +228,27 @@ brw_draw_range_elements(struct pipe_context *pipe,
       ret = try_draw_range_elements(brw, index_buffer, hw_prim, start, count );
       assert(ret == 0);
    }
-
-   return TRUE;
 }
 
-static boolean
+static void
 brw_draw_elements(struct pipe_context *pipe,
 		  struct pipe_buffer *index_buffer,
 		  unsigned index_size,
 		  unsigned mode, 
 		  unsigned start, unsigned count)
 {
-   return brw_draw_range_elements( pipe, index_buffer,
-				   index_size,
-				   0, 0xffffffff,
-				   mode, 
-				   start, count );
+   brw_draw_range_elements( pipe, index_buffer,
+                            index_size,
+                            0, 0xffffffff,
+                            mode, 
+                            start, count );
 }
 
-static boolean
+static void
 brw_draw_arrays(struct pipe_context *pipe, unsigned mode,
                      unsigned start, unsigned count)
 {
-   return brw_draw_elements(pipe, NULL, 0, mode, start, count);
+   brw_draw_elements(pipe, NULL, 0, mode, start, count);
 }
 
 
-- 
cgit v1.2.3


From 12c6b871a3ae4a091d6f768231304f3578a9a9f1 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 7 Jan 2010 15:59:41 +0000
Subject: llvmpipe: Remove more loose ends of TGSI exec sampling.

---
 src/gallium/drivers/llvmpipe/lp_context.c       | 21 +-----------
 src/gallium/drivers/llvmpipe/lp_context.h       |  8 -----
 src/gallium/drivers/llvmpipe/lp_jit.c           | 18 -----------
 src/gallium/drivers/llvmpipe/lp_jit.h           |  6 ----
 src/gallium/drivers/llvmpipe/lp_state_derived.c | 35 ++------------------
 src/gallium/drivers/llvmpipe/lp_tex_sample.h    | 43 -------------------------
 6 files changed, 4 insertions(+), 127 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 37587d4f79..1cc3c9227c 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -256,22 +256,6 @@ llvmpipe_create( struct pipe_screen *screen )
       llvmpipe->vertex_tex_cache[i] = lp_create_tex_tile_cache(screen);
 
 
-   /* vertex shader samplers */
-   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
-      llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples;
-      llvmpipe->tgsi.vert_samplers[i].processor = TGSI_PROCESSOR_VERTEX;
-      llvmpipe->tgsi.vert_samplers[i].cache = llvmpipe->vertex_tex_cache[i];
-      llvmpipe->tgsi.vert_samplers_list[i] = &llvmpipe->tgsi.vert_samplers[i];
-   }
-
-   /* fragment shader samplers */
-   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
-      llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples;
-      llvmpipe->tgsi.frag_samplers[i].processor = TGSI_PROCESSOR_FRAGMENT;
-      llvmpipe->tgsi.frag_samplers[i].cache = llvmpipe->tex_cache[i];
-      llvmpipe->tgsi.frag_samplers_list[i] = &llvmpipe->tgsi.frag_samplers[i];
-   }
-
    /*
     * Create drawing context and plug our rendering stage into it.
     */
@@ -279,10 +263,7 @@ llvmpipe_create( struct pipe_screen *screen )
    if (!llvmpipe->draw) 
       goto fail;
 
-   draw_texture_samplers(llvmpipe->draw,
-                         PIPE_MAX_VERTEX_SAMPLERS,
-                         (struct tgsi_sampler **)
-                            llvmpipe->tgsi.vert_samplers_list);
+   /* FIXME: devise alternative to draw_texture_samplers */
 
    if (debug_get_bool_option( "LP_NO_RAST", FALSE ))
       llvmpipe->no_rast = TRUE;
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index cc4d5ad5fd..6411797cf5 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -115,14 +115,6 @@ struct llvmpipe_context {
 
    unsigned line_stipple_counter;
 
-   /** TGSI exec things */
-   struct {
-      struct lp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS];
-      struct lp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS];
-      struct lp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS];
-      struct lp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS];
-   } tgsi;
-
    /** The primitive drawing context */
    struct draw_context *draw;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index bce3baec16..bc9e6ac1bd 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -109,24 +109,6 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
       screen->context_ptr_type = LLVMPointerType(context_type, 0);
    }
 
-   /* fetch_texel
-    */
-   {
-      LLVMTypeRef ret_type;
-      LLVMTypeRef arg_types[3];
-      LLVMValueRef fetch_texel;
-
-      ret_type = LLVMVoidType();
-      arg_types[0] = LLVMPointerType(LLVMInt8Type(), 0);  /* samplers */
-      arg_types[1] = LLVMInt32Type();                     /* unit */
-      arg_types[2] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); /* store */
-
-      fetch_texel = lp_declare_intrinsic(screen->module, "fetch_texel",
-                                         ret_type, arg_types, Elements(arg_types));
-
-      LLVMAddGlobalMapping(screen->engine, fetch_texel, lp_fetch_texel_soa);
-   }
-
 #ifdef DEBUG
    LLVMDumpModule(screen->module);
 #endif
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 58f716ede2..70fdb8b27b 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -118,12 +118,6 @@ typedef void
                     void *color,
                     void *depth);
 
-void PIPE_CDECL
-lp_fetch_texel_soa( struct tgsi_sampler **samplers,
-                    uint32_t unit,
-                    float *store );
-
-
 void
 lp_jit_screen_cleanup(struct llvmpipe_screen *screen);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index acfd7be5f7..6c1ef6bc42 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -192,36 +192,6 @@ compute_cliprect(struct llvmpipe_context *lp)
 }
 
 
-static void
-update_tgsi_samplers( struct llvmpipe_context *llvmpipe )
-{
-   unsigned i;
-
-   /* vertex shader samplers */
-   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
-      llvmpipe->tgsi.vert_samplers[i].sampler = llvmpipe->vertex_samplers[i];
-      llvmpipe->tgsi.vert_samplers[i].texture = llvmpipe->vertex_textures[i];
-      llvmpipe->tgsi.vert_samplers[i].base.get_samples = lp_get_samples;
-   }
-
-   for (i = 0; i < PIPE_MAX_VERTEX_SAMPLERS; i++) {
-      lp_tex_tile_cache_validate_texture( llvmpipe->vertex_tex_cache[i] );
-   }
-
-   /* fragment shader samplers */
-   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
-      llvmpipe->tgsi.frag_samplers[i].sampler = llvmpipe->sampler[i];
-      llvmpipe->tgsi.frag_samplers[i].texture = llvmpipe->texture[i];
-      llvmpipe->tgsi.frag_samplers[i].base.get_samples = lp_get_samples;
-   }
-
-   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
-      lp_tex_tile_cache_validate_texture( llvmpipe->tex_cache[i] );
-   }
-
-   llvmpipe->jit_context.samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list;
-}
-
 /* Hopefully this will remain quite simple, otherwise need to pull in
  * something like the state tracker mechanism.
  */
@@ -237,8 +207,9 @@ void llvmpipe_update_derived( struct llvmpipe_context *llvmpipe )
    }
       
    if (llvmpipe->dirty & (LP_NEW_SAMPLER |
-                          LP_NEW_TEXTURE))
-      update_tgsi_samplers( llvmpipe );
+                          LP_NEW_TEXTURE)) {
+      /* TODO */
+   }
 
    if (llvmpipe->dirty & (LP_NEW_RASTERIZER |
                           LP_NEW_FS |
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample.h b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
index 43be38eaee..cb59a94464 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample.h
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample.h
@@ -31,53 +31,10 @@
 
 #include <llvm-c/Core.h>
 
-#include "tgsi/tgsi_exec.h"
 
-
-struct llvmpipe_tex_tile_cache;
 struct lp_sampler_static_state;
 
 
-/**
- * Subclass of tgsi_sampler
- */
-struct lp_shader_sampler
-{
-   struct tgsi_sampler base;  /**< base class */
-
-   unsigned processor;
-
-   /* For lp_get_samples_2d_linear_POT:
-    */
-   unsigned xpot;
-   unsigned ypot;
-   unsigned level;
-
-   const struct pipe_texture *texture;
-   const struct pipe_sampler_state *sampler;
-
-   struct llvmpipe_tex_tile_cache *cache;
-};
-
-
-
-static INLINE struct lp_shader_sampler *
-lp_shader_sampler(const struct tgsi_sampler *sampler)
-{
-   return (struct lp_shader_sampler *) sampler;
-}
-
-
-
-extern void
-lp_get_samples(struct tgsi_sampler *tgsi_sampler,
-               const float s[QUAD_SIZE],
-               const float t[QUAD_SIZE],
-               const float p[QUAD_SIZE],
-               float lodbias,
-               float rgba[NUM_CHANNELS][QUAD_SIZE]);
-
-
 /**
  * Pure-LLVM texture sampling code generator.
  *
-- 
cgit v1.2.3


From 8081c1eaa56bc46a641ee9283ef9b6c416b5b4b2 Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Thu, 7 Jan 2010 16:16:45 +0000
Subject: llvmpipe: Remove TGSI sampler pointers from JIT context too.

---
 src/gallium/drivers/llvmpipe/lp_jit.c   | 15 ++++++---------
 src/gallium/drivers/llvmpipe/lp_jit.h   | 12 +++---------
 src/gallium/drivers/llvmpipe/lp_state.h |  1 -
 3 files changed, 9 insertions(+), 19 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index bc9e6ac1bd..4ef0783f3e 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -79,25 +79,22 @@ lp_jit_init_globals(struct llvmpipe_screen *screen)
 
    /* struct lp_jit_context */
    {
-      LLVMTypeRef elem_types[5];
+      LLVMTypeRef elem_types[4];
       LLVMTypeRef context_type;
 
       elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* constants */
-      elem_types[1] = LLVMPointerType(LLVMInt8Type(), 0);  /* samplers */
-      elem_types[2] = LLVMFloatType();                     /* alpha_ref_value */
-      elem_types[3] = LLVMPointerType(LLVMInt8Type(), 0);  /* blend_color */
-      elem_types[4] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
+      elem_types[1] = LLVMFloatType();                     /* alpha_ref_value */
+      elem_types[2] = LLVMPointerType(LLVMInt8Type(), 0);  /* blend_color */
+      elem_types[3] = LLVMArrayType(texture_type, PIPE_MAX_SAMPLERS); /* textures */
 
       context_type = LLVMStructType(elem_types, Elements(elem_types), 0);
 
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, constants,
                              screen->target, context_type, 0);
-      LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, samplers,
-                             screen->target, context_type, 1);
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, alpha_ref_value,
-                             screen->target, context_type, 2);
+                             screen->target, context_type, 1);
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, blend_color,
-                             screen->target, context_type, 3);
+                             screen->target, context_type, 2);
       LP_CHECK_MEMBER_OFFSET(struct lp_jit_context, textures,
                              screen->target, context_type,
                              LP_JIT_CONTEXT_TEXTURES_INDEX);
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 70fdb8b27b..277b690c02 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -41,7 +41,6 @@
 #include "pipe/p_state.h"
 
 
-struct tgsi_sampler;
 struct llvmpipe_screen;
 
 
@@ -78,8 +77,6 @@ struct lp_jit_context
 {
    const float *constants;
 
-   struct tgsi_sampler **samplers;
-
    float alpha_ref_value;
 
    /* FIXME: store (also?) in floats */
@@ -92,16 +89,13 @@ struct lp_jit_context
 #define lp_jit_context_constants(_builder, _ptr) \
    lp_build_struct_get(_builder, _ptr, 0, "constants")
 
-#define lp_jit_context_samplers(_builder, _ptr) \
-   lp_build_struct_get(_builder, _ptr, 1, "samplers")
-
 #define lp_jit_context_alpha_ref_value(_builder, _ptr) \
-   lp_build_struct_get(_builder, _ptr, 2, "alpha_ref_value")
+   lp_build_struct_get(_builder, _ptr, 1, "alpha_ref_value")
 
 #define lp_jit_context_blend_color(_builder, _ptr) \
-   lp_build_struct_get(_builder, _ptr, 3, "blend_color")
+   lp_build_struct_get(_builder, _ptr, 2, "blend_color")
 
-#define LP_JIT_CONTEXT_TEXTURES_INDEX 4
+#define LP_JIT_CONTEXT_TEXTURES_INDEX 3
 
 #define lp_jit_context_textures(_builder, _ptr) \
    lp_build_struct_get_ptr(_builder, _ptr, LP_JIT_CONTEXT_TEXTURES_INDEX, "textures")
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index 3e482cb904..7020da145f 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -56,7 +56,6 @@
 #define LP_NEW_QUERY         0x4000
 
 
-struct tgsi_sampler;
 struct vertex_info;
 struct pipe_context;
 struct llvmpipe_context;
-- 
cgit v1.2.3


From 152b3bd6ef70b74e2df50ff555cfacb5423ebf17 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 7 Jan 2010 19:45:44 +0100
Subject: nv50: handle TGSI_OPCODE_F2I,F2U,I2F,U2F plus src mods

---
 src/gallium/drivers/nv50/nv50_program.c | 218 +++++++++++++++++++-------------
 1 file changed, 131 insertions(+), 87 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index af0759e503..6417367e0b 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -96,7 +96,11 @@ struct nv50_reg {
 
 #define NV50_MOD_NEG 1
 #define NV50_MOD_ABS 2
+#define NV50_MOD_NEG_ABS (NV50_MOD_NEG | NV50_MOD_ABS)
 #define NV50_MOD_SAT 4
+#define NV50_MOD_I32 8
+
+/* NV50_MOD_I32 is used to indicate integer mode for neg/abs */
 
 /* STACK: Conditionals and loops have to use the (per warp) stack.
  * Stack entries consist of an entry type (divergent path, join at),
@@ -1142,36 +1146,41 @@ emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 	emit(pc, e);
 }
 
-#define CVTOP_RN	0x01
-#define CVTOP_FLOOR	0x03
-#define CVTOP_CEIL	0x05
-#define CVTOP_TRUNC	0x07
-#define CVTOP_SAT	0x08
-#define CVTOP_ABS	0x10
+#define CVT_RN    (0x00 << 16)
+#define CVT_FLOOR (0x02 << 16)
+#define CVT_CEIL  (0x04 << 16)
+#define CVT_TRUNC (0x06 << 16)
+#define CVT_SAT   (0x08 << 16)
+#define CVT_ABS   (0x10 << 16)
+
+#define CVT_X32_X32 0x04004000
+#define CVT_X32_S32 0x04014000
+#define CVT_F32_F32 ((0xc0 << 24) | CVT_X32_X32)
+#define CVT_S32_F32 ((0x88 << 24) | CVT_X32_X32)
+#define CVT_U32_F32 ((0x80 << 24) | CVT_X32_X32)
+#define CVT_F32_S32 ((0x40 << 24) | CVT_X32_S32)
+#define CVT_F32_U32 ((0x40 << 24) | CVT_X32_X32)
+#define CVT_S32_S32 ((0x08 << 24) | CVT_X32_S32)
+#define CVT_S32_U32 ((0x08 << 24) | CVT_X32_X32)
+#define CVT_U32_S32 ((0x00 << 24) | CVT_X32_S32)
 
-/* 0x04 == 32 bit dst */
-/* 0x40 == dst is float */
-/* 0x80 == src is float */
-#define CVT_F32_F32 0xc4
-#define CVT_F32_S32 0x44
-#define CVT_S32_F32 0x8c
-#define CVT_S32_S32 0x0c
-#define CVT_NEG     0x20
-#define CVT_RI      0x08
+#define CVT_NEG 0x20000000
+#define CVT_RI  0x08000000
 
 static void
 emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
-	 int wp, unsigned cvn, unsigned fmt)
+	 int wp, uint32_t cvn)
 {
 	struct nv50_program_exec *e;
 
 	e = exec(pc);
-	set_long(pc, e);
 
-	e->inst[0] |= 0xa0000000;
-	e->inst[1] |= 0x00004000; /* 32 bit src */
-	e->inst[1] |= (cvn << 16);
-	e->inst[1] |= (fmt << 24);
+	if (src->mod & NV50_MOD_NEG) cvn |= CVT_NEG;
+	if (src->mod & NV50_MOD_ABS) cvn |= CVT_ABS;
+
+	e->inst[0] = 0xa0000000;
+	e->inst[1] = cvn;
+	set_long(pc, e);
 	set_src_0(pc, src, e);
 
 	if (wp >= 0)
@@ -1240,7 +1249,7 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
 
 	/* cvt.f32.u32/s32 (?) if we didn't only write the predicate */
 	if (rdst)
-		emit_cvt(pc, rdst, dst, -1, CVTOP_ABS | CVTOP_RN, CVT_F32_S32);
+		emit_cvt(pc, rdst, dst, -1, CVT_ABS | CVT_F32_S32);
 	if (rdst && rdst != dst)
 		free_temp(pc, dst);
 }
@@ -1264,7 +1273,7 @@ map_tgsi_setop_cc(unsigned op)
 static INLINE void
 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
-	emit_cvt(pc, dst, src, -1, CVTOP_FLOOR, CVT_F32_F32 | CVT_RI);
+	emit_cvt(pc, dst, src, -1, CVT_FLOOR | CVT_F32_F32 | CVT_RI);
 }
 
 static void
@@ -1281,16 +1290,10 @@ emit_pow(struct nv50_pc *pc, struct nv50_reg *dst,
 	free_temp(pc, temp);
 }
 
-static INLINE void
-emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
-	emit_cvt(pc, dst, src, -1, CVTOP_ABS, CVT_F32_F32);
-}
-
 static INLINE void
 emit_sat(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
-	emit_cvt(pc, dst, src, -1, CVTOP_SAT, CVT_F32_F32);
+	emit_cvt(pc, dst, src, -1, CVT_SAT | CVT_F32_F32);
 }
 
 static void
@@ -1347,12 +1350,6 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 	FREE(one);
 }
 
-static INLINE void
-emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
-{
-	emit_cvt(pc, dst, src, -1, CVTOP_RN, CVT_F32_F32 | CVT_NEG);
-}
-
 static void
 emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 {
@@ -1364,14 +1361,9 @@ emit_kil(struct nv50_pc *pc, struct nv50_reg *src)
 	set_long(pc, e); /* sets cond code to ALWAYS */
 
 	if (src) {
-		unsigned cvn = CVT_F32_F32;
-
 		set_pred(pc, 0x1 /* cc = LT */, r_pred, e);
-
-		if (src->mod & NV50_MOD_NEG)
-			cvn |= CVT_NEG;
-		/* write predicate reg */
-		emit_cvt(pc, NULL, src, r_pred, CVTOP_RN, cvn);
+		/* write to predicate reg */
+		emit_cvt(pc, NULL, src, r_pred, CVT_F32_F32);
 	}
 
 	emit(pc, e);
@@ -1814,8 +1806,8 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
 }
 
 /* Some operations support an optional negation flag. */
-static boolean
-negate_supported(const struct tgsi_full_instruction *insn, int i)
+static int
+get_supported_mods(const struct tgsi_full_instruction *insn, int i)
 {
 	switch (insn->Instruction.Opcode) {
 	case TGSI_OPCODE_ADD:
@@ -1835,9 +1827,22 @@ negate_supported(const struct tgsi_full_instruction *insn, int i)
 	case TGSI_OPCODE_SCS:
 	case TGSI_OPCODE_SIN:
 	case TGSI_OPCODE_SUB:
-		return TRUE;
+		return NV50_MOD_NEG;
+	case TGSI_OPCODE_MAX:
+	case TGSI_OPCODE_MIN:
+	case TGSI_OPCODE_INEG: /* tgsi src sign toggle/set would be stupid */
+		return NV50_MOD_ABS;
+	case TGSI_OPCODE_CEIL:
+	case TGSI_OPCODE_FLR:
+	case TGSI_OPCODE_TRUNC:
+		return NV50_MOD_NEG | NV50_MOD_ABS;
+	case TGSI_OPCODE_F2I:
+	case TGSI_OPCODE_F2U:
+	case TGSI_OPCODE_I2F:
+	case TGSI_OPCODE_U2F:
+		return NV50_MOD_NEG | NV50_MOD_ABS | NV50_MOD_I32;
 	default:
-		return FALSE;
+		return 0;
 	}
 }
 
@@ -1944,11 +1949,11 @@ tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst)
 
 static struct nv50_reg *
 tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
-	 boolean neg)
+	 int mod)
 {
 	struct nv50_reg *r = NULL;
-	struct nv50_reg *temp;
-	unsigned sgn, c, swz;
+	struct nv50_reg *temp = NULL;
+	unsigned sgn, c, swz, cvn;
 
 	if (src->Register.File != TGSI_FILE_CONSTANT)
 		assert(!src->Register.Indirect);
@@ -1988,7 +1993,7 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 			r = &pc->immd[src->Register.Index * 4 + c];
 			break;
 		case TGSI_FILE_SAMPLER:
-			break;
+			return NULL;
 		case TGSI_FILE_ADDRESS:
 			r = pc->addr[src->Register.Index * 4 + c];
 			assert(r);
@@ -2003,35 +2008,34 @@ tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src,
 		break;
 	}
 
+	cvn = (mod & NV50_MOD_I32) ? CVT_S32_S32 : CVT_F32_F32;
+
 	switch (sgn) {
-	case TGSI_UTIL_SIGN_KEEP:
-		break;
 	case TGSI_UTIL_SIGN_CLEAR:
-		temp = temp_temp(pc);
-		emit_abs(pc, temp, r);
-		r = temp;
-		break;
-	case TGSI_UTIL_SIGN_TOGGLE:
-		if (neg)
-			r->mod = NV50_MOD_NEG;
-		else {
-			temp = temp_temp(pc);
-			emit_neg(pc, temp, r);
-			r = temp;
-		}
+		r->mod = NV50_MOD_ABS;
 		break;
 	case TGSI_UTIL_SIGN_SET:
-		temp = temp_temp(pc);
-		emit_cvt(pc, temp, r, -1, CVTOP_ABS, CVT_F32_F32 | CVT_NEG);
-		r = temp;
+		r->mod = NV50_MOD_NEG_ABS;
+		break;
+	case TGSI_UTIL_SIGN_TOGGLE:
+		r->mod = NV50_MOD_NEG;
 		break;
 	default:
-		assert(0);
+		assert(!r->mod && sgn == TGSI_UTIL_SIGN_KEEP);
 		break;
 	}
 
-	if (r && r->acc >= 0 && r != temp)
-		return reg_instance(pc, r);
+	if ((r->mod & mod) != r->mod) {
+		temp = temp_temp(pc);
+		emit_cvt(pc, temp, r, -1, cvn);
+		r->mod = 0;
+		r = temp;
+	} else
+		r->mod |= mod & NV50_MOD_I32;
+
+	assert(r);
+	if (r->acc >= 0 && r != temp)
+		return reg_instance(pc, r); /* will clear r->mod */
 	return r;
 }
 
@@ -2195,17 +2199,17 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
 		const struct tgsi_full_src_register *fs = &inst->Src[i];
 		unsigned src_mask;
-		boolean neg_supp;
+		int mod_supp;
 
 		src_mask = nv50_tgsi_src_mask(inst, i);
-		neg_supp = negate_supported(inst, i);
+		mod_supp = get_supported_mods(inst, i);
 
 		if (fs->Register.File == TGSI_FILE_SAMPLER)
 			unit = fs->Register.Index;
 
 		for (c = 0; c < 4; c++)
 			if (src_mask & (1 << c))
-				src[i][c] = tgsi_src(pc, c, fs, neg_supp);
+				src[i][c] = tgsi_src(pc, c, fs, mod_supp);
 	}
 
 	brdc = temp = pc->r_brdc;
@@ -2230,7 +2234,8 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
-			emit_abs(pc, dst[c], src[0][c]);
+			emit_cvt(pc, dst[c], src[0][c], -1,
+				 CVT_ABS | CVT_F32_F32);
 		}
 		break;
 	case TGSI_OPCODE_ADD:
@@ -2253,7 +2258,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 	case TGSI_OPCODE_ARL:
 		assert(src[0][0]);
 		temp = temp_temp(pc);
-		emit_cvt(pc, temp, src[0][0], -1, CVTOP_FLOOR, CVT_S32_F32);
+		emit_cvt(pc, temp, src[0][0], -1, CVT_FLOOR | CVT_S32_F32);
 		emit_arl(pc, dst[0], temp, 4);
 		break;
 	case TGSI_OPCODE_BGNLOOP:
@@ -2282,7 +2287,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			if (!(mask & (1 << c)))
 				continue;
 			emit_cvt(pc, dst[c], src[0][c], -1,
-				 CVTOP_CEIL, CVT_F32_F32 | CVT_RI);
+				 CVT_CEIL | CVT_F32_F32 | CVT_RI);
 		}
 		break;
 	case TGSI_OPCODE_CMP:
@@ -2290,7 +2295,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
-			emit_cvt(pc, NULL, src[0][c], 1, CVTOP_RN, CVT_F32_F32);
+			emit_cvt(pc, NULL, src[0][c], 1, CVT_F32_F32);
 			emit_mov(pc, dst[c], src[1][c]);
 			set_pred(pc, 0x1, 1, pc->p->exec_tail); /* @SF */
 			emit_mov(pc, dst[c], src[2][c]);
@@ -2419,6 +2424,22 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			emit_mov_immdval(pc, dst[3], 1.0f);
 	}
 		break;
+	case TGSI_OPCODE_F2I:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_cvt(pc, dst[c], src[0][c], -1,
+				 CVT_TRUNC | CVT_S32_F32);
+		}
+		break;
+	case TGSI_OPCODE_F2U:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_cvt(pc, dst[c], src[0][c], -1,
+				 CVT_TRUNC | CVT_U32_F32);
+		}
+		break;
 	case TGSI_OPCODE_FLR:
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
@@ -2435,14 +2456,28 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			emit_sub(pc, dst[c], src[0][c], temp);
 		}
 		break;
+	case TGSI_OPCODE_I2F:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_S32);
+		}
+		break;
 	case TGSI_OPCODE_IF:
 		assert(pc->if_lvl < NV50_MAX_COND_NESTING);
-		emit_cvt(pc, NULL, src[0][0], 0, CVTOP_ABS | CVTOP_RN,
-			 CVT_F32_F32);
+		emit_cvt(pc, NULL, src[0][0], 0, CVT_ABS | CVT_F32_F32);
 		pc->if_join[pc->if_lvl] = emit_joinat(pc);
 		pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);;
 		terminate_mbb(pc);
 		break;
+	case TGSI_OPCODE_INEG:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_cvt(pc, dst[c], src[0][c], -1,
+				 CVT_S32_S32 | CVT_NEG);
+		}
+		break;
 	case TGSI_OPCODE_KIL:
 		assert(src[0][0] && src[0][1] && src[0][2] && src[0][3]);
 		emit_kil(pc, src[0][0]);
@@ -2469,7 +2504,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		else
 			t[1] = t[0];
 
-		emit_abs(pc, t[0], src[0][0]);
+		emit_cvt(pc, t[0], src[0][0], -1, CVT_ABS | CVT_F32_F32);
 		emit_flop(pc, NV50_FLOP_LG2, t[1], t[0]);
 		if (mask & (1 << 2))
 			emit_mov(pc, dst[2], t[1]);
@@ -2612,7 +2647,14 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			if (!(mask & (1 << c)))
 				continue;
 			emit_cvt(pc, dst[c], src[0][c], -1,
-				 CVTOP_TRUNC, CVT_F32_F32 | CVT_RI);
+				 CVT_TRUNC | CVT_F32_F32 | CVT_RI);
+		}
+		break;
+	case TGSI_OPCODE_U2F:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_U32);
 		}
 		break;
 	case TGSI_OPCODE_XPD:
@@ -2814,7 +2856,7 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
 
 	for (i = 0; i < insn->Instruction.NumSrcRegs; i++) {
 		unsigned chn, mask = nv50_tgsi_src_mask(insn, i);
-		boolean neg_supp = negate_supported(insn, i);
+		int ms = get_supported_mods(insn, i);
 
 		fs = &insn->Src[i];
 		if (fs->Register.File != fd->Register.File ||
@@ -2832,10 +2874,12 @@ nv50_tgsi_scan_swizzle(const struct tgsi_full_instruction *insn,
 			if (!(fd->Register.WriteMask & (1 << c)))
 				continue;
 
-			/* no danger if src is copied to TEMP first */
-			if ((s != TGSI_UTIL_SIGN_KEEP) &&
-			    (s != TGSI_UTIL_SIGN_TOGGLE || !neg_supp))
-				continue;
+			if (s == TGSI_UTIL_SIGN_TOGGLE && !(ms & NV50_MOD_NEG))
+					continue;
+			if (s == TGSI_UTIL_SIGN_CLEAR && !(ms & NV50_MOD_ABS))
+					continue;
+			if ((s == TGSI_UTIL_SIGN_SET) && ((ms & 3) != 3))
+					continue;
 
 			rdep[c] |= nv50_tgsi_dst_revdep(
 				insn->Instruction.Opcode, i, chn);
-- 
cgit v1.2.3


From ccc7d0cb7afdac3bca985b7326b53e5c8bf83b3a Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 7 Jan 2010 17:48:41 +0100
Subject: nv50: handle TGSI_OPCODE_SHL,ISHR,USHR

---
 src/gallium/drivers/nv50/nv50_program.c | 42 +++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 6417367e0b..faf8448cba 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1042,6 +1042,34 @@ emit_bitop2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	emit(pc, e);
 }
 
+static void
+emit_shift(struct nv50_pc *pc, struct nv50_reg *dst,
+	   struct nv50_reg *src0, struct nv50_reg *src1, unsigned dir)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	e->inst[0] = 0x30000000;
+	e->inst[1] = 0xc4000000;
+
+	set_long(pc, e);
+	set_dst(pc, dst, e);
+	set_src_0(pc, src0, e);
+
+	if (src1->type == P_IMMD) {
+		e->inst[1] |= (1 << 20);
+		e->inst[0] |= (pc->immd_buf[src1->hw] & 0x7f) << 16;
+	} else
+		set_src_1(pc, src1, e);
+
+	if (dir != TGSI_OPCODE_SHL)
+		e->inst[1] |= (1 << 29);
+
+	if (dir == TGSI_OPCODE_ISHR)
+		e->inst[1] |= (1 << 27);
+
+	emit(pc, e);
+}
+
 static void
 emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0,
 	 struct nv50_reg *src1, struct nv50_reg *src2)
@@ -1841,6 +1869,10 @@ get_supported_mods(const struct tgsi_full_instruction *insn, int i)
 	case TGSI_OPCODE_I2F:
 	case TGSI_OPCODE_U2F:
 		return NV50_MOD_NEG | NV50_MOD_ABS | NV50_MOD_I32;
+	case TGSI_OPCODE_SHL:
+	case TGSI_OPCODE_ISHR:
+	case TGSI_OPCODE_USHR:
+		return NV50_MOD_I32;
 	default:
 		return 0;
 	}
@@ -2594,6 +2626,16 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		if (mask & (1 << 3))
 			emit_mov_immdval(pc, dst[3], 1.0);
 		break;
+	case TGSI_OPCODE_SHL:
+	case TGSI_OPCODE_ISHR:
+	case TGSI_OPCODE_USHR:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_shift(pc, dst[c], src[0][c], src[1][c],
+				   inst->Instruction.Opcode);
+		}
+		break;
 	case TGSI_OPCODE_SIN:
 		if (mask & 8) {
 			emit_precossin(pc, temp, src[0][3]);
-- 
cgit v1.2.3


From 607b9c2e09def36aca0b77e26826cd22e2573e66 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 7 Jan 2010 19:48:50 +0100
Subject: nv50: handle integer SET operations

---
 src/gallium/drivers/nv50/nv50_program.c | 55 ++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 21 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index faf8448cba..d61229b65e 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1233,10 +1233,12 @@ emit_cvt(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
  *  0x6 = GE
  *  0x7 = set condition code ? (used before bra.lt/le/gt/ge)
  *  0x8 = unordered bit (allows NaN)
+ *
+ *  mode = 0x04 (u32), 0x0c (s32), 0x80 (f32)
  */
 static void
 emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
-	 struct nv50_reg *src0, struct nv50_reg *src1)
+	 struct nv50_reg *src0, struct nv50_reg *src1, uint8_t mode)
 {
 	static const unsigned cc_swapped[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
 
@@ -1251,16 +1253,10 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
 	if (dst && dst->type != P_TEMP)
 		dst = alloc_temp(pc, NULL);
 
-	/* set.u32 */
 	set_long(pc, e);
-	e->inst[0] |= 0xb0000000;
+	e->inst[0] |= 0x30000000 | (mode << 24);
 	e->inst[1] |= 0x60000000 | (ccode << 14);
 
-	/* XXX: decuda will disasm as .u16 and use .lo/.hi regs, but
-	 * that doesn't seem to match what the hw actually does
-	e->inst[1] |= 0x04000000; << breaks things, u32 by default ?
-	 */
-
 	if (wp >= 0)
 		set_pred_wr(pc, 1, wp, e);
 	if (dst)
@@ -1275,26 +1271,32 @@ emit_set(struct nv50_pc *pc, unsigned ccode, struct nv50_reg *dst, int wp,
 
 	emit(pc, e);
 
-	/* cvt.f32.u32/s32 (?) if we didn't only write the predicate */
-	if (rdst)
+	if (rdst && mode == 0x80) /* convert to float ? */
 		emit_cvt(pc, rdst, dst, -1, CVT_ABS | CVT_F32_S32);
 	if (rdst && rdst != dst)
 		free_temp(pc, dst);
 }
 
-static INLINE unsigned
-map_tgsi_setop_cc(unsigned op)
+static INLINE void
+map_tgsi_setop_hw(unsigned op, uint8_t *cc, uint8_t *ty)
 {
 	switch (op) {
-	case TGSI_OPCODE_SLT: return 0x1;
-	case TGSI_OPCODE_SGE: return 0x6;
-	case TGSI_OPCODE_SEQ: return 0x2;
-	case TGSI_OPCODE_SGT: return 0x4;
-	case TGSI_OPCODE_SLE: return 0x3;
-	case TGSI_OPCODE_SNE: return 0xd;
+	case TGSI_OPCODE_SLT: *cc = 0x1; *ty = 0x80; break;
+	case TGSI_OPCODE_SGE: *cc = 0x6; *ty = 0x80; break;
+	case TGSI_OPCODE_SEQ: *cc = 0x2; *ty = 0x80; break;
+	case TGSI_OPCODE_SGT: *cc = 0x4; *ty = 0x80; break;
+	case TGSI_OPCODE_SLE: *cc = 0x3; *ty = 0x80; break;
+	case TGSI_OPCODE_SNE: *cc = 0xd; *ty = 0x80; break;
+
+	case TGSI_OPCODE_ISLT: *cc = 0x1; *ty = 0x0c; break;
+	case TGSI_OPCODE_ISGE: *cc = 0x6; *ty = 0x0c; break;
+	case TGSI_OPCODE_USEQ: *cc = 0x2; *ty = 0x04; break;
+	case TGSI_OPCODE_USGE: *cc = 0x6; *ty = 0x04; break;
+	case TGSI_OPCODE_USLT: *cc = 0x1; *ty = 0x04; break;
+	case TGSI_OPCODE_USNE: *cc = 0x5; *ty = 0x04; break;
 	default:
 		assert(0);
-		return 0;
+		return;
 	}
 }
 
@@ -2654,12 +2656,23 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 	case TGSI_OPCODE_SGT:
 	case TGSI_OPCODE_SLE:
 	case TGSI_OPCODE_SNE:
-		i = map_tgsi_setop_cc(inst->Instruction.Opcode);
+	case TGSI_OPCODE_ISLT:
+	case TGSI_OPCODE_ISGE:
+	case TGSI_OPCODE_USEQ:
+	case TGSI_OPCODE_USGE:
+	case TGSI_OPCODE_USLT:
+	case TGSI_OPCODE_USNE:
+	{
+		uint8_t cc, ty;
+
+		map_tgsi_setop_hw(inst->Instruction.Opcode, &cc, &ty);
+
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
-			emit_set(pc, i, dst[c], -1, src[0][c], src[1][c]);
+			emit_set(pc, cc, dst[c], -1, src[0][c], src[1][c], ty);
 		}
+	}
 		break;
 	case TGSI_OPCODE_SUB:
 		for (c = 0; c < 4; c++) {
-- 
cgit v1.2.3


From d550de2342bd2672a74707554171f9f9f1947baa Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 7 Jan 2010 17:59:58 +0100
Subject: nv50: handle TGSI_OPCODE_IMAX,IMIN,UMAX,UMIN

---
 src/gallium/drivers/nv50/nv50_program.c | 59 +++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 10 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index d61229b65e..c71592347d 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -971,6 +971,13 @@ emit_arl(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src,
 	emit(pc, e);
 }
 
+#define NV50_MAX_F32 0x880
+#define NV50_MAX_S32 0x08c
+#define NV50_MAX_U32 0x084
+#define NV50_MIN_F32 0x8a0
+#define NV50_MIN_S32 0x0ac
+#define NV50_MIN_U32 0x0a4
+
 static void
 emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
 	    struct nv50_reg *src0, struct nv50_reg *src1)
@@ -978,8 +985,8 @@ emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst,
 	struct nv50_program_exec *e = exec(pc);
 
 	set_long(pc, e);
-	e->inst[0] |= 0xb0000000;
-	e->inst[1] |= (sub << 29);
+	e->inst[0] |= 0x30000000 | ((sub & 0x800) << 20);
+	e->inst[1] |= (sub << 24);
 
 	check_swap_src_0_1(pc, &src0, &src1);
 	set_dst(pc, dst, e);
@@ -1341,18 +1348,18 @@ emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask,
 
 	if (mask & (3 << 1)) {
 		tmp[0] = alloc_temp(pc, NULL);
-		emit_minmax(pc, 4, tmp[0], src[0], zero);
+		emit_minmax(pc, NV50_MAX_F32, tmp[0], src[0], zero);
 	}
 
 	if (mask & (1 << 2)) {
 		set_pred_wr(pc, 1, 0, pc->p->exec_tail);
 
 		tmp[1] = temp_temp(pc);
-		emit_minmax(pc, 4, tmp[1], src[1], zero);
+		emit_minmax(pc, NV50_MAX_F32, tmp[1], src[1], zero);
 
 		tmp[3] = temp_temp(pc);
-		emit_minmax(pc, 4, tmp[3], src[3], neg128);
-		emit_minmax(pc, 5, tmp[3], tmp[3], pos128);
+		emit_minmax(pc, NV50_MAX_F32, tmp[3], src[3], neg128);
+		emit_minmax(pc, NV50_MIN_F32, tmp[3], tmp[3], pos128);
 
 		emit_pow(pc, dst[2], tmp[1], tmp[3]);
 		emit_mov(pc, dst[2], zero);
@@ -1496,8 +1503,8 @@ load_cube_tex_coords(struct nv50_pc *pc, struct nv50_reg *t[4],
 	src[1]->mod |= NV50_MOD_ABS;
 	src[2]->mod |= NV50_MOD_ABS;
 
-	emit_minmax(pc, 4, t[2], src[0], src[1]);
-	emit_minmax(pc, 4, t[2], src[2], t[2]);
+	emit_minmax(pc, NV50_MAX_F32, t[2], src[0], src[1]);
+	emit_minmax(pc, NV50_MAX_F32, t[2], src[2], t[2]);
 
 	src[0]->mod = mod[0];
 	src[1]->mod = mod[1];
@@ -1872,7 +1879,11 @@ get_supported_mods(const struct tgsi_full_instruction *insn, int i)
 	case TGSI_OPCODE_U2F:
 		return NV50_MOD_NEG | NV50_MOD_ABS | NV50_MOD_I32;
 	case TGSI_OPCODE_SHL:
+	case TGSI_OPCODE_IMAX:
+	case TGSI_OPCODE_IMIN:
 	case TGSI_OPCODE_ISHR:
+	case TGSI_OPCODE_UMAX:
+	case TGSI_OPCODE_UMIN:
 	case TGSI_OPCODE_USHR:
 		return NV50_MOD_I32;
 	default:
@@ -2504,6 +2515,20 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		pc->if_insn[pc->if_lvl++] = emit_branch(pc, 0, 2);;
 		terminate_mbb(pc);
 		break;
+	case TGSI_OPCODE_IMAX:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_minmax(pc, 0x08c, dst[c], src[0][c], src[1][c]);
+		}
+		break;
+	case TGSI_OPCODE_IMIN:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_minmax(pc, 0x0ac, dst[c], src[0][c], src[1][c]);
+		}
+		break;
 	case TGSI_OPCODE_INEG:
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
@@ -2576,14 +2601,14 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
-			emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]);
+			emit_minmax(pc, 0x880, dst[c], src[0][c], src[1][c]);
 		}
 		break;
 	case TGSI_OPCODE_MIN:
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
 				continue;
-			emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]);
+			emit_minmax(pc, 0x8a0, dst[c], src[0][c], src[1][c]);
 		}
 		break;
 	case TGSI_OPCODE_MOV:
@@ -2712,6 +2737,20 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_U32);
 		}
 		break;
+	case TGSI_OPCODE_UMAX:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_minmax(pc, 0x084, dst[c], src[0][c], src[1][c]);
+		}
+		break;
+	case TGSI_OPCODE_UMIN:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_minmax(pc, 0x0a4, dst[c], src[0][c], src[1][c]);
+		}
+		break;
 	case TGSI_OPCODE_XPD:
 		temp = temp_temp(pc);
 		if (mask & (1 << 0)) {
-- 
cgit v1.2.3


From a009fa430597d6c31b623a3989e95e0812c495e9 Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 7 Jan 2010 18:23:04 +0100
Subject: nv50: handle TGSI_OPCODE_SAD,UADD

---
 src/gallium/drivers/nv50/nv50_program.c | 96 +++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index c71592347d..b1bac447f2 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -1307,6 +1307,74 @@ map_tgsi_setop_hw(unsigned op, uint8_t *cc, uint8_t *ty)
 	}
 }
 
+static void
+emit_add_b32(struct nv50_pc *pc, struct nv50_reg *dst,
+	     struct nv50_reg *src0, struct nv50_reg *rsrc1)
+{
+	struct nv50_program_exec *e = exec(pc);
+	struct nv50_reg *src1;
+
+	e->inst[0] = 0x20000000;
+
+	alloc_reg(pc, rsrc1);
+	check_swap_src_0_1(pc, &src0, &rsrc1);
+
+	src1 = rsrc1;
+	if (src0->mod & rsrc1->mod & NV50_MOD_NEG) {
+		src1 = alloc_temp(pc, NULL);
+		emit_cvt(pc, src1, rsrc1, -1, CVT_S32_S32);
+	}
+
+	if (!pc->allow32 || src1->hw > 63 ||
+	    (src1->type != P_TEMP && src1->type != P_IMMD))
+		set_long(pc, e);
+
+	set_dst(pc, dst, e);
+	set_src_0(pc, src0, e);
+
+	if (is_long(e)) {
+		e->inst[1] |= 1 << 26;
+		set_src_2(pc, src1, e);
+	} else {
+		e->inst[0] |= 0x8000;
+		if (src1->type == P_IMMD)
+			set_immd(pc, src1, e);
+		else
+			set_src_1(pc, src1, e);
+	}
+
+	if (src0->mod & NV50_MOD_NEG)
+		e->inst[0] |= 1 << 28;
+	else
+	if (src1->mod & NV50_MOD_NEG)
+		e->inst[0] |= 1 << 22;
+
+	emit(pc, e);
+
+	if (src1 != rsrc1)
+		free_temp(pc, src1);
+}
+
+static void
+emit_sad(struct nv50_pc *pc, struct nv50_reg *dst,
+	 struct nv50_reg *src0, struct nv50_reg *src1, struct nv50_reg *src2)
+{
+	struct nv50_program_exec *e = exec(pc);
+
+	e->inst[0] = 0x50000000;
+	set_dst(pc, dst, e);
+	set_src_0(pc, src0, e);
+	set_src_1(pc, src1, e);
+	alloc_reg(pc, src2);
+	if (is_long(e) || (src2->type != dst->type) || (src2->hw != dst->hw))
+		set_src_2(pc, src2, e);
+
+	if (is_long(e))
+		e->inst[1] |= 0x0c << 24;
+	else
+		e->inst[0] |= 0x81 << 8;
+}
+
 static INLINE void
 emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src)
 {
@@ -1807,6 +1875,17 @@ convert_to_long(struct nv50_pc *pc, struct nv50_program_exec *e)
 		q = 0x0403c000;
 		m = 0xffff7fff;
 		break;
+	case 0x2:
+	case 0x3:
+		/* ADD, SUB, SUBR b32 */
+		m = ~(0x8000 | (127 << 16));
+		q = ((e->inst[0] & (~m)) >> 2) | (1 << 26);
+		break;
+	case 0x5:
+		/* SAD */
+		m = ~(0x81 << 8);
+		q = 0x0c << 24;
+		break;
 	case 0x8:
 		/* INTERP (move centroid, perspective and flat bits) */
 		m = ~0x03000100;
@@ -1878,6 +1957,9 @@ get_supported_mods(const struct tgsi_full_instruction *insn, int i)
 	case TGSI_OPCODE_I2F:
 	case TGSI_OPCODE_U2F:
 		return NV50_MOD_NEG | NV50_MOD_ABS | NV50_MOD_I32;
+	case TGSI_OPCODE_UADD:
+		return NV50_MOD_NEG | NV50_MOD_I32;
+	case TGSI_OPCODE_SAD:
 	case TGSI_OPCODE_SHL:
 	case TGSI_OPCODE_IMAX:
 	case TGSI_OPCODE_IMIN:
@@ -2640,6 +2722,13 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 		src[0][0]->mod |= NV50_MOD_ABS;
 		emit_flop(pc, NV50_FLOP_RSQ, brdc, src[0][0]);
 		break;
+	case TGSI_OPCODE_SAD:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_sad(pc, dst[c], src[0][c], src[1][c], src[2][c]);
+		}
+		break;
 	case TGSI_OPCODE_SCS:
 		temp = temp_temp(pc);
 		if (mask & 3)
@@ -2737,6 +2826,13 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 			emit_cvt(pc, dst[c], src[0][c], -1, CVT_F32_U32);
 		}
 		break;
+	case TGSI_OPCODE_UADD:
+		for (c = 0; c < 4; c++) {
+			if (!(mask & (1 << c)))
+				continue;
+			emit_add_b32(pc, dst[c], src[0][c], src[1][c]);
+		}
+		break;
 	case TGSI_OPCODE_UMAX:
 		for (c = 0; c < 4; c++) {
 			if (!(mask & (1 << c)))
-- 
cgit v1.2.3


From 7fc5fcada5600b401d23a29a4a3d1a09e3492d1c Mon Sep 17 00:00:00 2001
From: Christoph Bumiller <e0425955@student.tuwien.ac.at>
Date: Thu, 7 Jan 2010 21:17:13 +0100
Subject: nv50: preallocate TEMPs written first time in a subroutine

Otherwise we risk overwriting them with temporary GPRs if
they're not used immediately after the CALL.
---
 src/gallium/drivers/nv50/nv50_program.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index b1bac447f2..53f9f0adf3 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2911,7 +2911,7 @@ nv50_program_tx_insn(struct nv50_pc *pc,
 static void
 prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
 {
-	struct nv50_reg *reg = NULL;
+	struct nv50_reg *r, *reg = NULL;
 	const struct tgsi_full_src_register *src;
 	const struct tgsi_dst_register *dst;
 	unsigned i, c, k, mask;
@@ -2957,7 +2957,15 @@ prep_inspect_insn(struct nv50_pc *pc, const struct tgsi_full_instruction *insn)
 				continue;
 			k = tgsi_util_get_full_src_register_swizzle(src, c);
 
-			reg[src->Register.Index * 4 + k].acc = pc->insn_nr;
+			r = &reg[src->Register.Index * 4 + k];
+
+			/* If used before written, pre-allocate the reg,
+			 * lest we overwrite results from a subroutine.
+			 */
+			if (!r->acc && r->type == P_TEMP)
+				alloc_reg(pc, r);
+
+			r->acc = pc->insn_nr;
 		}
 	}
 }
-- 
cgit v1.2.3


From ba33ef00118d1c6017585af1498b89e99fe045be Mon Sep 17 00:00:00 2001
From: José Fonseca <jfonseca@vmware.com>
Date: Fri, 8 Jan 2010 01:15:17 +0000
Subject: lvmpipe: Initialize all coordinates.

Fixes assertion failure with fp-incomplete-tex (fdo 24298).
---
 src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
index 61b033c9fc..fb1eda4423 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_tgsi_soa.c
@@ -361,6 +361,9 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
       if (projected)
          coords[i] = lp_build_mul(&bld->base, coords[i], oow);
    }
+   for (i = num_coords; i < 3; i++) {
+      coords[i] = bld->base.undef;
+   }
 
    bld->sampler->emit_fetch_texel(bld->sampler,
                                   bld->base.builder,
-- 
cgit v1.2.3


From 95f603a5f3d897c5a6cf12fb13ea035f2e0867d9 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Fri, 8 Jan 2010 04:42:28 +0100
Subject: nv20: Fix build for the latest nouveau_class.h changes.

---
 src/gallium/drivers/nv20/nv20_context.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'src/gallium/drivers')

diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c
index 1dba724887..5b80af2d22 100644
--- a/src/gallium/drivers/nv20/nv20_context.c
+++ b/src/gallium/drivers/nv20/nv20_context.c
@@ -323,8 +323,8 @@ static void nv20_init_hwctx(struct nv20_context *nv20)
 	OUT_RINGf (chan, -0.090168);		/* NV20TCL.FOG_EQUATION_LINEAR */
 	OUT_RINGf (chan, 0.0);		/* NV20TCL.FOG_EQUATION_QUADRATIC */
 	BEGIN_RING(chan, kelvin, NV20TCL_FOG_MODE, 2);
-	OUT_RING  (chan, NV20TCL_FOG_MODE_EXP_2);
-	OUT_RING  (chan, NV20TCL_FOG_COORD_DIST_COORD_FOG);
+	OUT_RING  (chan, NV20TCL_FOG_MODE_EXP_SIGNED);
+	OUT_RING  (chan, NV20TCL_FOG_COORD_FOG);
 	BEGIN_RING(chan, kelvin, NV20TCL_FOG_ENABLE, 2);
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, 0);			/* NV20TCL.FOG_COLOR */
-- 
cgit v1.2.3